kugelaudio 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -8
- package/dist/index.d.mts +175 -17
- package/dist/index.d.ts +175 -17
- package/dist/index.js +287 -13
- package/dist/index.mjs +294 -13
- package/package.json +5 -1
- package/src/client.ts +354 -17
- package/src/index.ts +6 -2
- package/src/types.ts +83 -12
- package/src/websocket.ts +44 -0
package/README.md
CHANGED
|
@@ -31,7 +31,7 @@ const client = new KugelAudio({ apiKey: 'your_api_key' });
|
|
|
31
31
|
// Generate speech
|
|
32
32
|
const audio = await client.tts.generate({
|
|
33
33
|
text: 'Hello, world!',
|
|
34
|
-
|
|
34
|
+
modelId: 'kugel-1-turbo',
|
|
35
35
|
});
|
|
36
36
|
|
|
37
37
|
// Create a playable blob (browser)
|
|
@@ -144,12 +144,11 @@ Generate complete audio and receive it all at once:
|
|
|
144
144
|
```typescript
|
|
145
145
|
const audio = await client.tts.generate({
|
|
146
146
|
text: 'Hello, this is a test of the KugelAudio text-to-speech system.',
|
|
147
|
-
|
|
147
|
+
modelId: 'kugel-1-turbo', // 'kugel-1-turbo' (fast) or 'kugel-1' (quality)
|
|
148
148
|
voiceId: 123, // Optional: specific voice ID
|
|
149
149
|
cfgScale: 2.0, // Guidance scale (1.0-5.0)
|
|
150
150
|
maxNewTokens: 2048, // Maximum tokens to generate
|
|
151
151
|
sampleRate: 24000, // Output sample rate
|
|
152
|
-
speakerPrefix: true, // Add speaker prefix for better quality
|
|
153
152
|
normalize: true, // Enable text normalization (see below)
|
|
154
153
|
language: 'en', // Language for normalization
|
|
155
154
|
});
|
|
@@ -171,7 +170,7 @@ import { createWavBlob } from 'kugelaudio';
|
|
|
171
170
|
|
|
172
171
|
const audio = await client.tts.generate({
|
|
173
172
|
text: 'Hello, world!',
|
|
174
|
-
|
|
173
|
+
modelId: 'kugel-1-turbo',
|
|
175
174
|
});
|
|
176
175
|
|
|
177
176
|
// Create WAV blob for playback
|
|
@@ -200,7 +199,7 @@ Receive audio chunks as they are generated for lower latency:
|
|
|
200
199
|
await client.tts.stream(
|
|
201
200
|
{
|
|
202
201
|
text: 'Hello, this is streaming audio.',
|
|
203
|
-
|
|
202
|
+
modelId: 'kugel-1-turbo',
|
|
204
203
|
},
|
|
205
204
|
{
|
|
206
205
|
onOpen: () => {
|
|
@@ -354,8 +353,7 @@ interface GenerateOptions {
|
|
|
354
353
|
cfgScale?: number; // Default: 2.0
|
|
355
354
|
maxNewTokens?: number; // Default: 2048
|
|
356
355
|
sampleRate?: number; // Default: 24000
|
|
357
|
-
|
|
358
|
-
normalize?: boolean; // Default: false - Enable text normalization
|
|
356
|
+
normalize?: boolean; // Default: true - Enable text normalization
|
|
359
357
|
language?: string; // ISO 639-1 code for normalization (e.g., 'en', 'de')
|
|
360
358
|
}
|
|
361
359
|
```
|
|
@@ -520,7 +518,7 @@ async function main() {
|
|
|
520
518
|
await client.tts.stream(
|
|
521
519
|
{
|
|
522
520
|
text: 'Welcome to KugelAudio. This is an example of high-quality text-to-speech synthesis.',
|
|
523
|
-
|
|
521
|
+
modelId: 'kugel-1-turbo',
|
|
524
522
|
},
|
|
525
523
|
{
|
|
526
524
|
onChunk: (chunk) => {
|
package/dist/index.d.mts
CHANGED
|
@@ -15,7 +15,7 @@ interface Model {
|
|
|
15
15
|
/**
|
|
16
16
|
* Voice category types.
|
|
17
17
|
*/
|
|
18
|
-
type VoiceCategory = 'premade' | 'cloned' | 'designed';
|
|
18
|
+
type VoiceCategory = 'premade' | 'cloned' | 'designed' | 'conversational' | 'narrative' | 'narrative_story' | 'characters';
|
|
19
19
|
/**
|
|
20
20
|
* Voice sex types.
|
|
21
21
|
*/
|
|
@@ -48,7 +48,7 @@ interface GenerateOptions {
|
|
|
48
48
|
/** Text to synthesize */
|
|
49
49
|
text: string;
|
|
50
50
|
/** Model to use: 'kugel-1-turbo' (1.5B, fast) or 'kugel-1' (7B, premium). Default: 'kugel-1-turbo' */
|
|
51
|
-
|
|
51
|
+
modelId?: string;
|
|
52
52
|
/** Voice ID to use */
|
|
53
53
|
voiceId?: number;
|
|
54
54
|
/** CFG scale for generation (default: 2.0) */
|
|
@@ -57,21 +57,18 @@ interface GenerateOptions {
|
|
|
57
57
|
maxNewTokens?: number;
|
|
58
58
|
/** Output sample rate (default: 24000) */
|
|
59
59
|
sampleRate?: number;
|
|
60
|
-
/** Whether to add speaker prefix (default: true) */
|
|
61
|
-
speakerPrefix?: boolean;
|
|
62
60
|
/**
|
|
63
61
|
* Enable text normalization (converts numbers, dates, etc. to spoken words).
|
|
64
62
|
* When true, text will be normalized before TTS generation.
|
|
65
|
-
* Default:
|
|
63
|
+
* Default: true
|
|
66
64
|
*
|
|
67
|
-
* ⚠️
|
|
68
|
-
*
|
|
69
|
-
* the language parameter when using normalization.
|
|
65
|
+
* ⚠️ For best performance, always specify the language parameter when using
|
|
66
|
+
* normalization. Without it, language auto-detection adds ~150ms latency.
|
|
70
67
|
*/
|
|
71
68
|
normalize?: boolean;
|
|
72
69
|
/**
|
|
73
70
|
* ISO 639-1 language code for text normalization (e.g., 'de', 'en', 'fr').
|
|
74
|
-
* If not provided and normalize is true, language will be auto-detected
|
|
71
|
+
* If not provided and normalize is true (default), language will be auto-detected
|
|
75
72
|
* (adds ~150ms latency).
|
|
76
73
|
*
|
|
77
74
|
* Supported: de, en, fr, es, it, pt, nl, pl, sv, da, no, fi, cs, hu, ro,
|
|
@@ -91,12 +88,20 @@ interface StreamConfig {
|
|
|
91
88
|
maxNewTokens?: number;
|
|
92
89
|
/** Output sample rate */
|
|
93
90
|
sampleRate?: number;
|
|
94
|
-
/** Whether to add speaker prefix */
|
|
95
|
-
speakerPrefix?: boolean;
|
|
96
91
|
/** Auto-flush timeout in milliseconds */
|
|
97
92
|
flushTimeoutMs?: number;
|
|
98
93
|
/** Maximum buffer length */
|
|
99
94
|
maxBufferLength?: number;
|
|
95
|
+
/**
|
|
96
|
+
* Enable text normalization (converts numbers, dates, etc. to spoken words).
|
|
97
|
+
* Default: true
|
|
98
|
+
*/
|
|
99
|
+
normalize?: boolean;
|
|
100
|
+
/**
|
|
101
|
+
* ISO 639-1 language code for text normalization (e.g., 'de', 'en', 'fr').
|
|
102
|
+
* Specify to avoid ~150ms auto-detection latency.
|
|
103
|
+
*/
|
|
104
|
+
language?: string;
|
|
100
105
|
}
|
|
101
106
|
/**
|
|
102
107
|
* Audio chunk from streaming TTS.
|
|
@@ -176,17 +181,75 @@ interface KugelAudioOptions {
|
|
|
176
181
|
isMasterKey?: boolean;
|
|
177
182
|
/** Whether apiKey is a JWT token (for user authentication). Takes precedence over isMasterKey. */
|
|
178
183
|
isToken?: boolean;
|
|
184
|
+
/** Organisation ID to bill usage against (required for token auth to enable usage recording). */
|
|
185
|
+
orgId?: number;
|
|
179
186
|
/** API base URL (default: https://api.kugelaudio.com) */
|
|
180
187
|
apiUrl?: string;
|
|
181
|
-
/** TTS server URL (default:
|
|
188
|
+
/** TTS server URL (default: same as apiUrl) */
|
|
182
189
|
ttsUrl?: string;
|
|
183
190
|
/** Request timeout in milliseconds (default: 60000) */
|
|
184
191
|
timeout?: number;
|
|
185
192
|
}
|
|
186
|
-
|
|
187
193
|
/**
|
|
188
|
-
*
|
|
194
|
+
* Multi-context session configuration.
|
|
195
|
+
*/
|
|
196
|
+
interface MultiContextConfig {
|
|
197
|
+
/** Default voice ID for new contexts */
|
|
198
|
+
defaultVoiceId?: number;
|
|
199
|
+
/** Output sample rate (default: 24000) */
|
|
200
|
+
sampleRate?: number;
|
|
201
|
+
/** CFG scale for generation (default: 2.0) */
|
|
202
|
+
cfgScale?: number;
|
|
203
|
+
/** Maximum tokens to generate (default: 2048) */
|
|
204
|
+
maxNewTokens?: number;
|
|
205
|
+
/** Enable text normalization (default: true) */
|
|
206
|
+
normalize?: boolean;
|
|
207
|
+
/** Seconds before context auto-closes (default: 20.0) */
|
|
208
|
+
inactivityTimeout?: number;
|
|
209
|
+
}
|
|
210
|
+
/**
|
|
211
|
+
* Voice settings for a specific context.
|
|
212
|
+
*/
|
|
213
|
+
interface ContextVoiceSettings {
|
|
214
|
+
/** Stability (0.0-1.0) */
|
|
215
|
+
stability?: number;
|
|
216
|
+
/** Similarity boost (0.0-1.0) */
|
|
217
|
+
similarityBoost?: number;
|
|
218
|
+
/** Style (0.0-1.0) */
|
|
219
|
+
style?: number;
|
|
220
|
+
/** Use speaker boost */
|
|
221
|
+
useSpeakerBoost?: boolean;
|
|
222
|
+
/** Speed multiplier */
|
|
223
|
+
speed?: number;
|
|
224
|
+
}
|
|
225
|
+
/**
|
|
226
|
+
* Audio chunk from multi-context streaming.
|
|
227
|
+
*/
|
|
228
|
+
interface MultiContextAudioChunk extends AudioChunk {
|
|
229
|
+
/** Context ID this audio belongs to */
|
|
230
|
+
contextId: string;
|
|
231
|
+
}
|
|
232
|
+
/**
|
|
233
|
+
* Event callbacks for multi-context streaming.
|
|
189
234
|
*/
|
|
235
|
+
interface MultiContextCallbacks {
|
|
236
|
+
/** Called when session is started */
|
|
237
|
+
onSessionStarted?: (sessionId: string) => void;
|
|
238
|
+
/** Called when a context is created */
|
|
239
|
+
onContextCreated?: (contextId: string) => void;
|
|
240
|
+
/** Called when an audio chunk is received */
|
|
241
|
+
onChunk?: (chunk: MultiContextAudioChunk) => void;
|
|
242
|
+
/** Called when a context finishes generating */
|
|
243
|
+
onContextFinal?: (contextId: string) => void;
|
|
244
|
+
/** Called when a context is closed */
|
|
245
|
+
onContextClosed?: (contextId: string) => void;
|
|
246
|
+
/** Called when a context times out */
|
|
247
|
+
onContextTimeout?: (contextId: string) => void;
|
|
248
|
+
/** Called when session is closed */
|
|
249
|
+
onSessionClosed?: (stats: Record<string, unknown>) => void;
|
|
250
|
+
/** Called on error */
|
|
251
|
+
onError?: (error: Error, contextId?: string) => void;
|
|
252
|
+
}
|
|
190
253
|
|
|
191
254
|
/**
|
|
192
255
|
* Models resource for listing TTS models.
|
|
@@ -290,6 +353,98 @@ declare class TTSResource {
|
|
|
290
353
|
*/
|
|
291
354
|
close(): void;
|
|
292
355
|
private parseError;
|
|
356
|
+
/**
|
|
357
|
+
* Create a multi-context session for concurrent TTS streams.
|
|
358
|
+
*
|
|
359
|
+
* Allows managing up to 5 independent audio generation contexts
|
|
360
|
+
* over a single WebSocket connection. Each context has its own
|
|
361
|
+
* text buffer, voice settings, and generation queue.
|
|
362
|
+
*
|
|
363
|
+
* @example
|
|
364
|
+
* ```typescript
|
|
365
|
+
* const session = client.tts.createMultiContextSession({
|
|
366
|
+
* defaultVoiceId: 123,
|
|
367
|
+
* });
|
|
368
|
+
*
|
|
369
|
+
* session.connect({
|
|
370
|
+
* onChunk: (chunk) => {
|
|
371
|
+
* console.log(`Audio from ${chunk.contextId}`);
|
|
372
|
+
* playAudio(chunk.audio);
|
|
373
|
+
* },
|
|
374
|
+
* onContextFinal: (contextId) => {
|
|
375
|
+
* console.log(`${contextId} finished`);
|
|
376
|
+
* },
|
|
377
|
+
* });
|
|
378
|
+
*
|
|
379
|
+
* // Create contexts with different voices
|
|
380
|
+
* session.createContext('narrator', { voiceId: 123 });
|
|
381
|
+
* session.createContext('character', { voiceId: 456 });
|
|
382
|
+
*
|
|
383
|
+
* // Send text to different speakers
|
|
384
|
+
* session.send('narrator', 'The story begins.', true);
|
|
385
|
+
* session.send('character', 'Hello!', true);
|
|
386
|
+
*
|
|
387
|
+
* // Close when done
|
|
388
|
+
* session.close();
|
|
389
|
+
* ```
|
|
390
|
+
*/
|
|
391
|
+
createMultiContextSession(config?: MultiContextConfig): MultiContextSession;
|
|
392
|
+
}
|
|
393
|
+
/**
|
|
394
|
+
* Multi-context WebSocket session for concurrent TTS streams.
|
|
395
|
+
*/
|
|
396
|
+
declare class MultiContextSession {
|
|
397
|
+
private client;
|
|
398
|
+
private ws;
|
|
399
|
+
private config;
|
|
400
|
+
private callbacks;
|
|
401
|
+
private contexts;
|
|
402
|
+
private _sessionId;
|
|
403
|
+
private isStarted;
|
|
404
|
+
constructor(client: KugelAudio, config?: MultiContextConfig);
|
|
405
|
+
/**
|
|
406
|
+
* Get the current session ID, or null if not connected.
|
|
407
|
+
*/
|
|
408
|
+
get sessionId(): string | null;
|
|
409
|
+
/**
|
|
410
|
+
* Connect to the multi-context WebSocket endpoint.
|
|
411
|
+
*/
|
|
412
|
+
connect(callbacks: MultiContextCallbacks): void;
|
|
413
|
+
/**
|
|
414
|
+
* Create a new context with optional voice settings.
|
|
415
|
+
*/
|
|
416
|
+
createContext(contextId: string, options?: {
|
|
417
|
+
voiceId?: number;
|
|
418
|
+
voiceSettings?: ContextVoiceSettings;
|
|
419
|
+
}): void;
|
|
420
|
+
/**
|
|
421
|
+
* Send text to a specific context.
|
|
422
|
+
*/
|
|
423
|
+
send(contextId: string, text: string, flush?: boolean): void;
|
|
424
|
+
/**
|
|
425
|
+
* Flush a context's buffer.
|
|
426
|
+
*/
|
|
427
|
+
flush(contextId: string): void;
|
|
428
|
+
/**
|
|
429
|
+
* Close a specific context.
|
|
430
|
+
*/
|
|
431
|
+
closeContext(contextId: string): void;
|
|
432
|
+
/**
|
|
433
|
+
* Send keep-alive to reset a context's inactivity timeout.
|
|
434
|
+
*/
|
|
435
|
+
keepAlive(contextId: string): void;
|
|
436
|
+
/**
|
|
437
|
+
* Close the session and all contexts.
|
|
438
|
+
*/
|
|
439
|
+
close(): void;
|
|
440
|
+
/**
|
|
441
|
+
* Get active context IDs.
|
|
442
|
+
*/
|
|
443
|
+
get activeContexts(): string[];
|
|
444
|
+
/**
|
|
445
|
+
* Check if connected.
|
|
446
|
+
*/
|
|
447
|
+
get isConnected(): boolean;
|
|
293
448
|
}
|
|
294
449
|
/**
|
|
295
450
|
* KugelAudio API client.
|
|
@@ -307,13 +462,13 @@ declare class TTSResource {
|
|
|
307
462
|
* // Generate audio with fast model (1.5B params)
|
|
308
463
|
* const audio = await client.tts.generate({
|
|
309
464
|
* text: 'Hello, world!',
|
|
310
|
-
*
|
|
465
|
+
* modelId: 'kugel-1-turbo',
|
|
311
466
|
* });
|
|
312
467
|
*
|
|
313
468
|
* // Generate audio with premium model (7B params)
|
|
314
469
|
* const audio = await client.tts.generate({
|
|
315
470
|
* text: 'Hello, world!',
|
|
316
|
-
*
|
|
471
|
+
* modelId: 'kugel-1',
|
|
317
472
|
* });
|
|
318
473
|
* ```
|
|
319
474
|
*/
|
|
@@ -321,6 +476,7 @@ declare class KugelAudio {
|
|
|
321
476
|
private _apiKey;
|
|
322
477
|
private _isMasterKey;
|
|
323
478
|
private _isToken;
|
|
479
|
+
private _orgId;
|
|
324
480
|
private _apiUrl;
|
|
325
481
|
private _ttsUrl;
|
|
326
482
|
private _timeout;
|
|
@@ -354,6 +510,8 @@ declare class KugelAudio {
|
|
|
354
510
|
get isMasterKey(): boolean;
|
|
355
511
|
/** Check if using JWT token authentication */
|
|
356
512
|
get isToken(): boolean;
|
|
513
|
+
/** Get organisation ID for billing */
|
|
514
|
+
get orgId(): number | undefined;
|
|
357
515
|
/** Get TTS URL */
|
|
358
516
|
get ttsUrl(): string;
|
|
359
517
|
/**
|
|
@@ -451,4 +609,4 @@ declare function createWavFile(audio: ArrayBuffer, sampleRate: number): ArrayBuf
|
|
|
451
609
|
*/
|
|
452
610
|
declare function createWavBlob(audio: ArrayBuffer, sampleRate: number): Blob;
|
|
453
611
|
|
|
454
|
-
export { type AudioChunk, type AudioResponse, AuthenticationError, ConnectionError, type GenerateOptions, type GenerationStats, InsufficientCreditsError, KugelAudio, KugelAudioError, type KugelAudioOptions, type Model, RateLimitError, type StreamCallbacks, type StreamConfig, ValidationError, type Voice, type VoiceAge, type VoiceCategory, type VoiceSex, base64ToArrayBuffer, createWavBlob, createWavFile, decodePCM16 };
|
|
612
|
+
export { type AudioChunk, type AudioResponse, AuthenticationError, ConnectionError, type ContextVoiceSettings, type GenerateOptions, type GenerationStats, InsufficientCreditsError, KugelAudio, KugelAudioError, type KugelAudioOptions, type Model, type MultiContextAudioChunk, type MultiContextCallbacks, type MultiContextConfig, RateLimitError, type StreamCallbacks, type StreamConfig, ValidationError, type Voice, type VoiceAge, type VoiceCategory, type VoiceSex, base64ToArrayBuffer, createWavBlob, createWavFile, decodePCM16 };
|
package/dist/index.d.ts
CHANGED
|
@@ -15,7 +15,7 @@ interface Model {
|
|
|
15
15
|
/**
|
|
16
16
|
* Voice category types.
|
|
17
17
|
*/
|
|
18
|
-
type VoiceCategory = 'premade' | 'cloned' | 'designed';
|
|
18
|
+
type VoiceCategory = 'premade' | 'cloned' | 'designed' | 'conversational' | 'narrative' | 'narrative_story' | 'characters';
|
|
19
19
|
/**
|
|
20
20
|
* Voice sex types.
|
|
21
21
|
*/
|
|
@@ -48,7 +48,7 @@ interface GenerateOptions {
|
|
|
48
48
|
/** Text to synthesize */
|
|
49
49
|
text: string;
|
|
50
50
|
/** Model to use: 'kugel-1-turbo' (1.5B, fast) or 'kugel-1' (7B, premium). Default: 'kugel-1-turbo' */
|
|
51
|
-
|
|
51
|
+
modelId?: string;
|
|
52
52
|
/** Voice ID to use */
|
|
53
53
|
voiceId?: number;
|
|
54
54
|
/** CFG scale for generation (default: 2.0) */
|
|
@@ -57,21 +57,18 @@ interface GenerateOptions {
|
|
|
57
57
|
maxNewTokens?: number;
|
|
58
58
|
/** Output sample rate (default: 24000) */
|
|
59
59
|
sampleRate?: number;
|
|
60
|
-
/** Whether to add speaker prefix (default: true) */
|
|
61
|
-
speakerPrefix?: boolean;
|
|
62
60
|
/**
|
|
63
61
|
* Enable text normalization (converts numbers, dates, etc. to spoken words).
|
|
64
62
|
* When true, text will be normalized before TTS generation.
|
|
65
|
-
* Default:
|
|
63
|
+
* Default: true
|
|
66
64
|
*
|
|
67
|
-
* ⚠️
|
|
68
|
-
*
|
|
69
|
-
* the language parameter when using normalization.
|
|
65
|
+
* ⚠️ For best performance, always specify the language parameter when using
|
|
66
|
+
* normalization. Without it, language auto-detection adds ~150ms latency.
|
|
70
67
|
*/
|
|
71
68
|
normalize?: boolean;
|
|
72
69
|
/**
|
|
73
70
|
* ISO 639-1 language code for text normalization (e.g., 'de', 'en', 'fr').
|
|
74
|
-
* If not provided and normalize is true, language will be auto-detected
|
|
71
|
+
* If not provided and normalize is true (default), language will be auto-detected
|
|
75
72
|
* (adds ~150ms latency).
|
|
76
73
|
*
|
|
77
74
|
* Supported: de, en, fr, es, it, pt, nl, pl, sv, da, no, fi, cs, hu, ro,
|
|
@@ -91,12 +88,20 @@ interface StreamConfig {
|
|
|
91
88
|
maxNewTokens?: number;
|
|
92
89
|
/** Output sample rate */
|
|
93
90
|
sampleRate?: number;
|
|
94
|
-
/** Whether to add speaker prefix */
|
|
95
|
-
speakerPrefix?: boolean;
|
|
96
91
|
/** Auto-flush timeout in milliseconds */
|
|
97
92
|
flushTimeoutMs?: number;
|
|
98
93
|
/** Maximum buffer length */
|
|
99
94
|
maxBufferLength?: number;
|
|
95
|
+
/**
|
|
96
|
+
* Enable text normalization (converts numbers, dates, etc. to spoken words).
|
|
97
|
+
* Default: true
|
|
98
|
+
*/
|
|
99
|
+
normalize?: boolean;
|
|
100
|
+
/**
|
|
101
|
+
* ISO 639-1 language code for text normalization (e.g., 'de', 'en', 'fr').
|
|
102
|
+
* Specify to avoid ~150ms auto-detection latency.
|
|
103
|
+
*/
|
|
104
|
+
language?: string;
|
|
100
105
|
}
|
|
101
106
|
/**
|
|
102
107
|
* Audio chunk from streaming TTS.
|
|
@@ -176,17 +181,75 @@ interface KugelAudioOptions {
|
|
|
176
181
|
isMasterKey?: boolean;
|
|
177
182
|
/** Whether apiKey is a JWT token (for user authentication). Takes precedence over isMasterKey. */
|
|
178
183
|
isToken?: boolean;
|
|
184
|
+
/** Organisation ID to bill usage against (required for token auth to enable usage recording). */
|
|
185
|
+
orgId?: number;
|
|
179
186
|
/** API base URL (default: https://api.kugelaudio.com) */
|
|
180
187
|
apiUrl?: string;
|
|
181
|
-
/** TTS server URL (default:
|
|
188
|
+
/** TTS server URL (default: same as apiUrl) */
|
|
182
189
|
ttsUrl?: string;
|
|
183
190
|
/** Request timeout in milliseconds (default: 60000) */
|
|
184
191
|
timeout?: number;
|
|
185
192
|
}
|
|
186
|
-
|
|
187
193
|
/**
|
|
188
|
-
*
|
|
194
|
+
* Multi-context session configuration.
|
|
195
|
+
*/
|
|
196
|
+
interface MultiContextConfig {
|
|
197
|
+
/** Default voice ID for new contexts */
|
|
198
|
+
defaultVoiceId?: number;
|
|
199
|
+
/** Output sample rate (default: 24000) */
|
|
200
|
+
sampleRate?: number;
|
|
201
|
+
/** CFG scale for generation (default: 2.0) */
|
|
202
|
+
cfgScale?: number;
|
|
203
|
+
/** Maximum tokens to generate (default: 2048) */
|
|
204
|
+
maxNewTokens?: number;
|
|
205
|
+
/** Enable text normalization (default: true) */
|
|
206
|
+
normalize?: boolean;
|
|
207
|
+
/** Seconds before context auto-closes (default: 20.0) */
|
|
208
|
+
inactivityTimeout?: number;
|
|
209
|
+
}
|
|
210
|
+
/**
|
|
211
|
+
* Voice settings for a specific context.
|
|
212
|
+
*/
|
|
213
|
+
interface ContextVoiceSettings {
|
|
214
|
+
/** Stability (0.0-1.0) */
|
|
215
|
+
stability?: number;
|
|
216
|
+
/** Similarity boost (0.0-1.0) */
|
|
217
|
+
similarityBoost?: number;
|
|
218
|
+
/** Style (0.0-1.0) */
|
|
219
|
+
style?: number;
|
|
220
|
+
/** Use speaker boost */
|
|
221
|
+
useSpeakerBoost?: boolean;
|
|
222
|
+
/** Speed multiplier */
|
|
223
|
+
speed?: number;
|
|
224
|
+
}
|
|
225
|
+
/**
|
|
226
|
+
* Audio chunk from multi-context streaming.
|
|
227
|
+
*/
|
|
228
|
+
interface MultiContextAudioChunk extends AudioChunk {
|
|
229
|
+
/** Context ID this audio belongs to */
|
|
230
|
+
contextId: string;
|
|
231
|
+
}
|
|
232
|
+
/**
|
|
233
|
+
* Event callbacks for multi-context streaming.
|
|
189
234
|
*/
|
|
235
|
+
interface MultiContextCallbacks {
|
|
236
|
+
/** Called when session is started */
|
|
237
|
+
onSessionStarted?: (sessionId: string) => void;
|
|
238
|
+
/** Called when a context is created */
|
|
239
|
+
onContextCreated?: (contextId: string) => void;
|
|
240
|
+
/** Called when an audio chunk is received */
|
|
241
|
+
onChunk?: (chunk: MultiContextAudioChunk) => void;
|
|
242
|
+
/** Called when a context finishes generating */
|
|
243
|
+
onContextFinal?: (contextId: string) => void;
|
|
244
|
+
/** Called when a context is closed */
|
|
245
|
+
onContextClosed?: (contextId: string) => void;
|
|
246
|
+
/** Called when a context times out */
|
|
247
|
+
onContextTimeout?: (contextId: string) => void;
|
|
248
|
+
/** Called when session is closed */
|
|
249
|
+
onSessionClosed?: (stats: Record<string, unknown>) => void;
|
|
250
|
+
/** Called on error */
|
|
251
|
+
onError?: (error: Error, contextId?: string) => void;
|
|
252
|
+
}
|
|
190
253
|
|
|
191
254
|
/**
|
|
192
255
|
* Models resource for listing TTS models.
|
|
@@ -290,6 +353,98 @@ declare class TTSResource {
|
|
|
290
353
|
*/
|
|
291
354
|
close(): void;
|
|
292
355
|
private parseError;
|
|
356
|
+
/**
|
|
357
|
+
* Create a multi-context session for concurrent TTS streams.
|
|
358
|
+
*
|
|
359
|
+
* Allows managing up to 5 independent audio generation contexts
|
|
360
|
+
* over a single WebSocket connection. Each context has its own
|
|
361
|
+
* text buffer, voice settings, and generation queue.
|
|
362
|
+
*
|
|
363
|
+
* @example
|
|
364
|
+
* ```typescript
|
|
365
|
+
* const session = client.tts.createMultiContextSession({
|
|
366
|
+
* defaultVoiceId: 123,
|
|
367
|
+
* });
|
|
368
|
+
*
|
|
369
|
+
* session.connect({
|
|
370
|
+
* onChunk: (chunk) => {
|
|
371
|
+
* console.log(`Audio from ${chunk.contextId}`);
|
|
372
|
+
* playAudio(chunk.audio);
|
|
373
|
+
* },
|
|
374
|
+
* onContextFinal: (contextId) => {
|
|
375
|
+
* console.log(`${contextId} finished`);
|
|
376
|
+
* },
|
|
377
|
+
* });
|
|
378
|
+
*
|
|
379
|
+
* // Create contexts with different voices
|
|
380
|
+
* session.createContext('narrator', { voiceId: 123 });
|
|
381
|
+
* session.createContext('character', { voiceId: 456 });
|
|
382
|
+
*
|
|
383
|
+
* // Send text to different speakers
|
|
384
|
+
* session.send('narrator', 'The story begins.', true);
|
|
385
|
+
* session.send('character', 'Hello!', true);
|
|
386
|
+
*
|
|
387
|
+
* // Close when done
|
|
388
|
+
* session.close();
|
|
389
|
+
* ```
|
|
390
|
+
*/
|
|
391
|
+
createMultiContextSession(config?: MultiContextConfig): MultiContextSession;
|
|
392
|
+
}
|
|
393
|
+
/**
|
|
394
|
+
* Multi-context WebSocket session for concurrent TTS streams.
|
|
395
|
+
*/
|
|
396
|
+
declare class MultiContextSession {
|
|
397
|
+
private client;
|
|
398
|
+
private ws;
|
|
399
|
+
private config;
|
|
400
|
+
private callbacks;
|
|
401
|
+
private contexts;
|
|
402
|
+
private _sessionId;
|
|
403
|
+
private isStarted;
|
|
404
|
+
constructor(client: KugelAudio, config?: MultiContextConfig);
|
|
405
|
+
/**
|
|
406
|
+
* Get the current session ID, or null if not connected.
|
|
407
|
+
*/
|
|
408
|
+
get sessionId(): string | null;
|
|
409
|
+
/**
|
|
410
|
+
* Connect to the multi-context WebSocket endpoint.
|
|
411
|
+
*/
|
|
412
|
+
connect(callbacks: MultiContextCallbacks): void;
|
|
413
|
+
/**
|
|
414
|
+
* Create a new context with optional voice settings.
|
|
415
|
+
*/
|
|
416
|
+
createContext(contextId: string, options?: {
|
|
417
|
+
voiceId?: number;
|
|
418
|
+
voiceSettings?: ContextVoiceSettings;
|
|
419
|
+
}): void;
|
|
420
|
+
/**
|
|
421
|
+
* Send text to a specific context.
|
|
422
|
+
*/
|
|
423
|
+
send(contextId: string, text: string, flush?: boolean): void;
|
|
424
|
+
/**
|
|
425
|
+
* Flush a context's buffer.
|
|
426
|
+
*/
|
|
427
|
+
flush(contextId: string): void;
|
|
428
|
+
/**
|
|
429
|
+
* Close a specific context.
|
|
430
|
+
*/
|
|
431
|
+
closeContext(contextId: string): void;
|
|
432
|
+
/**
|
|
433
|
+
* Send keep-alive to reset a context's inactivity timeout.
|
|
434
|
+
*/
|
|
435
|
+
keepAlive(contextId: string): void;
|
|
436
|
+
/**
|
|
437
|
+
* Close the session and all contexts.
|
|
438
|
+
*/
|
|
439
|
+
close(): void;
|
|
440
|
+
/**
|
|
441
|
+
* Get active context IDs.
|
|
442
|
+
*/
|
|
443
|
+
get activeContexts(): string[];
|
|
444
|
+
/**
|
|
445
|
+
* Check if connected.
|
|
446
|
+
*/
|
|
447
|
+
get isConnected(): boolean;
|
|
293
448
|
}
|
|
294
449
|
/**
|
|
295
450
|
* KugelAudio API client.
|
|
@@ -307,13 +462,13 @@ declare class TTSResource {
|
|
|
307
462
|
* // Generate audio with fast model (1.5B params)
|
|
308
463
|
* const audio = await client.tts.generate({
|
|
309
464
|
* text: 'Hello, world!',
|
|
310
|
-
*
|
|
465
|
+
* modelId: 'kugel-1-turbo',
|
|
311
466
|
* });
|
|
312
467
|
*
|
|
313
468
|
* // Generate audio with premium model (7B params)
|
|
314
469
|
* const audio = await client.tts.generate({
|
|
315
470
|
* text: 'Hello, world!',
|
|
316
|
-
*
|
|
471
|
+
* modelId: 'kugel-1',
|
|
317
472
|
* });
|
|
318
473
|
* ```
|
|
319
474
|
*/
|
|
@@ -321,6 +476,7 @@ declare class KugelAudio {
|
|
|
321
476
|
private _apiKey;
|
|
322
477
|
private _isMasterKey;
|
|
323
478
|
private _isToken;
|
|
479
|
+
private _orgId;
|
|
324
480
|
private _apiUrl;
|
|
325
481
|
private _ttsUrl;
|
|
326
482
|
private _timeout;
|
|
@@ -354,6 +510,8 @@ declare class KugelAudio {
|
|
|
354
510
|
get isMasterKey(): boolean;
|
|
355
511
|
/** Check if using JWT token authentication */
|
|
356
512
|
get isToken(): boolean;
|
|
513
|
+
/** Get organisation ID for billing */
|
|
514
|
+
get orgId(): number | undefined;
|
|
357
515
|
/** Get TTS URL */
|
|
358
516
|
get ttsUrl(): string;
|
|
359
517
|
/**
|
|
@@ -451,4 +609,4 @@ declare function createWavFile(audio: ArrayBuffer, sampleRate: number): ArrayBuf
|
|
|
451
609
|
*/
|
|
452
610
|
declare function createWavBlob(audio: ArrayBuffer, sampleRate: number): Blob;
|
|
453
611
|
|
|
454
|
-
export { type AudioChunk, type AudioResponse, AuthenticationError, ConnectionError, type GenerateOptions, type GenerationStats, InsufficientCreditsError, KugelAudio, KugelAudioError, type KugelAudioOptions, type Model, RateLimitError, type StreamCallbacks, type StreamConfig, ValidationError, type Voice, type VoiceAge, type VoiceCategory, type VoiceSex, base64ToArrayBuffer, createWavBlob, createWavFile, decodePCM16 };
|
|
612
|
+
export { type AudioChunk, type AudioResponse, AuthenticationError, ConnectionError, type ContextVoiceSettings, type GenerateOptions, type GenerationStats, InsufficientCreditsError, KugelAudio, KugelAudioError, type KugelAudioOptions, type Model, type MultiContextAudioChunk, type MultiContextCallbacks, type MultiContextConfig, RateLimitError, type StreamCallbacks, type StreamConfig, ValidationError, type Voice, type VoiceAge, type VoiceCategory, type VoiceSex, base64ToArrayBuffer, createWavBlob, createWavFile, decodePCM16 };
|