kugelaudio 0.2.0 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -8
- package/dist/index.d.mts +207 -17
- package/dist/index.d.ts +207 -17
- package/dist/index.js +323 -16
- package/dist/index.mjs +330 -16
- package/package.json +5 -1
- package/src/client.ts +391 -18
- package/src/index.ts +8 -3
- package/src/types.ts +116 -12
- package/src/websocket.ts +44 -0
package/src/client.ts
CHANGED
|
@@ -16,12 +16,27 @@ import type {
|
|
|
16
16
|
KugelAudioOptions,
|
|
17
17
|
Model,
|
|
18
18
|
StreamCallbacks,
|
|
19
|
-
Voice
|
|
19
|
+
Voice,
|
|
20
|
+
WordTimestamp
|
|
20
21
|
} from './types';
|
|
21
22
|
import { base64ToArrayBuffer } from './utils';
|
|
23
|
+
import { getWebSocket } from './websocket';
|
|
22
24
|
|
|
23
25
|
const DEFAULT_API_URL = 'https://api.kugelaudio.com';
|
|
24
26
|
|
|
27
|
+
/**
|
|
28
|
+
* Create a new WebSocket instance.
|
|
29
|
+
* Lazily resolves the constructor to avoid top-level side-effects
|
|
30
|
+
* that break server-side bundlers (Turbopack/Webpack).
|
|
31
|
+
*/
|
|
32
|
+
function createWs(url: string): WebSocket {
|
|
33
|
+
const WS = getWebSocket();
|
|
34
|
+
return new WS(url);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/** WebSocket OPEN readyState constant. */
|
|
38
|
+
const WS_OPEN = 1;
|
|
39
|
+
|
|
25
40
|
/**
|
|
26
41
|
* Models resource for listing TTS models.
|
|
27
42
|
*/
|
|
@@ -111,6 +126,7 @@ class VoicesResource {
|
|
|
111
126
|
* TTS resource for text-to-speech generation.
|
|
112
127
|
*/
|
|
113
128
|
class TTSResource {
|
|
129
|
+
// Using any for WebSocket to support both browser WebSocket and ws package
|
|
114
130
|
private wsConnection: WebSocket | null = null;
|
|
115
131
|
private wsUrl: string | null = null;
|
|
116
132
|
private pendingRequests: Map<number, {
|
|
@@ -147,7 +163,7 @@ class TTSResource {
|
|
|
147
163
|
* Check if WebSocket connection is established and open.
|
|
148
164
|
*/
|
|
149
165
|
isConnected(): boolean {
|
|
150
|
-
return this.wsConnection !== null && this.wsConnection.readyState ===
|
|
166
|
+
return this.wsConnection !== null && this.wsConnection.readyState === WS_OPEN;
|
|
151
167
|
}
|
|
152
168
|
|
|
153
169
|
/**
|
|
@@ -157,11 +173,15 @@ class TTSResource {
|
|
|
157
173
|
async generate(options: GenerateOptions): Promise<AudioResponse> {
|
|
158
174
|
const chunks: ArrayBuffer[] = [];
|
|
159
175
|
let finalStats: GenerationStats | undefined;
|
|
176
|
+
const allTimestamps: WordTimestamp[] = [];
|
|
160
177
|
|
|
161
178
|
await this.stream(options, {
|
|
162
179
|
onChunk: (chunk) => {
|
|
163
180
|
chunks.push(base64ToArrayBuffer(chunk.audio));
|
|
164
181
|
},
|
|
182
|
+
onWordTimestamps: (timestamps) => {
|
|
183
|
+
allTimestamps.push(...timestamps);
|
|
184
|
+
},
|
|
165
185
|
onFinal: (stats) => {
|
|
166
186
|
finalStats = stats;
|
|
167
187
|
},
|
|
@@ -183,6 +203,7 @@ class TTSResource {
|
|
|
183
203
|
durationMs: finalStats ? finalStats.durationMs : 0,
|
|
184
204
|
generationMs: finalStats ? finalStats.generationMs : 0,
|
|
185
205
|
rtf: finalStats ? finalStats.rtf : 0,
|
|
206
|
+
wordTimestamps: allTimestamps,
|
|
186
207
|
};
|
|
187
208
|
}
|
|
188
209
|
|
|
@@ -202,7 +223,12 @@ class TTSResource {
|
|
|
202
223
|
} else {
|
|
203
224
|
authParam = 'api_key';
|
|
204
225
|
}
|
|
205
|
-
|
|
226
|
+
let url = `${wsUrl}/ws/tts?${authParam}=${this.client.apiKey}`;
|
|
227
|
+
// Append org_id for token auth so usage is recorded against the org
|
|
228
|
+
if (this.client.orgId !== undefined) {
|
|
229
|
+
url += `&org_id=${this.client.orgId}`;
|
|
230
|
+
}
|
|
231
|
+
return url;
|
|
206
232
|
}
|
|
207
233
|
|
|
208
234
|
/**
|
|
@@ -216,7 +242,7 @@ class TTSResource {
|
|
|
216
242
|
if (
|
|
217
243
|
this.wsConnection &&
|
|
218
244
|
this.wsUrl === url &&
|
|
219
|
-
this.wsConnection.readyState ===
|
|
245
|
+
this.wsConnection.readyState === WS_OPEN
|
|
220
246
|
) {
|
|
221
247
|
return this.wsConnection;
|
|
222
248
|
}
|
|
@@ -233,7 +259,7 @@ class TTSResource {
|
|
|
233
259
|
|
|
234
260
|
// Create new connection
|
|
235
261
|
return new Promise((resolve, reject) => {
|
|
236
|
-
const ws =
|
|
262
|
+
const ws = createWs(url);
|
|
237
263
|
|
|
238
264
|
ws.onopen = () => {
|
|
239
265
|
this.wsConnection = ws;
|
|
@@ -252,9 +278,15 @@ class TTSResource {
|
|
|
252
278
|
* Setup message handler for pooled connection.
|
|
253
279
|
*/
|
|
254
280
|
private setupMessageHandler(ws: WebSocket): void {
|
|
255
|
-
ws.onmessage = (event) => {
|
|
281
|
+
ws.onmessage = (event: { data: unknown }) => {
|
|
256
282
|
try {
|
|
257
|
-
|
|
283
|
+
// Handle both browser (string) and Node.js (Buffer) message formats
|
|
284
|
+
const messageData = typeof event.data === 'string'
|
|
285
|
+
? event.data
|
|
286
|
+
: event.data instanceof Buffer
|
|
287
|
+
? event.data.toString()
|
|
288
|
+
: String(event.data);
|
|
289
|
+
const data = JSON.parse(messageData);
|
|
258
290
|
|
|
259
291
|
// Get the current pending request (we process one at a time)
|
|
260
292
|
const [requestId, pending] = [...this.pendingRequests.entries()][0] || [];
|
|
@@ -295,6 +327,20 @@ class TTSResource {
|
|
|
295
327
|
};
|
|
296
328
|
pending.callbacks.onChunk?.(chunk);
|
|
297
329
|
}
|
|
330
|
+
|
|
331
|
+
if (data.word_timestamps) {
|
|
332
|
+
const timestamps: WordTimestamp[] = data.word_timestamps.map(
|
|
333
|
+
(w: Record<string, unknown>) => ({
|
|
334
|
+
word: w.word as string,
|
|
335
|
+
startMs: w.start_ms as number,
|
|
336
|
+
endMs: w.end_ms as number,
|
|
337
|
+
charStart: w.char_start as number,
|
|
338
|
+
charEnd: w.char_end as number,
|
|
339
|
+
score: (w.score as number) ?? 1.0,
|
|
340
|
+
})
|
|
341
|
+
);
|
|
342
|
+
pending.callbacks.onWordTimestamps?.(timestamps);
|
|
343
|
+
}
|
|
298
344
|
} catch (e) {
|
|
299
345
|
console.error('Failed to parse WebSocket message:', e);
|
|
300
346
|
}
|
|
@@ -364,14 +410,14 @@ class TTSResource {
|
|
|
364
410
|
|
|
365
411
|
ws.send(JSON.stringify({
|
|
366
412
|
text: options.text,
|
|
367
|
-
|
|
413
|
+
model_id: options.modelId || 'kugel-1-turbo',
|
|
368
414
|
voice_id: options.voiceId,
|
|
369
415
|
cfg_scale: options.cfgScale ?? 2.0,
|
|
370
416
|
max_new_tokens: options.maxNewTokens ?? 2048,
|
|
371
417
|
sample_rate: options.sampleRate ?? 24000,
|
|
372
|
-
|
|
373
|
-
normalize: options.normalize ?? false,
|
|
418
|
+
normalize: options.normalize ?? true,
|
|
374
419
|
...(options.language && { language: options.language }),
|
|
420
|
+
...(options.wordTimestamps && { word_timestamps: true }),
|
|
375
421
|
}));
|
|
376
422
|
});
|
|
377
423
|
}
|
|
@@ -385,27 +431,33 @@ class TTSResource {
|
|
|
385
431
|
): Promise<void> {
|
|
386
432
|
return new Promise((resolve, reject) => {
|
|
387
433
|
const url = this.buildWsUrl();
|
|
388
|
-
const ws =
|
|
434
|
+
const ws = createWs(url);
|
|
389
435
|
|
|
390
436
|
ws.onopen = () => {
|
|
391
437
|
callbacks.onOpen?.();
|
|
392
438
|
// Send TTS request
|
|
393
439
|
ws.send(JSON.stringify({
|
|
394
440
|
text: options.text,
|
|
395
|
-
|
|
441
|
+
model_id: options.modelId || 'kugel-1-turbo',
|
|
396
442
|
voice_id: options.voiceId,
|
|
397
443
|
cfg_scale: options.cfgScale ?? 2.0,
|
|
398
444
|
max_new_tokens: options.maxNewTokens ?? 2048,
|
|
399
445
|
sample_rate: options.sampleRate ?? 24000,
|
|
400
|
-
|
|
401
|
-
normalize: options.normalize ?? false,
|
|
446
|
+
normalize: options.normalize ?? true,
|
|
402
447
|
...(options.language && { language: options.language }),
|
|
448
|
+
...(options.wordTimestamps && { word_timestamps: true }),
|
|
403
449
|
}));
|
|
404
450
|
};
|
|
405
451
|
|
|
406
|
-
ws.onmessage = (event) => {
|
|
452
|
+
ws.onmessage = (event: { data: unknown }) => {
|
|
407
453
|
try {
|
|
408
|
-
|
|
454
|
+
// Handle both browser (string) and Node.js (Buffer) message formats
|
|
455
|
+
const messageData = typeof event.data === 'string'
|
|
456
|
+
? event.data
|
|
457
|
+
: event.data instanceof Buffer
|
|
458
|
+
? event.data.toString()
|
|
459
|
+
: String(event.data);
|
|
460
|
+
const data = JSON.parse(messageData);
|
|
409
461
|
|
|
410
462
|
if (data.error) {
|
|
411
463
|
const error = this.parseError(data.error);
|
|
@@ -442,6 +494,20 @@ class TTSResource {
|
|
|
442
494
|
};
|
|
443
495
|
callbacks.onChunk?.(chunk);
|
|
444
496
|
}
|
|
497
|
+
|
|
498
|
+
if (data.word_timestamps) {
|
|
499
|
+
const timestamps: WordTimestamp[] = data.word_timestamps.map(
|
|
500
|
+
(w: Record<string, unknown>) => ({
|
|
501
|
+
word: w.word as string,
|
|
502
|
+
startMs: w.start_ms as number,
|
|
503
|
+
endMs: w.end_ms as number,
|
|
504
|
+
charStart: w.char_start as number,
|
|
505
|
+
charEnd: w.char_end as number,
|
|
506
|
+
score: (w.score as number) ?? 1.0,
|
|
507
|
+
})
|
|
508
|
+
);
|
|
509
|
+
callbacks.onWordTimestamps?.(timestamps);
|
|
510
|
+
}
|
|
445
511
|
} catch (e) {
|
|
446
512
|
console.error('Failed to parse WebSocket message:', e);
|
|
447
513
|
}
|
|
@@ -489,6 +555,306 @@ class TTSResource {
|
|
|
489
555
|
}
|
|
490
556
|
return new KugelAudioError(message);
|
|
491
557
|
}
|
|
558
|
+
|
|
559
|
+
/**
|
|
560
|
+
* Create a multi-context session for concurrent TTS streams.
|
|
561
|
+
*
|
|
562
|
+
* Allows managing up to 5 independent audio generation contexts
|
|
563
|
+
* over a single WebSocket connection. Each context has its own
|
|
564
|
+
* text buffer, voice settings, and generation queue.
|
|
565
|
+
*
|
|
566
|
+
* @example
|
|
567
|
+
* ```typescript
|
|
568
|
+
* const session = client.tts.createMultiContextSession({
|
|
569
|
+
* defaultVoiceId: 123,
|
|
570
|
+
* });
|
|
571
|
+
*
|
|
572
|
+
* session.connect({
|
|
573
|
+
* onChunk: (chunk) => {
|
|
574
|
+
* console.log(`Audio from ${chunk.contextId}`);
|
|
575
|
+
* playAudio(chunk.audio);
|
|
576
|
+
* },
|
|
577
|
+
* onContextFinal: (contextId) => {
|
|
578
|
+
* console.log(`${contextId} finished`);
|
|
579
|
+
* },
|
|
580
|
+
* });
|
|
581
|
+
*
|
|
582
|
+
* // Create contexts with different voices
|
|
583
|
+
* session.createContext('narrator', { voiceId: 123 });
|
|
584
|
+
* session.createContext('character', { voiceId: 456 });
|
|
585
|
+
*
|
|
586
|
+
* // Send text to different speakers
|
|
587
|
+
* session.send('narrator', 'The story begins.', true);
|
|
588
|
+
* session.send('character', 'Hello!', true);
|
|
589
|
+
*
|
|
590
|
+
* // Close when done
|
|
591
|
+
* session.close();
|
|
592
|
+
* ```
|
|
593
|
+
*/
|
|
594
|
+
createMultiContextSession(
|
|
595
|
+
config?: import('./types').MultiContextConfig
|
|
596
|
+
): MultiContextSession {
|
|
597
|
+
return new MultiContextSession(this.client, config);
|
|
598
|
+
}
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
/**
|
|
602
|
+
* Multi-context WebSocket session for concurrent TTS streams.
|
|
603
|
+
*/
|
|
604
|
+
class MultiContextSession {
|
|
605
|
+
private ws: WebSocket | null = null;
|
|
606
|
+
private config: import('./types').MultiContextConfig;
|
|
607
|
+
private callbacks: import('./types').MultiContextCallbacks = {};
|
|
608
|
+
private contexts: Set<string> = new Set();
|
|
609
|
+
private _sessionId: string | null = null;
|
|
610
|
+
private isStarted = false;
|
|
611
|
+
|
|
612
|
+
constructor(
|
|
613
|
+
private client: KugelAudio,
|
|
614
|
+
config?: import('./types').MultiContextConfig
|
|
615
|
+
) {
|
|
616
|
+
this.config = config || {};
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
/**
|
|
620
|
+
* Get the current session ID, or null if not connected.
|
|
621
|
+
*/
|
|
622
|
+
get sessionId(): string | null {
|
|
623
|
+
return this._sessionId;
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
/**
|
|
627
|
+
* Connect to the multi-context WebSocket endpoint.
|
|
628
|
+
*/
|
|
629
|
+
connect(callbacks: import('./types').MultiContextCallbacks): void {
|
|
630
|
+
this.callbacks = callbacks;
|
|
631
|
+
|
|
632
|
+
const wsUrl = this.client.ttsUrl
|
|
633
|
+
.replace('https://', 'wss://')
|
|
634
|
+
.replace('http://', 'ws://');
|
|
635
|
+
|
|
636
|
+
let authParam: string;
|
|
637
|
+
if (this.client.isToken) {
|
|
638
|
+
authParam = 'token';
|
|
639
|
+
} else if (this.client.isMasterKey) {
|
|
640
|
+
authParam = 'master_key';
|
|
641
|
+
} else {
|
|
642
|
+
authParam = 'api_key';
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
const url = `${wsUrl}/ws/tts/multi?${authParam}=${this.client.apiKey}`;
|
|
646
|
+
this.ws = createWs(url);
|
|
647
|
+
|
|
648
|
+
this.ws.onopen = () => {
|
|
649
|
+
// Connection established, ready to create contexts
|
|
650
|
+
};
|
|
651
|
+
|
|
652
|
+
this.ws.onmessage = (event: { data: unknown }) => {
|
|
653
|
+
try {
|
|
654
|
+
// Handle both browser (string) and Node.js (Buffer) message formats
|
|
655
|
+
const messageData = typeof event.data === 'string'
|
|
656
|
+
? event.data
|
|
657
|
+
: event.data instanceof Buffer
|
|
658
|
+
? event.data.toString()
|
|
659
|
+
: String(event.data);
|
|
660
|
+
const data = JSON.parse(messageData);
|
|
661
|
+
|
|
662
|
+
if (data.error) {
|
|
663
|
+
this.callbacks.onError?.(
|
|
664
|
+
new KugelAudioError(data.error),
|
|
665
|
+
data.context_id
|
|
666
|
+
);
|
|
667
|
+
return;
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
if (data.session_started) {
|
|
671
|
+
this._sessionId = data.session_id;
|
|
672
|
+
this.isStarted = true;
|
|
673
|
+
this.callbacks.onSessionStarted?.(data.session_id);
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
if (data.context_created) {
|
|
677
|
+
this.contexts.add(data.context_id);
|
|
678
|
+
this.callbacks.onContextCreated?.(data.context_id);
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
if (data.audio) {
|
|
682
|
+
const chunk: import('./types').MultiContextAudioChunk = {
|
|
683
|
+
audio: data.audio,
|
|
684
|
+
encoding: 'pcm_s16le',
|
|
685
|
+
index: data.idx || 0,
|
|
686
|
+
sampleRate: data.sr || 24000,
|
|
687
|
+
samples: data.samples || 0,
|
|
688
|
+
contextId: data.context_id,
|
|
689
|
+
};
|
|
690
|
+
this.callbacks.onChunk?.(chunk);
|
|
691
|
+
}
|
|
692
|
+
|
|
693
|
+
if (data.is_final) {
|
|
694
|
+
this.callbacks.onContextFinal?.(data.context_id);
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
if (data.context_closed) {
|
|
698
|
+
this.contexts.delete(data.context_id);
|
|
699
|
+
this.callbacks.onContextClosed?.(data.context_id);
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
if (data.context_timeout) {
|
|
703
|
+
this.contexts.delete(data.context_id);
|
|
704
|
+
this.callbacks.onContextTimeout?.(data.context_id);
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
if (data.session_closed) {
|
|
708
|
+
this.callbacks.onSessionClosed?.(data);
|
|
709
|
+
}
|
|
710
|
+
} catch (e) {
|
|
711
|
+
console.error('Failed to parse WebSocket message:', e);
|
|
712
|
+
}
|
|
713
|
+
};
|
|
714
|
+
|
|
715
|
+
this.ws.onerror = () => {
|
|
716
|
+
this.callbacks.onError?.(new KugelAudioError('WebSocket connection error'));
|
|
717
|
+
};
|
|
718
|
+
|
|
719
|
+
this.ws.onclose = (event) => {
|
|
720
|
+
if (event.code === 4001) {
|
|
721
|
+
this.callbacks.onError?.(new AuthenticationError('Authentication failed'));
|
|
722
|
+
} else if (event.code === 4003) {
|
|
723
|
+
this.callbacks.onError?.(new InsufficientCreditsError('Insufficient credits'));
|
|
724
|
+
}
|
|
725
|
+
this.ws = null;
|
|
726
|
+
this.isStarted = false;
|
|
727
|
+
this.contexts.clear();
|
|
728
|
+
};
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
/**
|
|
732
|
+
* Create a new context with optional voice settings.
|
|
733
|
+
*/
|
|
734
|
+
createContext(
|
|
735
|
+
contextId: string,
|
|
736
|
+
options?: {
|
|
737
|
+
voiceId?: number;
|
|
738
|
+
voiceSettings?: import('./types').ContextVoiceSettings;
|
|
739
|
+
}
|
|
740
|
+
): void {
|
|
741
|
+
if (!this.ws || this.ws.readyState !== WS_OPEN) {
|
|
742
|
+
throw new KugelAudioError('WebSocket not connected');
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
const msg: Record<string, unknown> = {
|
|
746
|
+
text: ' ',
|
|
747
|
+
context_id: contextId,
|
|
748
|
+
};
|
|
749
|
+
|
|
750
|
+
// Include session config on first context
|
|
751
|
+
if (!this.isStarted) {
|
|
752
|
+
if (this.config.sampleRate) msg.sample_rate = this.config.sampleRate;
|
|
753
|
+
if (this.config.cfgScale) msg.cfg_scale = this.config.cfgScale;
|
|
754
|
+
if (this.config.maxNewTokens) msg.max_new_tokens = this.config.maxNewTokens;
|
|
755
|
+
if (this.config.normalize !== undefined) msg.normalize = this.config.normalize;
|
|
756
|
+
if (this.config.inactivityTimeout) msg.inactivity_timeout = this.config.inactivityTimeout;
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
// Per-context voice
|
|
760
|
+
const voiceId = options?.voiceId || this.config.defaultVoiceId;
|
|
761
|
+
if (voiceId) msg.voice_id = voiceId;
|
|
762
|
+
|
|
763
|
+
if (options?.voiceSettings) {
|
|
764
|
+
msg.voice_settings = {
|
|
765
|
+
stability: options.voiceSettings.stability,
|
|
766
|
+
similarity_boost: options.voiceSettings.similarityBoost,
|
|
767
|
+
style: options.voiceSettings.style,
|
|
768
|
+
use_speaker_boost: options.voiceSettings.useSpeakerBoost,
|
|
769
|
+
speed: options.voiceSettings.speed,
|
|
770
|
+
};
|
|
771
|
+
}
|
|
772
|
+
|
|
773
|
+
this.ws.send(JSON.stringify(msg));
|
|
774
|
+
}
|
|
775
|
+
|
|
776
|
+
/**
|
|
777
|
+
* Send text to a specific context.
|
|
778
|
+
*/
|
|
779
|
+
send(contextId: string, text: string, flush = false): void {
|
|
780
|
+
if (!this.ws || this.ws.readyState !== WS_OPEN) {
|
|
781
|
+
throw new KugelAudioError('WebSocket not connected');
|
|
782
|
+
}
|
|
783
|
+
|
|
784
|
+
// Auto-create context if needed
|
|
785
|
+
if (!this.contexts.has(contextId) && !this.isStarted) {
|
|
786
|
+
this.createContext(contextId);
|
|
787
|
+
}
|
|
788
|
+
|
|
789
|
+
this.ws.send(JSON.stringify({
|
|
790
|
+
text,
|
|
791
|
+
context_id: contextId,
|
|
792
|
+
flush,
|
|
793
|
+
}));
|
|
794
|
+
}
|
|
795
|
+
|
|
796
|
+
/**
|
|
797
|
+
* Flush a context's buffer.
|
|
798
|
+
*/
|
|
799
|
+
flush(contextId: string): void {
|
|
800
|
+
if (!this.ws || this.ws.readyState !== WS_OPEN) return;
|
|
801
|
+
|
|
802
|
+
this.ws.send(JSON.stringify({
|
|
803
|
+
flush: true,
|
|
804
|
+
context_id: contextId,
|
|
805
|
+
}));
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
/**
|
|
809
|
+
* Close a specific context.
|
|
810
|
+
*/
|
|
811
|
+
closeContext(contextId: string): void {
|
|
812
|
+
if (!this.ws || this.ws.readyState !== WS_OPEN) return;
|
|
813
|
+
|
|
814
|
+
this.ws.send(JSON.stringify({
|
|
815
|
+
close_context: true,
|
|
816
|
+
context_id: contextId,
|
|
817
|
+
}));
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
/**
|
|
821
|
+
* Send keep-alive to reset a context's inactivity timeout.
|
|
822
|
+
*/
|
|
823
|
+
keepAlive(contextId: string): void {
|
|
824
|
+
if (!this.ws || this.ws.readyState !== WS_OPEN) return;
|
|
825
|
+
|
|
826
|
+
this.ws.send(JSON.stringify({
|
|
827
|
+
text: '',
|
|
828
|
+
context_id: contextId,
|
|
829
|
+
}));
|
|
830
|
+
}
|
|
831
|
+
|
|
832
|
+
/**
|
|
833
|
+
* Close the session and all contexts.
|
|
834
|
+
*/
|
|
835
|
+
close(): void {
|
|
836
|
+
if (this.ws && this.ws.readyState === WS_OPEN) {
|
|
837
|
+
this.ws.send(JSON.stringify({ close_socket: true }));
|
|
838
|
+
this.ws.close();
|
|
839
|
+
}
|
|
840
|
+
this.ws = null;
|
|
841
|
+
this.isStarted = false;
|
|
842
|
+
this.contexts.clear();
|
|
843
|
+
}
|
|
844
|
+
|
|
845
|
+
/**
|
|
846
|
+
* Get active context IDs.
|
|
847
|
+
*/
|
|
848
|
+
get activeContexts(): string[] {
|
|
849
|
+
return Array.from(this.contexts);
|
|
850
|
+
}
|
|
851
|
+
|
|
852
|
+
/**
|
|
853
|
+
* Check if connected.
|
|
854
|
+
*/
|
|
855
|
+
get isConnected(): boolean {
|
|
856
|
+
return this.ws !== null && this.ws.readyState === WS_OPEN;
|
|
857
|
+
}
|
|
492
858
|
}
|
|
493
859
|
|
|
494
860
|
/**
|
|
@@ -507,13 +873,13 @@ class TTSResource {
|
|
|
507
873
|
* // Generate audio with fast model (1.5B params)
|
|
508
874
|
* const audio = await client.tts.generate({
|
|
509
875
|
* text: 'Hello, world!',
|
|
510
|
-
*
|
|
876
|
+
* modelId: 'kugel-1-turbo',
|
|
511
877
|
* });
|
|
512
878
|
*
|
|
513
879
|
* // Generate audio with premium model (7B params)
|
|
514
880
|
* const audio = await client.tts.generate({
|
|
515
881
|
* text: 'Hello, world!',
|
|
516
|
-
*
|
|
882
|
+
* modelId: 'kugel-1',
|
|
517
883
|
* });
|
|
518
884
|
* ```
|
|
519
885
|
*/
|
|
@@ -521,6 +887,7 @@ export class KugelAudio {
|
|
|
521
887
|
private _apiKey: string;
|
|
522
888
|
private _isMasterKey: boolean;
|
|
523
889
|
private _isToken: boolean;
|
|
890
|
+
private _orgId: number | undefined;
|
|
524
891
|
private _apiUrl: string;
|
|
525
892
|
private _ttsUrl: string;
|
|
526
893
|
private _timeout: number;
|
|
@@ -540,6 +907,7 @@ export class KugelAudio {
|
|
|
540
907
|
this._apiKey = options.apiKey;
|
|
541
908
|
this._isMasterKey = options.isMasterKey || false;
|
|
542
909
|
this._isToken = options.isToken || false;
|
|
910
|
+
this._orgId = options.orgId;
|
|
543
911
|
this._apiUrl = (options.apiUrl || DEFAULT_API_URL).replace(/\/$/, '');
|
|
544
912
|
// If ttsUrl not specified, use apiUrl (backend proxies to TTS server)
|
|
545
913
|
this._ttsUrl = (options.ttsUrl || this._apiUrl).replace(/\/$/, '');
|
|
@@ -587,6 +955,11 @@ export class KugelAudio {
|
|
|
587
955
|
return this._isToken;
|
|
588
956
|
}
|
|
589
957
|
|
|
958
|
+
/** Get organisation ID for billing */
|
|
959
|
+
get orgId(): number | undefined {
|
|
960
|
+
return this._orgId;
|
|
961
|
+
}
|
|
962
|
+
|
|
590
963
|
/** Get TTS URL */
|
|
591
964
|
get ttsUrl(): string {
|
|
592
965
|
return this._ttsUrl;
|
package/src/index.ts
CHANGED
|
@@ -18,13 +18,13 @@
|
|
|
18
18
|
* // Generate audio (non-streaming)
|
|
19
19
|
* const audio = await client.tts.generate({
|
|
20
20
|
* text: 'Hello, world!',
|
|
21
|
-
*
|
|
21
|
+
* modelId: 'kugel-1-turbo',
|
|
22
22
|
* voiceId: 123,
|
|
23
23
|
* });
|
|
24
24
|
*
|
|
25
25
|
* // Generate audio (streaming)
|
|
26
26
|
* await client.tts.stream(
|
|
27
|
-
* { text: 'Hello, world!',
|
|
27
|
+
* { text: 'Hello, world!', modelId: 'kugel-1-turbo' },
|
|
28
28
|
* {
|
|
29
29
|
* onChunk: (chunk) => {
|
|
30
30
|
* // Process audio chunk
|
|
@@ -46,16 +46,21 @@ export { KugelAudio } from './client';
|
|
|
46
46
|
export type {
|
|
47
47
|
AudioChunk,
|
|
48
48
|
AudioResponse,
|
|
49
|
+
ContextVoiceSettings,
|
|
49
50
|
GenerateOptions,
|
|
50
51
|
GenerationStats,
|
|
51
52
|
KugelAudioOptions,
|
|
52
53
|
Model,
|
|
54
|
+
MultiContextAudioChunk,
|
|
55
|
+
MultiContextCallbacks,
|
|
56
|
+
MultiContextConfig,
|
|
53
57
|
StreamCallbacks,
|
|
54
58
|
StreamConfig,
|
|
55
59
|
Voice,
|
|
56
60
|
VoiceAge,
|
|
57
61
|
VoiceCategory,
|
|
58
|
-
VoiceSex
|
|
62
|
+
VoiceSex,
|
|
63
|
+
WordTimestamp
|
|
59
64
|
} from './types';
|
|
60
65
|
|
|
61
66
|
// Errors
|