kugelaudio 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -8
- package/dist/index.d.mts +175 -17
- package/dist/index.d.ts +175 -17
- package/dist/index.js +287 -13
- package/dist/index.mjs +294 -13
- package/package.json +5 -1
- package/src/client.ts +354 -17
- package/src/index.ts +6 -2
- package/src/types.ts +83 -12
- package/src/websocket.ts +44 -0
package/src/client.ts
CHANGED
|
@@ -19,9 +19,23 @@ import type {
|
|
|
19
19
|
Voice
|
|
20
20
|
} from './types';
|
|
21
21
|
import { base64ToArrayBuffer } from './utils';
|
|
22
|
+
import { getWebSocket } from './websocket';
|
|
22
23
|
|
|
23
24
|
const DEFAULT_API_URL = 'https://api.kugelaudio.com';
|
|
24
25
|
|
|
26
|
+
/**
|
|
27
|
+
* Create a new WebSocket instance.
|
|
28
|
+
* Lazily resolves the constructor to avoid top-level side-effects
|
|
29
|
+
* that break server-side bundlers (Turbopack/Webpack).
|
|
30
|
+
*/
|
|
31
|
+
function createWs(url: string): WebSocket {
|
|
32
|
+
const WS = getWebSocket();
|
|
33
|
+
return new WS(url);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/** WebSocket OPEN readyState constant. */
|
|
37
|
+
const WS_OPEN = 1;
|
|
38
|
+
|
|
25
39
|
/**
|
|
26
40
|
* Models resource for listing TTS models.
|
|
27
41
|
*/
|
|
@@ -111,6 +125,7 @@ class VoicesResource {
|
|
|
111
125
|
* TTS resource for text-to-speech generation.
|
|
112
126
|
*/
|
|
113
127
|
class TTSResource {
|
|
128
|
+
// Using any for WebSocket to support both browser WebSocket and ws package
|
|
114
129
|
private wsConnection: WebSocket | null = null;
|
|
115
130
|
private wsUrl: string | null = null;
|
|
116
131
|
private pendingRequests: Map<number, {
|
|
@@ -147,7 +162,7 @@ class TTSResource {
|
|
|
147
162
|
* Check if WebSocket connection is established and open.
|
|
148
163
|
*/
|
|
149
164
|
isConnected(): boolean {
|
|
150
|
-
return this.wsConnection !== null && this.wsConnection.readyState ===
|
|
165
|
+
return this.wsConnection !== null && this.wsConnection.readyState === WS_OPEN;
|
|
151
166
|
}
|
|
152
167
|
|
|
153
168
|
/**
|
|
@@ -202,7 +217,12 @@ class TTSResource {
|
|
|
202
217
|
} else {
|
|
203
218
|
authParam = 'api_key';
|
|
204
219
|
}
|
|
205
|
-
|
|
220
|
+
let url = `${wsUrl}/ws/tts?${authParam}=${this.client.apiKey}`;
|
|
221
|
+
// Append org_id for token auth so usage is recorded against the org
|
|
222
|
+
if (this.client.orgId !== undefined) {
|
|
223
|
+
url += `&org_id=${this.client.orgId}`;
|
|
224
|
+
}
|
|
225
|
+
return url;
|
|
206
226
|
}
|
|
207
227
|
|
|
208
228
|
/**
|
|
@@ -216,7 +236,7 @@ class TTSResource {
|
|
|
216
236
|
if (
|
|
217
237
|
this.wsConnection &&
|
|
218
238
|
this.wsUrl === url &&
|
|
219
|
-
this.wsConnection.readyState ===
|
|
239
|
+
this.wsConnection.readyState === WS_OPEN
|
|
220
240
|
) {
|
|
221
241
|
return this.wsConnection;
|
|
222
242
|
}
|
|
@@ -233,7 +253,7 @@ class TTSResource {
|
|
|
233
253
|
|
|
234
254
|
// Create new connection
|
|
235
255
|
return new Promise((resolve, reject) => {
|
|
236
|
-
const ws =
|
|
256
|
+
const ws = createWs(url);
|
|
237
257
|
|
|
238
258
|
ws.onopen = () => {
|
|
239
259
|
this.wsConnection = ws;
|
|
@@ -252,9 +272,15 @@ class TTSResource {
|
|
|
252
272
|
* Setup message handler for pooled connection.
|
|
253
273
|
*/
|
|
254
274
|
private setupMessageHandler(ws: WebSocket): void {
|
|
255
|
-
ws.onmessage = (event) => {
|
|
275
|
+
ws.onmessage = (event: { data: unknown }) => {
|
|
256
276
|
try {
|
|
257
|
-
|
|
277
|
+
// Handle both browser (string) and Node.js (Buffer) message formats
|
|
278
|
+
const messageData = typeof event.data === 'string'
|
|
279
|
+
? event.data
|
|
280
|
+
: event.data instanceof Buffer
|
|
281
|
+
? event.data.toString()
|
|
282
|
+
: String(event.data);
|
|
283
|
+
const data = JSON.parse(messageData);
|
|
258
284
|
|
|
259
285
|
// Get the current pending request (we process one at a time)
|
|
260
286
|
const [requestId, pending] = [...this.pendingRequests.entries()][0] || [];
|
|
@@ -364,13 +390,12 @@ class TTSResource {
|
|
|
364
390
|
|
|
365
391
|
ws.send(JSON.stringify({
|
|
366
392
|
text: options.text,
|
|
367
|
-
|
|
393
|
+
model_id: options.modelId || 'kugel-1-turbo',
|
|
368
394
|
voice_id: options.voiceId,
|
|
369
395
|
cfg_scale: options.cfgScale ?? 2.0,
|
|
370
396
|
max_new_tokens: options.maxNewTokens ?? 2048,
|
|
371
397
|
sample_rate: options.sampleRate ?? 24000,
|
|
372
|
-
|
|
373
|
-
normalize: options.normalize ?? false,
|
|
398
|
+
normalize: options.normalize ?? true,
|
|
374
399
|
...(options.language && { language: options.language }),
|
|
375
400
|
}));
|
|
376
401
|
});
|
|
@@ -385,27 +410,32 @@ class TTSResource {
|
|
|
385
410
|
): Promise<void> {
|
|
386
411
|
return new Promise((resolve, reject) => {
|
|
387
412
|
const url = this.buildWsUrl();
|
|
388
|
-
const ws =
|
|
413
|
+
const ws = createWs(url);
|
|
389
414
|
|
|
390
415
|
ws.onopen = () => {
|
|
391
416
|
callbacks.onOpen?.();
|
|
392
417
|
// Send TTS request
|
|
393
418
|
ws.send(JSON.stringify({
|
|
394
419
|
text: options.text,
|
|
395
|
-
|
|
420
|
+
model_id: options.modelId || 'kugel-1-turbo',
|
|
396
421
|
voice_id: options.voiceId,
|
|
397
422
|
cfg_scale: options.cfgScale ?? 2.0,
|
|
398
423
|
max_new_tokens: options.maxNewTokens ?? 2048,
|
|
399
424
|
sample_rate: options.sampleRate ?? 24000,
|
|
400
|
-
|
|
401
|
-
normalize: options.normalize ?? false,
|
|
425
|
+
normalize: options.normalize ?? true,
|
|
402
426
|
...(options.language && { language: options.language }),
|
|
403
427
|
}));
|
|
404
428
|
};
|
|
405
429
|
|
|
406
|
-
ws.onmessage = (event) => {
|
|
430
|
+
ws.onmessage = (event: { data: unknown }) => {
|
|
407
431
|
try {
|
|
408
|
-
|
|
432
|
+
// Handle both browser (string) and Node.js (Buffer) message formats
|
|
433
|
+
const messageData = typeof event.data === 'string'
|
|
434
|
+
? event.data
|
|
435
|
+
: event.data instanceof Buffer
|
|
436
|
+
? event.data.toString()
|
|
437
|
+
: String(event.data);
|
|
438
|
+
const data = JSON.parse(messageData);
|
|
409
439
|
|
|
410
440
|
if (data.error) {
|
|
411
441
|
const error = this.parseError(data.error);
|
|
@@ -489,6 +519,306 @@ class TTSResource {
|
|
|
489
519
|
}
|
|
490
520
|
return new KugelAudioError(message);
|
|
491
521
|
}
|
|
522
|
+
|
|
523
|
+
/**
|
|
524
|
+
* Create a multi-context session for concurrent TTS streams.
|
|
525
|
+
*
|
|
526
|
+
* Allows managing up to 5 independent audio generation contexts
|
|
527
|
+
* over a single WebSocket connection. Each context has its own
|
|
528
|
+
* text buffer, voice settings, and generation queue.
|
|
529
|
+
*
|
|
530
|
+
* @example
|
|
531
|
+
* ```typescript
|
|
532
|
+
* const session = client.tts.createMultiContextSession({
|
|
533
|
+
* defaultVoiceId: 123,
|
|
534
|
+
* });
|
|
535
|
+
*
|
|
536
|
+
* session.connect({
|
|
537
|
+
* onChunk: (chunk) => {
|
|
538
|
+
* console.log(`Audio from ${chunk.contextId}`);
|
|
539
|
+
* playAudio(chunk.audio);
|
|
540
|
+
* },
|
|
541
|
+
* onContextFinal: (contextId) => {
|
|
542
|
+
* console.log(`${contextId} finished`);
|
|
543
|
+
* },
|
|
544
|
+
* });
|
|
545
|
+
*
|
|
546
|
+
* // Create contexts with different voices
|
|
547
|
+
* session.createContext('narrator', { voiceId: 123 });
|
|
548
|
+
* session.createContext('character', { voiceId: 456 });
|
|
549
|
+
*
|
|
550
|
+
* // Send text to different speakers
|
|
551
|
+
* session.send('narrator', 'The story begins.', true);
|
|
552
|
+
* session.send('character', 'Hello!', true);
|
|
553
|
+
*
|
|
554
|
+
* // Close when done
|
|
555
|
+
* session.close();
|
|
556
|
+
* ```
|
|
557
|
+
*/
|
|
558
|
+
createMultiContextSession(
|
|
559
|
+
config?: import('./types').MultiContextConfig
|
|
560
|
+
): MultiContextSession {
|
|
561
|
+
return new MultiContextSession(this.client, config);
|
|
562
|
+
}
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
/**
|
|
566
|
+
* Multi-context WebSocket session for concurrent TTS streams.
|
|
567
|
+
*/
|
|
568
|
+
class MultiContextSession {
|
|
569
|
+
private ws: WebSocket | null = null;
|
|
570
|
+
private config: import('./types').MultiContextConfig;
|
|
571
|
+
private callbacks: import('./types').MultiContextCallbacks = {};
|
|
572
|
+
private contexts: Set<string> = new Set();
|
|
573
|
+
private _sessionId: string | null = null;
|
|
574
|
+
private isStarted = false;
|
|
575
|
+
|
|
576
|
+
constructor(
|
|
577
|
+
private client: KugelAudio,
|
|
578
|
+
config?: import('./types').MultiContextConfig
|
|
579
|
+
) {
|
|
580
|
+
this.config = config || {};
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
/**
|
|
584
|
+
* Get the current session ID, or null if not connected.
|
|
585
|
+
*/
|
|
586
|
+
get sessionId(): string | null {
|
|
587
|
+
return this._sessionId;
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
/**
|
|
591
|
+
* Connect to the multi-context WebSocket endpoint.
|
|
592
|
+
*/
|
|
593
|
+
connect(callbacks: import('./types').MultiContextCallbacks): void {
|
|
594
|
+
this.callbacks = callbacks;
|
|
595
|
+
|
|
596
|
+
const wsUrl = this.client.ttsUrl
|
|
597
|
+
.replace('https://', 'wss://')
|
|
598
|
+
.replace('http://', 'ws://');
|
|
599
|
+
|
|
600
|
+
let authParam: string;
|
|
601
|
+
if (this.client.isToken) {
|
|
602
|
+
authParam = 'token';
|
|
603
|
+
} else if (this.client.isMasterKey) {
|
|
604
|
+
authParam = 'master_key';
|
|
605
|
+
} else {
|
|
606
|
+
authParam = 'api_key';
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
const url = `${wsUrl}/ws/tts/multi?${authParam}=${this.client.apiKey}`;
|
|
610
|
+
this.ws = createWs(url);
|
|
611
|
+
|
|
612
|
+
this.ws.onopen = () => {
|
|
613
|
+
// Connection established, ready to create contexts
|
|
614
|
+
};
|
|
615
|
+
|
|
616
|
+
this.ws.onmessage = (event: { data: unknown }) => {
|
|
617
|
+
try {
|
|
618
|
+
// Handle both browser (string) and Node.js (Buffer) message formats
|
|
619
|
+
const messageData = typeof event.data === 'string'
|
|
620
|
+
? event.data
|
|
621
|
+
: event.data instanceof Buffer
|
|
622
|
+
? event.data.toString()
|
|
623
|
+
: String(event.data);
|
|
624
|
+
const data = JSON.parse(messageData);
|
|
625
|
+
|
|
626
|
+
if (data.error) {
|
|
627
|
+
this.callbacks.onError?.(
|
|
628
|
+
new KugelAudioError(data.error),
|
|
629
|
+
data.context_id
|
|
630
|
+
);
|
|
631
|
+
return;
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
if (data.session_started) {
|
|
635
|
+
this._sessionId = data.session_id;
|
|
636
|
+
this.isStarted = true;
|
|
637
|
+
this.callbacks.onSessionStarted?.(data.session_id);
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
if (data.context_created) {
|
|
641
|
+
this.contexts.add(data.context_id);
|
|
642
|
+
this.callbacks.onContextCreated?.(data.context_id);
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
if (data.audio) {
|
|
646
|
+
const chunk: import('./types').MultiContextAudioChunk = {
|
|
647
|
+
audio: data.audio,
|
|
648
|
+
encoding: 'pcm_s16le',
|
|
649
|
+
index: data.idx || 0,
|
|
650
|
+
sampleRate: data.sr || 24000,
|
|
651
|
+
samples: data.samples || 0,
|
|
652
|
+
contextId: data.context_id,
|
|
653
|
+
};
|
|
654
|
+
this.callbacks.onChunk?.(chunk);
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
if (data.is_final) {
|
|
658
|
+
this.callbacks.onContextFinal?.(data.context_id);
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
if (data.context_closed) {
|
|
662
|
+
this.contexts.delete(data.context_id);
|
|
663
|
+
this.callbacks.onContextClosed?.(data.context_id);
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
if (data.context_timeout) {
|
|
667
|
+
this.contexts.delete(data.context_id);
|
|
668
|
+
this.callbacks.onContextTimeout?.(data.context_id);
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
if (data.session_closed) {
|
|
672
|
+
this.callbacks.onSessionClosed?.(data);
|
|
673
|
+
}
|
|
674
|
+
} catch (e) {
|
|
675
|
+
console.error('Failed to parse WebSocket message:', e);
|
|
676
|
+
}
|
|
677
|
+
};
|
|
678
|
+
|
|
679
|
+
this.ws.onerror = () => {
|
|
680
|
+
this.callbacks.onError?.(new KugelAudioError('WebSocket connection error'));
|
|
681
|
+
};
|
|
682
|
+
|
|
683
|
+
this.ws.onclose = (event) => {
|
|
684
|
+
if (event.code === 4001) {
|
|
685
|
+
this.callbacks.onError?.(new AuthenticationError('Authentication failed'));
|
|
686
|
+
} else if (event.code === 4003) {
|
|
687
|
+
this.callbacks.onError?.(new InsufficientCreditsError('Insufficient credits'));
|
|
688
|
+
}
|
|
689
|
+
this.ws = null;
|
|
690
|
+
this.isStarted = false;
|
|
691
|
+
this.contexts.clear();
|
|
692
|
+
};
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
/**
|
|
696
|
+
* Create a new context with optional voice settings.
|
|
697
|
+
*/
|
|
698
|
+
createContext(
|
|
699
|
+
contextId: string,
|
|
700
|
+
options?: {
|
|
701
|
+
voiceId?: number;
|
|
702
|
+
voiceSettings?: import('./types').ContextVoiceSettings;
|
|
703
|
+
}
|
|
704
|
+
): void {
|
|
705
|
+
if (!this.ws || this.ws.readyState !== WS_OPEN) {
|
|
706
|
+
throw new KugelAudioError('WebSocket not connected');
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
const msg: Record<string, unknown> = {
|
|
710
|
+
text: ' ',
|
|
711
|
+
context_id: contextId,
|
|
712
|
+
};
|
|
713
|
+
|
|
714
|
+
// Include session config on first context
|
|
715
|
+
if (!this.isStarted) {
|
|
716
|
+
if (this.config.sampleRate) msg.sample_rate = this.config.sampleRate;
|
|
717
|
+
if (this.config.cfgScale) msg.cfg_scale = this.config.cfgScale;
|
|
718
|
+
if (this.config.maxNewTokens) msg.max_new_tokens = this.config.maxNewTokens;
|
|
719
|
+
if (this.config.normalize !== undefined) msg.normalize = this.config.normalize;
|
|
720
|
+
if (this.config.inactivityTimeout) msg.inactivity_timeout = this.config.inactivityTimeout;
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
// Per-context voice
|
|
724
|
+
const voiceId = options?.voiceId || this.config.defaultVoiceId;
|
|
725
|
+
if (voiceId) msg.voice_id = voiceId;
|
|
726
|
+
|
|
727
|
+
if (options?.voiceSettings) {
|
|
728
|
+
msg.voice_settings = {
|
|
729
|
+
stability: options.voiceSettings.stability,
|
|
730
|
+
similarity_boost: options.voiceSettings.similarityBoost,
|
|
731
|
+
style: options.voiceSettings.style,
|
|
732
|
+
use_speaker_boost: options.voiceSettings.useSpeakerBoost,
|
|
733
|
+
speed: options.voiceSettings.speed,
|
|
734
|
+
};
|
|
735
|
+
}
|
|
736
|
+
|
|
737
|
+
this.ws.send(JSON.stringify(msg));
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
/**
|
|
741
|
+
* Send text to a specific context.
|
|
742
|
+
*/
|
|
743
|
+
send(contextId: string, text: string, flush = false): void {
|
|
744
|
+
if (!this.ws || this.ws.readyState !== WS_OPEN) {
|
|
745
|
+
throw new KugelAudioError('WebSocket not connected');
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
// Auto-create context if needed
|
|
749
|
+
if (!this.contexts.has(contextId) && !this.isStarted) {
|
|
750
|
+
this.createContext(contextId);
|
|
751
|
+
}
|
|
752
|
+
|
|
753
|
+
this.ws.send(JSON.stringify({
|
|
754
|
+
text,
|
|
755
|
+
context_id: contextId,
|
|
756
|
+
flush,
|
|
757
|
+
}));
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
/**
|
|
761
|
+
* Flush a context's buffer.
|
|
762
|
+
*/
|
|
763
|
+
flush(contextId: string): void {
|
|
764
|
+
if (!this.ws || this.ws.readyState !== WS_OPEN) return;
|
|
765
|
+
|
|
766
|
+
this.ws.send(JSON.stringify({
|
|
767
|
+
flush: true,
|
|
768
|
+
context_id: contextId,
|
|
769
|
+
}));
|
|
770
|
+
}
|
|
771
|
+
|
|
772
|
+
/**
|
|
773
|
+
* Close a specific context.
|
|
774
|
+
*/
|
|
775
|
+
closeContext(contextId: string): void {
|
|
776
|
+
if (!this.ws || this.ws.readyState !== WS_OPEN) return;
|
|
777
|
+
|
|
778
|
+
this.ws.send(JSON.stringify({
|
|
779
|
+
close_context: true,
|
|
780
|
+
context_id: contextId,
|
|
781
|
+
}));
|
|
782
|
+
}
|
|
783
|
+
|
|
784
|
+
/**
|
|
785
|
+
* Send keep-alive to reset a context's inactivity timeout.
|
|
786
|
+
*/
|
|
787
|
+
keepAlive(contextId: string): void {
|
|
788
|
+
if (!this.ws || this.ws.readyState !== WS_OPEN) return;
|
|
789
|
+
|
|
790
|
+
this.ws.send(JSON.stringify({
|
|
791
|
+
text: '',
|
|
792
|
+
context_id: contextId,
|
|
793
|
+
}));
|
|
794
|
+
}
|
|
795
|
+
|
|
796
|
+
/**
|
|
797
|
+
* Close the session and all contexts.
|
|
798
|
+
*/
|
|
799
|
+
close(): void {
|
|
800
|
+
if (this.ws && this.ws.readyState === WS_OPEN) {
|
|
801
|
+
this.ws.send(JSON.stringify({ close_socket: true }));
|
|
802
|
+
this.ws.close();
|
|
803
|
+
}
|
|
804
|
+
this.ws = null;
|
|
805
|
+
this.isStarted = false;
|
|
806
|
+
this.contexts.clear();
|
|
807
|
+
}
|
|
808
|
+
|
|
809
|
+
/**
|
|
810
|
+
* Get active context IDs.
|
|
811
|
+
*/
|
|
812
|
+
get activeContexts(): string[] {
|
|
813
|
+
return Array.from(this.contexts);
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
/**
|
|
817
|
+
* Check if connected.
|
|
818
|
+
*/
|
|
819
|
+
get isConnected(): boolean {
|
|
820
|
+
return this.ws !== null && this.ws.readyState === WS_OPEN;
|
|
821
|
+
}
|
|
492
822
|
}
|
|
493
823
|
|
|
494
824
|
/**
|
|
@@ -507,13 +837,13 @@ class TTSResource {
|
|
|
507
837
|
* // Generate audio with fast model (1.5B params)
|
|
508
838
|
* const audio = await client.tts.generate({
|
|
509
839
|
* text: 'Hello, world!',
|
|
510
|
-
*
|
|
840
|
+
* modelId: 'kugel-1-turbo',
|
|
511
841
|
* });
|
|
512
842
|
*
|
|
513
843
|
* // Generate audio with premium model (7B params)
|
|
514
844
|
* const audio = await client.tts.generate({
|
|
515
845
|
* text: 'Hello, world!',
|
|
516
|
-
*
|
|
846
|
+
* modelId: 'kugel-1',
|
|
517
847
|
* });
|
|
518
848
|
* ```
|
|
519
849
|
*/
|
|
@@ -521,6 +851,7 @@ export class KugelAudio {
|
|
|
521
851
|
private _apiKey: string;
|
|
522
852
|
private _isMasterKey: boolean;
|
|
523
853
|
private _isToken: boolean;
|
|
854
|
+
private _orgId: number | undefined;
|
|
524
855
|
private _apiUrl: string;
|
|
525
856
|
private _ttsUrl: string;
|
|
526
857
|
private _timeout: number;
|
|
@@ -540,6 +871,7 @@ export class KugelAudio {
|
|
|
540
871
|
this._apiKey = options.apiKey;
|
|
541
872
|
this._isMasterKey = options.isMasterKey || false;
|
|
542
873
|
this._isToken = options.isToken || false;
|
|
874
|
+
this._orgId = options.orgId;
|
|
543
875
|
this._apiUrl = (options.apiUrl || DEFAULT_API_URL).replace(/\/$/, '');
|
|
544
876
|
// If ttsUrl not specified, use apiUrl (backend proxies to TTS server)
|
|
545
877
|
this._ttsUrl = (options.ttsUrl || this._apiUrl).replace(/\/$/, '');
|
|
@@ -587,6 +919,11 @@ export class KugelAudio {
|
|
|
587
919
|
return this._isToken;
|
|
588
920
|
}
|
|
589
921
|
|
|
922
|
+
/** Get organisation ID for billing */
|
|
923
|
+
get orgId(): number | undefined {
|
|
924
|
+
return this._orgId;
|
|
925
|
+
}
|
|
926
|
+
|
|
590
927
|
/** Get TTS URL */
|
|
591
928
|
get ttsUrl(): string {
|
|
592
929
|
return this._ttsUrl;
|
package/src/index.ts
CHANGED
|
@@ -18,13 +18,13 @@
|
|
|
18
18
|
* // Generate audio (non-streaming)
|
|
19
19
|
* const audio = await client.tts.generate({
|
|
20
20
|
* text: 'Hello, world!',
|
|
21
|
-
*
|
|
21
|
+
* modelId: 'kugel-1-turbo',
|
|
22
22
|
* voiceId: 123,
|
|
23
23
|
* });
|
|
24
24
|
*
|
|
25
25
|
* // Generate audio (streaming)
|
|
26
26
|
* await client.tts.stream(
|
|
27
|
-
* { text: 'Hello, world!',
|
|
27
|
+
* { text: 'Hello, world!', modelId: 'kugel-1-turbo' },
|
|
28
28
|
* {
|
|
29
29
|
* onChunk: (chunk) => {
|
|
30
30
|
* // Process audio chunk
|
|
@@ -46,10 +46,14 @@ export { KugelAudio } from './client';
|
|
|
46
46
|
export type {
|
|
47
47
|
AudioChunk,
|
|
48
48
|
AudioResponse,
|
|
49
|
+
ContextVoiceSettings,
|
|
49
50
|
GenerateOptions,
|
|
50
51
|
GenerationStats,
|
|
51
52
|
KugelAudioOptions,
|
|
52
53
|
Model,
|
|
54
|
+
MultiContextAudioChunk,
|
|
55
|
+
MultiContextCallbacks,
|
|
56
|
+
MultiContextConfig,
|
|
53
57
|
StreamCallbacks,
|
|
54
58
|
StreamConfig,
|
|
55
59
|
Voice,
|
package/src/types.ts
CHANGED
|
@@ -17,7 +17,7 @@ export interface Model {
|
|
|
17
17
|
/**
|
|
18
18
|
* Voice category types.
|
|
19
19
|
*/
|
|
20
|
-
export type VoiceCategory = 'premade' | 'cloned' | 'designed';
|
|
20
|
+
export type VoiceCategory = 'premade' | 'cloned' | 'designed' | 'conversational' | 'narrative' | 'narrative_story' | 'characters';
|
|
21
21
|
|
|
22
22
|
/**
|
|
23
23
|
* Voice sex types.
|
|
@@ -54,7 +54,7 @@ export interface GenerateOptions {
|
|
|
54
54
|
/** Text to synthesize */
|
|
55
55
|
text: string;
|
|
56
56
|
/** Model to use: 'kugel-1-turbo' (1.5B, fast) or 'kugel-1' (7B, premium). Default: 'kugel-1-turbo' */
|
|
57
|
-
|
|
57
|
+
modelId?: string;
|
|
58
58
|
/** Voice ID to use */
|
|
59
59
|
voiceId?: number;
|
|
60
60
|
/** CFG scale for generation (default: 2.0) */
|
|
@@ -63,21 +63,18 @@ export interface GenerateOptions {
|
|
|
63
63
|
maxNewTokens?: number;
|
|
64
64
|
/** Output sample rate (default: 24000) */
|
|
65
65
|
sampleRate?: number;
|
|
66
|
-
/** Whether to add speaker prefix (default: true) */
|
|
67
|
-
speakerPrefix?: boolean;
|
|
68
66
|
/**
|
|
69
67
|
* Enable text normalization (converts numbers, dates, etc. to spoken words).
|
|
70
68
|
* When true, text will be normalized before TTS generation.
|
|
71
|
-
* Default:
|
|
69
|
+
* Default: true
|
|
72
70
|
*
|
|
73
|
-
* ⚠️
|
|
74
|
-
*
|
|
75
|
-
* the language parameter when using normalization.
|
|
71
|
+
* ⚠️ For best performance, always specify the language parameter when using
|
|
72
|
+
* normalization. Without it, language auto-detection adds ~150ms latency.
|
|
76
73
|
*/
|
|
77
74
|
normalize?: boolean;
|
|
78
75
|
/**
|
|
79
76
|
* ISO 639-1 language code for text normalization (e.g., 'de', 'en', 'fr').
|
|
80
|
-
* If not provided and normalize is true, language will be auto-detected
|
|
77
|
+
* If not provided and normalize is true (default), language will be auto-detected
|
|
81
78
|
* (adds ~150ms latency).
|
|
82
79
|
*
|
|
83
80
|
* Supported: de, en, fr, es, it, pt, nl, pl, sv, da, no, fi, cs, hu, ro,
|
|
@@ -98,12 +95,20 @@ export interface StreamConfig {
|
|
|
98
95
|
maxNewTokens?: number;
|
|
99
96
|
/** Output sample rate */
|
|
100
97
|
sampleRate?: number;
|
|
101
|
-
/** Whether to add speaker prefix */
|
|
102
|
-
speakerPrefix?: boolean;
|
|
103
98
|
/** Auto-flush timeout in milliseconds */
|
|
104
99
|
flushTimeoutMs?: number;
|
|
105
100
|
/** Maximum buffer length */
|
|
106
101
|
maxBufferLength?: number;
|
|
102
|
+
/**
|
|
103
|
+
* Enable text normalization (converts numbers, dates, etc. to spoken words).
|
|
104
|
+
* Default: true
|
|
105
|
+
*/
|
|
106
|
+
normalize?: boolean;
|
|
107
|
+
/**
|
|
108
|
+
* ISO 639-1 language code for text normalization (e.g., 'de', 'en', 'fr').
|
|
109
|
+
* Specify to avoid ~150ms auto-detection latency.
|
|
110
|
+
*/
|
|
111
|
+
language?: string;
|
|
107
112
|
}
|
|
108
113
|
|
|
109
114
|
/**
|
|
@@ -188,9 +193,11 @@ export interface KugelAudioOptions {
|
|
|
188
193
|
isMasterKey?: boolean;
|
|
189
194
|
/** Whether apiKey is a JWT token (for user authentication). Takes precedence over isMasterKey. */
|
|
190
195
|
isToken?: boolean;
|
|
196
|
+
/** Organisation ID to bill usage against (required for token auth to enable usage recording). */
|
|
197
|
+
orgId?: number;
|
|
191
198
|
/** API base URL (default: https://api.kugelaudio.com) */
|
|
192
199
|
apiUrl?: string;
|
|
193
|
-
/** TTS server URL (default:
|
|
200
|
+
/** TTS server URL (default: same as apiUrl) */
|
|
194
201
|
ttsUrl?: string;
|
|
195
202
|
/** Request timeout in milliseconds (default: 60000) */
|
|
196
203
|
timeout?: number;
|
|
@@ -205,3 +212,67 @@ export interface ApiError {
|
|
|
205
212
|
statusCode?: number;
|
|
206
213
|
}
|
|
207
214
|
|
|
215
|
+
/**
|
|
216
|
+
* Multi-context session configuration.
|
|
217
|
+
*/
|
|
218
|
+
export interface MultiContextConfig {
|
|
219
|
+
/** Default voice ID for new contexts */
|
|
220
|
+
defaultVoiceId?: number;
|
|
221
|
+
/** Output sample rate (default: 24000) */
|
|
222
|
+
sampleRate?: number;
|
|
223
|
+
/** CFG scale for generation (default: 2.0) */
|
|
224
|
+
cfgScale?: number;
|
|
225
|
+
/** Maximum tokens to generate (default: 2048) */
|
|
226
|
+
maxNewTokens?: number;
|
|
227
|
+
/** Enable text normalization (default: true) */
|
|
228
|
+
normalize?: boolean;
|
|
229
|
+
/** Seconds before context auto-closes (default: 20.0) */
|
|
230
|
+
inactivityTimeout?: number;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
/**
|
|
234
|
+
* Voice settings for a specific context.
|
|
235
|
+
*/
|
|
236
|
+
export interface ContextVoiceSettings {
|
|
237
|
+
/** Stability (0.0-1.0) */
|
|
238
|
+
stability?: number;
|
|
239
|
+
/** Similarity boost (0.0-1.0) */
|
|
240
|
+
similarityBoost?: number;
|
|
241
|
+
/** Style (0.0-1.0) */
|
|
242
|
+
style?: number;
|
|
243
|
+
/** Use speaker boost */
|
|
244
|
+
useSpeakerBoost?: boolean;
|
|
245
|
+
/** Speed multiplier */
|
|
246
|
+
speed?: number;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/**
|
|
250
|
+
* Audio chunk from multi-context streaming.
|
|
251
|
+
*/
|
|
252
|
+
export interface MultiContextAudioChunk extends AudioChunk {
|
|
253
|
+
/** Context ID this audio belongs to */
|
|
254
|
+
contextId: string;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* Event callbacks for multi-context streaming.
|
|
259
|
+
*/
|
|
260
|
+
export interface MultiContextCallbacks {
|
|
261
|
+
/** Called when session is started */
|
|
262
|
+
onSessionStarted?: (sessionId: string) => void;
|
|
263
|
+
/** Called when a context is created */
|
|
264
|
+
onContextCreated?: (contextId: string) => void;
|
|
265
|
+
/** Called when an audio chunk is received */
|
|
266
|
+
onChunk?: (chunk: MultiContextAudioChunk) => void;
|
|
267
|
+
/** Called when a context finishes generating */
|
|
268
|
+
onContextFinal?: (contextId: string) => void;
|
|
269
|
+
/** Called when a context is closed */
|
|
270
|
+
onContextClosed?: (contextId: string) => void;
|
|
271
|
+
/** Called when a context times out */
|
|
272
|
+
onContextTimeout?: (contextId: string) => void;
|
|
273
|
+
/** Called when session is closed */
|
|
274
|
+
onSessionClosed?: (stats: Record<string, unknown>) => void;
|
|
275
|
+
/** Called on error */
|
|
276
|
+
onError?: (error: Error, contextId?: string) => void;
|
|
277
|
+
}
|
|
278
|
+
|