kugelaudio 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -8
- package/dist/index.d.mts +175 -17
- package/dist/index.d.ts +175 -17
- package/dist/index.js +287 -13
- package/dist/index.mjs +294 -13
- package/package.json +5 -1
- package/src/client.ts +354 -17
- package/src/index.ts +6 -2
- package/src/types.ts +83 -12
- package/src/websocket.ts +44 -0
package/dist/index.js
CHANGED
|
@@ -135,8 +135,35 @@ function createWavBlob(audio, sampleRate) {
|
|
|
135
135
|
return new Blob([wavBuffer], { type: "audio/wav" });
|
|
136
136
|
}
|
|
137
137
|
|
|
138
|
+
// src/websocket.ts
|
|
139
|
+
var _cachedWs = null;
|
|
140
|
+
function getWebSocket() {
|
|
141
|
+
if (_cachedWs) return _cachedWs;
|
|
142
|
+
if (typeof globalThis !== "undefined" && typeof globalThis.WebSocket !== "undefined") {
|
|
143
|
+
_cachedWs = globalThis.WebSocket;
|
|
144
|
+
return _cachedWs;
|
|
145
|
+
}
|
|
146
|
+
try {
|
|
147
|
+
const _require = typeof require !== "undefined" ? require : Function('return typeof require !== "undefined" ? require : undefined')();
|
|
148
|
+
if (_require) {
|
|
149
|
+
const ws = _require("ws");
|
|
150
|
+
_cachedWs = ws.default || ws;
|
|
151
|
+
return _cachedWs;
|
|
152
|
+
}
|
|
153
|
+
} catch {
|
|
154
|
+
}
|
|
155
|
+
throw new Error(
|
|
156
|
+
'WebSocket not available. In Node.js, install the "ws" package: npm install ws'
|
|
157
|
+
);
|
|
158
|
+
}
|
|
159
|
+
|
|
138
160
|
// src/client.ts
|
|
139
161
|
var DEFAULT_API_URL = "https://api.kugelaudio.com";
|
|
162
|
+
function createWs(url) {
|
|
163
|
+
const WS = getWebSocket();
|
|
164
|
+
return new WS(url);
|
|
165
|
+
}
|
|
166
|
+
var WS_OPEN = 1;
|
|
140
167
|
var ModelsResource = class {
|
|
141
168
|
constructor(client) {
|
|
142
169
|
this.client = client;
|
|
@@ -212,6 +239,7 @@ var VoicesResource = class {
|
|
|
212
239
|
var TTSResource = class {
|
|
213
240
|
constructor(client) {
|
|
214
241
|
this.client = client;
|
|
242
|
+
// Using any for WebSocket to support both browser WebSocket and ws package
|
|
215
243
|
this.wsConnection = null;
|
|
216
244
|
this.wsUrl = null;
|
|
217
245
|
this.pendingRequests = /* @__PURE__ */ new Map();
|
|
@@ -241,7 +269,7 @@ var TTSResource = class {
|
|
|
241
269
|
* Check if WebSocket connection is established and open.
|
|
242
270
|
*/
|
|
243
271
|
isConnected() {
|
|
244
|
-
return this.wsConnection !== null && this.wsConnection.readyState ===
|
|
272
|
+
return this.wsConnection !== null && this.wsConnection.readyState === WS_OPEN;
|
|
245
273
|
}
|
|
246
274
|
/**
|
|
247
275
|
* Generate audio from text with streaming via WebSocket.
|
|
@@ -287,7 +315,11 @@ var TTSResource = class {
|
|
|
287
315
|
} else {
|
|
288
316
|
authParam = "api_key";
|
|
289
317
|
}
|
|
290
|
-
|
|
318
|
+
let url = `${wsUrl}/ws/tts?${authParam}=${this.client.apiKey}`;
|
|
319
|
+
if (this.client.orgId !== void 0) {
|
|
320
|
+
url += `&org_id=${this.client.orgId}`;
|
|
321
|
+
}
|
|
322
|
+
return url;
|
|
291
323
|
}
|
|
292
324
|
/**
|
|
293
325
|
* Get or create a WebSocket connection for connection pooling.
|
|
@@ -295,7 +327,7 @@ var TTSResource = class {
|
|
|
295
327
|
*/
|
|
296
328
|
async getConnection() {
|
|
297
329
|
const url = this.buildWsUrl();
|
|
298
|
-
if (this.wsConnection && this.wsUrl === url && this.wsConnection.readyState ===
|
|
330
|
+
if (this.wsConnection && this.wsUrl === url && this.wsConnection.readyState === WS_OPEN) {
|
|
299
331
|
return this.wsConnection;
|
|
300
332
|
}
|
|
301
333
|
if (this.wsConnection) {
|
|
@@ -306,7 +338,7 @@ var TTSResource = class {
|
|
|
306
338
|
this.wsConnection = null;
|
|
307
339
|
}
|
|
308
340
|
return new Promise((resolve, reject) => {
|
|
309
|
-
const ws =
|
|
341
|
+
const ws = createWs(url);
|
|
310
342
|
ws.onopen = () => {
|
|
311
343
|
this.wsConnection = ws;
|
|
312
344
|
this.wsUrl = url;
|
|
@@ -324,7 +356,8 @@ var TTSResource = class {
|
|
|
324
356
|
setupMessageHandler(ws) {
|
|
325
357
|
ws.onmessage = (event) => {
|
|
326
358
|
try {
|
|
327
|
-
const
|
|
359
|
+
const messageData = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
|
|
360
|
+
const data = JSON.parse(messageData);
|
|
328
361
|
const [requestId, pending] = [...this.pendingRequests.entries()][0] || [];
|
|
329
362
|
if (!pending) return;
|
|
330
363
|
if (data.error) {
|
|
@@ -411,13 +444,12 @@ var TTSResource = class {
|
|
|
411
444
|
callbacks.onOpen?.();
|
|
412
445
|
ws.send(JSON.stringify({
|
|
413
446
|
text: options.text,
|
|
414
|
-
|
|
447
|
+
model_id: options.modelId || "kugel-1-turbo",
|
|
415
448
|
voice_id: options.voiceId,
|
|
416
449
|
cfg_scale: options.cfgScale ?? 2,
|
|
417
450
|
max_new_tokens: options.maxNewTokens ?? 2048,
|
|
418
451
|
sample_rate: options.sampleRate ?? 24e3,
|
|
419
|
-
|
|
420
|
-
normalize: options.normalize ?? false,
|
|
452
|
+
normalize: options.normalize ?? true,
|
|
421
453
|
...options.language && { language: options.language }
|
|
422
454
|
}));
|
|
423
455
|
});
|
|
@@ -428,24 +460,24 @@ var TTSResource = class {
|
|
|
428
460
|
streamWithoutPooling(options, callbacks) {
|
|
429
461
|
return new Promise((resolve, reject) => {
|
|
430
462
|
const url = this.buildWsUrl();
|
|
431
|
-
const ws =
|
|
463
|
+
const ws = createWs(url);
|
|
432
464
|
ws.onopen = () => {
|
|
433
465
|
callbacks.onOpen?.();
|
|
434
466
|
ws.send(JSON.stringify({
|
|
435
467
|
text: options.text,
|
|
436
|
-
|
|
468
|
+
model_id: options.modelId || "kugel-1-turbo",
|
|
437
469
|
voice_id: options.voiceId,
|
|
438
470
|
cfg_scale: options.cfgScale ?? 2,
|
|
439
471
|
max_new_tokens: options.maxNewTokens ?? 2048,
|
|
440
472
|
sample_rate: options.sampleRate ?? 24e3,
|
|
441
|
-
|
|
442
|
-
normalize: options.normalize ?? false,
|
|
473
|
+
normalize: options.normalize ?? true,
|
|
443
474
|
...options.language && { language: options.language }
|
|
444
475
|
}));
|
|
445
476
|
};
|
|
446
477
|
ws.onmessage = (event) => {
|
|
447
478
|
try {
|
|
448
|
-
const
|
|
479
|
+
const messageData = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
|
|
480
|
+
const data = JSON.parse(messageData);
|
|
449
481
|
if (data.error) {
|
|
450
482
|
const error = this.parseError(data.error);
|
|
451
483
|
callbacks.onError?.(error);
|
|
@@ -521,6 +553,243 @@ var TTSResource = class {
|
|
|
521
553
|
}
|
|
522
554
|
return new KugelAudioError(message);
|
|
523
555
|
}
|
|
556
|
+
/**
|
|
557
|
+
* Create a multi-context session for concurrent TTS streams.
|
|
558
|
+
*
|
|
559
|
+
* Allows managing up to 5 independent audio generation contexts
|
|
560
|
+
* over a single WebSocket connection. Each context has its own
|
|
561
|
+
* text buffer, voice settings, and generation queue.
|
|
562
|
+
*
|
|
563
|
+
* @example
|
|
564
|
+
* ```typescript
|
|
565
|
+
* const session = client.tts.createMultiContextSession({
|
|
566
|
+
* defaultVoiceId: 123,
|
|
567
|
+
* });
|
|
568
|
+
*
|
|
569
|
+
* session.connect({
|
|
570
|
+
* onChunk: (chunk) => {
|
|
571
|
+
* console.log(`Audio from ${chunk.contextId}`);
|
|
572
|
+
* playAudio(chunk.audio);
|
|
573
|
+
* },
|
|
574
|
+
* onContextFinal: (contextId) => {
|
|
575
|
+
* console.log(`${contextId} finished`);
|
|
576
|
+
* },
|
|
577
|
+
* });
|
|
578
|
+
*
|
|
579
|
+
* // Create contexts with different voices
|
|
580
|
+
* session.createContext('narrator', { voiceId: 123 });
|
|
581
|
+
* session.createContext('character', { voiceId: 456 });
|
|
582
|
+
*
|
|
583
|
+
* // Send text to different speakers
|
|
584
|
+
* session.send('narrator', 'The story begins.', true);
|
|
585
|
+
* session.send('character', 'Hello!', true);
|
|
586
|
+
*
|
|
587
|
+
* // Close when done
|
|
588
|
+
* session.close();
|
|
589
|
+
* ```
|
|
590
|
+
*/
|
|
591
|
+
createMultiContextSession(config) {
|
|
592
|
+
return new MultiContextSession(this.client, config);
|
|
593
|
+
}
|
|
594
|
+
};
|
|
595
|
+
var MultiContextSession = class {
|
|
596
|
+
constructor(client, config) {
|
|
597
|
+
this.client = client;
|
|
598
|
+
this.ws = null;
|
|
599
|
+
this.callbacks = {};
|
|
600
|
+
this.contexts = /* @__PURE__ */ new Set();
|
|
601
|
+
this._sessionId = null;
|
|
602
|
+
this.isStarted = false;
|
|
603
|
+
this.config = config || {};
|
|
604
|
+
}
|
|
605
|
+
/**
|
|
606
|
+
* Get the current session ID, or null if not connected.
|
|
607
|
+
*/
|
|
608
|
+
get sessionId() {
|
|
609
|
+
return this._sessionId;
|
|
610
|
+
}
|
|
611
|
+
/**
|
|
612
|
+
* Connect to the multi-context WebSocket endpoint.
|
|
613
|
+
*/
|
|
614
|
+
connect(callbacks) {
|
|
615
|
+
this.callbacks = callbacks;
|
|
616
|
+
const wsUrl = this.client.ttsUrl.replace("https://", "wss://").replace("http://", "ws://");
|
|
617
|
+
let authParam;
|
|
618
|
+
if (this.client.isToken) {
|
|
619
|
+
authParam = "token";
|
|
620
|
+
} else if (this.client.isMasterKey) {
|
|
621
|
+
authParam = "master_key";
|
|
622
|
+
} else {
|
|
623
|
+
authParam = "api_key";
|
|
624
|
+
}
|
|
625
|
+
const url = `${wsUrl}/ws/tts/multi?${authParam}=${this.client.apiKey}`;
|
|
626
|
+
this.ws = createWs(url);
|
|
627
|
+
this.ws.onopen = () => {
|
|
628
|
+
};
|
|
629
|
+
this.ws.onmessage = (event) => {
|
|
630
|
+
try {
|
|
631
|
+
const messageData = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
|
|
632
|
+
const data = JSON.parse(messageData);
|
|
633
|
+
if (data.error) {
|
|
634
|
+
this.callbacks.onError?.(
|
|
635
|
+
new KugelAudioError(data.error),
|
|
636
|
+
data.context_id
|
|
637
|
+
);
|
|
638
|
+
return;
|
|
639
|
+
}
|
|
640
|
+
if (data.session_started) {
|
|
641
|
+
this._sessionId = data.session_id;
|
|
642
|
+
this.isStarted = true;
|
|
643
|
+
this.callbacks.onSessionStarted?.(data.session_id);
|
|
644
|
+
}
|
|
645
|
+
if (data.context_created) {
|
|
646
|
+
this.contexts.add(data.context_id);
|
|
647
|
+
this.callbacks.onContextCreated?.(data.context_id);
|
|
648
|
+
}
|
|
649
|
+
if (data.audio) {
|
|
650
|
+
const chunk = {
|
|
651
|
+
audio: data.audio,
|
|
652
|
+
encoding: "pcm_s16le",
|
|
653
|
+
index: data.idx || 0,
|
|
654
|
+
sampleRate: data.sr || 24e3,
|
|
655
|
+
samples: data.samples || 0,
|
|
656
|
+
contextId: data.context_id
|
|
657
|
+
};
|
|
658
|
+
this.callbacks.onChunk?.(chunk);
|
|
659
|
+
}
|
|
660
|
+
if (data.is_final) {
|
|
661
|
+
this.callbacks.onContextFinal?.(data.context_id);
|
|
662
|
+
}
|
|
663
|
+
if (data.context_closed) {
|
|
664
|
+
this.contexts.delete(data.context_id);
|
|
665
|
+
this.callbacks.onContextClosed?.(data.context_id);
|
|
666
|
+
}
|
|
667
|
+
if (data.context_timeout) {
|
|
668
|
+
this.contexts.delete(data.context_id);
|
|
669
|
+
this.callbacks.onContextTimeout?.(data.context_id);
|
|
670
|
+
}
|
|
671
|
+
if (data.session_closed) {
|
|
672
|
+
this.callbacks.onSessionClosed?.(data);
|
|
673
|
+
}
|
|
674
|
+
} catch (e) {
|
|
675
|
+
console.error("Failed to parse WebSocket message:", e);
|
|
676
|
+
}
|
|
677
|
+
};
|
|
678
|
+
this.ws.onerror = () => {
|
|
679
|
+
this.callbacks.onError?.(new KugelAudioError("WebSocket connection error"));
|
|
680
|
+
};
|
|
681
|
+
this.ws.onclose = (event) => {
|
|
682
|
+
if (event.code === 4001) {
|
|
683
|
+
this.callbacks.onError?.(new AuthenticationError("Authentication failed"));
|
|
684
|
+
} else if (event.code === 4003) {
|
|
685
|
+
this.callbacks.onError?.(new InsufficientCreditsError("Insufficient credits"));
|
|
686
|
+
}
|
|
687
|
+
this.ws = null;
|
|
688
|
+
this.isStarted = false;
|
|
689
|
+
this.contexts.clear();
|
|
690
|
+
};
|
|
691
|
+
}
|
|
692
|
+
/**
|
|
693
|
+
* Create a new context with optional voice settings.
|
|
694
|
+
*/
|
|
695
|
+
createContext(contextId, options) {
|
|
696
|
+
if (!this.ws || this.ws.readyState !== WS_OPEN) {
|
|
697
|
+
throw new KugelAudioError("WebSocket not connected");
|
|
698
|
+
}
|
|
699
|
+
const msg = {
|
|
700
|
+
text: " ",
|
|
701
|
+
context_id: contextId
|
|
702
|
+
};
|
|
703
|
+
if (!this.isStarted) {
|
|
704
|
+
if (this.config.sampleRate) msg.sample_rate = this.config.sampleRate;
|
|
705
|
+
if (this.config.cfgScale) msg.cfg_scale = this.config.cfgScale;
|
|
706
|
+
if (this.config.maxNewTokens) msg.max_new_tokens = this.config.maxNewTokens;
|
|
707
|
+
if (this.config.normalize !== void 0) msg.normalize = this.config.normalize;
|
|
708
|
+
if (this.config.inactivityTimeout) msg.inactivity_timeout = this.config.inactivityTimeout;
|
|
709
|
+
}
|
|
710
|
+
const voiceId = options?.voiceId || this.config.defaultVoiceId;
|
|
711
|
+
if (voiceId) msg.voice_id = voiceId;
|
|
712
|
+
if (options?.voiceSettings) {
|
|
713
|
+
msg.voice_settings = {
|
|
714
|
+
stability: options.voiceSettings.stability,
|
|
715
|
+
similarity_boost: options.voiceSettings.similarityBoost,
|
|
716
|
+
style: options.voiceSettings.style,
|
|
717
|
+
use_speaker_boost: options.voiceSettings.useSpeakerBoost,
|
|
718
|
+
speed: options.voiceSettings.speed
|
|
719
|
+
};
|
|
720
|
+
}
|
|
721
|
+
this.ws.send(JSON.stringify(msg));
|
|
722
|
+
}
|
|
723
|
+
/**
|
|
724
|
+
* Send text to a specific context.
|
|
725
|
+
*/
|
|
726
|
+
send(contextId, text, flush = false) {
|
|
727
|
+
if (!this.ws || this.ws.readyState !== WS_OPEN) {
|
|
728
|
+
throw new KugelAudioError("WebSocket not connected");
|
|
729
|
+
}
|
|
730
|
+
if (!this.contexts.has(contextId) && !this.isStarted) {
|
|
731
|
+
this.createContext(contextId);
|
|
732
|
+
}
|
|
733
|
+
this.ws.send(JSON.stringify({
|
|
734
|
+
text,
|
|
735
|
+
context_id: contextId,
|
|
736
|
+
flush
|
|
737
|
+
}));
|
|
738
|
+
}
|
|
739
|
+
/**
|
|
740
|
+
* Flush a context's buffer.
|
|
741
|
+
*/
|
|
742
|
+
flush(contextId) {
|
|
743
|
+
if (!this.ws || this.ws.readyState !== WS_OPEN) return;
|
|
744
|
+
this.ws.send(JSON.stringify({
|
|
745
|
+
flush: true,
|
|
746
|
+
context_id: contextId
|
|
747
|
+
}));
|
|
748
|
+
}
|
|
749
|
+
/**
|
|
750
|
+
* Close a specific context.
|
|
751
|
+
*/
|
|
752
|
+
closeContext(contextId) {
|
|
753
|
+
if (!this.ws || this.ws.readyState !== WS_OPEN) return;
|
|
754
|
+
this.ws.send(JSON.stringify({
|
|
755
|
+
close_context: true,
|
|
756
|
+
context_id: contextId
|
|
757
|
+
}));
|
|
758
|
+
}
|
|
759
|
+
/**
|
|
760
|
+
* Send keep-alive to reset a context's inactivity timeout.
|
|
761
|
+
*/
|
|
762
|
+
keepAlive(contextId) {
|
|
763
|
+
if (!this.ws || this.ws.readyState !== WS_OPEN) return;
|
|
764
|
+
this.ws.send(JSON.stringify({
|
|
765
|
+
text: "",
|
|
766
|
+
context_id: contextId
|
|
767
|
+
}));
|
|
768
|
+
}
|
|
769
|
+
/**
|
|
770
|
+
* Close the session and all contexts.
|
|
771
|
+
*/
|
|
772
|
+
close() {
|
|
773
|
+
if (this.ws && this.ws.readyState === WS_OPEN) {
|
|
774
|
+
this.ws.send(JSON.stringify({ close_socket: true }));
|
|
775
|
+
this.ws.close();
|
|
776
|
+
}
|
|
777
|
+
this.ws = null;
|
|
778
|
+
this.isStarted = false;
|
|
779
|
+
this.contexts.clear();
|
|
780
|
+
}
|
|
781
|
+
/**
|
|
782
|
+
* Get active context IDs.
|
|
783
|
+
*/
|
|
784
|
+
get activeContexts() {
|
|
785
|
+
return Array.from(this.contexts);
|
|
786
|
+
}
|
|
787
|
+
/**
|
|
788
|
+
* Check if connected.
|
|
789
|
+
*/
|
|
790
|
+
get isConnected() {
|
|
791
|
+
return this.ws !== null && this.ws.readyState === WS_OPEN;
|
|
792
|
+
}
|
|
524
793
|
};
|
|
525
794
|
var KugelAudio = class _KugelAudio {
|
|
526
795
|
constructor(options) {
|
|
@@ -530,6 +799,7 @@ var KugelAudio = class _KugelAudio {
|
|
|
530
799
|
this._apiKey = options.apiKey;
|
|
531
800
|
this._isMasterKey = options.isMasterKey || false;
|
|
532
801
|
this._isToken = options.isToken || false;
|
|
802
|
+
this._orgId = options.orgId;
|
|
533
803
|
this._apiUrl = (options.apiUrl || DEFAULT_API_URL).replace(/\/$/, "");
|
|
534
804
|
this._ttsUrl = (options.ttsUrl || this._apiUrl).replace(/\/$/, "");
|
|
535
805
|
this._timeout = options.timeout || 6e4;
|
|
@@ -570,6 +840,10 @@ var KugelAudio = class _KugelAudio {
|
|
|
570
840
|
get isToken() {
|
|
571
841
|
return this._isToken;
|
|
572
842
|
}
|
|
843
|
+
/** Get organisation ID for billing */
|
|
844
|
+
get orgId() {
|
|
845
|
+
return this._orgId;
|
|
846
|
+
}
|
|
573
847
|
/** Get TTS URL */
|
|
574
848
|
get ttsUrl() {
|
|
575
849
|
return this._ttsUrl;
|