kugelaudio 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -8
- package/dist/index.d.mts +175 -17
- package/dist/index.d.ts +175 -17
- package/dist/index.js +287 -13
- package/dist/index.mjs +294 -13
- package/package.json +5 -1
- package/src/client.ts +354 -17
- package/src/index.ts +6 -2
- package/src/types.ts +83 -12
- package/src/websocket.ts +44 -0
package/dist/index.mjs
CHANGED
|
@@ -1,3 +1,10 @@
|
|
|
1
|
+
var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
|
|
2
|
+
get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
|
|
3
|
+
}) : x)(function(x) {
|
|
4
|
+
if (typeof require !== "undefined") return require.apply(this, arguments);
|
|
5
|
+
throw Error('Dynamic require of "' + x + '" is not supported');
|
|
6
|
+
});
|
|
7
|
+
|
|
1
8
|
// src/errors.ts
|
|
2
9
|
var KugelAudioError = class _KugelAudioError extends Error {
|
|
3
10
|
constructor(message, statusCode) {
|
|
@@ -99,8 +106,35 @@ function createWavBlob(audio, sampleRate) {
|
|
|
99
106
|
return new Blob([wavBuffer], { type: "audio/wav" });
|
|
100
107
|
}
|
|
101
108
|
|
|
109
|
+
// src/websocket.ts
|
|
110
|
+
var _cachedWs = null;
|
|
111
|
+
function getWebSocket() {
|
|
112
|
+
if (_cachedWs) return _cachedWs;
|
|
113
|
+
if (typeof globalThis !== "undefined" && typeof globalThis.WebSocket !== "undefined") {
|
|
114
|
+
_cachedWs = globalThis.WebSocket;
|
|
115
|
+
return _cachedWs;
|
|
116
|
+
}
|
|
117
|
+
try {
|
|
118
|
+
const _require = typeof __require !== "undefined" ? __require : Function('return typeof require !== "undefined" ? require : undefined')();
|
|
119
|
+
if (_require) {
|
|
120
|
+
const ws = _require("ws");
|
|
121
|
+
_cachedWs = ws.default || ws;
|
|
122
|
+
return _cachedWs;
|
|
123
|
+
}
|
|
124
|
+
} catch {
|
|
125
|
+
}
|
|
126
|
+
throw new Error(
|
|
127
|
+
'WebSocket not available. In Node.js, install the "ws" package: npm install ws'
|
|
128
|
+
);
|
|
129
|
+
}
|
|
130
|
+
|
|
102
131
|
// src/client.ts
|
|
103
132
|
var DEFAULT_API_URL = "https://api.kugelaudio.com";
|
|
133
|
+
function createWs(url) {
|
|
134
|
+
const WS = getWebSocket();
|
|
135
|
+
return new WS(url);
|
|
136
|
+
}
|
|
137
|
+
var WS_OPEN = 1;
|
|
104
138
|
var ModelsResource = class {
|
|
105
139
|
constructor(client) {
|
|
106
140
|
this.client = client;
|
|
@@ -176,6 +210,7 @@ var VoicesResource = class {
|
|
|
176
210
|
var TTSResource = class {
|
|
177
211
|
constructor(client) {
|
|
178
212
|
this.client = client;
|
|
213
|
+
// Using any for WebSocket to support both browser WebSocket and ws package
|
|
179
214
|
this.wsConnection = null;
|
|
180
215
|
this.wsUrl = null;
|
|
181
216
|
this.pendingRequests = /* @__PURE__ */ new Map();
|
|
@@ -205,7 +240,7 @@ var TTSResource = class {
|
|
|
205
240
|
* Check if WebSocket connection is established and open.
|
|
206
241
|
*/
|
|
207
242
|
isConnected() {
|
|
208
|
-
return this.wsConnection !== null && this.wsConnection.readyState ===
|
|
243
|
+
return this.wsConnection !== null && this.wsConnection.readyState === WS_OPEN;
|
|
209
244
|
}
|
|
210
245
|
/**
|
|
211
246
|
* Generate audio from text with streaming via WebSocket.
|
|
@@ -251,7 +286,11 @@ var TTSResource = class {
|
|
|
251
286
|
} else {
|
|
252
287
|
authParam = "api_key";
|
|
253
288
|
}
|
|
254
|
-
|
|
289
|
+
let url = `${wsUrl}/ws/tts?${authParam}=${this.client.apiKey}`;
|
|
290
|
+
if (this.client.orgId !== void 0) {
|
|
291
|
+
url += `&org_id=${this.client.orgId}`;
|
|
292
|
+
}
|
|
293
|
+
return url;
|
|
255
294
|
}
|
|
256
295
|
/**
|
|
257
296
|
* Get or create a WebSocket connection for connection pooling.
|
|
@@ -259,7 +298,7 @@ var TTSResource = class {
|
|
|
259
298
|
*/
|
|
260
299
|
async getConnection() {
|
|
261
300
|
const url = this.buildWsUrl();
|
|
262
|
-
if (this.wsConnection && this.wsUrl === url && this.wsConnection.readyState ===
|
|
301
|
+
if (this.wsConnection && this.wsUrl === url && this.wsConnection.readyState === WS_OPEN) {
|
|
263
302
|
return this.wsConnection;
|
|
264
303
|
}
|
|
265
304
|
if (this.wsConnection) {
|
|
@@ -270,7 +309,7 @@ var TTSResource = class {
|
|
|
270
309
|
this.wsConnection = null;
|
|
271
310
|
}
|
|
272
311
|
return new Promise((resolve, reject) => {
|
|
273
|
-
const ws =
|
|
312
|
+
const ws = createWs(url);
|
|
274
313
|
ws.onopen = () => {
|
|
275
314
|
this.wsConnection = ws;
|
|
276
315
|
this.wsUrl = url;
|
|
@@ -288,7 +327,8 @@ var TTSResource = class {
|
|
|
288
327
|
setupMessageHandler(ws) {
|
|
289
328
|
ws.onmessage = (event) => {
|
|
290
329
|
try {
|
|
291
|
-
const
|
|
330
|
+
const messageData = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
|
|
331
|
+
const data = JSON.parse(messageData);
|
|
292
332
|
const [requestId, pending] = [...this.pendingRequests.entries()][0] || [];
|
|
293
333
|
if (!pending) return;
|
|
294
334
|
if (data.error) {
|
|
@@ -375,13 +415,12 @@ var TTSResource = class {
|
|
|
375
415
|
callbacks.onOpen?.();
|
|
376
416
|
ws.send(JSON.stringify({
|
|
377
417
|
text: options.text,
|
|
378
|
-
|
|
418
|
+
model_id: options.modelId || "kugel-1-turbo",
|
|
379
419
|
voice_id: options.voiceId,
|
|
380
420
|
cfg_scale: options.cfgScale ?? 2,
|
|
381
421
|
max_new_tokens: options.maxNewTokens ?? 2048,
|
|
382
422
|
sample_rate: options.sampleRate ?? 24e3,
|
|
383
|
-
|
|
384
|
-
normalize: options.normalize ?? false,
|
|
423
|
+
normalize: options.normalize ?? true,
|
|
385
424
|
...options.language && { language: options.language }
|
|
386
425
|
}));
|
|
387
426
|
});
|
|
@@ -392,24 +431,24 @@ var TTSResource = class {
|
|
|
392
431
|
streamWithoutPooling(options, callbacks) {
|
|
393
432
|
return new Promise((resolve, reject) => {
|
|
394
433
|
const url = this.buildWsUrl();
|
|
395
|
-
const ws =
|
|
434
|
+
const ws = createWs(url);
|
|
396
435
|
ws.onopen = () => {
|
|
397
436
|
callbacks.onOpen?.();
|
|
398
437
|
ws.send(JSON.stringify({
|
|
399
438
|
text: options.text,
|
|
400
|
-
|
|
439
|
+
model_id: options.modelId || "kugel-1-turbo",
|
|
401
440
|
voice_id: options.voiceId,
|
|
402
441
|
cfg_scale: options.cfgScale ?? 2,
|
|
403
442
|
max_new_tokens: options.maxNewTokens ?? 2048,
|
|
404
443
|
sample_rate: options.sampleRate ?? 24e3,
|
|
405
|
-
|
|
406
|
-
normalize: options.normalize ?? false,
|
|
444
|
+
normalize: options.normalize ?? true,
|
|
407
445
|
...options.language && { language: options.language }
|
|
408
446
|
}));
|
|
409
447
|
};
|
|
410
448
|
ws.onmessage = (event) => {
|
|
411
449
|
try {
|
|
412
|
-
const
|
|
450
|
+
const messageData = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
|
|
451
|
+
const data = JSON.parse(messageData);
|
|
413
452
|
if (data.error) {
|
|
414
453
|
const error = this.parseError(data.error);
|
|
415
454
|
callbacks.onError?.(error);
|
|
@@ -485,6 +524,243 @@ var TTSResource = class {
|
|
|
485
524
|
}
|
|
486
525
|
return new KugelAudioError(message);
|
|
487
526
|
}
|
|
527
|
+
/**
|
|
528
|
+
* Create a multi-context session for concurrent TTS streams.
|
|
529
|
+
*
|
|
530
|
+
* Allows managing up to 5 independent audio generation contexts
|
|
531
|
+
* over a single WebSocket connection. Each context has its own
|
|
532
|
+
* text buffer, voice settings, and generation queue.
|
|
533
|
+
*
|
|
534
|
+
* @example
|
|
535
|
+
* ```typescript
|
|
536
|
+
* const session = client.tts.createMultiContextSession({
|
|
537
|
+
* defaultVoiceId: 123,
|
|
538
|
+
* });
|
|
539
|
+
*
|
|
540
|
+
* session.connect({
|
|
541
|
+
* onChunk: (chunk) => {
|
|
542
|
+
* console.log(`Audio from ${chunk.contextId}`);
|
|
543
|
+
* playAudio(chunk.audio);
|
|
544
|
+
* },
|
|
545
|
+
* onContextFinal: (contextId) => {
|
|
546
|
+
* console.log(`${contextId} finished`);
|
|
547
|
+
* },
|
|
548
|
+
* });
|
|
549
|
+
*
|
|
550
|
+
* // Create contexts with different voices
|
|
551
|
+
* session.createContext('narrator', { voiceId: 123 });
|
|
552
|
+
* session.createContext('character', { voiceId: 456 });
|
|
553
|
+
*
|
|
554
|
+
* // Send text to different speakers
|
|
555
|
+
* session.send('narrator', 'The story begins.', true);
|
|
556
|
+
* session.send('character', 'Hello!', true);
|
|
557
|
+
*
|
|
558
|
+
* // Close when done
|
|
559
|
+
* session.close();
|
|
560
|
+
* ```
|
|
561
|
+
*/
|
|
562
|
+
createMultiContextSession(config) {
|
|
563
|
+
return new MultiContextSession(this.client, config);
|
|
564
|
+
}
|
|
565
|
+
};
|
|
566
|
+
var MultiContextSession = class {
|
|
567
|
+
constructor(client, config) {
|
|
568
|
+
this.client = client;
|
|
569
|
+
this.ws = null;
|
|
570
|
+
this.callbacks = {};
|
|
571
|
+
this.contexts = /* @__PURE__ */ new Set();
|
|
572
|
+
this._sessionId = null;
|
|
573
|
+
this.isStarted = false;
|
|
574
|
+
this.config = config || {};
|
|
575
|
+
}
|
|
576
|
+
/**
|
|
577
|
+
* Get the current session ID, or null if not connected.
|
|
578
|
+
*/
|
|
579
|
+
get sessionId() {
|
|
580
|
+
return this._sessionId;
|
|
581
|
+
}
|
|
582
|
+
/**
|
|
583
|
+
* Connect to the multi-context WebSocket endpoint.
|
|
584
|
+
*/
|
|
585
|
+
connect(callbacks) {
|
|
586
|
+
this.callbacks = callbacks;
|
|
587
|
+
const wsUrl = this.client.ttsUrl.replace("https://", "wss://").replace("http://", "ws://");
|
|
588
|
+
let authParam;
|
|
589
|
+
if (this.client.isToken) {
|
|
590
|
+
authParam = "token";
|
|
591
|
+
} else if (this.client.isMasterKey) {
|
|
592
|
+
authParam = "master_key";
|
|
593
|
+
} else {
|
|
594
|
+
authParam = "api_key";
|
|
595
|
+
}
|
|
596
|
+
const url = `${wsUrl}/ws/tts/multi?${authParam}=${this.client.apiKey}`;
|
|
597
|
+
this.ws = createWs(url);
|
|
598
|
+
this.ws.onopen = () => {
|
|
599
|
+
};
|
|
600
|
+
this.ws.onmessage = (event) => {
|
|
601
|
+
try {
|
|
602
|
+
const messageData = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
|
|
603
|
+
const data = JSON.parse(messageData);
|
|
604
|
+
if (data.error) {
|
|
605
|
+
this.callbacks.onError?.(
|
|
606
|
+
new KugelAudioError(data.error),
|
|
607
|
+
data.context_id
|
|
608
|
+
);
|
|
609
|
+
return;
|
|
610
|
+
}
|
|
611
|
+
if (data.session_started) {
|
|
612
|
+
this._sessionId = data.session_id;
|
|
613
|
+
this.isStarted = true;
|
|
614
|
+
this.callbacks.onSessionStarted?.(data.session_id);
|
|
615
|
+
}
|
|
616
|
+
if (data.context_created) {
|
|
617
|
+
this.contexts.add(data.context_id);
|
|
618
|
+
this.callbacks.onContextCreated?.(data.context_id);
|
|
619
|
+
}
|
|
620
|
+
if (data.audio) {
|
|
621
|
+
const chunk = {
|
|
622
|
+
audio: data.audio,
|
|
623
|
+
encoding: "pcm_s16le",
|
|
624
|
+
index: data.idx || 0,
|
|
625
|
+
sampleRate: data.sr || 24e3,
|
|
626
|
+
samples: data.samples || 0,
|
|
627
|
+
contextId: data.context_id
|
|
628
|
+
};
|
|
629
|
+
this.callbacks.onChunk?.(chunk);
|
|
630
|
+
}
|
|
631
|
+
if (data.is_final) {
|
|
632
|
+
this.callbacks.onContextFinal?.(data.context_id);
|
|
633
|
+
}
|
|
634
|
+
if (data.context_closed) {
|
|
635
|
+
this.contexts.delete(data.context_id);
|
|
636
|
+
this.callbacks.onContextClosed?.(data.context_id);
|
|
637
|
+
}
|
|
638
|
+
if (data.context_timeout) {
|
|
639
|
+
this.contexts.delete(data.context_id);
|
|
640
|
+
this.callbacks.onContextTimeout?.(data.context_id);
|
|
641
|
+
}
|
|
642
|
+
if (data.session_closed) {
|
|
643
|
+
this.callbacks.onSessionClosed?.(data);
|
|
644
|
+
}
|
|
645
|
+
} catch (e) {
|
|
646
|
+
console.error("Failed to parse WebSocket message:", e);
|
|
647
|
+
}
|
|
648
|
+
};
|
|
649
|
+
this.ws.onerror = () => {
|
|
650
|
+
this.callbacks.onError?.(new KugelAudioError("WebSocket connection error"));
|
|
651
|
+
};
|
|
652
|
+
this.ws.onclose = (event) => {
|
|
653
|
+
if (event.code === 4001) {
|
|
654
|
+
this.callbacks.onError?.(new AuthenticationError("Authentication failed"));
|
|
655
|
+
} else if (event.code === 4003) {
|
|
656
|
+
this.callbacks.onError?.(new InsufficientCreditsError("Insufficient credits"));
|
|
657
|
+
}
|
|
658
|
+
this.ws = null;
|
|
659
|
+
this.isStarted = false;
|
|
660
|
+
this.contexts.clear();
|
|
661
|
+
};
|
|
662
|
+
}
|
|
663
|
+
/**
|
|
664
|
+
* Create a new context with optional voice settings.
|
|
665
|
+
*/
|
|
666
|
+
createContext(contextId, options) {
|
|
667
|
+
if (!this.ws || this.ws.readyState !== WS_OPEN) {
|
|
668
|
+
throw new KugelAudioError("WebSocket not connected");
|
|
669
|
+
}
|
|
670
|
+
const msg = {
|
|
671
|
+
text: " ",
|
|
672
|
+
context_id: contextId
|
|
673
|
+
};
|
|
674
|
+
if (!this.isStarted) {
|
|
675
|
+
if (this.config.sampleRate) msg.sample_rate = this.config.sampleRate;
|
|
676
|
+
if (this.config.cfgScale) msg.cfg_scale = this.config.cfgScale;
|
|
677
|
+
if (this.config.maxNewTokens) msg.max_new_tokens = this.config.maxNewTokens;
|
|
678
|
+
if (this.config.normalize !== void 0) msg.normalize = this.config.normalize;
|
|
679
|
+
if (this.config.inactivityTimeout) msg.inactivity_timeout = this.config.inactivityTimeout;
|
|
680
|
+
}
|
|
681
|
+
const voiceId = options?.voiceId || this.config.defaultVoiceId;
|
|
682
|
+
if (voiceId) msg.voice_id = voiceId;
|
|
683
|
+
if (options?.voiceSettings) {
|
|
684
|
+
msg.voice_settings = {
|
|
685
|
+
stability: options.voiceSettings.stability,
|
|
686
|
+
similarity_boost: options.voiceSettings.similarityBoost,
|
|
687
|
+
style: options.voiceSettings.style,
|
|
688
|
+
use_speaker_boost: options.voiceSettings.useSpeakerBoost,
|
|
689
|
+
speed: options.voiceSettings.speed
|
|
690
|
+
};
|
|
691
|
+
}
|
|
692
|
+
this.ws.send(JSON.stringify(msg));
|
|
693
|
+
}
|
|
694
|
+
/**
|
|
695
|
+
* Send text to a specific context.
|
|
696
|
+
*/
|
|
697
|
+
send(contextId, text, flush = false) {
|
|
698
|
+
if (!this.ws || this.ws.readyState !== WS_OPEN) {
|
|
699
|
+
throw new KugelAudioError("WebSocket not connected");
|
|
700
|
+
}
|
|
701
|
+
if (!this.contexts.has(contextId) && !this.isStarted) {
|
|
702
|
+
this.createContext(contextId);
|
|
703
|
+
}
|
|
704
|
+
this.ws.send(JSON.stringify({
|
|
705
|
+
text,
|
|
706
|
+
context_id: contextId,
|
|
707
|
+
flush
|
|
708
|
+
}));
|
|
709
|
+
}
|
|
710
|
+
/**
|
|
711
|
+
* Flush a context's buffer.
|
|
712
|
+
*/
|
|
713
|
+
flush(contextId) {
|
|
714
|
+
if (!this.ws || this.ws.readyState !== WS_OPEN) return;
|
|
715
|
+
this.ws.send(JSON.stringify({
|
|
716
|
+
flush: true,
|
|
717
|
+
context_id: contextId
|
|
718
|
+
}));
|
|
719
|
+
}
|
|
720
|
+
/**
|
|
721
|
+
* Close a specific context.
|
|
722
|
+
*/
|
|
723
|
+
closeContext(contextId) {
|
|
724
|
+
if (!this.ws || this.ws.readyState !== WS_OPEN) return;
|
|
725
|
+
this.ws.send(JSON.stringify({
|
|
726
|
+
close_context: true,
|
|
727
|
+
context_id: contextId
|
|
728
|
+
}));
|
|
729
|
+
}
|
|
730
|
+
/**
|
|
731
|
+
* Send keep-alive to reset a context's inactivity timeout.
|
|
732
|
+
*/
|
|
733
|
+
keepAlive(contextId) {
|
|
734
|
+
if (!this.ws || this.ws.readyState !== WS_OPEN) return;
|
|
735
|
+
this.ws.send(JSON.stringify({
|
|
736
|
+
text: "",
|
|
737
|
+
context_id: contextId
|
|
738
|
+
}));
|
|
739
|
+
}
|
|
740
|
+
/**
|
|
741
|
+
* Close the session and all contexts.
|
|
742
|
+
*/
|
|
743
|
+
close() {
|
|
744
|
+
if (this.ws && this.ws.readyState === WS_OPEN) {
|
|
745
|
+
this.ws.send(JSON.stringify({ close_socket: true }));
|
|
746
|
+
this.ws.close();
|
|
747
|
+
}
|
|
748
|
+
this.ws = null;
|
|
749
|
+
this.isStarted = false;
|
|
750
|
+
this.contexts.clear();
|
|
751
|
+
}
|
|
752
|
+
/**
|
|
753
|
+
* Get active context IDs.
|
|
754
|
+
*/
|
|
755
|
+
get activeContexts() {
|
|
756
|
+
return Array.from(this.contexts);
|
|
757
|
+
}
|
|
758
|
+
/**
|
|
759
|
+
* Check if connected.
|
|
760
|
+
*/
|
|
761
|
+
get isConnected() {
|
|
762
|
+
return this.ws !== null && this.ws.readyState === WS_OPEN;
|
|
763
|
+
}
|
|
488
764
|
};
|
|
489
765
|
var KugelAudio = class _KugelAudio {
|
|
490
766
|
constructor(options) {
|
|
@@ -494,6 +770,7 @@ var KugelAudio = class _KugelAudio {
|
|
|
494
770
|
this._apiKey = options.apiKey;
|
|
495
771
|
this._isMasterKey = options.isMasterKey || false;
|
|
496
772
|
this._isToken = options.isToken || false;
|
|
773
|
+
this._orgId = options.orgId;
|
|
497
774
|
this._apiUrl = (options.apiUrl || DEFAULT_API_URL).replace(/\/$/, "");
|
|
498
775
|
this._ttsUrl = (options.ttsUrl || this._apiUrl).replace(/\/$/, "");
|
|
499
776
|
this._timeout = options.timeout || 6e4;
|
|
@@ -534,6 +811,10 @@ var KugelAudio = class _KugelAudio {
|
|
|
534
811
|
get isToken() {
|
|
535
812
|
return this._isToken;
|
|
536
813
|
}
|
|
814
|
+
/** Get organisation ID for billing */
|
|
815
|
+
get orgId() {
|
|
816
|
+
return this._orgId;
|
|
817
|
+
}
|
|
537
818
|
/** Get TTS URL */
|
|
538
819
|
get ttsUrl() {
|
|
539
820
|
return this._ttsUrl;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "kugelaudio",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.2",
|
|
4
4
|
"description": "Official JavaScript/TypeScript SDK for KugelAudio TTS API",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.mjs",
|
|
@@ -52,5 +52,9 @@
|
|
|
52
52
|
},
|
|
53
53
|
"engines": {
|
|
54
54
|
"node": ">=18.0.0"
|
|
55
|
+
},
|
|
56
|
+
"dependencies": {
|
|
57
|
+
"tsx": "^4.21.0",
|
|
58
|
+
"ws": "^8.18.0"
|
|
55
59
|
}
|
|
56
60
|
}
|