kugelaudio 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,3 +1,10 @@
1
+ var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
2
+ get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
3
+ }) : x)(function(x) {
4
+ if (typeof require !== "undefined") return require.apply(this, arguments);
5
+ throw Error('Dynamic require of "' + x + '" is not supported');
6
+ });
7
+
1
8
  // src/errors.ts
2
9
  var KugelAudioError = class _KugelAudioError extends Error {
3
10
  constructor(message, statusCode) {
@@ -99,8 +106,35 @@ function createWavBlob(audio, sampleRate) {
99
106
  return new Blob([wavBuffer], { type: "audio/wav" });
100
107
  }
101
108
 
109
+ // src/websocket.ts
110
+ var _cachedWs = null;
111
+ function getWebSocket() {
112
+ if (_cachedWs) return _cachedWs;
113
+ if (typeof globalThis !== "undefined" && typeof globalThis.WebSocket !== "undefined") {
114
+ _cachedWs = globalThis.WebSocket;
115
+ return _cachedWs;
116
+ }
117
+ try {
118
+ const _require = typeof __require !== "undefined" ? __require : Function('return typeof require !== "undefined" ? require : undefined')();
119
+ if (_require) {
120
+ const ws = _require("ws");
121
+ _cachedWs = ws.default || ws;
122
+ return _cachedWs;
123
+ }
124
+ } catch {
125
+ }
126
+ throw new Error(
127
+ 'WebSocket not available. In Node.js, install the "ws" package: npm install ws'
128
+ );
129
+ }
130
+
102
131
  // src/client.ts
103
132
  var DEFAULT_API_URL = "https://api.kugelaudio.com";
133
+ function createWs(url) {
134
+ const WS = getWebSocket();
135
+ return new WS(url);
136
+ }
137
+ var WS_OPEN = 1;
104
138
  var ModelsResource = class {
105
139
  constructor(client) {
106
140
  this.client = client;
@@ -176,6 +210,7 @@ var VoicesResource = class {
176
210
  var TTSResource = class {
177
211
  constructor(client) {
178
212
  this.client = client;
213
+ // Using any for WebSocket to support both browser WebSocket and ws package
179
214
  this.wsConnection = null;
180
215
  this.wsUrl = null;
181
216
  this.pendingRequests = /* @__PURE__ */ new Map();
@@ -205,7 +240,7 @@ var TTSResource = class {
205
240
  * Check if WebSocket connection is established and open.
206
241
  */
207
242
  isConnected() {
208
- return this.wsConnection !== null && this.wsConnection.readyState === WebSocket.OPEN;
243
+ return this.wsConnection !== null && this.wsConnection.readyState === WS_OPEN;
209
244
  }
210
245
  /**
211
246
  * Generate audio from text with streaming via WebSocket.
@@ -251,7 +286,11 @@ var TTSResource = class {
251
286
  } else {
252
287
  authParam = "api_key";
253
288
  }
254
- return `${wsUrl}/ws/tts?${authParam}=${this.client.apiKey}`;
289
+ let url = `${wsUrl}/ws/tts?${authParam}=${this.client.apiKey}`;
290
+ if (this.client.orgId !== void 0) {
291
+ url += `&org_id=${this.client.orgId}`;
292
+ }
293
+ return url;
255
294
  }
256
295
  /**
257
296
  * Get or create a WebSocket connection for connection pooling.
@@ -259,7 +298,7 @@ var TTSResource = class {
259
298
  */
260
299
  async getConnection() {
261
300
  const url = this.buildWsUrl();
262
- if (this.wsConnection && this.wsUrl === url && this.wsConnection.readyState === WebSocket.OPEN) {
301
+ if (this.wsConnection && this.wsUrl === url && this.wsConnection.readyState === WS_OPEN) {
263
302
  return this.wsConnection;
264
303
  }
265
304
  if (this.wsConnection) {
@@ -270,7 +309,7 @@ var TTSResource = class {
270
309
  this.wsConnection = null;
271
310
  }
272
311
  return new Promise((resolve, reject) => {
273
- const ws = new WebSocket(url);
312
+ const ws = createWs(url);
274
313
  ws.onopen = () => {
275
314
  this.wsConnection = ws;
276
315
  this.wsUrl = url;
@@ -288,7 +327,8 @@ var TTSResource = class {
288
327
  setupMessageHandler(ws) {
289
328
  ws.onmessage = (event) => {
290
329
  try {
291
- const data = JSON.parse(event.data);
330
+ const messageData = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
331
+ const data = JSON.parse(messageData);
292
332
  const [requestId, pending] = [...this.pendingRequests.entries()][0] || [];
293
333
  if (!pending) return;
294
334
  if (data.error) {
@@ -375,13 +415,12 @@ var TTSResource = class {
375
415
  callbacks.onOpen?.();
376
416
  ws.send(JSON.stringify({
377
417
  text: options.text,
378
- model: options.model || "kugel-1-turbo",
418
+ model_id: options.modelId || "kugel-1-turbo",
379
419
  voice_id: options.voiceId,
380
420
  cfg_scale: options.cfgScale ?? 2,
381
421
  max_new_tokens: options.maxNewTokens ?? 2048,
382
422
  sample_rate: options.sampleRate ?? 24e3,
383
- speaker_prefix: options.speakerPrefix ?? true,
384
- normalize: options.normalize ?? false,
423
+ normalize: options.normalize ?? true,
385
424
  ...options.language && { language: options.language }
386
425
  }));
387
426
  });
@@ -392,24 +431,24 @@ var TTSResource = class {
392
431
  streamWithoutPooling(options, callbacks) {
393
432
  return new Promise((resolve, reject) => {
394
433
  const url = this.buildWsUrl();
395
- const ws = new WebSocket(url);
434
+ const ws = createWs(url);
396
435
  ws.onopen = () => {
397
436
  callbacks.onOpen?.();
398
437
  ws.send(JSON.stringify({
399
438
  text: options.text,
400
- model: options.model || "kugel-1-turbo",
439
+ model_id: options.modelId || "kugel-1-turbo",
401
440
  voice_id: options.voiceId,
402
441
  cfg_scale: options.cfgScale ?? 2,
403
442
  max_new_tokens: options.maxNewTokens ?? 2048,
404
443
  sample_rate: options.sampleRate ?? 24e3,
405
- speaker_prefix: options.speakerPrefix ?? true,
406
- normalize: options.normalize ?? false,
444
+ normalize: options.normalize ?? true,
407
445
  ...options.language && { language: options.language }
408
446
  }));
409
447
  };
410
448
  ws.onmessage = (event) => {
411
449
  try {
412
- const data = JSON.parse(event.data);
450
+ const messageData = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
451
+ const data = JSON.parse(messageData);
413
452
  if (data.error) {
414
453
  const error = this.parseError(data.error);
415
454
  callbacks.onError?.(error);
@@ -485,6 +524,243 @@ var TTSResource = class {
485
524
  }
486
525
  return new KugelAudioError(message);
487
526
  }
527
+ /**
528
+ * Create a multi-context session for concurrent TTS streams.
529
+ *
530
+ * Allows managing up to 5 independent audio generation contexts
531
+ * over a single WebSocket connection. Each context has its own
532
+ * text buffer, voice settings, and generation queue.
533
+ *
534
+ * @example
535
+ * ```typescript
536
+ * const session = client.tts.createMultiContextSession({
537
+ * defaultVoiceId: 123,
538
+ * });
539
+ *
540
+ * session.connect({
541
+ * onChunk: (chunk) => {
542
+ * console.log(`Audio from ${chunk.contextId}`);
543
+ * playAudio(chunk.audio);
544
+ * },
545
+ * onContextFinal: (contextId) => {
546
+ * console.log(`${contextId} finished`);
547
+ * },
548
+ * });
549
+ *
550
+ * // Create contexts with different voices
551
+ * session.createContext('narrator', { voiceId: 123 });
552
+ * session.createContext('character', { voiceId: 456 });
553
+ *
554
+ * // Send text to different speakers
555
+ * session.send('narrator', 'The story begins.', true);
556
+ * session.send('character', 'Hello!', true);
557
+ *
558
+ * // Close when done
559
+ * session.close();
560
+ * ```
561
+ */
562
+ createMultiContextSession(config) {
563
+ return new MultiContextSession(this.client, config);
564
+ }
565
+ };
566
+ var MultiContextSession = class {
567
+ constructor(client, config) {
568
+ this.client = client;
569
+ this.ws = null;
570
+ this.callbacks = {};
571
+ this.contexts = /* @__PURE__ */ new Set();
572
+ this._sessionId = null;
573
+ this.isStarted = false;
574
+ this.config = config || {};
575
+ }
576
+ /**
577
+ * Get the current session ID, or null if not connected.
578
+ */
579
+ get sessionId() {
580
+ return this._sessionId;
581
+ }
582
+ /**
583
+ * Connect to the multi-context WebSocket endpoint.
584
+ */
585
+ connect(callbacks) {
586
+ this.callbacks = callbacks;
587
+ const wsUrl = this.client.ttsUrl.replace("https://", "wss://").replace("http://", "ws://");
588
+ let authParam;
589
+ if (this.client.isToken) {
590
+ authParam = "token";
591
+ } else if (this.client.isMasterKey) {
592
+ authParam = "master_key";
593
+ } else {
594
+ authParam = "api_key";
595
+ }
596
+ const url = `${wsUrl}/ws/tts/multi?${authParam}=${this.client.apiKey}`;
597
+ this.ws = createWs(url);
598
+ this.ws.onopen = () => {
599
+ };
600
+ this.ws.onmessage = (event) => {
601
+ try {
602
+ const messageData = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
603
+ const data = JSON.parse(messageData);
604
+ if (data.error) {
605
+ this.callbacks.onError?.(
606
+ new KugelAudioError(data.error),
607
+ data.context_id
608
+ );
609
+ return;
610
+ }
611
+ if (data.session_started) {
612
+ this._sessionId = data.session_id;
613
+ this.isStarted = true;
614
+ this.callbacks.onSessionStarted?.(data.session_id);
615
+ }
616
+ if (data.context_created) {
617
+ this.contexts.add(data.context_id);
618
+ this.callbacks.onContextCreated?.(data.context_id);
619
+ }
620
+ if (data.audio) {
621
+ const chunk = {
622
+ audio: data.audio,
623
+ encoding: "pcm_s16le",
624
+ index: data.idx || 0,
625
+ sampleRate: data.sr || 24e3,
626
+ samples: data.samples || 0,
627
+ contextId: data.context_id
628
+ };
629
+ this.callbacks.onChunk?.(chunk);
630
+ }
631
+ if (data.is_final) {
632
+ this.callbacks.onContextFinal?.(data.context_id);
633
+ }
634
+ if (data.context_closed) {
635
+ this.contexts.delete(data.context_id);
636
+ this.callbacks.onContextClosed?.(data.context_id);
637
+ }
638
+ if (data.context_timeout) {
639
+ this.contexts.delete(data.context_id);
640
+ this.callbacks.onContextTimeout?.(data.context_id);
641
+ }
642
+ if (data.session_closed) {
643
+ this.callbacks.onSessionClosed?.(data);
644
+ }
645
+ } catch (e) {
646
+ console.error("Failed to parse WebSocket message:", e);
647
+ }
648
+ };
649
+ this.ws.onerror = () => {
650
+ this.callbacks.onError?.(new KugelAudioError("WebSocket connection error"));
651
+ };
652
+ this.ws.onclose = (event) => {
653
+ if (event.code === 4001) {
654
+ this.callbacks.onError?.(new AuthenticationError("Authentication failed"));
655
+ } else if (event.code === 4003) {
656
+ this.callbacks.onError?.(new InsufficientCreditsError("Insufficient credits"));
657
+ }
658
+ this.ws = null;
659
+ this.isStarted = false;
660
+ this.contexts.clear();
661
+ };
662
+ }
663
+ /**
664
+ * Create a new context with optional voice settings.
665
+ */
666
+ createContext(contextId, options) {
667
+ if (!this.ws || this.ws.readyState !== WS_OPEN) {
668
+ throw new KugelAudioError("WebSocket not connected");
669
+ }
670
+ const msg = {
671
+ text: " ",
672
+ context_id: contextId
673
+ };
674
+ if (!this.isStarted) {
675
+ if (this.config.sampleRate) msg.sample_rate = this.config.sampleRate;
676
+ if (this.config.cfgScale) msg.cfg_scale = this.config.cfgScale;
677
+ if (this.config.maxNewTokens) msg.max_new_tokens = this.config.maxNewTokens;
678
+ if (this.config.normalize !== void 0) msg.normalize = this.config.normalize;
679
+ if (this.config.inactivityTimeout) msg.inactivity_timeout = this.config.inactivityTimeout;
680
+ }
681
+ const voiceId = options?.voiceId || this.config.defaultVoiceId;
682
+ if (voiceId) msg.voice_id = voiceId;
683
+ if (options?.voiceSettings) {
684
+ msg.voice_settings = {
685
+ stability: options.voiceSettings.stability,
686
+ similarity_boost: options.voiceSettings.similarityBoost,
687
+ style: options.voiceSettings.style,
688
+ use_speaker_boost: options.voiceSettings.useSpeakerBoost,
689
+ speed: options.voiceSettings.speed
690
+ };
691
+ }
692
+ this.ws.send(JSON.stringify(msg));
693
+ }
694
+ /**
695
+ * Send text to a specific context.
696
+ */
697
+ send(contextId, text, flush = false) {
698
+ if (!this.ws || this.ws.readyState !== WS_OPEN) {
699
+ throw new KugelAudioError("WebSocket not connected");
700
+ }
701
+ if (!this.contexts.has(contextId) && !this.isStarted) {
702
+ this.createContext(contextId);
703
+ }
704
+ this.ws.send(JSON.stringify({
705
+ text,
706
+ context_id: contextId,
707
+ flush
708
+ }));
709
+ }
710
+ /**
711
+ * Flush a context's buffer.
712
+ */
713
+ flush(contextId) {
714
+ if (!this.ws || this.ws.readyState !== WS_OPEN) return;
715
+ this.ws.send(JSON.stringify({
716
+ flush: true,
717
+ context_id: contextId
718
+ }));
719
+ }
720
+ /**
721
+ * Close a specific context.
722
+ */
723
+ closeContext(contextId) {
724
+ if (!this.ws || this.ws.readyState !== WS_OPEN) return;
725
+ this.ws.send(JSON.stringify({
726
+ close_context: true,
727
+ context_id: contextId
728
+ }));
729
+ }
730
+ /**
731
+ * Send keep-alive to reset a context's inactivity timeout.
732
+ */
733
+ keepAlive(contextId) {
734
+ if (!this.ws || this.ws.readyState !== WS_OPEN) return;
735
+ this.ws.send(JSON.stringify({
736
+ text: "",
737
+ context_id: contextId
738
+ }));
739
+ }
740
+ /**
741
+ * Close the session and all contexts.
742
+ */
743
+ close() {
744
+ if (this.ws && this.ws.readyState === WS_OPEN) {
745
+ this.ws.send(JSON.stringify({ close_socket: true }));
746
+ this.ws.close();
747
+ }
748
+ this.ws = null;
749
+ this.isStarted = false;
750
+ this.contexts.clear();
751
+ }
752
+ /**
753
+ * Get active context IDs.
754
+ */
755
+ get activeContexts() {
756
+ return Array.from(this.contexts);
757
+ }
758
+ /**
759
+ * Check if connected.
760
+ */
761
+ get isConnected() {
762
+ return this.ws !== null && this.ws.readyState === WS_OPEN;
763
+ }
488
764
  };
489
765
  var KugelAudio = class _KugelAudio {
490
766
  constructor(options) {
@@ -494,6 +770,7 @@ var KugelAudio = class _KugelAudio {
494
770
  this._apiKey = options.apiKey;
495
771
  this._isMasterKey = options.isMasterKey || false;
496
772
  this._isToken = options.isToken || false;
773
+ this._orgId = options.orgId;
497
774
  this._apiUrl = (options.apiUrl || DEFAULT_API_URL).replace(/\/$/, "");
498
775
  this._ttsUrl = (options.ttsUrl || this._apiUrl).replace(/\/$/, "");
499
776
  this._timeout = options.timeout || 6e4;
@@ -534,6 +811,10 @@ var KugelAudio = class _KugelAudio {
534
811
  get isToken() {
535
812
  return this._isToken;
536
813
  }
814
+ /** Get organisation ID for billing */
815
+ get orgId() {
816
+ return this._orgId;
817
+ }
537
818
  /** Get TTS URL */
538
819
  get ttsUrl() {
539
820
  return this._ttsUrl;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "kugelaudio",
3
- "version": "0.2.0",
3
+ "version": "0.2.2",
4
4
  "description": "Official JavaScript/TypeScript SDK for KugelAudio TTS API",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.mjs",
@@ -52,5 +52,9 @@
52
52
  },
53
53
  "engines": {
54
54
  "node": ">=18.0.0"
55
+ },
56
+ "dependencies": {
57
+ "tsx": "^4.21.0",
58
+ "ws": "^8.18.0"
55
59
  }
56
60
  }