kugelaudio 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -135,8 +135,35 @@ function createWavBlob(audio, sampleRate) {
135
135
  return new Blob([wavBuffer], { type: "audio/wav" });
136
136
  }
137
137
 
138
+ // src/websocket.ts
139
+ var _cachedWs = null;
140
+ function getWebSocket() {
141
+ if (_cachedWs) return _cachedWs;
142
+ if (typeof globalThis !== "undefined" && typeof globalThis.WebSocket !== "undefined") {
143
+ _cachedWs = globalThis.WebSocket;
144
+ return _cachedWs;
145
+ }
146
+ try {
147
+ const _require = typeof require !== "undefined" ? require : Function('return typeof require !== "undefined" ? require : undefined')();
148
+ if (_require) {
149
+ const ws = _require("ws");
150
+ _cachedWs = ws.default || ws;
151
+ return _cachedWs;
152
+ }
153
+ } catch {
154
+ }
155
+ throw new Error(
156
+ 'WebSocket not available. In Node.js, install the "ws" package: npm install ws'
157
+ );
158
+ }
159
+
138
160
  // src/client.ts
139
161
  var DEFAULT_API_URL = "https://api.kugelaudio.com";
162
+ function createWs(url) {
163
+ const WS = getWebSocket();
164
+ return new WS(url);
165
+ }
166
+ var WS_OPEN = 1;
140
167
  var ModelsResource = class {
141
168
  constructor(client) {
142
169
  this.client = client;
@@ -212,6 +239,7 @@ var VoicesResource = class {
212
239
  var TTSResource = class {
213
240
  constructor(client) {
214
241
  this.client = client;
242
+ // Using any for WebSocket to support both browser WebSocket and ws package
215
243
  this.wsConnection = null;
216
244
  this.wsUrl = null;
217
245
  this.pendingRequests = /* @__PURE__ */ new Map();
@@ -241,7 +269,7 @@ var TTSResource = class {
241
269
  * Check if WebSocket connection is established and open.
242
270
  */
243
271
  isConnected() {
244
- return this.wsConnection !== null && this.wsConnection.readyState === WebSocket.OPEN;
272
+ return this.wsConnection !== null && this.wsConnection.readyState === WS_OPEN;
245
273
  }
246
274
  /**
247
275
  * Generate audio from text with streaming via WebSocket.
@@ -287,7 +315,11 @@ var TTSResource = class {
287
315
  } else {
288
316
  authParam = "api_key";
289
317
  }
290
- return `${wsUrl}/ws/tts?${authParam}=${this.client.apiKey}`;
318
+ let url = `${wsUrl}/ws/tts?${authParam}=${this.client.apiKey}`;
319
+ if (this.client.orgId !== void 0) {
320
+ url += `&org_id=${this.client.orgId}`;
321
+ }
322
+ return url;
291
323
  }
292
324
  /**
293
325
  * Get or create a WebSocket connection for connection pooling.
@@ -295,7 +327,7 @@ var TTSResource = class {
295
327
  */
296
328
  async getConnection() {
297
329
  const url = this.buildWsUrl();
298
- if (this.wsConnection && this.wsUrl === url && this.wsConnection.readyState === WebSocket.OPEN) {
330
+ if (this.wsConnection && this.wsUrl === url && this.wsConnection.readyState === WS_OPEN) {
299
331
  return this.wsConnection;
300
332
  }
301
333
  if (this.wsConnection) {
@@ -306,7 +338,7 @@ var TTSResource = class {
306
338
  this.wsConnection = null;
307
339
  }
308
340
  return new Promise((resolve, reject) => {
309
- const ws = new WebSocket(url);
341
+ const ws = createWs(url);
310
342
  ws.onopen = () => {
311
343
  this.wsConnection = ws;
312
344
  this.wsUrl = url;
@@ -324,7 +356,8 @@ var TTSResource = class {
324
356
  setupMessageHandler(ws) {
325
357
  ws.onmessage = (event) => {
326
358
  try {
327
- const data = JSON.parse(event.data);
359
+ const messageData = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
360
+ const data = JSON.parse(messageData);
328
361
  const [requestId, pending] = [...this.pendingRequests.entries()][0] || [];
329
362
  if (!pending) return;
330
363
  if (data.error) {
@@ -411,13 +444,12 @@ var TTSResource = class {
411
444
  callbacks.onOpen?.();
412
445
  ws.send(JSON.stringify({
413
446
  text: options.text,
414
- model: options.model || "kugel-1-turbo",
447
+ model_id: options.modelId || "kugel-1-turbo",
415
448
  voice_id: options.voiceId,
416
449
  cfg_scale: options.cfgScale ?? 2,
417
450
  max_new_tokens: options.maxNewTokens ?? 2048,
418
451
  sample_rate: options.sampleRate ?? 24e3,
419
- speaker_prefix: options.speakerPrefix ?? true,
420
- normalize: options.normalize ?? false,
452
+ normalize: options.normalize ?? true,
421
453
  ...options.language && { language: options.language }
422
454
  }));
423
455
  });
@@ -428,24 +460,24 @@ var TTSResource = class {
428
460
  streamWithoutPooling(options, callbacks) {
429
461
  return new Promise((resolve, reject) => {
430
462
  const url = this.buildWsUrl();
431
- const ws = new WebSocket(url);
463
+ const ws = createWs(url);
432
464
  ws.onopen = () => {
433
465
  callbacks.onOpen?.();
434
466
  ws.send(JSON.stringify({
435
467
  text: options.text,
436
- model: options.model || "kugel-1-turbo",
468
+ model_id: options.modelId || "kugel-1-turbo",
437
469
  voice_id: options.voiceId,
438
470
  cfg_scale: options.cfgScale ?? 2,
439
471
  max_new_tokens: options.maxNewTokens ?? 2048,
440
472
  sample_rate: options.sampleRate ?? 24e3,
441
- speaker_prefix: options.speakerPrefix ?? true,
442
- normalize: options.normalize ?? false,
473
+ normalize: options.normalize ?? true,
443
474
  ...options.language && { language: options.language }
444
475
  }));
445
476
  };
446
477
  ws.onmessage = (event) => {
447
478
  try {
448
- const data = JSON.parse(event.data);
479
+ const messageData = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
480
+ const data = JSON.parse(messageData);
449
481
  if (data.error) {
450
482
  const error = this.parseError(data.error);
451
483
  callbacks.onError?.(error);
@@ -521,6 +553,243 @@ var TTSResource = class {
521
553
  }
522
554
  return new KugelAudioError(message);
523
555
  }
556
+ /**
557
+ * Create a multi-context session for concurrent TTS streams.
558
+ *
559
+ * Allows managing up to 5 independent audio generation contexts
560
+ * over a single WebSocket connection. Each context has its own
561
+ * text buffer, voice settings, and generation queue.
562
+ *
563
+ * @example
564
+ * ```typescript
565
+ * const session = client.tts.createMultiContextSession({
566
+ * defaultVoiceId: 123,
567
+ * });
568
+ *
569
+ * session.connect({
570
+ * onChunk: (chunk) => {
571
+ * console.log(`Audio from ${chunk.contextId}`);
572
+ * playAudio(chunk.audio);
573
+ * },
574
+ * onContextFinal: (contextId) => {
575
+ * console.log(`${contextId} finished`);
576
+ * },
577
+ * });
578
+ *
579
+ * // Create contexts with different voices
580
+ * session.createContext('narrator', { voiceId: 123 });
581
+ * session.createContext('character', { voiceId: 456 });
582
+ *
583
+ * // Send text to different speakers
584
+ * session.send('narrator', 'The story begins.', true);
585
+ * session.send('character', 'Hello!', true);
586
+ *
587
+ * // Close when done
588
+ * session.close();
589
+ * ```
590
+ */
591
+ createMultiContextSession(config) {
592
+ return new MultiContextSession(this.client, config);
593
+ }
594
+ };
595
+ var MultiContextSession = class {
596
+ constructor(client, config) {
597
+ this.client = client;
598
+ this.ws = null;
599
+ this.callbacks = {};
600
+ this.contexts = /* @__PURE__ */ new Set();
601
+ this._sessionId = null;
602
+ this.isStarted = false;
603
+ this.config = config || {};
604
+ }
605
+ /**
606
+ * Get the current session ID, or null if not connected.
607
+ */
608
+ get sessionId() {
609
+ return this._sessionId;
610
+ }
611
+ /**
612
+ * Connect to the multi-context WebSocket endpoint.
613
+ */
614
+ connect(callbacks) {
615
+ this.callbacks = callbacks;
616
+ const wsUrl = this.client.ttsUrl.replace("https://", "wss://").replace("http://", "ws://");
617
+ let authParam;
618
+ if (this.client.isToken) {
619
+ authParam = "token";
620
+ } else if (this.client.isMasterKey) {
621
+ authParam = "master_key";
622
+ } else {
623
+ authParam = "api_key";
624
+ }
625
+ const url = `${wsUrl}/ws/tts/multi?${authParam}=${this.client.apiKey}`;
626
+ this.ws = createWs(url);
627
+ this.ws.onopen = () => {
628
+ };
629
+ this.ws.onmessage = (event) => {
630
+ try {
631
+ const messageData = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
632
+ const data = JSON.parse(messageData);
633
+ if (data.error) {
634
+ this.callbacks.onError?.(
635
+ new KugelAudioError(data.error),
636
+ data.context_id
637
+ );
638
+ return;
639
+ }
640
+ if (data.session_started) {
641
+ this._sessionId = data.session_id;
642
+ this.isStarted = true;
643
+ this.callbacks.onSessionStarted?.(data.session_id);
644
+ }
645
+ if (data.context_created) {
646
+ this.contexts.add(data.context_id);
647
+ this.callbacks.onContextCreated?.(data.context_id);
648
+ }
649
+ if (data.audio) {
650
+ const chunk = {
651
+ audio: data.audio,
652
+ encoding: "pcm_s16le",
653
+ index: data.idx || 0,
654
+ sampleRate: data.sr || 24e3,
655
+ samples: data.samples || 0,
656
+ contextId: data.context_id
657
+ };
658
+ this.callbacks.onChunk?.(chunk);
659
+ }
660
+ if (data.is_final) {
661
+ this.callbacks.onContextFinal?.(data.context_id);
662
+ }
663
+ if (data.context_closed) {
664
+ this.contexts.delete(data.context_id);
665
+ this.callbacks.onContextClosed?.(data.context_id);
666
+ }
667
+ if (data.context_timeout) {
668
+ this.contexts.delete(data.context_id);
669
+ this.callbacks.onContextTimeout?.(data.context_id);
670
+ }
671
+ if (data.session_closed) {
672
+ this.callbacks.onSessionClosed?.(data);
673
+ }
674
+ } catch (e) {
675
+ console.error("Failed to parse WebSocket message:", e);
676
+ }
677
+ };
678
+ this.ws.onerror = () => {
679
+ this.callbacks.onError?.(new KugelAudioError("WebSocket connection error"));
680
+ };
681
+ this.ws.onclose = (event) => {
682
+ if (event.code === 4001) {
683
+ this.callbacks.onError?.(new AuthenticationError("Authentication failed"));
684
+ } else if (event.code === 4003) {
685
+ this.callbacks.onError?.(new InsufficientCreditsError("Insufficient credits"));
686
+ }
687
+ this.ws = null;
688
+ this.isStarted = false;
689
+ this.contexts.clear();
690
+ };
691
+ }
692
+ /**
693
+ * Create a new context with optional voice settings.
694
+ */
695
+ createContext(contextId, options) {
696
+ if (!this.ws || this.ws.readyState !== WS_OPEN) {
697
+ throw new KugelAudioError("WebSocket not connected");
698
+ }
699
+ const msg = {
700
+ text: " ",
701
+ context_id: contextId
702
+ };
703
+ if (!this.isStarted) {
704
+ if (this.config.sampleRate) msg.sample_rate = this.config.sampleRate;
705
+ if (this.config.cfgScale) msg.cfg_scale = this.config.cfgScale;
706
+ if (this.config.maxNewTokens) msg.max_new_tokens = this.config.maxNewTokens;
707
+ if (this.config.normalize !== void 0) msg.normalize = this.config.normalize;
708
+ if (this.config.inactivityTimeout) msg.inactivity_timeout = this.config.inactivityTimeout;
709
+ }
710
+ const voiceId = options?.voiceId || this.config.defaultVoiceId;
711
+ if (voiceId) msg.voice_id = voiceId;
712
+ if (options?.voiceSettings) {
713
+ msg.voice_settings = {
714
+ stability: options.voiceSettings.stability,
715
+ similarity_boost: options.voiceSettings.similarityBoost,
716
+ style: options.voiceSettings.style,
717
+ use_speaker_boost: options.voiceSettings.useSpeakerBoost,
718
+ speed: options.voiceSettings.speed
719
+ };
720
+ }
721
+ this.ws.send(JSON.stringify(msg));
722
+ }
723
+ /**
724
+ * Send text to a specific context.
725
+ */
726
+ send(contextId, text, flush = false) {
727
+ if (!this.ws || this.ws.readyState !== WS_OPEN) {
728
+ throw new KugelAudioError("WebSocket not connected");
729
+ }
730
+ if (!this.contexts.has(contextId) && !this.isStarted) {
731
+ this.createContext(contextId);
732
+ }
733
+ this.ws.send(JSON.stringify({
734
+ text,
735
+ context_id: contextId,
736
+ flush
737
+ }));
738
+ }
739
+ /**
740
+ * Flush a context's buffer.
741
+ */
742
+ flush(contextId) {
743
+ if (!this.ws || this.ws.readyState !== WS_OPEN) return;
744
+ this.ws.send(JSON.stringify({
745
+ flush: true,
746
+ context_id: contextId
747
+ }));
748
+ }
749
+ /**
750
+ * Close a specific context.
751
+ */
752
+ closeContext(contextId) {
753
+ if (!this.ws || this.ws.readyState !== WS_OPEN) return;
754
+ this.ws.send(JSON.stringify({
755
+ close_context: true,
756
+ context_id: contextId
757
+ }));
758
+ }
759
+ /**
760
+ * Send keep-alive to reset a context's inactivity timeout.
761
+ */
762
+ keepAlive(contextId) {
763
+ if (!this.ws || this.ws.readyState !== WS_OPEN) return;
764
+ this.ws.send(JSON.stringify({
765
+ text: "",
766
+ context_id: contextId
767
+ }));
768
+ }
769
+ /**
770
+ * Close the session and all contexts.
771
+ */
772
+ close() {
773
+ if (this.ws && this.ws.readyState === WS_OPEN) {
774
+ this.ws.send(JSON.stringify({ close_socket: true }));
775
+ this.ws.close();
776
+ }
777
+ this.ws = null;
778
+ this.isStarted = false;
779
+ this.contexts.clear();
780
+ }
781
+ /**
782
+ * Get active context IDs.
783
+ */
784
+ get activeContexts() {
785
+ return Array.from(this.contexts);
786
+ }
787
+ /**
788
+ * Check if connected.
789
+ */
790
+ get isConnected() {
791
+ return this.ws !== null && this.ws.readyState === WS_OPEN;
792
+ }
524
793
  };
525
794
  var KugelAudio = class _KugelAudio {
526
795
  constructor(options) {
@@ -530,6 +799,7 @@ var KugelAudio = class _KugelAudio {
530
799
  this._apiKey = options.apiKey;
531
800
  this._isMasterKey = options.isMasterKey || false;
532
801
  this._isToken = options.isToken || false;
802
+ this._orgId = options.orgId;
533
803
  this._apiUrl = (options.apiUrl || DEFAULT_API_URL).replace(/\/$/, "");
534
804
  this._ttsUrl = (options.ttsUrl || this._apiUrl).replace(/\/$/, "");
535
805
  this._timeout = options.timeout || 6e4;
@@ -570,6 +840,10 @@ var KugelAudio = class _KugelAudio {
570
840
  get isToken() {
571
841
  return this._isToken;
572
842
  }
843
+ /** Get organisation ID for billing */
844
+ get orgId() {
845
+ return this._orgId;
846
+ }
573
847
  /** Get TTS URL */
574
848
  get ttsUrl() {
575
849
  return this._ttsUrl;