kugelaudio 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -31,7 +31,7 @@ const client = new KugelAudio({ apiKey: 'your_api_key' });
31
31
  // Generate speech
32
32
  const audio = await client.tts.generate({
33
33
  text: 'Hello, world!',
34
- model: 'kugel-one-turbo',
34
+ model: 'kugel-1-turbo',
35
35
  });
36
36
 
37
37
  // Create a playable blob (browser)
@@ -86,8 +86,8 @@ const client = new KugelAudio({
86
86
 
87
87
  | Model ID | Name | Parameters | Description |
88
88
  |----------|------|------------|-------------|
89
- | `kugel-one-turbo` | Kugel One Turbo | 1.5B | Fast, low-latency model for real-time applications |
90
- | `kugel-one` | Kugel One | 7B | Premium quality model for pre-recorded content |
89
+ | `kugel-1-turbo` | Kugel 1 Turbo | 1.5B | Fast, low-latency model for real-time applications |
90
+ | `kugel-1` | Kugel 1 | 7B | Premium quality model for pre-recorded content |
91
91
 
92
92
  ### List Available Models
93
93
 
@@ -144,7 +144,7 @@ Generate complete audio and receive it all at once:
144
144
  ```typescript
145
145
  const audio = await client.tts.generate({
146
146
  text: 'Hello, this is a test of the KugelAudio text-to-speech system.',
147
- model: 'kugel-one-turbo', // 'kugel-one-turbo' (fast) or 'kugel-one' (quality)
147
+ model: 'kugel-1-turbo', // 'kugel-1-turbo' (fast) or 'kugel-1' (quality)
148
148
  voiceId: 123, // Optional: specific voice ID
149
149
  cfgScale: 2.0, // Guidance scale (1.0-5.0)
150
150
  maxNewTokens: 2048, // Maximum tokens to generate
@@ -169,7 +169,7 @@ import { createWavBlob } from 'kugelaudio';
169
169
 
170
170
  const audio = await client.tts.generate({
171
171
  text: 'Hello, world!',
172
- model: 'kugel-one-turbo',
172
+ model: 'kugel-1-turbo',
173
173
  });
174
174
 
175
175
  // Create WAV blob for playback
@@ -198,7 +198,7 @@ Receive audio chunks as they are generated for lower latency:
198
198
  await client.tts.stream(
199
199
  {
200
200
  text: 'Hello, this is streaming audio.',
201
- model: 'kugel-one-turbo',
201
+ model: 'kugel-1-turbo',
202
202
  },
203
203
  {
204
204
  onOpen: () => {
@@ -300,7 +300,7 @@ interface KugelAudioOptions {
300
300
  ```typescript
301
301
  interface GenerateOptions {
302
302
  text: string; // Required: Text to synthesize
303
- model?: string; // Default: 'kugel-one-turbo'
303
+ model?: string; // Default: 'kugel-1-turbo'
304
304
  voiceId?: number; // Optional: Voice ID
305
305
  cfgScale?: number; // Default: 2.0
306
306
  maxNewTokens?: number; // Default: 2048
@@ -364,7 +364,7 @@ interface StreamCallbacks {
364
364
 
365
365
  ```typescript
366
366
  interface Model {
367
- id: string; // 'kugel-one-turbo' or 'kugel-one'
367
+ id: string; // 'kugel-1-turbo' or 'kugel-1'
368
368
  name: string; // Human-readable name
369
369
  description: string; // Model description
370
370
  parameters: string; // Parameter count ('1.5B', '7B')
@@ -467,7 +467,7 @@ async function main() {
467
467
  await client.tts.stream(
468
468
  {
469
469
  text: 'Welcome to KugelAudio. This is an example of high-quality text-to-speech synthesis.',
470
- model: 'kugel-one-turbo',
470
+ model: 'kugel-1-turbo',
471
471
  },
472
472
  {
473
473
  onChunk: (chunk) => {
package/dist/index.d.mts CHANGED
@@ -47,7 +47,7 @@ interface Voice {
47
47
  interface GenerateOptions {
48
48
  /** Text to synthesize */
49
49
  text: string;
50
- /** Model to use (default: 'kugel-one-turbo') */
50
+ /** Model to use: 'kugel-1-turbo' (1.5B, fast) or 'kugel-1' (7B, premium). Default: 'kugel-1-turbo' */
51
51
  model?: string;
52
52
  /** Voice ID to use */
53
53
  voiceId?: number;
@@ -200,16 +200,46 @@ declare class VoicesResource {
200
200
  */
201
201
  declare class TTSResource {
202
202
  private client;
203
+ private wsConnection;
204
+ private wsUrl;
205
+ private pendingRequests;
206
+ private requestCounter;
203
207
  constructor(client: KugelAudio);
204
208
  /**
205
209
  * Generate audio from text with streaming via WebSocket.
206
210
  * Returns complete audio after all chunks are received.
207
211
  */
208
212
  generate(options: GenerateOptions): Promise<AudioResponse>;
213
+ /**
214
+ * Get or create a WebSocket connection for connection pooling.
215
+ * This avoids the ~220ms connect overhead on each request.
216
+ */
217
+ private getConnection;
218
+ /**
219
+ * Setup message handler for pooled connection.
220
+ */
221
+ private setupMessageHandler;
209
222
  /**
210
223
  * Stream audio from text via WebSocket.
224
+ * Uses connection pooling for faster TTFA (~180ms vs ~400ms).
225
+ *
226
+ * @param options - Generation options
227
+ * @param callbacks - Stream callbacks
228
+ * @param reuseConnection - If true (default), reuse WebSocket connection
229
+ */
230
+ stream(options: GenerateOptions, callbacks: StreamCallbacks, reuseConnection?: boolean): Promise<void>;
231
+ /**
232
+ * Stream with connection pooling (fast path).
233
+ */
234
+ private streamWithPooling;
235
+ /**
236
+ * Stream without connection pooling (original behavior).
211
237
  */
212
- stream(options: GenerateOptions, callbacks: StreamCallbacks): Promise<void>;
238
+ private streamWithoutPooling;
239
+ /**
240
+ * Close the pooled WebSocket connection.
241
+ */
242
+ close(): void;
213
243
  private parseError;
214
244
  }
215
245
  /**
@@ -225,10 +255,16 @@ declare class TTSResource {
225
255
  * // List voices
226
256
  * const voices = await client.voices.list();
227
257
  *
228
- * // Generate audio
258
+ * // Generate audio with fast model (1.5B params)
259
+ * const audio = await client.tts.generate({
260
+ * text: 'Hello, world!',
261
+ * model: 'kugel-1-turbo',
262
+ * });
263
+ *
264
+ * // Generate audio with premium model (7B params)
229
265
  * const audio = await client.tts.generate({
230
266
  * text: 'Hello, world!',
231
- * model: 'kugel-one-turbo',
267
+ * model: 'kugel-1',
232
268
  * });
233
269
  * ```
234
270
  */
@@ -248,6 +284,11 @@ declare class KugelAudio {
248
284
  get apiKey(): string;
249
285
  /** Get TTS URL */
250
286
  get ttsUrl(): string;
287
+ /**
288
+ * Close the client and release resources.
289
+ * This closes any pooled WebSocket connections.
290
+ */
291
+ close(): void;
251
292
  /**
252
293
  * Make an HTTP request to the API.
253
294
  * @internal
package/dist/index.d.ts CHANGED
@@ -47,7 +47,7 @@ interface Voice {
47
47
  interface GenerateOptions {
48
48
  /** Text to synthesize */
49
49
  text: string;
50
- /** Model to use (default: 'kugel-one-turbo') */
50
+ /** Model to use: 'kugel-1-turbo' (1.5B, fast) or 'kugel-1' (7B, premium). Default: 'kugel-1-turbo' */
51
51
  model?: string;
52
52
  /** Voice ID to use */
53
53
  voiceId?: number;
@@ -200,16 +200,46 @@ declare class VoicesResource {
200
200
  */
201
201
  declare class TTSResource {
202
202
  private client;
203
+ private wsConnection;
204
+ private wsUrl;
205
+ private pendingRequests;
206
+ private requestCounter;
203
207
  constructor(client: KugelAudio);
204
208
  /**
205
209
  * Generate audio from text with streaming via WebSocket.
206
210
  * Returns complete audio after all chunks are received.
207
211
  */
208
212
  generate(options: GenerateOptions): Promise<AudioResponse>;
213
+ /**
214
+ * Get or create a WebSocket connection for connection pooling.
215
+ * This avoids the ~220ms connect overhead on each request.
216
+ */
217
+ private getConnection;
218
+ /**
219
+ * Setup message handler for pooled connection.
220
+ */
221
+ private setupMessageHandler;
209
222
  /**
210
223
  * Stream audio from text via WebSocket.
224
+ * Uses connection pooling for faster TTFA (~180ms vs ~400ms).
225
+ *
226
+ * @param options - Generation options
227
+ * @param callbacks - Stream callbacks
228
+ * @param reuseConnection - If true (default), reuse WebSocket connection
229
+ */
230
+ stream(options: GenerateOptions, callbacks: StreamCallbacks, reuseConnection?: boolean): Promise<void>;
231
+ /**
232
+ * Stream with connection pooling (fast path).
233
+ */
234
+ private streamWithPooling;
235
+ /**
236
+ * Stream without connection pooling (original behavior).
211
237
  */
212
- stream(options: GenerateOptions, callbacks: StreamCallbacks): Promise<void>;
238
+ private streamWithoutPooling;
239
+ /**
240
+ * Close the pooled WebSocket connection.
241
+ */
242
+ close(): void;
213
243
  private parseError;
214
244
  }
215
245
  /**
@@ -225,10 +255,16 @@ declare class TTSResource {
225
255
  * // List voices
226
256
  * const voices = await client.voices.list();
227
257
  *
228
- * // Generate audio
258
+ * // Generate audio with fast model (1.5B params)
259
+ * const audio = await client.tts.generate({
260
+ * text: 'Hello, world!',
261
+ * model: 'kugel-1-turbo',
262
+ * });
263
+ *
264
+ * // Generate audio with premium model (7B params)
229
265
  * const audio = await client.tts.generate({
230
266
  * text: 'Hello, world!',
231
- * model: 'kugel-one-turbo',
267
+ * model: 'kugel-1',
232
268
  * });
233
269
  * ```
234
270
  */
@@ -248,6 +284,11 @@ declare class KugelAudio {
248
284
  get apiKey(): string;
249
285
  /** Get TTS URL */
250
286
  get ttsUrl(): string;
287
+ /**
288
+ * Close the client and release resources.
289
+ * This closes any pooled WebSocket connections.
290
+ */
291
+ close(): void;
251
292
  /**
252
293
  * Make an HTTP request to the API.
253
294
  * @internal
package/dist/index.js CHANGED
@@ -212,6 +212,10 @@ var VoicesResource = class {
212
212
  var TTSResource = class {
213
213
  constructor(client) {
214
214
  this.client = client;
215
+ this.wsConnection = null;
216
+ this.wsUrl = null;
217
+ this.pendingRequests = /* @__PURE__ */ new Map();
218
+ this.requestCounter = 0;
215
219
  }
216
220
  /**
217
221
  * Generate audio from text with streaming via WebSocket.
@@ -244,10 +248,142 @@ var TTSResource = class {
244
248
  rtf: finalStats ? finalStats.rtf : 0
245
249
  };
246
250
  }
251
+ /**
252
+ * Get or create a WebSocket connection for connection pooling.
253
+ * This avoids the ~220ms connect overhead on each request.
254
+ */
255
+ async getConnection() {
256
+ const wsUrl = this.client.ttsUrl.replace("https://", "wss://").replace("http://", "ws://");
257
+ const url = `${wsUrl}/ws/tts?api_key=${this.client.apiKey}`;
258
+ if (this.wsConnection && this.wsUrl === url && this.wsConnection.readyState === WebSocket.OPEN) {
259
+ return this.wsConnection;
260
+ }
261
+ if (this.wsConnection) {
262
+ try {
263
+ this.wsConnection.close();
264
+ } catch {
265
+ }
266
+ this.wsConnection = null;
267
+ }
268
+ return new Promise((resolve, reject) => {
269
+ const ws = new WebSocket(url);
270
+ ws.onopen = () => {
271
+ this.wsConnection = ws;
272
+ this.wsUrl = url;
273
+ this.setupMessageHandler(ws);
274
+ resolve(ws);
275
+ };
276
+ ws.onerror = () => {
277
+ reject(new KugelAudioError("WebSocket connection error"));
278
+ };
279
+ });
280
+ }
281
+ /**
282
+ * Setup message handler for pooled connection.
283
+ */
284
+ setupMessageHandler(ws) {
285
+ ws.onmessage = (event) => {
286
+ try {
287
+ const data = JSON.parse(event.data);
288
+ const [requestId, pending] = [...this.pendingRequests.entries()][0] || [];
289
+ if (!pending) return;
290
+ if (data.error) {
291
+ const error = this.parseError(data.error);
292
+ pending.callbacks.onError?.(error);
293
+ this.pendingRequests.delete(requestId);
294
+ pending.reject(error);
295
+ return;
296
+ }
297
+ if (data.final) {
298
+ const stats = {
299
+ final: true,
300
+ chunks: data.chunks,
301
+ totalSamples: data.total_samples,
302
+ durationMs: data.dur_ms,
303
+ generationMs: data.gen_ms,
304
+ ttfaMs: data.ttfa_ms,
305
+ rtf: data.rtf,
306
+ error: data.error
307
+ };
308
+ pending.callbacks.onFinal?.(stats);
309
+ this.pendingRequests.delete(requestId);
310
+ pending.resolve();
311
+ return;
312
+ }
313
+ if (data.audio) {
314
+ const chunk = {
315
+ audio: data.audio,
316
+ encoding: data.enc || "pcm_s16le",
317
+ index: data.idx,
318
+ sampleRate: data.sr,
319
+ samples: data.samples
320
+ };
321
+ pending.callbacks.onChunk?.(chunk);
322
+ }
323
+ } catch (e) {
324
+ console.error("Failed to parse WebSocket message:", e);
325
+ }
326
+ };
327
+ ws.onclose = (event) => {
328
+ this.wsConnection = null;
329
+ this.wsUrl = null;
330
+ for (const [id, pending] of this.pendingRequests) {
331
+ pending.callbacks.onClose?.();
332
+ if (event.code === 4001) {
333
+ pending.reject(new AuthenticationError("Authentication failed"));
334
+ } else if (event.code === 4003) {
335
+ pending.reject(new InsufficientCreditsError("Insufficient credits"));
336
+ }
337
+ this.pendingRequests.delete(id);
338
+ }
339
+ };
340
+ ws.onerror = () => {
341
+ const error = new KugelAudioError("WebSocket connection error");
342
+ for (const [id, pending] of this.pendingRequests) {
343
+ pending.callbacks.onError?.(error);
344
+ pending.reject(error);
345
+ this.pendingRequests.delete(id);
346
+ }
347
+ };
348
+ }
247
349
  /**
248
350
  * Stream audio from text via WebSocket.
351
+ * Uses connection pooling for faster TTFA (~180ms vs ~400ms).
352
+ *
353
+ * @param options - Generation options
354
+ * @param callbacks - Stream callbacks
355
+ * @param reuseConnection - If true (default), reuse WebSocket connection
356
+ */
357
+ stream(options, callbacks, reuseConnection = true) {
358
+ if (reuseConnection) {
359
+ return this.streamWithPooling(options, callbacks);
360
+ }
361
+ return this.streamWithoutPooling(options, callbacks);
362
+ }
363
+ /**
364
+ * Stream with connection pooling (fast path).
365
+ */
366
+ async streamWithPooling(options, callbacks) {
367
+ const ws = await this.getConnection();
368
+ const requestId = ++this.requestCounter;
369
+ return new Promise((resolve, reject) => {
370
+ this.pendingRequests.set(requestId, { callbacks, resolve, reject });
371
+ callbacks.onOpen?.();
372
+ ws.send(JSON.stringify({
373
+ text: options.text,
374
+ model: options.model || "kugel-1-turbo",
375
+ voice_id: options.voiceId,
376
+ cfg_scale: options.cfgScale ?? 2,
377
+ max_new_tokens: options.maxNewTokens ?? 2048,
378
+ sample_rate: options.sampleRate ?? 24e3,
379
+ speaker_prefix: options.speakerPrefix ?? true
380
+ }));
381
+ });
382
+ }
383
+ /**
384
+ * Stream without connection pooling (original behavior).
249
385
  */
250
- stream(options, callbacks) {
386
+ streamWithoutPooling(options, callbacks) {
251
387
  return new Promise((resolve, reject) => {
252
388
  const wsUrl = this.client.ttsUrl.replace("https://", "wss://").replace("http://", "ws://");
253
389
  const url = `${wsUrl}/ws/tts?api_key=${this.client.apiKey}`;
@@ -256,7 +392,7 @@ var TTSResource = class {
256
392
  callbacks.onOpen?.();
257
393
  ws.send(JSON.stringify({
258
394
  text: options.text,
259
- model: options.model || "kugel-one-turbo",
395
+ model: options.model || "kugel-1-turbo",
260
396
  voice_id: options.voiceId,
261
397
  cfg_scale: options.cfgScale ?? 2,
262
398
  max_new_tokens: options.maxNewTokens ?? 2048,
@@ -319,6 +455,19 @@ var TTSResource = class {
319
455
  };
320
456
  });
321
457
  }
458
+ /**
459
+ * Close the pooled WebSocket connection.
460
+ */
461
+ close() {
462
+ if (this.wsConnection) {
463
+ try {
464
+ this.wsConnection.close();
465
+ } catch {
466
+ }
467
+ this.wsConnection = null;
468
+ this.wsUrl = null;
469
+ }
470
+ }
322
471
  parseError(message) {
323
472
  const lower = message.toLowerCase();
324
473
  if (lower.includes("auth") || lower.includes("unauthorized")) {
@@ -351,6 +500,13 @@ var KugelAudio = class {
351
500
  get ttsUrl() {
352
501
  return this._ttsUrl;
353
502
  }
503
+ /**
504
+ * Close the client and release resources.
505
+ * This closes any pooled WebSocket connections.
506
+ */
507
+ close() {
508
+ this.tts.close();
509
+ }
354
510
  /**
355
511
  * Make an HTTP request to the API.
356
512
  * @internal
package/dist/index.mjs CHANGED
@@ -176,6 +176,10 @@ var VoicesResource = class {
176
176
  var TTSResource = class {
177
177
  constructor(client) {
178
178
  this.client = client;
179
+ this.wsConnection = null;
180
+ this.wsUrl = null;
181
+ this.pendingRequests = /* @__PURE__ */ new Map();
182
+ this.requestCounter = 0;
179
183
  }
180
184
  /**
181
185
  * Generate audio from text with streaming via WebSocket.
@@ -208,10 +212,142 @@ var TTSResource = class {
208
212
  rtf: finalStats ? finalStats.rtf : 0
209
213
  };
210
214
  }
215
+ /**
216
+ * Get or create a WebSocket connection for connection pooling.
217
+ * This avoids the ~220ms connect overhead on each request.
218
+ */
219
+ async getConnection() {
220
+ const wsUrl = this.client.ttsUrl.replace("https://", "wss://").replace("http://", "ws://");
221
+ const url = `${wsUrl}/ws/tts?api_key=${this.client.apiKey}`;
222
+ if (this.wsConnection && this.wsUrl === url && this.wsConnection.readyState === WebSocket.OPEN) {
223
+ return this.wsConnection;
224
+ }
225
+ if (this.wsConnection) {
226
+ try {
227
+ this.wsConnection.close();
228
+ } catch {
229
+ }
230
+ this.wsConnection = null;
231
+ }
232
+ return new Promise((resolve, reject) => {
233
+ const ws = new WebSocket(url);
234
+ ws.onopen = () => {
235
+ this.wsConnection = ws;
236
+ this.wsUrl = url;
237
+ this.setupMessageHandler(ws);
238
+ resolve(ws);
239
+ };
240
+ ws.onerror = () => {
241
+ reject(new KugelAudioError("WebSocket connection error"));
242
+ };
243
+ });
244
+ }
245
+ /**
246
+ * Setup message handler for pooled connection.
247
+ */
248
+ setupMessageHandler(ws) {
249
+ ws.onmessage = (event) => {
250
+ try {
251
+ const data = JSON.parse(event.data);
252
+ const [requestId, pending] = [...this.pendingRequests.entries()][0] || [];
253
+ if (!pending) return;
254
+ if (data.error) {
255
+ const error = this.parseError(data.error);
256
+ pending.callbacks.onError?.(error);
257
+ this.pendingRequests.delete(requestId);
258
+ pending.reject(error);
259
+ return;
260
+ }
261
+ if (data.final) {
262
+ const stats = {
263
+ final: true,
264
+ chunks: data.chunks,
265
+ totalSamples: data.total_samples,
266
+ durationMs: data.dur_ms,
267
+ generationMs: data.gen_ms,
268
+ ttfaMs: data.ttfa_ms,
269
+ rtf: data.rtf,
270
+ error: data.error
271
+ };
272
+ pending.callbacks.onFinal?.(stats);
273
+ this.pendingRequests.delete(requestId);
274
+ pending.resolve();
275
+ return;
276
+ }
277
+ if (data.audio) {
278
+ const chunk = {
279
+ audio: data.audio,
280
+ encoding: data.enc || "pcm_s16le",
281
+ index: data.idx,
282
+ sampleRate: data.sr,
283
+ samples: data.samples
284
+ };
285
+ pending.callbacks.onChunk?.(chunk);
286
+ }
287
+ } catch (e) {
288
+ console.error("Failed to parse WebSocket message:", e);
289
+ }
290
+ };
291
+ ws.onclose = (event) => {
292
+ this.wsConnection = null;
293
+ this.wsUrl = null;
294
+ for (const [id, pending] of this.pendingRequests) {
295
+ pending.callbacks.onClose?.();
296
+ if (event.code === 4001) {
297
+ pending.reject(new AuthenticationError("Authentication failed"));
298
+ } else if (event.code === 4003) {
299
+ pending.reject(new InsufficientCreditsError("Insufficient credits"));
300
+ }
301
+ this.pendingRequests.delete(id);
302
+ }
303
+ };
304
+ ws.onerror = () => {
305
+ const error = new KugelAudioError("WebSocket connection error");
306
+ for (const [id, pending] of this.pendingRequests) {
307
+ pending.callbacks.onError?.(error);
308
+ pending.reject(error);
309
+ this.pendingRequests.delete(id);
310
+ }
311
+ };
312
+ }
211
313
  /**
212
314
  * Stream audio from text via WebSocket.
315
+ * Uses connection pooling for faster TTFA (~180ms vs ~400ms).
316
+ *
317
+ * @param options - Generation options
318
+ * @param callbacks - Stream callbacks
319
+ * @param reuseConnection - If true (default), reuse WebSocket connection
320
+ */
321
+ stream(options, callbacks, reuseConnection = true) {
322
+ if (reuseConnection) {
323
+ return this.streamWithPooling(options, callbacks);
324
+ }
325
+ return this.streamWithoutPooling(options, callbacks);
326
+ }
327
+ /**
328
+ * Stream with connection pooling (fast path).
329
+ */
330
+ async streamWithPooling(options, callbacks) {
331
+ const ws = await this.getConnection();
332
+ const requestId = ++this.requestCounter;
333
+ return new Promise((resolve, reject) => {
334
+ this.pendingRequests.set(requestId, { callbacks, resolve, reject });
335
+ callbacks.onOpen?.();
336
+ ws.send(JSON.stringify({
337
+ text: options.text,
338
+ model: options.model || "kugel-1-turbo",
339
+ voice_id: options.voiceId,
340
+ cfg_scale: options.cfgScale ?? 2,
341
+ max_new_tokens: options.maxNewTokens ?? 2048,
342
+ sample_rate: options.sampleRate ?? 24e3,
343
+ speaker_prefix: options.speakerPrefix ?? true
344
+ }));
345
+ });
346
+ }
347
+ /**
348
+ * Stream without connection pooling (original behavior).
213
349
  */
214
- stream(options, callbacks) {
350
+ streamWithoutPooling(options, callbacks) {
215
351
  return new Promise((resolve, reject) => {
216
352
  const wsUrl = this.client.ttsUrl.replace("https://", "wss://").replace("http://", "ws://");
217
353
  const url = `${wsUrl}/ws/tts?api_key=${this.client.apiKey}`;
@@ -220,7 +356,7 @@ var TTSResource = class {
220
356
  callbacks.onOpen?.();
221
357
  ws.send(JSON.stringify({
222
358
  text: options.text,
223
- model: options.model || "kugel-one-turbo",
359
+ model: options.model || "kugel-1-turbo",
224
360
  voice_id: options.voiceId,
225
361
  cfg_scale: options.cfgScale ?? 2,
226
362
  max_new_tokens: options.maxNewTokens ?? 2048,
@@ -283,6 +419,19 @@ var TTSResource = class {
283
419
  };
284
420
  });
285
421
  }
422
+ /**
423
+ * Close the pooled WebSocket connection.
424
+ */
425
+ close() {
426
+ if (this.wsConnection) {
427
+ try {
428
+ this.wsConnection.close();
429
+ } catch {
430
+ }
431
+ this.wsConnection = null;
432
+ this.wsUrl = null;
433
+ }
434
+ }
286
435
  parseError(message) {
287
436
  const lower = message.toLowerCase();
288
437
  if (lower.includes("auth") || lower.includes("unauthorized")) {
@@ -315,6 +464,13 @@ var KugelAudio = class {
315
464
  get ttsUrl() {
316
465
  return this._ttsUrl;
317
466
  }
467
+ /**
468
+ * Close the client and release resources.
469
+ * This closes any pooled WebSocket connections.
470
+ */
471
+ close() {
472
+ this.tts.close();
473
+ }
318
474
  /**
319
475
  * Make an HTTP request to the API.
320
476
  * @internal
package/package.json CHANGED
@@ -1,15 +1,15 @@
1
1
  {
2
2
  "name": "kugelaudio",
3
- "version": "0.1.1",
3
+ "version": "0.1.2",
4
4
  "description": "Official JavaScript/TypeScript SDK for KugelAudio TTS API",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.mjs",
7
7
  "types": "dist/index.d.ts",
8
8
  "exports": {
9
9
  ".": {
10
+ "types": "./dist/index.d.ts",
10
11
  "import": "./dist/index.mjs",
11
- "require": "./dist/index.js",
12
- "types": "./dist/index.d.ts"
12
+ "require": "./dist/index.js"
13
13
  }
14
14
  },
15
15
  "files": [
package/src/client.ts CHANGED
@@ -111,6 +111,15 @@ class VoicesResource {
111
111
  * TTS resource for text-to-speech generation.
112
112
  */
113
113
  class TTSResource {
114
+ private wsConnection: WebSocket | null = null;
115
+ private wsUrl: string | null = null;
116
+ private pendingRequests: Map<number, {
117
+ callbacks: StreamCallbacks;
118
+ resolve: () => void;
119
+ reject: (error: Error) => void;
120
+ }> = new Map();
121
+ private requestCounter = 0;
122
+
114
123
  constructor(private client: KugelAudio) {}
115
124
 
116
125
  /**
@@ -149,10 +158,185 @@ class TTSResource {
149
158
  };
150
159
  }
151
160
 
161
+ /**
162
+ * Get or create a WebSocket connection for connection pooling.
163
+ * This avoids the ~220ms connect overhead on each request.
164
+ */
165
+ private async getConnection(): Promise<WebSocket> {
166
+ const wsUrl = this.client.ttsUrl
167
+ .replace('https://', 'wss://')
168
+ .replace('http://', 'ws://');
169
+ const url = `${wsUrl}/ws/tts?api_key=${this.client.apiKey}`;
170
+
171
+ // Return existing connection if valid
172
+ if (
173
+ this.wsConnection &&
174
+ this.wsUrl === url &&
175
+ this.wsConnection.readyState === WebSocket.OPEN
176
+ ) {
177
+ return this.wsConnection;
178
+ }
179
+
180
+ // Close old connection if URL changed
181
+ if (this.wsConnection) {
182
+ try {
183
+ this.wsConnection.close();
184
+ } catch {
185
+ // Ignore close errors
186
+ }
187
+ this.wsConnection = null;
188
+ }
189
+
190
+ // Create new connection
191
+ return new Promise((resolve, reject) => {
192
+ const ws = new WebSocket(url);
193
+
194
+ ws.onopen = () => {
195
+ this.wsConnection = ws;
196
+ this.wsUrl = url;
197
+ this.setupMessageHandler(ws);
198
+ resolve(ws);
199
+ };
200
+
201
+ ws.onerror = () => {
202
+ reject(new KugelAudioError('WebSocket connection error'));
203
+ };
204
+ });
205
+ }
206
+
207
+ /**
208
+ * Setup message handler for pooled connection.
209
+ */
210
+ private setupMessageHandler(ws: WebSocket): void {
211
+ ws.onmessage = (event) => {
212
+ try {
213
+ const data = JSON.parse(event.data);
214
+
215
+ // Get the current pending request (we process one at a time)
216
+ const [requestId, pending] = [...this.pendingRequests.entries()][0] || [];
217
+ if (!pending) return;
218
+
219
+ if (data.error) {
220
+ const error = this.parseError(data.error);
221
+ pending.callbacks.onError?.(error);
222
+ this.pendingRequests.delete(requestId);
223
+ pending.reject(error);
224
+ return;
225
+ }
226
+
227
+ if (data.final) {
228
+ const stats: GenerationStats = {
229
+ final: true,
230
+ chunks: data.chunks,
231
+ totalSamples: data.total_samples,
232
+ durationMs: data.dur_ms,
233
+ generationMs: data.gen_ms,
234
+ ttfaMs: data.ttfa_ms,
235
+ rtf: data.rtf,
236
+ error: data.error,
237
+ };
238
+ pending.callbacks.onFinal?.(stats);
239
+ this.pendingRequests.delete(requestId);
240
+ pending.resolve();
241
+ return;
242
+ }
243
+
244
+ if (data.audio) {
245
+ const chunk: AudioChunk = {
246
+ audio: data.audio,
247
+ encoding: data.enc || 'pcm_s16le',
248
+ index: data.idx,
249
+ sampleRate: data.sr,
250
+ samples: data.samples,
251
+ };
252
+ pending.callbacks.onChunk?.(chunk);
253
+ }
254
+ } catch (e) {
255
+ console.error('Failed to parse WebSocket message:', e);
256
+ }
257
+ };
258
+
259
+ ws.onclose = (event) => {
260
+ // Clear connection pool
261
+ this.wsConnection = null;
262
+ this.wsUrl = null;
263
+
264
+ // Reject all pending requests
265
+ for (const [id, pending] of this.pendingRequests) {
266
+ pending.callbacks.onClose?.();
267
+ if (event.code === 4001) {
268
+ pending.reject(new AuthenticationError('Authentication failed'));
269
+ } else if (event.code === 4003) {
270
+ pending.reject(new InsufficientCreditsError('Insufficient credits'));
271
+ }
272
+ this.pendingRequests.delete(id);
273
+ }
274
+ };
275
+
276
+ ws.onerror = () => {
277
+ // Reject all pending requests
278
+ const error = new KugelAudioError('WebSocket connection error');
279
+ for (const [id, pending] of this.pendingRequests) {
280
+ pending.callbacks.onError?.(error);
281
+ pending.reject(error);
282
+ this.pendingRequests.delete(id);
283
+ }
284
+ };
285
+ }
286
+
152
287
  /**
153
288
  * Stream audio from text via WebSocket.
289
+ * Uses connection pooling for faster TTFA (~180ms vs ~400ms).
290
+ *
291
+ * @param options - Generation options
292
+ * @param callbacks - Stream callbacks
293
+ * @param reuseConnection - If true (default), reuse WebSocket connection
154
294
  */
155
- stream(options: GenerateOptions, callbacks: StreamCallbacks): Promise<void> {
295
+ stream(
296
+ options: GenerateOptions,
297
+ callbacks: StreamCallbacks,
298
+ reuseConnection = true
299
+ ): Promise<void> {
300
+ if (reuseConnection) {
301
+ return this.streamWithPooling(options, callbacks);
302
+ }
303
+ return this.streamWithoutPooling(options, callbacks);
304
+ }
305
+
306
+ /**
307
+ * Stream with connection pooling (fast path).
308
+ */
309
+ private async streamWithPooling(
310
+ options: GenerateOptions,
311
+ callbacks: StreamCallbacks
312
+ ): Promise<void> {
313
+ const ws = await this.getConnection();
314
+ const requestId = ++this.requestCounter;
315
+
316
+ return new Promise((resolve, reject) => {
317
+ this.pendingRequests.set(requestId, { callbacks, resolve, reject });
318
+
319
+ callbacks.onOpen?.();
320
+
321
+ ws.send(JSON.stringify({
322
+ text: options.text,
323
+ model: options.model || 'kugel-1-turbo',
324
+ voice_id: options.voiceId,
325
+ cfg_scale: options.cfgScale ?? 2.0,
326
+ max_new_tokens: options.maxNewTokens ?? 2048,
327
+ sample_rate: options.sampleRate ?? 24000,
328
+ speaker_prefix: options.speakerPrefix ?? true,
329
+ }));
330
+ });
331
+ }
332
+
333
+ /**
334
+ * Stream without connection pooling (original behavior).
335
+ */
336
+ private streamWithoutPooling(
337
+ options: GenerateOptions,
338
+ callbacks: StreamCallbacks
339
+ ): Promise<void> {
156
340
  return new Promise((resolve, reject) => {
157
341
  const wsUrl = this.client.ttsUrl
158
342
  .replace('https://', 'wss://')
@@ -166,7 +350,7 @@ class TTSResource {
166
350
  // Send TTS request
167
351
  ws.send(JSON.stringify({
168
352
  text: options.text,
169
- model: options.model || 'kugel-one-turbo',
353
+ model: options.model || 'kugel-1-turbo',
170
354
  voice_id: options.voiceId,
171
355
  cfg_scale: options.cfgScale ?? 2.0,
172
356
  max_new_tokens: options.maxNewTokens ?? 2048,
@@ -236,6 +420,21 @@ class TTSResource {
236
420
  });
237
421
  }
238
422
 
423
+ /**
424
+ * Close the pooled WebSocket connection.
425
+ */
426
+ close(): void {
427
+ if (this.wsConnection) {
428
+ try {
429
+ this.wsConnection.close();
430
+ } catch {
431
+ // Ignore close errors
432
+ }
433
+ this.wsConnection = null;
434
+ this.wsUrl = null;
435
+ }
436
+ }
437
+
239
438
  private parseError(message: string): Error {
240
439
  const lower = message.toLowerCase();
241
440
  if (lower.includes('auth') || lower.includes('unauthorized')) {
@@ -261,10 +460,16 @@ class TTSResource {
261
460
  * // List voices
262
461
  * const voices = await client.voices.list();
263
462
  *
264
- * // Generate audio
463
+ * // Generate audio with fast model (1.5B params)
265
464
  * const audio = await client.tts.generate({
266
465
  * text: 'Hello, world!',
267
- * model: 'kugel-one-turbo',
466
+ * model: 'kugel-1-turbo',
467
+ * });
468
+ *
469
+ * // Generate audio with premium model (7B params)
470
+ * const audio = await client.tts.generate({
471
+ * text: 'Hello, world!',
472
+ * model: 'kugel-1',
268
473
  * });
269
474
  * ```
270
475
  */
@@ -307,6 +512,14 @@ export class KugelAudio {
307
512
  return this._ttsUrl;
308
513
  }
309
514
 
515
+ /**
516
+ * Close the client and release resources.
517
+ * This closes any pooled WebSocket connections.
518
+ */
519
+ close(): void {
520
+ this.tts.close();
521
+ }
522
+
310
523
  /**
311
524
  * Make an HTTP request to the API.
312
525
  * @internal
package/src/index.ts CHANGED
@@ -18,13 +18,13 @@
18
18
  * // Generate audio (non-streaming)
19
19
  * const audio = await client.tts.generate({
20
20
  * text: 'Hello, world!',
21
- * model: 'kugel-one-turbo',
21
+ * model: 'kugel-1-turbo',
22
22
  * voiceId: 123,
23
23
  * });
24
24
  *
25
25
  * // Generate audio (streaming)
26
26
  * await client.tts.stream(
27
- * { text: 'Hello, world!', model: 'kugel-one-turbo' },
27
+ * { text: 'Hello, world!', model: 'kugel-1-turbo' },
28
28
  * {
29
29
  * onChunk: (chunk) => {
30
30
  * // Process audio chunk
package/src/types.ts CHANGED
@@ -53,7 +53,7 @@ export interface Voice {
53
53
  export interface GenerateOptions {
54
54
  /** Text to synthesize */
55
55
  text: string;
56
- /** Model to use (default: 'kugel-one-turbo') */
56
+ /** Model to use: 'kugel-1-turbo' (1.5B, fast) or 'kugel-1' (7B, premium). Default: 'kugel-1-turbo' */
57
57
  model?: string;
58
58
  /** Voice ID to use */
59
59
  voiceId?: number;