kugelaudio 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -9
- package/dist/index.d.mts +54 -4
- package/dist/index.d.ts +54 -4
- package/dist/index.js +171 -4
- package/dist/index.mjs +171 -4
- package/package.json +3 -3
- package/src/client.ts +233 -8
- package/src/index.ts +2 -2
- package/src/types.ts +3 -1
package/README.md
CHANGED
|
@@ -31,7 +31,7 @@ const client = new KugelAudio({ apiKey: 'your_api_key' });
|
|
|
31
31
|
// Generate speech
|
|
32
32
|
const audio = await client.tts.generate({
|
|
33
33
|
text: 'Hello, world!',
|
|
34
|
-
model: 'kugel-
|
|
34
|
+
model: 'kugel-1-turbo',
|
|
35
35
|
});
|
|
36
36
|
|
|
37
37
|
// Create a playable blob (browser)
|
|
@@ -86,8 +86,8 @@ const client = new KugelAudio({
|
|
|
86
86
|
|
|
87
87
|
| Model ID | Name | Parameters | Description |
|
|
88
88
|
|----------|------|------------|-------------|
|
|
89
|
-
| `kugel-
|
|
90
|
-
| `kugel-
|
|
89
|
+
| `kugel-1-turbo` | Kugel 1 Turbo | 1.5B | Fast, low-latency model for real-time applications |
|
|
90
|
+
| `kugel-1` | Kugel 1 | 7B | Premium quality model for pre-recorded content |
|
|
91
91
|
|
|
92
92
|
### List Available Models
|
|
93
93
|
|
|
@@ -144,7 +144,7 @@ Generate complete audio and receive it all at once:
|
|
|
144
144
|
```typescript
|
|
145
145
|
const audio = await client.tts.generate({
|
|
146
146
|
text: 'Hello, this is a test of the KugelAudio text-to-speech system.',
|
|
147
|
-
model: 'kugel-
|
|
147
|
+
model: 'kugel-1-turbo', // 'kugel-1-turbo' (fast) or 'kugel-1' (quality)
|
|
148
148
|
voiceId: 123, // Optional: specific voice ID
|
|
149
149
|
cfgScale: 2.0, // Guidance scale (1.0-5.0)
|
|
150
150
|
maxNewTokens: 2048, // Maximum tokens to generate
|
|
@@ -169,7 +169,7 @@ import { createWavBlob } from 'kugelaudio';
|
|
|
169
169
|
|
|
170
170
|
const audio = await client.tts.generate({
|
|
171
171
|
text: 'Hello, world!',
|
|
172
|
-
model: 'kugel-
|
|
172
|
+
model: 'kugel-1-turbo',
|
|
173
173
|
});
|
|
174
174
|
|
|
175
175
|
// Create WAV blob for playback
|
|
@@ -198,7 +198,7 @@ Receive audio chunks as they are generated for lower latency:
|
|
|
198
198
|
await client.tts.stream(
|
|
199
199
|
{
|
|
200
200
|
text: 'Hello, this is streaming audio.',
|
|
201
|
-
model: 'kugel-
|
|
201
|
+
model: 'kugel-1-turbo',
|
|
202
202
|
},
|
|
203
203
|
{
|
|
204
204
|
onOpen: () => {
|
|
@@ -300,7 +300,7 @@ interface KugelAudioOptions {
|
|
|
300
300
|
```typescript
|
|
301
301
|
interface GenerateOptions {
|
|
302
302
|
text: string; // Required: Text to synthesize
|
|
303
|
-
model?: string; // Default: 'kugel-
|
|
303
|
+
model?: string; // Default: 'kugel-1-turbo'
|
|
304
304
|
voiceId?: number; // Optional: Voice ID
|
|
305
305
|
cfgScale?: number; // Default: 2.0
|
|
306
306
|
maxNewTokens?: number; // Default: 2048
|
|
@@ -364,7 +364,7 @@ interface StreamCallbacks {
|
|
|
364
364
|
|
|
365
365
|
```typescript
|
|
366
366
|
interface Model {
|
|
367
|
-
id: string; // 'kugel-
|
|
367
|
+
id: string; // 'kugel-1-turbo' or 'kugel-1'
|
|
368
368
|
name: string; // Human-readable name
|
|
369
369
|
description: string; // Model description
|
|
370
370
|
parameters: string; // Parameter count ('1.5B', '7B')
|
|
@@ -467,7 +467,7 @@ async function main() {
|
|
|
467
467
|
await client.tts.stream(
|
|
468
468
|
{
|
|
469
469
|
text: 'Welcome to KugelAudio. This is an example of high-quality text-to-speech synthesis.',
|
|
470
|
-
model: 'kugel-
|
|
470
|
+
model: 'kugel-1-turbo',
|
|
471
471
|
},
|
|
472
472
|
{
|
|
473
473
|
onChunk: (chunk) => {
|
package/dist/index.d.mts
CHANGED
|
@@ -47,7 +47,7 @@ interface Voice {
|
|
|
47
47
|
interface GenerateOptions {
|
|
48
48
|
/** Text to synthesize */
|
|
49
49
|
text: string;
|
|
50
|
-
/** Model to use (
|
|
50
|
+
/** Model to use: 'kugel-1-turbo' (1.5B, fast) or 'kugel-1' (7B, premium). Default: 'kugel-1-turbo' */
|
|
51
51
|
model?: string;
|
|
52
52
|
/** Voice ID to use */
|
|
53
53
|
voiceId?: number;
|
|
@@ -153,6 +153,8 @@ interface StreamCallbacks {
|
|
|
153
153
|
interface KugelAudioOptions {
|
|
154
154
|
/** Your KugelAudio API key */
|
|
155
155
|
apiKey: string;
|
|
156
|
+
/** Whether apiKey is a master key (for internal/server-side use). Master keys bypass billing. */
|
|
157
|
+
isMasterKey?: boolean;
|
|
156
158
|
/** API base URL (default: https://api.kugelaudio.com) */
|
|
157
159
|
apiUrl?: string;
|
|
158
160
|
/** TTS server URL (default: https://eu.kugelaudio.com) */
|
|
@@ -200,16 +202,50 @@ declare class VoicesResource {
|
|
|
200
202
|
*/
|
|
201
203
|
declare class TTSResource {
|
|
202
204
|
private client;
|
|
205
|
+
private wsConnection;
|
|
206
|
+
private wsUrl;
|
|
207
|
+
private pendingRequests;
|
|
208
|
+
private requestCounter;
|
|
203
209
|
constructor(client: KugelAudio);
|
|
204
210
|
/**
|
|
205
211
|
* Generate audio from text with streaming via WebSocket.
|
|
206
212
|
* Returns complete audio after all chunks are received.
|
|
207
213
|
*/
|
|
208
214
|
generate(options: GenerateOptions): Promise<AudioResponse>;
|
|
215
|
+
/**
|
|
216
|
+
* Build the WebSocket URL with appropriate auth param.
|
|
217
|
+
*/
|
|
218
|
+
private buildWsUrl;
|
|
219
|
+
/**
|
|
220
|
+
* Get or create a WebSocket connection for connection pooling.
|
|
221
|
+
* This avoids the ~220ms connect overhead on each request.
|
|
222
|
+
*/
|
|
223
|
+
private getConnection;
|
|
224
|
+
/**
|
|
225
|
+
* Setup message handler for pooled connection.
|
|
226
|
+
*/
|
|
227
|
+
private setupMessageHandler;
|
|
209
228
|
/**
|
|
210
229
|
* Stream audio from text via WebSocket.
|
|
230
|
+
* Uses connection pooling for faster TTFA (~180ms vs ~400ms).
|
|
231
|
+
*
|
|
232
|
+
* @param options - Generation options
|
|
233
|
+
* @param callbacks - Stream callbacks
|
|
234
|
+
* @param reuseConnection - If true (default), reuse WebSocket connection
|
|
235
|
+
*/
|
|
236
|
+
stream(options: GenerateOptions, callbacks: StreamCallbacks, reuseConnection?: boolean): Promise<void>;
|
|
237
|
+
/**
|
|
238
|
+
* Stream with connection pooling (fast path).
|
|
239
|
+
*/
|
|
240
|
+
private streamWithPooling;
|
|
241
|
+
/**
|
|
242
|
+
* Stream without connection pooling (original behavior).
|
|
243
|
+
*/
|
|
244
|
+
private streamWithoutPooling;
|
|
245
|
+
/**
|
|
246
|
+
* Close the pooled WebSocket connection.
|
|
211
247
|
*/
|
|
212
|
-
|
|
248
|
+
close(): void;
|
|
213
249
|
private parseError;
|
|
214
250
|
}
|
|
215
251
|
/**
|
|
@@ -225,15 +261,22 @@ declare class TTSResource {
|
|
|
225
261
|
* // List voices
|
|
226
262
|
* const voices = await client.voices.list();
|
|
227
263
|
*
|
|
228
|
-
* // Generate audio
|
|
264
|
+
* // Generate audio with fast model (1.5B params)
|
|
229
265
|
* const audio = await client.tts.generate({
|
|
230
266
|
* text: 'Hello, world!',
|
|
231
|
-
* model: 'kugel-
|
|
267
|
+
* model: 'kugel-1-turbo',
|
|
268
|
+
* });
|
|
269
|
+
*
|
|
270
|
+
* // Generate audio with premium model (7B params)
|
|
271
|
+
* const audio = await client.tts.generate({
|
|
272
|
+
* text: 'Hello, world!',
|
|
273
|
+
* model: 'kugel-1',
|
|
232
274
|
* });
|
|
233
275
|
* ```
|
|
234
276
|
*/
|
|
235
277
|
declare class KugelAudio {
|
|
236
278
|
private _apiKey;
|
|
279
|
+
private _isMasterKey;
|
|
237
280
|
private _apiUrl;
|
|
238
281
|
private _ttsUrl;
|
|
239
282
|
private _timeout;
|
|
@@ -246,8 +289,15 @@ declare class KugelAudio {
|
|
|
246
289
|
constructor(options: KugelAudioOptions);
|
|
247
290
|
/** Get API key */
|
|
248
291
|
get apiKey(): string;
|
|
292
|
+
/** Check if using master key authentication */
|
|
293
|
+
get isMasterKey(): boolean;
|
|
249
294
|
/** Get TTS URL */
|
|
250
295
|
get ttsUrl(): string;
|
|
296
|
+
/**
|
|
297
|
+
* Close the client and release resources.
|
|
298
|
+
* This closes any pooled WebSocket connections.
|
|
299
|
+
*/
|
|
300
|
+
close(): void;
|
|
251
301
|
/**
|
|
252
302
|
* Make an HTTP request to the API.
|
|
253
303
|
* @internal
|
package/dist/index.d.ts
CHANGED
|
@@ -47,7 +47,7 @@ interface Voice {
|
|
|
47
47
|
interface GenerateOptions {
|
|
48
48
|
/** Text to synthesize */
|
|
49
49
|
text: string;
|
|
50
|
-
/** Model to use (
|
|
50
|
+
/** Model to use: 'kugel-1-turbo' (1.5B, fast) or 'kugel-1' (7B, premium). Default: 'kugel-1-turbo' */
|
|
51
51
|
model?: string;
|
|
52
52
|
/** Voice ID to use */
|
|
53
53
|
voiceId?: number;
|
|
@@ -153,6 +153,8 @@ interface StreamCallbacks {
|
|
|
153
153
|
interface KugelAudioOptions {
|
|
154
154
|
/** Your KugelAudio API key */
|
|
155
155
|
apiKey: string;
|
|
156
|
+
/** Whether apiKey is a master key (for internal/server-side use). Master keys bypass billing. */
|
|
157
|
+
isMasterKey?: boolean;
|
|
156
158
|
/** API base URL (default: https://api.kugelaudio.com) */
|
|
157
159
|
apiUrl?: string;
|
|
158
160
|
/** TTS server URL (default: https://eu.kugelaudio.com) */
|
|
@@ -200,16 +202,50 @@ declare class VoicesResource {
|
|
|
200
202
|
*/
|
|
201
203
|
declare class TTSResource {
|
|
202
204
|
private client;
|
|
205
|
+
private wsConnection;
|
|
206
|
+
private wsUrl;
|
|
207
|
+
private pendingRequests;
|
|
208
|
+
private requestCounter;
|
|
203
209
|
constructor(client: KugelAudio);
|
|
204
210
|
/**
|
|
205
211
|
* Generate audio from text with streaming via WebSocket.
|
|
206
212
|
* Returns complete audio after all chunks are received.
|
|
207
213
|
*/
|
|
208
214
|
generate(options: GenerateOptions): Promise<AudioResponse>;
|
|
215
|
+
/**
|
|
216
|
+
* Build the WebSocket URL with appropriate auth param.
|
|
217
|
+
*/
|
|
218
|
+
private buildWsUrl;
|
|
219
|
+
/**
|
|
220
|
+
* Get or create a WebSocket connection for connection pooling.
|
|
221
|
+
* This avoids the ~220ms connect overhead on each request.
|
|
222
|
+
*/
|
|
223
|
+
private getConnection;
|
|
224
|
+
/**
|
|
225
|
+
* Setup message handler for pooled connection.
|
|
226
|
+
*/
|
|
227
|
+
private setupMessageHandler;
|
|
209
228
|
/**
|
|
210
229
|
* Stream audio from text via WebSocket.
|
|
230
|
+
* Uses connection pooling for faster TTFA (~180ms vs ~400ms).
|
|
231
|
+
*
|
|
232
|
+
* @param options - Generation options
|
|
233
|
+
* @param callbacks - Stream callbacks
|
|
234
|
+
* @param reuseConnection - If true (default), reuse WebSocket connection
|
|
235
|
+
*/
|
|
236
|
+
stream(options: GenerateOptions, callbacks: StreamCallbacks, reuseConnection?: boolean): Promise<void>;
|
|
237
|
+
/**
|
|
238
|
+
* Stream with connection pooling (fast path).
|
|
239
|
+
*/
|
|
240
|
+
private streamWithPooling;
|
|
241
|
+
/**
|
|
242
|
+
* Stream without connection pooling (original behavior).
|
|
243
|
+
*/
|
|
244
|
+
private streamWithoutPooling;
|
|
245
|
+
/**
|
|
246
|
+
* Close the pooled WebSocket connection.
|
|
211
247
|
*/
|
|
212
|
-
|
|
248
|
+
close(): void;
|
|
213
249
|
private parseError;
|
|
214
250
|
}
|
|
215
251
|
/**
|
|
@@ -225,15 +261,22 @@ declare class TTSResource {
|
|
|
225
261
|
* // List voices
|
|
226
262
|
* const voices = await client.voices.list();
|
|
227
263
|
*
|
|
228
|
-
* // Generate audio
|
|
264
|
+
* // Generate audio with fast model (1.5B params)
|
|
229
265
|
* const audio = await client.tts.generate({
|
|
230
266
|
* text: 'Hello, world!',
|
|
231
|
-
* model: 'kugel-
|
|
267
|
+
* model: 'kugel-1-turbo',
|
|
268
|
+
* });
|
|
269
|
+
*
|
|
270
|
+
* // Generate audio with premium model (7B params)
|
|
271
|
+
* const audio = await client.tts.generate({
|
|
272
|
+
* text: 'Hello, world!',
|
|
273
|
+
* model: 'kugel-1',
|
|
232
274
|
* });
|
|
233
275
|
* ```
|
|
234
276
|
*/
|
|
235
277
|
declare class KugelAudio {
|
|
236
278
|
private _apiKey;
|
|
279
|
+
private _isMasterKey;
|
|
237
280
|
private _apiUrl;
|
|
238
281
|
private _ttsUrl;
|
|
239
282
|
private _timeout;
|
|
@@ -246,8 +289,15 @@ declare class KugelAudio {
|
|
|
246
289
|
constructor(options: KugelAudioOptions);
|
|
247
290
|
/** Get API key */
|
|
248
291
|
get apiKey(): string;
|
|
292
|
+
/** Check if using master key authentication */
|
|
293
|
+
get isMasterKey(): boolean;
|
|
249
294
|
/** Get TTS URL */
|
|
250
295
|
get ttsUrl(): string;
|
|
296
|
+
/**
|
|
297
|
+
* Close the client and release resources.
|
|
298
|
+
* This closes any pooled WebSocket connections.
|
|
299
|
+
*/
|
|
300
|
+
close(): void;
|
|
251
301
|
/**
|
|
252
302
|
* Make an HTTP request to the API.
|
|
253
303
|
* @internal
|
package/dist/index.js
CHANGED
|
@@ -212,6 +212,10 @@ var VoicesResource = class {
|
|
|
212
212
|
var TTSResource = class {
|
|
213
213
|
constructor(client) {
|
|
214
214
|
this.client = client;
|
|
215
|
+
this.wsConnection = null;
|
|
216
|
+
this.wsUrl = null;
|
|
217
|
+
this.pendingRequests = /* @__PURE__ */ new Map();
|
|
218
|
+
this.requestCounter = 0;
|
|
215
219
|
}
|
|
216
220
|
/**
|
|
217
221
|
* Generate audio from text with streaming via WebSocket.
|
|
@@ -244,19 +248,157 @@ var TTSResource = class {
|
|
|
244
248
|
rtf: finalStats ? finalStats.rtf : 0
|
|
245
249
|
};
|
|
246
250
|
}
|
|
251
|
+
/**
|
|
252
|
+
* Build the WebSocket URL with appropriate auth param.
|
|
253
|
+
*/
|
|
254
|
+
buildWsUrl() {
|
|
255
|
+
const wsUrl = this.client.ttsUrl.replace("https://", "wss://").replace("http://", "ws://");
|
|
256
|
+
const authParam = this.client.isMasterKey ? "master_key" : "api_key";
|
|
257
|
+
return `${wsUrl}/ws/tts?${authParam}=${this.client.apiKey}`;
|
|
258
|
+
}
|
|
259
|
+
/**
|
|
260
|
+
* Get or create a WebSocket connection for connection pooling.
|
|
261
|
+
* This avoids the ~220ms connect overhead on each request.
|
|
262
|
+
*/
|
|
263
|
+
async getConnection() {
|
|
264
|
+
const url = this.buildWsUrl();
|
|
265
|
+
if (this.wsConnection && this.wsUrl === url && this.wsConnection.readyState === WebSocket.OPEN) {
|
|
266
|
+
return this.wsConnection;
|
|
267
|
+
}
|
|
268
|
+
if (this.wsConnection) {
|
|
269
|
+
try {
|
|
270
|
+
this.wsConnection.close();
|
|
271
|
+
} catch {
|
|
272
|
+
}
|
|
273
|
+
this.wsConnection = null;
|
|
274
|
+
}
|
|
275
|
+
return new Promise((resolve, reject) => {
|
|
276
|
+
const ws = new WebSocket(url);
|
|
277
|
+
ws.onopen = () => {
|
|
278
|
+
this.wsConnection = ws;
|
|
279
|
+
this.wsUrl = url;
|
|
280
|
+
this.setupMessageHandler(ws);
|
|
281
|
+
resolve(ws);
|
|
282
|
+
};
|
|
283
|
+
ws.onerror = () => {
|
|
284
|
+
reject(new KugelAudioError("WebSocket connection error"));
|
|
285
|
+
};
|
|
286
|
+
});
|
|
287
|
+
}
|
|
288
|
+
/**
|
|
289
|
+
* Setup message handler for pooled connection.
|
|
290
|
+
*/
|
|
291
|
+
setupMessageHandler(ws) {
|
|
292
|
+
ws.onmessage = (event) => {
|
|
293
|
+
try {
|
|
294
|
+
const data = JSON.parse(event.data);
|
|
295
|
+
const [requestId, pending] = [...this.pendingRequests.entries()][0] || [];
|
|
296
|
+
if (!pending) return;
|
|
297
|
+
if (data.error) {
|
|
298
|
+
const error = this.parseError(data.error);
|
|
299
|
+
pending.callbacks.onError?.(error);
|
|
300
|
+
this.pendingRequests.delete(requestId);
|
|
301
|
+
pending.reject(error);
|
|
302
|
+
return;
|
|
303
|
+
}
|
|
304
|
+
if (data.final) {
|
|
305
|
+
const stats = {
|
|
306
|
+
final: true,
|
|
307
|
+
chunks: data.chunks,
|
|
308
|
+
totalSamples: data.total_samples,
|
|
309
|
+
durationMs: data.dur_ms,
|
|
310
|
+
generationMs: data.gen_ms,
|
|
311
|
+
ttfaMs: data.ttfa_ms,
|
|
312
|
+
rtf: data.rtf,
|
|
313
|
+
error: data.error
|
|
314
|
+
};
|
|
315
|
+
pending.callbacks.onFinal?.(stats);
|
|
316
|
+
this.pendingRequests.delete(requestId);
|
|
317
|
+
pending.resolve();
|
|
318
|
+
return;
|
|
319
|
+
}
|
|
320
|
+
if (data.audio) {
|
|
321
|
+
const chunk = {
|
|
322
|
+
audio: data.audio,
|
|
323
|
+
encoding: data.enc || "pcm_s16le",
|
|
324
|
+
index: data.idx,
|
|
325
|
+
sampleRate: data.sr,
|
|
326
|
+
samples: data.samples
|
|
327
|
+
};
|
|
328
|
+
pending.callbacks.onChunk?.(chunk);
|
|
329
|
+
}
|
|
330
|
+
} catch (e) {
|
|
331
|
+
console.error("Failed to parse WebSocket message:", e);
|
|
332
|
+
}
|
|
333
|
+
};
|
|
334
|
+
ws.onclose = (event) => {
|
|
335
|
+
this.wsConnection = null;
|
|
336
|
+
this.wsUrl = null;
|
|
337
|
+
for (const [id, pending] of this.pendingRequests) {
|
|
338
|
+
pending.callbacks.onClose?.();
|
|
339
|
+
if (event.code === 4001) {
|
|
340
|
+
pending.reject(new AuthenticationError("Authentication failed"));
|
|
341
|
+
} else if (event.code === 4003) {
|
|
342
|
+
pending.reject(new InsufficientCreditsError("Insufficient credits"));
|
|
343
|
+
}
|
|
344
|
+
this.pendingRequests.delete(id);
|
|
345
|
+
}
|
|
346
|
+
};
|
|
347
|
+
ws.onerror = () => {
|
|
348
|
+
const error = new KugelAudioError("WebSocket connection error");
|
|
349
|
+
for (const [id, pending] of this.pendingRequests) {
|
|
350
|
+
pending.callbacks.onError?.(error);
|
|
351
|
+
pending.reject(error);
|
|
352
|
+
this.pendingRequests.delete(id);
|
|
353
|
+
}
|
|
354
|
+
};
|
|
355
|
+
}
|
|
247
356
|
/**
|
|
248
357
|
* Stream audio from text via WebSocket.
|
|
358
|
+
* Uses connection pooling for faster TTFA (~180ms vs ~400ms).
|
|
359
|
+
*
|
|
360
|
+
* @param options - Generation options
|
|
361
|
+
* @param callbacks - Stream callbacks
|
|
362
|
+
* @param reuseConnection - If true (default), reuse WebSocket connection
|
|
363
|
+
*/
|
|
364
|
+
stream(options, callbacks, reuseConnection = true) {
|
|
365
|
+
if (reuseConnection) {
|
|
366
|
+
return this.streamWithPooling(options, callbacks);
|
|
367
|
+
}
|
|
368
|
+
return this.streamWithoutPooling(options, callbacks);
|
|
369
|
+
}
|
|
370
|
+
/**
|
|
371
|
+
* Stream with connection pooling (fast path).
|
|
372
|
+
*/
|
|
373
|
+
async streamWithPooling(options, callbacks) {
|
|
374
|
+
const ws = await this.getConnection();
|
|
375
|
+
const requestId = ++this.requestCounter;
|
|
376
|
+
return new Promise((resolve, reject) => {
|
|
377
|
+
this.pendingRequests.set(requestId, { callbacks, resolve, reject });
|
|
378
|
+
callbacks.onOpen?.();
|
|
379
|
+
ws.send(JSON.stringify({
|
|
380
|
+
text: options.text,
|
|
381
|
+
model: options.model || "kugel-1-turbo",
|
|
382
|
+
voice_id: options.voiceId,
|
|
383
|
+
cfg_scale: options.cfgScale ?? 2,
|
|
384
|
+
max_new_tokens: options.maxNewTokens ?? 2048,
|
|
385
|
+
sample_rate: options.sampleRate ?? 24e3,
|
|
386
|
+
speaker_prefix: options.speakerPrefix ?? true
|
|
387
|
+
}));
|
|
388
|
+
});
|
|
389
|
+
}
|
|
390
|
+
/**
|
|
391
|
+
* Stream without connection pooling (original behavior).
|
|
249
392
|
*/
|
|
250
|
-
|
|
393
|
+
streamWithoutPooling(options, callbacks) {
|
|
251
394
|
return new Promise((resolve, reject) => {
|
|
252
|
-
const
|
|
253
|
-
const url = `${wsUrl}/ws/tts?api_key=${this.client.apiKey}`;
|
|
395
|
+
const url = this.buildWsUrl();
|
|
254
396
|
const ws = new WebSocket(url);
|
|
255
397
|
ws.onopen = () => {
|
|
256
398
|
callbacks.onOpen?.();
|
|
257
399
|
ws.send(JSON.stringify({
|
|
258
400
|
text: options.text,
|
|
259
|
-
model: options.model || "kugel-
|
|
401
|
+
model: options.model || "kugel-1-turbo",
|
|
260
402
|
voice_id: options.voiceId,
|
|
261
403
|
cfg_scale: options.cfgScale ?? 2,
|
|
262
404
|
max_new_tokens: options.maxNewTokens ?? 2048,
|
|
@@ -319,6 +461,19 @@ var TTSResource = class {
|
|
|
319
461
|
};
|
|
320
462
|
});
|
|
321
463
|
}
|
|
464
|
+
/**
|
|
465
|
+
* Close the pooled WebSocket connection.
|
|
466
|
+
*/
|
|
467
|
+
close() {
|
|
468
|
+
if (this.wsConnection) {
|
|
469
|
+
try {
|
|
470
|
+
this.wsConnection.close();
|
|
471
|
+
} catch {
|
|
472
|
+
}
|
|
473
|
+
this.wsConnection = null;
|
|
474
|
+
this.wsUrl = null;
|
|
475
|
+
}
|
|
476
|
+
}
|
|
322
477
|
parseError(message) {
|
|
323
478
|
const lower = message.toLowerCase();
|
|
324
479
|
if (lower.includes("auth") || lower.includes("unauthorized")) {
|
|
@@ -336,6 +491,7 @@ var KugelAudio = class {
|
|
|
336
491
|
throw new Error("API key is required");
|
|
337
492
|
}
|
|
338
493
|
this._apiKey = options.apiKey;
|
|
494
|
+
this._isMasterKey = options.isMasterKey || false;
|
|
339
495
|
this._apiUrl = (options.apiUrl || DEFAULT_API_URL).replace(/\/$/, "");
|
|
340
496
|
this._ttsUrl = (options.ttsUrl || this._apiUrl).replace(/\/$/, "");
|
|
341
497
|
this._timeout = options.timeout || 6e4;
|
|
@@ -347,10 +503,21 @@ var KugelAudio = class {
|
|
|
347
503
|
get apiKey() {
|
|
348
504
|
return this._apiKey;
|
|
349
505
|
}
|
|
506
|
+
/** Check if using master key authentication */
|
|
507
|
+
get isMasterKey() {
|
|
508
|
+
return this._isMasterKey;
|
|
509
|
+
}
|
|
350
510
|
/** Get TTS URL */
|
|
351
511
|
get ttsUrl() {
|
|
352
512
|
return this._ttsUrl;
|
|
353
513
|
}
|
|
514
|
+
/**
|
|
515
|
+
* Close the client and release resources.
|
|
516
|
+
* This closes any pooled WebSocket connections.
|
|
517
|
+
*/
|
|
518
|
+
close() {
|
|
519
|
+
this.tts.close();
|
|
520
|
+
}
|
|
354
521
|
/**
|
|
355
522
|
* Make an HTTP request to the API.
|
|
356
523
|
* @internal
|
package/dist/index.mjs
CHANGED
|
@@ -176,6 +176,10 @@ var VoicesResource = class {
|
|
|
176
176
|
var TTSResource = class {
|
|
177
177
|
constructor(client) {
|
|
178
178
|
this.client = client;
|
|
179
|
+
this.wsConnection = null;
|
|
180
|
+
this.wsUrl = null;
|
|
181
|
+
this.pendingRequests = /* @__PURE__ */ new Map();
|
|
182
|
+
this.requestCounter = 0;
|
|
179
183
|
}
|
|
180
184
|
/**
|
|
181
185
|
* Generate audio from text with streaming via WebSocket.
|
|
@@ -208,19 +212,157 @@ var TTSResource = class {
|
|
|
208
212
|
rtf: finalStats ? finalStats.rtf : 0
|
|
209
213
|
};
|
|
210
214
|
}
|
|
215
|
+
/**
|
|
216
|
+
* Build the WebSocket URL with appropriate auth param.
|
|
217
|
+
*/
|
|
218
|
+
buildWsUrl() {
|
|
219
|
+
const wsUrl = this.client.ttsUrl.replace("https://", "wss://").replace("http://", "ws://");
|
|
220
|
+
const authParam = this.client.isMasterKey ? "master_key" : "api_key";
|
|
221
|
+
return `${wsUrl}/ws/tts?${authParam}=${this.client.apiKey}`;
|
|
222
|
+
}
|
|
223
|
+
/**
|
|
224
|
+
* Get or create a WebSocket connection for connection pooling.
|
|
225
|
+
* This avoids the ~220ms connect overhead on each request.
|
|
226
|
+
*/
|
|
227
|
+
async getConnection() {
|
|
228
|
+
const url = this.buildWsUrl();
|
|
229
|
+
if (this.wsConnection && this.wsUrl === url && this.wsConnection.readyState === WebSocket.OPEN) {
|
|
230
|
+
return this.wsConnection;
|
|
231
|
+
}
|
|
232
|
+
if (this.wsConnection) {
|
|
233
|
+
try {
|
|
234
|
+
this.wsConnection.close();
|
|
235
|
+
} catch {
|
|
236
|
+
}
|
|
237
|
+
this.wsConnection = null;
|
|
238
|
+
}
|
|
239
|
+
return new Promise((resolve, reject) => {
|
|
240
|
+
const ws = new WebSocket(url);
|
|
241
|
+
ws.onopen = () => {
|
|
242
|
+
this.wsConnection = ws;
|
|
243
|
+
this.wsUrl = url;
|
|
244
|
+
this.setupMessageHandler(ws);
|
|
245
|
+
resolve(ws);
|
|
246
|
+
};
|
|
247
|
+
ws.onerror = () => {
|
|
248
|
+
reject(new KugelAudioError("WebSocket connection error"));
|
|
249
|
+
};
|
|
250
|
+
});
|
|
251
|
+
}
|
|
252
|
+
/**
|
|
253
|
+
* Setup message handler for pooled connection.
|
|
254
|
+
*/
|
|
255
|
+
setupMessageHandler(ws) {
|
|
256
|
+
ws.onmessage = (event) => {
|
|
257
|
+
try {
|
|
258
|
+
const data = JSON.parse(event.data);
|
|
259
|
+
const [requestId, pending] = [...this.pendingRequests.entries()][0] || [];
|
|
260
|
+
if (!pending) return;
|
|
261
|
+
if (data.error) {
|
|
262
|
+
const error = this.parseError(data.error);
|
|
263
|
+
pending.callbacks.onError?.(error);
|
|
264
|
+
this.pendingRequests.delete(requestId);
|
|
265
|
+
pending.reject(error);
|
|
266
|
+
return;
|
|
267
|
+
}
|
|
268
|
+
if (data.final) {
|
|
269
|
+
const stats = {
|
|
270
|
+
final: true,
|
|
271
|
+
chunks: data.chunks,
|
|
272
|
+
totalSamples: data.total_samples,
|
|
273
|
+
durationMs: data.dur_ms,
|
|
274
|
+
generationMs: data.gen_ms,
|
|
275
|
+
ttfaMs: data.ttfa_ms,
|
|
276
|
+
rtf: data.rtf,
|
|
277
|
+
error: data.error
|
|
278
|
+
};
|
|
279
|
+
pending.callbacks.onFinal?.(stats);
|
|
280
|
+
this.pendingRequests.delete(requestId);
|
|
281
|
+
pending.resolve();
|
|
282
|
+
return;
|
|
283
|
+
}
|
|
284
|
+
if (data.audio) {
|
|
285
|
+
const chunk = {
|
|
286
|
+
audio: data.audio,
|
|
287
|
+
encoding: data.enc || "pcm_s16le",
|
|
288
|
+
index: data.idx,
|
|
289
|
+
sampleRate: data.sr,
|
|
290
|
+
samples: data.samples
|
|
291
|
+
};
|
|
292
|
+
pending.callbacks.onChunk?.(chunk);
|
|
293
|
+
}
|
|
294
|
+
} catch (e) {
|
|
295
|
+
console.error("Failed to parse WebSocket message:", e);
|
|
296
|
+
}
|
|
297
|
+
};
|
|
298
|
+
ws.onclose = (event) => {
|
|
299
|
+
this.wsConnection = null;
|
|
300
|
+
this.wsUrl = null;
|
|
301
|
+
for (const [id, pending] of this.pendingRequests) {
|
|
302
|
+
pending.callbacks.onClose?.();
|
|
303
|
+
if (event.code === 4001) {
|
|
304
|
+
pending.reject(new AuthenticationError("Authentication failed"));
|
|
305
|
+
} else if (event.code === 4003) {
|
|
306
|
+
pending.reject(new InsufficientCreditsError("Insufficient credits"));
|
|
307
|
+
}
|
|
308
|
+
this.pendingRequests.delete(id);
|
|
309
|
+
}
|
|
310
|
+
};
|
|
311
|
+
ws.onerror = () => {
|
|
312
|
+
const error = new KugelAudioError("WebSocket connection error");
|
|
313
|
+
for (const [id, pending] of this.pendingRequests) {
|
|
314
|
+
pending.callbacks.onError?.(error);
|
|
315
|
+
pending.reject(error);
|
|
316
|
+
this.pendingRequests.delete(id);
|
|
317
|
+
}
|
|
318
|
+
};
|
|
319
|
+
}
|
|
211
320
|
/**
|
|
212
321
|
* Stream audio from text via WebSocket.
|
|
322
|
+
* Uses connection pooling for faster TTFA (~180ms vs ~400ms).
|
|
323
|
+
*
|
|
324
|
+
* @param options - Generation options
|
|
325
|
+
* @param callbacks - Stream callbacks
|
|
326
|
+
* @param reuseConnection - If true (default), reuse WebSocket connection
|
|
327
|
+
*/
|
|
328
|
+
stream(options, callbacks, reuseConnection = true) {
|
|
329
|
+
if (reuseConnection) {
|
|
330
|
+
return this.streamWithPooling(options, callbacks);
|
|
331
|
+
}
|
|
332
|
+
return this.streamWithoutPooling(options, callbacks);
|
|
333
|
+
}
|
|
334
|
+
/**
|
|
335
|
+
* Stream with connection pooling (fast path).
|
|
336
|
+
*/
|
|
337
|
+
async streamWithPooling(options, callbacks) {
|
|
338
|
+
const ws = await this.getConnection();
|
|
339
|
+
const requestId = ++this.requestCounter;
|
|
340
|
+
return new Promise((resolve, reject) => {
|
|
341
|
+
this.pendingRequests.set(requestId, { callbacks, resolve, reject });
|
|
342
|
+
callbacks.onOpen?.();
|
|
343
|
+
ws.send(JSON.stringify({
|
|
344
|
+
text: options.text,
|
|
345
|
+
model: options.model || "kugel-1-turbo",
|
|
346
|
+
voice_id: options.voiceId,
|
|
347
|
+
cfg_scale: options.cfgScale ?? 2,
|
|
348
|
+
max_new_tokens: options.maxNewTokens ?? 2048,
|
|
349
|
+
sample_rate: options.sampleRate ?? 24e3,
|
|
350
|
+
speaker_prefix: options.speakerPrefix ?? true
|
|
351
|
+
}));
|
|
352
|
+
});
|
|
353
|
+
}
|
|
354
|
+
/**
|
|
355
|
+
* Stream without connection pooling (original behavior).
|
|
213
356
|
*/
|
|
214
|
-
|
|
357
|
+
streamWithoutPooling(options, callbacks) {
|
|
215
358
|
return new Promise((resolve, reject) => {
|
|
216
|
-
const
|
|
217
|
-
const url = `${wsUrl}/ws/tts?api_key=${this.client.apiKey}`;
|
|
359
|
+
const url = this.buildWsUrl();
|
|
218
360
|
const ws = new WebSocket(url);
|
|
219
361
|
ws.onopen = () => {
|
|
220
362
|
callbacks.onOpen?.();
|
|
221
363
|
ws.send(JSON.stringify({
|
|
222
364
|
text: options.text,
|
|
223
|
-
model: options.model || "kugel-
|
|
365
|
+
model: options.model || "kugel-1-turbo",
|
|
224
366
|
voice_id: options.voiceId,
|
|
225
367
|
cfg_scale: options.cfgScale ?? 2,
|
|
226
368
|
max_new_tokens: options.maxNewTokens ?? 2048,
|
|
@@ -283,6 +425,19 @@ var TTSResource = class {
|
|
|
283
425
|
};
|
|
284
426
|
});
|
|
285
427
|
}
|
|
428
|
+
/**
|
|
429
|
+
* Close the pooled WebSocket connection.
|
|
430
|
+
*/
|
|
431
|
+
close() {
|
|
432
|
+
if (this.wsConnection) {
|
|
433
|
+
try {
|
|
434
|
+
this.wsConnection.close();
|
|
435
|
+
} catch {
|
|
436
|
+
}
|
|
437
|
+
this.wsConnection = null;
|
|
438
|
+
this.wsUrl = null;
|
|
439
|
+
}
|
|
440
|
+
}
|
|
286
441
|
parseError(message) {
|
|
287
442
|
const lower = message.toLowerCase();
|
|
288
443
|
if (lower.includes("auth") || lower.includes("unauthorized")) {
|
|
@@ -300,6 +455,7 @@ var KugelAudio = class {
|
|
|
300
455
|
throw new Error("API key is required");
|
|
301
456
|
}
|
|
302
457
|
this._apiKey = options.apiKey;
|
|
458
|
+
this._isMasterKey = options.isMasterKey || false;
|
|
303
459
|
this._apiUrl = (options.apiUrl || DEFAULT_API_URL).replace(/\/$/, "");
|
|
304
460
|
this._ttsUrl = (options.ttsUrl || this._apiUrl).replace(/\/$/, "");
|
|
305
461
|
this._timeout = options.timeout || 6e4;
|
|
@@ -311,10 +467,21 @@ var KugelAudio = class {
|
|
|
311
467
|
get apiKey() {
|
|
312
468
|
return this._apiKey;
|
|
313
469
|
}
|
|
470
|
+
/** Check if using master key authentication */
|
|
471
|
+
get isMasterKey() {
|
|
472
|
+
return this._isMasterKey;
|
|
473
|
+
}
|
|
314
474
|
/** Get TTS URL */
|
|
315
475
|
get ttsUrl() {
|
|
316
476
|
return this._ttsUrl;
|
|
317
477
|
}
|
|
478
|
+
/**
|
|
479
|
+
* Close the client and release resources.
|
|
480
|
+
* This closes any pooled WebSocket connections.
|
|
481
|
+
*/
|
|
482
|
+
close() {
|
|
483
|
+
this.tts.close();
|
|
484
|
+
}
|
|
318
485
|
/**
|
|
319
486
|
* Make an HTTP request to the API.
|
|
320
487
|
* @internal
|
package/package.json
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "kugelaudio",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.3",
|
|
4
4
|
"description": "Official JavaScript/TypeScript SDK for KugelAudio TTS API",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.mjs",
|
|
7
7
|
"types": "dist/index.d.ts",
|
|
8
8
|
"exports": {
|
|
9
9
|
".": {
|
|
10
|
+
"types": "./dist/index.d.ts",
|
|
10
11
|
"import": "./dist/index.mjs",
|
|
11
|
-
"require": "./dist/index.js"
|
|
12
|
-
"types": "./dist/index.d.ts"
|
|
12
|
+
"require": "./dist/index.js"
|
|
13
13
|
}
|
|
14
14
|
},
|
|
15
15
|
"files": [
|
package/src/client.ts
CHANGED
|
@@ -111,6 +111,15 @@ class VoicesResource {
|
|
|
111
111
|
* TTS resource for text-to-speech generation.
|
|
112
112
|
*/
|
|
113
113
|
class TTSResource {
|
|
114
|
+
private wsConnection: WebSocket | null = null;
|
|
115
|
+
private wsUrl: string | null = null;
|
|
116
|
+
private pendingRequests: Map<number, {
|
|
117
|
+
callbacks: StreamCallbacks;
|
|
118
|
+
resolve: () => void;
|
|
119
|
+
reject: (error: Error) => void;
|
|
120
|
+
}> = new Map();
|
|
121
|
+
private requestCounter = 0;
|
|
122
|
+
|
|
114
123
|
constructor(private client: KugelAudio) {}
|
|
115
124
|
|
|
116
125
|
/**
|
|
@@ -149,16 +158,196 @@ class TTSResource {
|
|
|
149
158
|
};
|
|
150
159
|
}
|
|
151
160
|
|
|
161
|
+
/**
|
|
162
|
+
* Build the WebSocket URL with appropriate auth param.
|
|
163
|
+
*/
|
|
164
|
+
private buildWsUrl(): string {
|
|
165
|
+
const wsUrl = this.client.ttsUrl
|
|
166
|
+
.replace('https://', 'wss://')
|
|
167
|
+
.replace('http://', 'ws://');
|
|
168
|
+
// Use master_key param for master keys (bypasses billing), api_key for regular keys
|
|
169
|
+
const authParam = this.client.isMasterKey ? 'master_key' : 'api_key';
|
|
170
|
+
return `${wsUrl}/ws/tts?${authParam}=${this.client.apiKey}`;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
/**
|
|
174
|
+
* Get or create a WebSocket connection for connection pooling.
|
|
175
|
+
* This avoids the ~220ms connect overhead on each request.
|
|
176
|
+
*/
|
|
177
|
+
private async getConnection(): Promise<WebSocket> {
|
|
178
|
+
const url = this.buildWsUrl();
|
|
179
|
+
|
|
180
|
+
// Return existing connection if valid
|
|
181
|
+
if (
|
|
182
|
+
this.wsConnection &&
|
|
183
|
+
this.wsUrl === url &&
|
|
184
|
+
this.wsConnection.readyState === WebSocket.OPEN
|
|
185
|
+
) {
|
|
186
|
+
return this.wsConnection;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// Close old connection if URL changed
|
|
190
|
+
if (this.wsConnection) {
|
|
191
|
+
try {
|
|
192
|
+
this.wsConnection.close();
|
|
193
|
+
} catch {
|
|
194
|
+
// Ignore close errors
|
|
195
|
+
}
|
|
196
|
+
this.wsConnection = null;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// Create new connection
|
|
200
|
+
return new Promise((resolve, reject) => {
|
|
201
|
+
const ws = new WebSocket(url);
|
|
202
|
+
|
|
203
|
+
ws.onopen = () => {
|
|
204
|
+
this.wsConnection = ws;
|
|
205
|
+
this.wsUrl = url;
|
|
206
|
+
this.setupMessageHandler(ws);
|
|
207
|
+
resolve(ws);
|
|
208
|
+
};
|
|
209
|
+
|
|
210
|
+
ws.onerror = () => {
|
|
211
|
+
reject(new KugelAudioError('WebSocket connection error'));
|
|
212
|
+
};
|
|
213
|
+
});
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
/**
|
|
217
|
+
* Setup message handler for pooled connection.
|
|
218
|
+
*/
|
|
219
|
+
private setupMessageHandler(ws: WebSocket): void {
|
|
220
|
+
ws.onmessage = (event) => {
|
|
221
|
+
try {
|
|
222
|
+
const data = JSON.parse(event.data);
|
|
223
|
+
|
|
224
|
+
// Get the current pending request (we process one at a time)
|
|
225
|
+
const [requestId, pending] = [...this.pendingRequests.entries()][0] || [];
|
|
226
|
+
if (!pending) return;
|
|
227
|
+
|
|
228
|
+
if (data.error) {
|
|
229
|
+
const error = this.parseError(data.error);
|
|
230
|
+
pending.callbacks.onError?.(error);
|
|
231
|
+
this.pendingRequests.delete(requestId);
|
|
232
|
+
pending.reject(error);
|
|
233
|
+
return;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
if (data.final) {
|
|
237
|
+
const stats: GenerationStats = {
|
|
238
|
+
final: true,
|
|
239
|
+
chunks: data.chunks,
|
|
240
|
+
totalSamples: data.total_samples,
|
|
241
|
+
durationMs: data.dur_ms,
|
|
242
|
+
generationMs: data.gen_ms,
|
|
243
|
+
ttfaMs: data.ttfa_ms,
|
|
244
|
+
rtf: data.rtf,
|
|
245
|
+
error: data.error,
|
|
246
|
+
};
|
|
247
|
+
pending.callbacks.onFinal?.(stats);
|
|
248
|
+
this.pendingRequests.delete(requestId);
|
|
249
|
+
pending.resolve();
|
|
250
|
+
return;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
if (data.audio) {
|
|
254
|
+
const chunk: AudioChunk = {
|
|
255
|
+
audio: data.audio,
|
|
256
|
+
encoding: data.enc || 'pcm_s16le',
|
|
257
|
+
index: data.idx,
|
|
258
|
+
sampleRate: data.sr,
|
|
259
|
+
samples: data.samples,
|
|
260
|
+
};
|
|
261
|
+
pending.callbacks.onChunk?.(chunk);
|
|
262
|
+
}
|
|
263
|
+
} catch (e) {
|
|
264
|
+
console.error('Failed to parse WebSocket message:', e);
|
|
265
|
+
}
|
|
266
|
+
};
|
|
267
|
+
|
|
268
|
+
ws.onclose = (event) => {
|
|
269
|
+
// Clear connection pool
|
|
270
|
+
this.wsConnection = null;
|
|
271
|
+
this.wsUrl = null;
|
|
272
|
+
|
|
273
|
+
// Reject all pending requests
|
|
274
|
+
for (const [id, pending] of this.pendingRequests) {
|
|
275
|
+
pending.callbacks.onClose?.();
|
|
276
|
+
if (event.code === 4001) {
|
|
277
|
+
pending.reject(new AuthenticationError('Authentication failed'));
|
|
278
|
+
} else if (event.code === 4003) {
|
|
279
|
+
pending.reject(new InsufficientCreditsError('Insufficient credits'));
|
|
280
|
+
}
|
|
281
|
+
this.pendingRequests.delete(id);
|
|
282
|
+
}
|
|
283
|
+
};
|
|
284
|
+
|
|
285
|
+
ws.onerror = () => {
|
|
286
|
+
// Reject all pending requests
|
|
287
|
+
const error = new KugelAudioError('WebSocket connection error');
|
|
288
|
+
for (const [id, pending] of this.pendingRequests) {
|
|
289
|
+
pending.callbacks.onError?.(error);
|
|
290
|
+
pending.reject(error);
|
|
291
|
+
this.pendingRequests.delete(id);
|
|
292
|
+
}
|
|
293
|
+
};
|
|
294
|
+
}
|
|
295
|
+
|
|
152
296
|
/**
|
|
153
297
|
* Stream audio from text via WebSocket.
|
|
298
|
+
* Uses connection pooling for faster TTFA (~180ms vs ~400ms).
|
|
299
|
+
*
|
|
300
|
+
* @param options - Generation options
|
|
301
|
+
* @param callbacks - Stream callbacks
|
|
302
|
+
* @param reuseConnection - If true (default), reuse WebSocket connection
|
|
303
|
+
*/
|
|
304
|
+
stream(
|
|
305
|
+
options: GenerateOptions,
|
|
306
|
+
callbacks: StreamCallbacks,
|
|
307
|
+
reuseConnection = true
|
|
308
|
+
): Promise<void> {
|
|
309
|
+
if (reuseConnection) {
|
|
310
|
+
return this.streamWithPooling(options, callbacks);
|
|
311
|
+
}
|
|
312
|
+
return this.streamWithoutPooling(options, callbacks);
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
/**
|
|
316
|
+
* Stream with connection pooling (fast path).
|
|
154
317
|
*/
|
|
155
|
-
|
|
318
|
+
private async streamWithPooling(
|
|
319
|
+
options: GenerateOptions,
|
|
320
|
+
callbacks: StreamCallbacks
|
|
321
|
+
): Promise<void> {
|
|
322
|
+
const ws = await this.getConnection();
|
|
323
|
+
const requestId = ++this.requestCounter;
|
|
324
|
+
|
|
156
325
|
return new Promise((resolve, reject) => {
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
326
|
+
this.pendingRequests.set(requestId, { callbacks, resolve, reject });
|
|
327
|
+
|
|
328
|
+
callbacks.onOpen?.();
|
|
329
|
+
|
|
330
|
+
ws.send(JSON.stringify({
|
|
331
|
+
text: options.text,
|
|
332
|
+
model: options.model || 'kugel-1-turbo',
|
|
333
|
+
voice_id: options.voiceId,
|
|
334
|
+
cfg_scale: options.cfgScale ?? 2.0,
|
|
335
|
+
max_new_tokens: options.maxNewTokens ?? 2048,
|
|
336
|
+
sample_rate: options.sampleRate ?? 24000,
|
|
337
|
+
speaker_prefix: options.speakerPrefix ?? true,
|
|
338
|
+
}));
|
|
339
|
+
});
|
|
340
|
+
}
|
|
161
341
|
|
|
342
|
+
/**
|
|
343
|
+
* Stream without connection pooling (original behavior).
|
|
344
|
+
*/
|
|
345
|
+
private streamWithoutPooling(
|
|
346
|
+
options: GenerateOptions,
|
|
347
|
+
callbacks: StreamCallbacks
|
|
348
|
+
): Promise<void> {
|
|
349
|
+
return new Promise((resolve, reject) => {
|
|
350
|
+
const url = this.buildWsUrl();
|
|
162
351
|
const ws = new WebSocket(url);
|
|
163
352
|
|
|
164
353
|
ws.onopen = () => {
|
|
@@ -166,7 +355,7 @@ class TTSResource {
|
|
|
166
355
|
// Send TTS request
|
|
167
356
|
ws.send(JSON.stringify({
|
|
168
357
|
text: options.text,
|
|
169
|
-
model: options.model || 'kugel-
|
|
358
|
+
model: options.model || 'kugel-1-turbo',
|
|
170
359
|
voice_id: options.voiceId,
|
|
171
360
|
cfg_scale: options.cfgScale ?? 2.0,
|
|
172
361
|
max_new_tokens: options.maxNewTokens ?? 2048,
|
|
@@ -236,6 +425,21 @@ class TTSResource {
|
|
|
236
425
|
});
|
|
237
426
|
}
|
|
238
427
|
|
|
428
|
+
/**
|
|
429
|
+
* Close the pooled WebSocket connection.
|
|
430
|
+
*/
|
|
431
|
+
close(): void {
|
|
432
|
+
if (this.wsConnection) {
|
|
433
|
+
try {
|
|
434
|
+
this.wsConnection.close();
|
|
435
|
+
} catch {
|
|
436
|
+
// Ignore close errors
|
|
437
|
+
}
|
|
438
|
+
this.wsConnection = null;
|
|
439
|
+
this.wsUrl = null;
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
|
|
239
443
|
private parseError(message: string): Error {
|
|
240
444
|
const lower = message.toLowerCase();
|
|
241
445
|
if (lower.includes('auth') || lower.includes('unauthorized')) {
|
|
@@ -261,15 +465,22 @@ class TTSResource {
|
|
|
261
465
|
* // List voices
|
|
262
466
|
* const voices = await client.voices.list();
|
|
263
467
|
*
|
|
264
|
-
* // Generate audio
|
|
468
|
+
* // Generate audio with fast model (1.5B params)
|
|
265
469
|
* const audio = await client.tts.generate({
|
|
266
470
|
* text: 'Hello, world!',
|
|
267
|
-
* model: 'kugel-
|
|
471
|
+
* model: 'kugel-1-turbo',
|
|
472
|
+
* });
|
|
473
|
+
*
|
|
474
|
+
* // Generate audio with premium model (7B params)
|
|
475
|
+
* const audio = await client.tts.generate({
|
|
476
|
+
* text: 'Hello, world!',
|
|
477
|
+
* model: 'kugel-1',
|
|
268
478
|
* });
|
|
269
479
|
* ```
|
|
270
480
|
*/
|
|
271
481
|
export class KugelAudio {
|
|
272
482
|
private _apiKey: string;
|
|
483
|
+
private _isMasterKey: boolean;
|
|
273
484
|
private _apiUrl: string;
|
|
274
485
|
private _ttsUrl: string;
|
|
275
486
|
private _timeout: number;
|
|
@@ -287,6 +498,7 @@ export class KugelAudio {
|
|
|
287
498
|
}
|
|
288
499
|
|
|
289
500
|
this._apiKey = options.apiKey;
|
|
501
|
+
this._isMasterKey = options.isMasterKey || false;
|
|
290
502
|
this._apiUrl = (options.apiUrl || DEFAULT_API_URL).replace(/\/$/, '');
|
|
291
503
|
// If ttsUrl not specified, use apiUrl (backend proxies to TTS server)
|
|
292
504
|
this._ttsUrl = (options.ttsUrl || this._apiUrl).replace(/\/$/, '');
|
|
@@ -302,11 +514,24 @@ export class KugelAudio {
|
|
|
302
514
|
return this._apiKey;
|
|
303
515
|
}
|
|
304
516
|
|
|
517
|
+
/** Check if using master key authentication */
|
|
518
|
+
get isMasterKey(): boolean {
|
|
519
|
+
return this._isMasterKey;
|
|
520
|
+
}
|
|
521
|
+
|
|
305
522
|
/** Get TTS URL */
|
|
306
523
|
get ttsUrl(): string {
|
|
307
524
|
return this._ttsUrl;
|
|
308
525
|
}
|
|
309
526
|
|
|
527
|
+
/**
|
|
528
|
+
* Close the client and release resources.
|
|
529
|
+
* This closes any pooled WebSocket connections.
|
|
530
|
+
*/
|
|
531
|
+
close(): void {
|
|
532
|
+
this.tts.close();
|
|
533
|
+
}
|
|
534
|
+
|
|
310
535
|
/**
|
|
311
536
|
* Make an HTTP request to the API.
|
|
312
537
|
* @internal
|
package/src/index.ts
CHANGED
|
@@ -18,13 +18,13 @@
|
|
|
18
18
|
* // Generate audio (non-streaming)
|
|
19
19
|
* const audio = await client.tts.generate({
|
|
20
20
|
* text: 'Hello, world!',
|
|
21
|
-
* model: 'kugel-
|
|
21
|
+
* model: 'kugel-1-turbo',
|
|
22
22
|
* voiceId: 123,
|
|
23
23
|
* });
|
|
24
24
|
*
|
|
25
25
|
* // Generate audio (streaming)
|
|
26
26
|
* await client.tts.stream(
|
|
27
|
-
* { text: 'Hello, world!', model: 'kugel-
|
|
27
|
+
* { text: 'Hello, world!', model: 'kugel-1-turbo' },
|
|
28
28
|
* {
|
|
29
29
|
* onChunk: (chunk) => {
|
|
30
30
|
* // Process audio chunk
|
package/src/types.ts
CHANGED
|
@@ -53,7 +53,7 @@ export interface Voice {
|
|
|
53
53
|
export interface GenerateOptions {
|
|
54
54
|
/** Text to synthesize */
|
|
55
55
|
text: string;
|
|
56
|
-
/** Model to use (
|
|
56
|
+
/** Model to use: 'kugel-1-turbo' (1.5B, fast) or 'kugel-1' (7B, premium). Default: 'kugel-1-turbo' */
|
|
57
57
|
model?: string;
|
|
58
58
|
/** Voice ID to use */
|
|
59
59
|
voiceId?: number;
|
|
@@ -165,6 +165,8 @@ export interface StreamCallbacks {
|
|
|
165
165
|
export interface KugelAudioOptions {
|
|
166
166
|
/** Your KugelAudio API key */
|
|
167
167
|
apiKey: string;
|
|
168
|
+
/** Whether apiKey is a master key (for internal/server-side use). Master keys bypass billing. */
|
|
169
|
+
isMasterKey?: boolean;
|
|
168
170
|
/** API base URL (default: https://api.kugelaudio.com) */
|
|
169
171
|
apiUrl?: string;
|
|
170
172
|
/** TTS server URL (default: https://eu.kugelaudio.com) */
|