kugelaudio 0.1.9 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -8
- package/dist/index.d.mts +19 -16
- package/dist/index.d.ts +19 -16
- package/dist/index.js +4 -7
- package/dist/index.mjs +4 -7
- package/package.json +1 -1
- package/src/client.ts +6 -9
- package/src/index.ts +2 -2
- package/src/types.ts +17 -14
package/README.md
CHANGED
|
@@ -31,7 +31,7 @@ const client = new KugelAudio({ apiKey: 'your_api_key' });
|
|
|
31
31
|
// Generate speech
|
|
32
32
|
const audio = await client.tts.generate({
|
|
33
33
|
text: 'Hello, world!',
|
|
34
|
-
|
|
34
|
+
modelId: 'kugel-1-turbo',
|
|
35
35
|
});
|
|
36
36
|
|
|
37
37
|
// Create a playable blob (browser)
|
|
@@ -144,12 +144,11 @@ Generate complete audio and receive it all at once:
|
|
|
144
144
|
```typescript
|
|
145
145
|
const audio = await client.tts.generate({
|
|
146
146
|
text: 'Hello, this is a test of the KugelAudio text-to-speech system.',
|
|
147
|
-
|
|
147
|
+
modelId: 'kugel-1-turbo', // 'kugel-1-turbo' (fast) or 'kugel-1' (quality)
|
|
148
148
|
voiceId: 123, // Optional: specific voice ID
|
|
149
149
|
cfgScale: 2.0, // Guidance scale (1.0-5.0)
|
|
150
150
|
maxNewTokens: 2048, // Maximum tokens to generate
|
|
151
151
|
sampleRate: 24000, // Output sample rate
|
|
152
|
-
speakerPrefix: true, // Add speaker prefix for better quality
|
|
153
152
|
normalize: true, // Enable text normalization (see below)
|
|
154
153
|
language: 'en', // Language for normalization
|
|
155
154
|
});
|
|
@@ -171,7 +170,7 @@ import { createWavBlob } from 'kugelaudio';
|
|
|
171
170
|
|
|
172
171
|
const audio = await client.tts.generate({
|
|
173
172
|
text: 'Hello, world!',
|
|
174
|
-
|
|
173
|
+
modelId: 'kugel-1-turbo',
|
|
175
174
|
});
|
|
176
175
|
|
|
177
176
|
// Create WAV blob for playback
|
|
@@ -200,7 +199,7 @@ Receive audio chunks as they are generated for lower latency:
|
|
|
200
199
|
await client.tts.stream(
|
|
201
200
|
{
|
|
202
201
|
text: 'Hello, this is streaming audio.',
|
|
203
|
-
|
|
202
|
+
modelId: 'kugel-1-turbo',
|
|
204
203
|
},
|
|
205
204
|
{
|
|
206
205
|
onOpen: () => {
|
|
@@ -354,8 +353,7 @@ interface GenerateOptions {
|
|
|
354
353
|
cfgScale?: number; // Default: 2.0
|
|
355
354
|
maxNewTokens?: number; // Default: 2048
|
|
356
355
|
sampleRate?: number; // Default: 24000
|
|
357
|
-
|
|
358
|
-
normalize?: boolean; // Default: false - Enable text normalization
|
|
356
|
+
normalize?: boolean; // Default: true - Enable text normalization
|
|
359
357
|
language?: string; // ISO 639-1 code for normalization (e.g., 'en', 'de')
|
|
360
358
|
}
|
|
361
359
|
```
|
|
@@ -520,7 +518,7 @@ async function main() {
|
|
|
520
518
|
await client.tts.stream(
|
|
521
519
|
{
|
|
522
520
|
text: 'Welcome to KugelAudio. This is an example of high-quality text-to-speech synthesis.',
|
|
523
|
-
|
|
521
|
+
modelId: 'kugel-1-turbo',
|
|
524
522
|
},
|
|
525
523
|
{
|
|
526
524
|
onChunk: (chunk) => {
|
package/dist/index.d.mts
CHANGED
|
@@ -15,7 +15,7 @@ interface Model {
|
|
|
15
15
|
/**
|
|
16
16
|
* Voice category types.
|
|
17
17
|
*/
|
|
18
|
-
type VoiceCategory = 'premade' | 'cloned' | 'designed';
|
|
18
|
+
type VoiceCategory = 'premade' | 'cloned' | 'designed' | 'conversational' | 'narrative' | 'narrative_story' | 'characters';
|
|
19
19
|
/**
|
|
20
20
|
* Voice sex types.
|
|
21
21
|
*/
|
|
@@ -48,7 +48,7 @@ interface GenerateOptions {
|
|
|
48
48
|
/** Text to synthesize */
|
|
49
49
|
text: string;
|
|
50
50
|
/** Model to use: 'kugel-1-turbo' (1.5B, fast) or 'kugel-1' (7B, premium). Default: 'kugel-1-turbo' */
|
|
51
|
-
|
|
51
|
+
modelId?: string;
|
|
52
52
|
/** Voice ID to use */
|
|
53
53
|
voiceId?: number;
|
|
54
54
|
/** CFG scale for generation (default: 2.0) */
|
|
@@ -57,21 +57,18 @@ interface GenerateOptions {
|
|
|
57
57
|
maxNewTokens?: number;
|
|
58
58
|
/** Output sample rate (default: 24000) */
|
|
59
59
|
sampleRate?: number;
|
|
60
|
-
/** Whether to add speaker prefix (default: true) */
|
|
61
|
-
speakerPrefix?: boolean;
|
|
62
60
|
/**
|
|
63
61
|
* Enable text normalization (converts numbers, dates, etc. to spoken words).
|
|
64
62
|
* When true, text will be normalized before TTS generation.
|
|
65
|
-
* Default:
|
|
63
|
+
* Default: true
|
|
66
64
|
*
|
|
67
|
-
* ⚠️
|
|
68
|
-
*
|
|
69
|
-
* the language parameter when using normalization.
|
|
65
|
+
* ⚠️ For best performance, always specify the language parameter when using
|
|
66
|
+
* normalization. Without it, language auto-detection adds ~150ms latency.
|
|
70
67
|
*/
|
|
71
68
|
normalize?: boolean;
|
|
72
69
|
/**
|
|
73
70
|
* ISO 639-1 language code for text normalization (e.g., 'de', 'en', 'fr').
|
|
74
|
-
* If not provided and normalize is true, language will be auto-detected
|
|
71
|
+
* If not provided and normalize is true (default), language will be auto-detected
|
|
75
72
|
* (adds ~150ms latency).
|
|
76
73
|
*
|
|
77
74
|
* Supported: de, en, fr, es, it, pt, nl, pl, sv, da, no, fi, cs, hu, ro,
|
|
@@ -91,12 +88,20 @@ interface StreamConfig {
|
|
|
91
88
|
maxNewTokens?: number;
|
|
92
89
|
/** Output sample rate */
|
|
93
90
|
sampleRate?: number;
|
|
94
|
-
/** Whether to add speaker prefix */
|
|
95
|
-
speakerPrefix?: boolean;
|
|
96
91
|
/** Auto-flush timeout in milliseconds */
|
|
97
92
|
flushTimeoutMs?: number;
|
|
98
93
|
/** Maximum buffer length */
|
|
99
94
|
maxBufferLength?: number;
|
|
95
|
+
/**
|
|
96
|
+
* Enable text normalization (converts numbers, dates, etc. to spoken words).
|
|
97
|
+
* Default: true
|
|
98
|
+
*/
|
|
99
|
+
normalize?: boolean;
|
|
100
|
+
/**
|
|
101
|
+
* ISO 639-1 language code for text normalization (e.g., 'de', 'en', 'fr').
|
|
102
|
+
* Specify to avoid ~150ms auto-detection latency.
|
|
103
|
+
*/
|
|
104
|
+
language?: string;
|
|
100
105
|
}
|
|
101
106
|
/**
|
|
102
107
|
* Audio chunk from streaming TTS.
|
|
@@ -180,7 +185,7 @@ interface KugelAudioOptions {
|
|
|
180
185
|
orgId?: number;
|
|
181
186
|
/** API base URL (default: https://api.kugelaudio.com) */
|
|
182
187
|
apiUrl?: string;
|
|
183
|
-
/** TTS server URL (default:
|
|
188
|
+
/** TTS server URL (default: same as apiUrl) */
|
|
184
189
|
ttsUrl?: string;
|
|
185
190
|
/** Request timeout in milliseconds (default: 60000) */
|
|
186
191
|
timeout?: number;
|
|
@@ -199,8 +204,6 @@ interface MultiContextConfig {
|
|
|
199
204
|
maxNewTokens?: number;
|
|
200
205
|
/** Enable text normalization (default: true) */
|
|
201
206
|
normalize?: boolean;
|
|
202
|
-
/** Add speaker prefix (default: true) */
|
|
203
|
-
speakerPrefix?: boolean;
|
|
204
207
|
/** Seconds before context auto-closes (default: 20.0) */
|
|
205
208
|
inactivityTimeout?: number;
|
|
206
209
|
}
|
|
@@ -459,13 +462,13 @@ declare class MultiContextSession {
|
|
|
459
462
|
* // Generate audio with fast model (1.5B params)
|
|
460
463
|
* const audio = await client.tts.generate({
|
|
461
464
|
* text: 'Hello, world!',
|
|
462
|
-
*
|
|
465
|
+
* modelId: 'kugel-1-turbo',
|
|
463
466
|
* });
|
|
464
467
|
*
|
|
465
468
|
* // Generate audio with premium model (7B params)
|
|
466
469
|
* const audio = await client.tts.generate({
|
|
467
470
|
* text: 'Hello, world!',
|
|
468
|
-
*
|
|
471
|
+
* modelId: 'kugel-1',
|
|
469
472
|
* });
|
|
470
473
|
* ```
|
|
471
474
|
*/
|
package/dist/index.d.ts
CHANGED
|
@@ -15,7 +15,7 @@ interface Model {
|
|
|
15
15
|
/**
|
|
16
16
|
* Voice category types.
|
|
17
17
|
*/
|
|
18
|
-
type VoiceCategory = 'premade' | 'cloned' | 'designed';
|
|
18
|
+
type VoiceCategory = 'premade' | 'cloned' | 'designed' | 'conversational' | 'narrative' | 'narrative_story' | 'characters';
|
|
19
19
|
/**
|
|
20
20
|
* Voice sex types.
|
|
21
21
|
*/
|
|
@@ -48,7 +48,7 @@ interface GenerateOptions {
|
|
|
48
48
|
/** Text to synthesize */
|
|
49
49
|
text: string;
|
|
50
50
|
/** Model to use: 'kugel-1-turbo' (1.5B, fast) or 'kugel-1' (7B, premium). Default: 'kugel-1-turbo' */
|
|
51
|
-
|
|
51
|
+
modelId?: string;
|
|
52
52
|
/** Voice ID to use */
|
|
53
53
|
voiceId?: number;
|
|
54
54
|
/** CFG scale for generation (default: 2.0) */
|
|
@@ -57,21 +57,18 @@ interface GenerateOptions {
|
|
|
57
57
|
maxNewTokens?: number;
|
|
58
58
|
/** Output sample rate (default: 24000) */
|
|
59
59
|
sampleRate?: number;
|
|
60
|
-
/** Whether to add speaker prefix (default: true) */
|
|
61
|
-
speakerPrefix?: boolean;
|
|
62
60
|
/**
|
|
63
61
|
* Enable text normalization (converts numbers, dates, etc. to spoken words).
|
|
64
62
|
* When true, text will be normalized before TTS generation.
|
|
65
|
-
* Default:
|
|
63
|
+
* Default: true
|
|
66
64
|
*
|
|
67
|
-
* ⚠️
|
|
68
|
-
*
|
|
69
|
-
* the language parameter when using normalization.
|
|
65
|
+
* ⚠️ For best performance, always specify the language parameter when using
|
|
66
|
+
* normalization. Without it, language auto-detection adds ~150ms latency.
|
|
70
67
|
*/
|
|
71
68
|
normalize?: boolean;
|
|
72
69
|
/**
|
|
73
70
|
* ISO 639-1 language code for text normalization (e.g., 'de', 'en', 'fr').
|
|
74
|
-
* If not provided and normalize is true, language will be auto-detected
|
|
71
|
+
* If not provided and normalize is true (default), language will be auto-detected
|
|
75
72
|
* (adds ~150ms latency).
|
|
76
73
|
*
|
|
77
74
|
* Supported: de, en, fr, es, it, pt, nl, pl, sv, da, no, fi, cs, hu, ro,
|
|
@@ -91,12 +88,20 @@ interface StreamConfig {
|
|
|
91
88
|
maxNewTokens?: number;
|
|
92
89
|
/** Output sample rate */
|
|
93
90
|
sampleRate?: number;
|
|
94
|
-
/** Whether to add speaker prefix */
|
|
95
|
-
speakerPrefix?: boolean;
|
|
96
91
|
/** Auto-flush timeout in milliseconds */
|
|
97
92
|
flushTimeoutMs?: number;
|
|
98
93
|
/** Maximum buffer length */
|
|
99
94
|
maxBufferLength?: number;
|
|
95
|
+
/**
|
|
96
|
+
* Enable text normalization (converts numbers, dates, etc. to spoken words).
|
|
97
|
+
* Default: true
|
|
98
|
+
*/
|
|
99
|
+
normalize?: boolean;
|
|
100
|
+
/**
|
|
101
|
+
* ISO 639-1 language code for text normalization (e.g., 'de', 'en', 'fr').
|
|
102
|
+
* Specify to avoid ~150ms auto-detection latency.
|
|
103
|
+
*/
|
|
104
|
+
language?: string;
|
|
100
105
|
}
|
|
101
106
|
/**
|
|
102
107
|
* Audio chunk from streaming TTS.
|
|
@@ -180,7 +185,7 @@ interface KugelAudioOptions {
|
|
|
180
185
|
orgId?: number;
|
|
181
186
|
/** API base URL (default: https://api.kugelaudio.com) */
|
|
182
187
|
apiUrl?: string;
|
|
183
|
-
/** TTS server URL (default:
|
|
188
|
+
/** TTS server URL (default: same as apiUrl) */
|
|
184
189
|
ttsUrl?: string;
|
|
185
190
|
/** Request timeout in milliseconds (default: 60000) */
|
|
186
191
|
timeout?: number;
|
|
@@ -199,8 +204,6 @@ interface MultiContextConfig {
|
|
|
199
204
|
maxNewTokens?: number;
|
|
200
205
|
/** Enable text normalization (default: true) */
|
|
201
206
|
normalize?: boolean;
|
|
202
|
-
/** Add speaker prefix (default: true) */
|
|
203
|
-
speakerPrefix?: boolean;
|
|
204
207
|
/** Seconds before context auto-closes (default: 20.0) */
|
|
205
208
|
inactivityTimeout?: number;
|
|
206
209
|
}
|
|
@@ -459,13 +462,13 @@ declare class MultiContextSession {
|
|
|
459
462
|
* // Generate audio with fast model (1.5B params)
|
|
460
463
|
* const audio = await client.tts.generate({
|
|
461
464
|
* text: 'Hello, world!',
|
|
462
|
-
*
|
|
465
|
+
* modelId: 'kugel-1-turbo',
|
|
463
466
|
* });
|
|
464
467
|
*
|
|
465
468
|
* // Generate audio with premium model (7B params)
|
|
466
469
|
* const audio = await client.tts.generate({
|
|
467
470
|
* text: 'Hello, world!',
|
|
468
|
-
*
|
|
471
|
+
* modelId: 'kugel-1',
|
|
469
472
|
* });
|
|
470
473
|
* ```
|
|
471
474
|
*/
|
package/dist/index.js
CHANGED
|
@@ -415,13 +415,12 @@ var TTSResource = class {
|
|
|
415
415
|
callbacks.onOpen?.();
|
|
416
416
|
ws.send(JSON.stringify({
|
|
417
417
|
text: options.text,
|
|
418
|
-
|
|
418
|
+
model_id: options.modelId || "kugel-1-turbo",
|
|
419
419
|
voice_id: options.voiceId,
|
|
420
420
|
cfg_scale: options.cfgScale ?? 2,
|
|
421
421
|
max_new_tokens: options.maxNewTokens ?? 2048,
|
|
422
422
|
sample_rate: options.sampleRate ?? 24e3,
|
|
423
|
-
|
|
424
|
-
normalize: options.normalize ?? false,
|
|
423
|
+
normalize: options.normalize ?? true,
|
|
425
424
|
...options.language && { language: options.language }
|
|
426
425
|
}));
|
|
427
426
|
});
|
|
@@ -437,13 +436,12 @@ var TTSResource = class {
|
|
|
437
436
|
callbacks.onOpen?.();
|
|
438
437
|
ws.send(JSON.stringify({
|
|
439
438
|
text: options.text,
|
|
440
|
-
|
|
439
|
+
model_id: options.modelId || "kugel-1-turbo",
|
|
441
440
|
voice_id: options.voiceId,
|
|
442
441
|
cfg_scale: options.cfgScale ?? 2,
|
|
443
442
|
max_new_tokens: options.maxNewTokens ?? 2048,
|
|
444
443
|
sample_rate: options.sampleRate ?? 24e3,
|
|
445
|
-
|
|
446
|
-
normalize: options.normalize ?? false,
|
|
444
|
+
normalize: options.normalize ?? true,
|
|
447
445
|
...options.language && { language: options.language }
|
|
448
446
|
}));
|
|
449
447
|
};
|
|
@@ -676,7 +674,6 @@ var MultiContextSession = class {
|
|
|
676
674
|
if (this.config.cfgScale) msg.cfg_scale = this.config.cfgScale;
|
|
677
675
|
if (this.config.maxNewTokens) msg.max_new_tokens = this.config.maxNewTokens;
|
|
678
676
|
if (this.config.normalize !== void 0) msg.normalize = this.config.normalize;
|
|
679
|
-
if (this.config.speakerPrefix !== void 0) msg.speaker_prefix = this.config.speakerPrefix;
|
|
680
677
|
if (this.config.inactivityTimeout) msg.inactivity_timeout = this.config.inactivityTimeout;
|
|
681
678
|
}
|
|
682
679
|
const voiceId = options?.voiceId || this.config.defaultVoiceId;
|
package/dist/index.mjs
CHANGED
|
@@ -379,13 +379,12 @@ var TTSResource = class {
|
|
|
379
379
|
callbacks.onOpen?.();
|
|
380
380
|
ws.send(JSON.stringify({
|
|
381
381
|
text: options.text,
|
|
382
|
-
|
|
382
|
+
model_id: options.modelId || "kugel-1-turbo",
|
|
383
383
|
voice_id: options.voiceId,
|
|
384
384
|
cfg_scale: options.cfgScale ?? 2,
|
|
385
385
|
max_new_tokens: options.maxNewTokens ?? 2048,
|
|
386
386
|
sample_rate: options.sampleRate ?? 24e3,
|
|
387
|
-
|
|
388
|
-
normalize: options.normalize ?? false,
|
|
387
|
+
normalize: options.normalize ?? true,
|
|
389
388
|
...options.language && { language: options.language }
|
|
390
389
|
}));
|
|
391
390
|
});
|
|
@@ -401,13 +400,12 @@ var TTSResource = class {
|
|
|
401
400
|
callbacks.onOpen?.();
|
|
402
401
|
ws.send(JSON.stringify({
|
|
403
402
|
text: options.text,
|
|
404
|
-
|
|
403
|
+
model_id: options.modelId || "kugel-1-turbo",
|
|
405
404
|
voice_id: options.voiceId,
|
|
406
405
|
cfg_scale: options.cfgScale ?? 2,
|
|
407
406
|
max_new_tokens: options.maxNewTokens ?? 2048,
|
|
408
407
|
sample_rate: options.sampleRate ?? 24e3,
|
|
409
|
-
|
|
410
|
-
normalize: options.normalize ?? false,
|
|
408
|
+
normalize: options.normalize ?? true,
|
|
411
409
|
...options.language && { language: options.language }
|
|
412
410
|
}));
|
|
413
411
|
};
|
|
@@ -640,7 +638,6 @@ var MultiContextSession = class {
|
|
|
640
638
|
if (this.config.cfgScale) msg.cfg_scale = this.config.cfgScale;
|
|
641
639
|
if (this.config.maxNewTokens) msg.max_new_tokens = this.config.maxNewTokens;
|
|
642
640
|
if (this.config.normalize !== void 0) msg.normalize = this.config.normalize;
|
|
643
|
-
if (this.config.speakerPrefix !== void 0) msg.speaker_prefix = this.config.speakerPrefix;
|
|
644
641
|
if (this.config.inactivityTimeout) msg.inactivity_timeout = this.config.inactivityTimeout;
|
|
645
642
|
}
|
|
646
643
|
const voiceId = options?.voiceId || this.config.defaultVoiceId;
|
package/package.json
CHANGED
package/src/client.ts
CHANGED
|
@@ -369,13 +369,12 @@ class TTSResource {
|
|
|
369
369
|
|
|
370
370
|
ws.send(JSON.stringify({
|
|
371
371
|
text: options.text,
|
|
372
|
-
|
|
372
|
+
model_id: options.modelId || 'kugel-1-turbo',
|
|
373
373
|
voice_id: options.voiceId,
|
|
374
374
|
cfg_scale: options.cfgScale ?? 2.0,
|
|
375
375
|
max_new_tokens: options.maxNewTokens ?? 2048,
|
|
376
376
|
sample_rate: options.sampleRate ?? 24000,
|
|
377
|
-
|
|
378
|
-
normalize: options.normalize ?? false,
|
|
377
|
+
normalize: options.normalize ?? true,
|
|
379
378
|
...(options.language && { language: options.language }),
|
|
380
379
|
}));
|
|
381
380
|
});
|
|
@@ -397,13 +396,12 @@ class TTSResource {
|
|
|
397
396
|
// Send TTS request
|
|
398
397
|
ws.send(JSON.stringify({
|
|
399
398
|
text: options.text,
|
|
400
|
-
|
|
399
|
+
model_id: options.modelId || 'kugel-1-turbo',
|
|
401
400
|
voice_id: options.voiceId,
|
|
402
401
|
cfg_scale: options.cfgScale ?? 2.0,
|
|
403
402
|
max_new_tokens: options.maxNewTokens ?? 2048,
|
|
404
403
|
sample_rate: options.sampleRate ?? 24000,
|
|
405
|
-
|
|
406
|
-
normalize: options.normalize ?? false,
|
|
404
|
+
normalize: options.normalize ?? true,
|
|
407
405
|
...(options.language && { language: options.language }),
|
|
408
406
|
}));
|
|
409
407
|
};
|
|
@@ -686,7 +684,6 @@ class MultiContextSession {
|
|
|
686
684
|
if (this.config.cfgScale) msg.cfg_scale = this.config.cfgScale;
|
|
687
685
|
if (this.config.maxNewTokens) msg.max_new_tokens = this.config.maxNewTokens;
|
|
688
686
|
if (this.config.normalize !== undefined) msg.normalize = this.config.normalize;
|
|
689
|
-
if (this.config.speakerPrefix !== undefined) msg.speaker_prefix = this.config.speakerPrefix;
|
|
690
687
|
if (this.config.inactivityTimeout) msg.inactivity_timeout = this.config.inactivityTimeout;
|
|
691
688
|
}
|
|
692
689
|
|
|
@@ -807,13 +804,13 @@ class MultiContextSession {
|
|
|
807
804
|
* // Generate audio with fast model (1.5B params)
|
|
808
805
|
* const audio = await client.tts.generate({
|
|
809
806
|
* text: 'Hello, world!',
|
|
810
|
-
*
|
|
807
|
+
* modelId: 'kugel-1-turbo',
|
|
811
808
|
* });
|
|
812
809
|
*
|
|
813
810
|
* // Generate audio with premium model (7B params)
|
|
814
811
|
* const audio = await client.tts.generate({
|
|
815
812
|
* text: 'Hello, world!',
|
|
816
|
-
*
|
|
813
|
+
* modelId: 'kugel-1',
|
|
817
814
|
* });
|
|
818
815
|
* ```
|
|
819
816
|
*/
|
package/src/index.ts
CHANGED
|
@@ -18,13 +18,13 @@
|
|
|
18
18
|
* // Generate audio (non-streaming)
|
|
19
19
|
* const audio = await client.tts.generate({
|
|
20
20
|
* text: 'Hello, world!',
|
|
21
|
-
*
|
|
21
|
+
* modelId: 'kugel-1-turbo',
|
|
22
22
|
* voiceId: 123,
|
|
23
23
|
* });
|
|
24
24
|
*
|
|
25
25
|
* // Generate audio (streaming)
|
|
26
26
|
* await client.tts.stream(
|
|
27
|
-
* { text: 'Hello, world!',
|
|
27
|
+
* { text: 'Hello, world!', modelId: 'kugel-1-turbo' },
|
|
28
28
|
* {
|
|
29
29
|
* onChunk: (chunk) => {
|
|
30
30
|
* // Process audio chunk
|
package/src/types.ts
CHANGED
|
@@ -17,7 +17,7 @@ export interface Model {
|
|
|
17
17
|
/**
|
|
18
18
|
* Voice category types.
|
|
19
19
|
*/
|
|
20
|
-
export type VoiceCategory = 'premade' | 'cloned' | 'designed';
|
|
20
|
+
export type VoiceCategory = 'premade' | 'cloned' | 'designed' | 'conversational' | 'narrative' | 'narrative_story' | 'characters';
|
|
21
21
|
|
|
22
22
|
/**
|
|
23
23
|
* Voice sex types.
|
|
@@ -54,7 +54,7 @@ export interface GenerateOptions {
|
|
|
54
54
|
/** Text to synthesize */
|
|
55
55
|
text: string;
|
|
56
56
|
/** Model to use: 'kugel-1-turbo' (1.5B, fast) or 'kugel-1' (7B, premium). Default: 'kugel-1-turbo' */
|
|
57
|
-
|
|
57
|
+
modelId?: string;
|
|
58
58
|
/** Voice ID to use */
|
|
59
59
|
voiceId?: number;
|
|
60
60
|
/** CFG scale for generation (default: 2.0) */
|
|
@@ -63,21 +63,18 @@ export interface GenerateOptions {
|
|
|
63
63
|
maxNewTokens?: number;
|
|
64
64
|
/** Output sample rate (default: 24000) */
|
|
65
65
|
sampleRate?: number;
|
|
66
|
-
/** Whether to add speaker prefix (default: true) */
|
|
67
|
-
speakerPrefix?: boolean;
|
|
68
66
|
/**
|
|
69
67
|
* Enable text normalization (converts numbers, dates, etc. to spoken words).
|
|
70
68
|
* When true, text will be normalized before TTS generation.
|
|
71
|
-
* Default:
|
|
69
|
+
* Default: true
|
|
72
70
|
*
|
|
73
|
-
* ⚠️
|
|
74
|
-
*
|
|
75
|
-
* the language parameter when using normalization.
|
|
71
|
+
* ⚠️ For best performance, always specify the language parameter when using
|
|
72
|
+
* normalization. Without it, language auto-detection adds ~150ms latency.
|
|
76
73
|
*/
|
|
77
74
|
normalize?: boolean;
|
|
78
75
|
/**
|
|
79
76
|
* ISO 639-1 language code for text normalization (e.g., 'de', 'en', 'fr').
|
|
80
|
-
* If not provided and normalize is true, language will be auto-detected
|
|
77
|
+
* If not provided and normalize is true (default), language will be auto-detected
|
|
81
78
|
* (adds ~150ms latency).
|
|
82
79
|
*
|
|
83
80
|
* Supported: de, en, fr, es, it, pt, nl, pl, sv, da, no, fi, cs, hu, ro,
|
|
@@ -98,12 +95,20 @@ export interface StreamConfig {
|
|
|
98
95
|
maxNewTokens?: number;
|
|
99
96
|
/** Output sample rate */
|
|
100
97
|
sampleRate?: number;
|
|
101
|
-
/** Whether to add speaker prefix */
|
|
102
|
-
speakerPrefix?: boolean;
|
|
103
98
|
/** Auto-flush timeout in milliseconds */
|
|
104
99
|
flushTimeoutMs?: number;
|
|
105
100
|
/** Maximum buffer length */
|
|
106
101
|
maxBufferLength?: number;
|
|
102
|
+
/**
|
|
103
|
+
* Enable text normalization (converts numbers, dates, etc. to spoken words).
|
|
104
|
+
* Default: true
|
|
105
|
+
*/
|
|
106
|
+
normalize?: boolean;
|
|
107
|
+
/**
|
|
108
|
+
* ISO 639-1 language code for text normalization (e.g., 'de', 'en', 'fr').
|
|
109
|
+
* Specify to avoid ~150ms auto-detection latency.
|
|
110
|
+
*/
|
|
111
|
+
language?: string;
|
|
107
112
|
}
|
|
108
113
|
|
|
109
114
|
/**
|
|
@@ -192,7 +197,7 @@ export interface KugelAudioOptions {
|
|
|
192
197
|
orgId?: number;
|
|
193
198
|
/** API base URL (default: https://api.kugelaudio.com) */
|
|
194
199
|
apiUrl?: string;
|
|
195
|
-
/** TTS server URL (default:
|
|
200
|
+
/** TTS server URL (default: same as apiUrl) */
|
|
196
201
|
ttsUrl?: string;
|
|
197
202
|
/** Request timeout in milliseconds (default: 60000) */
|
|
198
203
|
timeout?: number;
|
|
@@ -221,8 +226,6 @@ export interface MultiContextConfig {
|
|
|
221
226
|
maxNewTokens?: number;
|
|
222
227
|
/** Enable text normalization (default: true) */
|
|
223
228
|
normalize?: boolean;
|
|
224
|
-
/** Add speaker prefix (default: true) */
|
|
225
|
-
speakerPrefix?: boolean;
|
|
226
229
|
/** Seconds before context auto-closes (default: 20.0) */
|
|
227
230
|
inactivityTimeout?: number;
|
|
228
231
|
}
|