npm - kugelaudio - Versions diffs - 0.1.9 → 0.1.11 - Mend

kugelaudio 0.1.9 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/README.md CHANGED Viewed

@@ -31,7 +31,7 @@ const client = new KugelAudio({ apiKey: 'your_api_key' });
 // Generate speech
 const audio = await client.tts.generate({
   text: 'Hello, world!',
-  model: 'kugel-1-turbo',
+  modelId: 'kugel-1-turbo',
 });
 // Create a playable blob (browser)
@@ -144,12 +144,11 @@ Generate complete audio and receive it all at once:
 ```typescript
 const audio = await client.tts.generate({
   text: 'Hello, this is a test of the KugelAudio text-to-speech system.',
-  model: 'kugel-1-turbo',  // 'kugel-1-turbo' (fast) or 'kugel-1' (quality)
+  modelId: 'kugel-1-turbo',  // 'kugel-1-turbo' (fast) or 'kugel-1' (quality)
   voiceId: 123,              // Optional: specific voice ID
   cfgScale: 2.0,             // Guidance scale (1.0-5.0)
   maxNewTokens: 2048,        // Maximum tokens to generate
   sampleRate: 24000,         // Output sample rate
-  speakerPrefix: true,       // Add speaker prefix for better quality
   normalize: true,           // Enable text normalization (see below)
   language: 'en',            // Language for normalization
 });
@@ -171,7 +170,7 @@ import { createWavBlob } from 'kugelaudio';
 const audio = await client.tts.generate({
   text: 'Hello, world!',
-  model: 'kugel-1-turbo',
+  modelId: 'kugel-1-turbo',
 });
 // Create WAV blob for playback
@@ -200,7 +199,7 @@ Receive audio chunks as they are generated for lower latency:
 await client.tts.stream(
   {
     text: 'Hello, this is streaming audio.',
-    model: 'kugel-1-turbo',
+    modelId: 'kugel-1-turbo',
   },
   {
     onOpen: () => {
@@ -354,8 +353,7 @@ interface GenerateOptions {
   cfgScale?: number;       // Default: 2.0
   maxNewTokens?: number;   // Default: 2048
   sampleRate?: number;     // Default: 24000
-  speakerPrefix?: boolean; // Default: true
-  normalize?: boolean;     // Default: false - Enable text normalization
+  normalize?: boolean;     // Default: true - Enable text normalization
   language?: string;       // ISO 639-1 code for normalization (e.g., 'en', 'de')
 }
 ```
@@ -520,7 +518,7 @@ async function main() {
   await client.tts.stream(
     {
       text: 'Welcome to KugelAudio. This is an example of high-quality text-to-speech synthesis.',
-      model: 'kugel-1-turbo',
+      modelId: 'kugel-1-turbo',
     },
     {
       onChunk: (chunk) => {

package/dist/index.d.mts CHANGED Viewed

@@ -15,7 +15,7 @@ interface Model {
 /**
  * Voice category types.
  */
-type VoiceCategory = 'premade' | 'cloned' | 'designed';
+type VoiceCategory = 'premade' | 'cloned' | 'designed' | 'conversational' | 'narrative' | 'narrative_story' | 'characters';
 /**
  * Voice sex types.
  */
@@ -48,7 +48,7 @@ interface GenerateOptions {
     /** Text to synthesize */
     text: string;
     /** Model to use: 'kugel-1-turbo' (1.5B, fast) or 'kugel-1' (7B, premium). Default: 'kugel-1-turbo' */
-    model?: string;
+    modelId?: string;
     /** Voice ID to use */
     voiceId?: number;
     /** CFG scale for generation (default: 2.0) */
@@ -57,21 +57,18 @@ interface GenerateOptions {
     maxNewTokens?: number;
     /** Output sample rate (default: 24000) */
     sampleRate?: number;
-    /** Whether to add speaker prefix (default: true) */
-    speakerPrefix?: boolean;
     /**
      * Enable text normalization (converts numbers, dates, etc. to spoken words).
      * When true, text will be normalized before TTS generation.
-     * Default: false
+     * Default: true
      *
-     * ⚠️ WARNING: Using normalize=true without specifying language adds ~150ms
-     * latency for language auto-detection. For best performance, always specify
-     * the language parameter when using normalization.
+     * ⚠️ For best performance, always specify the language parameter when using
+     * normalization. Without it, language auto-detection adds ~150ms latency.
      */
     normalize?: boolean;
     /**
      * ISO 639-1 language code for text normalization (e.g., 'de', 'en', 'fr').
-     * If not provided and normalize is true, language will be auto-detected
+     * If not provided and normalize is true (default), language will be auto-detected
      * (adds ~150ms latency).
      *
      * Supported: de, en, fr, es, it, pt, nl, pl, sv, da, no, fi, cs, hu, ro,
@@ -91,12 +88,20 @@ interface StreamConfig {
     maxNewTokens?: number;
     /** Output sample rate */
     sampleRate?: number;
-    /** Whether to add speaker prefix */
-    speakerPrefix?: boolean;
     /** Auto-flush timeout in milliseconds */
     flushTimeoutMs?: number;
     /** Maximum buffer length */
     maxBufferLength?: number;
+    /**
+     * Enable text normalization (converts numbers, dates, etc. to spoken words).
+     * Default: true
+     */
+    normalize?: boolean;
+    /**
+     * ISO 639-1 language code for text normalization (e.g., 'de', 'en', 'fr').
+     * Specify to avoid ~150ms auto-detection latency.
+     */
+    language?: string;
 }
 /**
  * Audio chunk from streaming TTS.
@@ -180,7 +185,7 @@ interface KugelAudioOptions {
     orgId?: number;
     /** API base URL (default: https://api.kugelaudio.com) */
     apiUrl?: string;
-    /** TTS server URL (default: https://eu.kugelaudio.com) */
+    /** TTS server URL (default: same as apiUrl) */
     ttsUrl?: string;
     /** Request timeout in milliseconds (default: 60000) */
     timeout?: number;
@@ -199,8 +204,6 @@ interface MultiContextConfig {
     maxNewTokens?: number;
     /** Enable text normalization (default: true) */
     normalize?: boolean;
-    /** Add speaker prefix (default: true) */
-    speakerPrefix?: boolean;
     /** Seconds before context auto-closes (default: 20.0) */
     inactivityTimeout?: number;
 }
@@ -459,13 +462,13 @@ declare class MultiContextSession {
  * // Generate audio with fast model (1.5B params)
  * const audio = await client.tts.generate({
  *   text: 'Hello, world!',
- *   model: 'kugel-1-turbo',
+ *   modelId: 'kugel-1-turbo',
  * });
  *
  * // Generate audio with premium model (7B params)
  * const audio = await client.tts.generate({
  *   text: 'Hello, world!',
- *   model: 'kugel-1',
+ *   modelId: 'kugel-1',
  * });
  * ```
  */

package/dist/index.d.ts CHANGED Viewed

@@ -15,7 +15,7 @@ interface Model {
 /**
  * Voice category types.
  */
-type VoiceCategory = 'premade' | 'cloned' | 'designed';
+type VoiceCategory = 'premade' | 'cloned' | 'designed' | 'conversational' | 'narrative' | 'narrative_story' | 'characters';
 /**
  * Voice sex types.
  */
@@ -48,7 +48,7 @@ interface GenerateOptions {
     /** Text to synthesize */
     text: string;
     /** Model to use: 'kugel-1-turbo' (1.5B, fast) or 'kugel-1' (7B, premium). Default: 'kugel-1-turbo' */
-    model?: string;
+    modelId?: string;
     /** Voice ID to use */
     voiceId?: number;
     /** CFG scale for generation (default: 2.0) */
@@ -57,21 +57,18 @@ interface GenerateOptions {
     maxNewTokens?: number;
     /** Output sample rate (default: 24000) */
     sampleRate?: number;
-    /** Whether to add speaker prefix (default: true) */
-    speakerPrefix?: boolean;
     /**
      * Enable text normalization (converts numbers, dates, etc. to spoken words).
      * When true, text will be normalized before TTS generation.
-     * Default: false
+     * Default: true
      *
-     * ⚠️ WARNING: Using normalize=true without specifying language adds ~150ms
-     * latency for language auto-detection. For best performance, always specify
-     * the language parameter when using normalization.
+     * ⚠️ For best performance, always specify the language parameter when using
+     * normalization. Without it, language auto-detection adds ~150ms latency.
      */
     normalize?: boolean;
     /**
      * ISO 639-1 language code for text normalization (e.g., 'de', 'en', 'fr').
-     * If not provided and normalize is true, language will be auto-detected
+     * If not provided and normalize is true (default), language will be auto-detected
      * (adds ~150ms latency).
      *
      * Supported: de, en, fr, es, it, pt, nl, pl, sv, da, no, fi, cs, hu, ro,
@@ -91,12 +88,20 @@ interface StreamConfig {
     maxNewTokens?: number;
     /** Output sample rate */
     sampleRate?: number;
-    /** Whether to add speaker prefix */
-    speakerPrefix?: boolean;
     /** Auto-flush timeout in milliseconds */
     flushTimeoutMs?: number;
     /** Maximum buffer length */
     maxBufferLength?: number;
+    /**
+     * Enable text normalization (converts numbers, dates, etc. to spoken words).
+     * Default: true
+     */
+    normalize?: boolean;
+    /**
+     * ISO 639-1 language code for text normalization (e.g., 'de', 'en', 'fr').
+     * Specify to avoid ~150ms auto-detection latency.
+     */
+    language?: string;
 }
 /**
  * Audio chunk from streaming TTS.
@@ -180,7 +185,7 @@ interface KugelAudioOptions {
     orgId?: number;
     /** API base URL (default: https://api.kugelaudio.com) */
     apiUrl?: string;
-    /** TTS server URL (default: https://eu.kugelaudio.com) */
+    /** TTS server URL (default: same as apiUrl) */
     ttsUrl?: string;
     /** Request timeout in milliseconds (default: 60000) */
     timeout?: number;
@@ -199,8 +204,6 @@ interface MultiContextConfig {
     maxNewTokens?: number;
     /** Enable text normalization (default: true) */
     normalize?: boolean;
-    /** Add speaker prefix (default: true) */
-    speakerPrefix?: boolean;
     /** Seconds before context auto-closes (default: 20.0) */
     inactivityTimeout?: number;
 }
@@ -459,13 +462,13 @@ declare class MultiContextSession {
  * // Generate audio with fast model (1.5B params)
  * const audio = await client.tts.generate({
  *   text: 'Hello, world!',
- *   model: 'kugel-1-turbo',
+ *   modelId: 'kugel-1-turbo',
  * });
  *
  * // Generate audio with premium model (7B params)
  * const audio = await client.tts.generate({
  *   text: 'Hello, world!',
- *   model: 'kugel-1',
+ *   modelId: 'kugel-1',
  * });
  * ```
  */

package/dist/index.js CHANGED Viewed

@@ -415,13 +415,12 @@ var TTSResource = class {
       callbacks.onOpen?.();
       ws.send(JSON.stringify({
         text: options.text,
-        model: options.model || "kugel-1-turbo",
+        model_id: options.modelId || "kugel-1-turbo",
         voice_id: options.voiceId,
         cfg_scale: options.cfgScale ?? 2,
         max_new_tokens: options.maxNewTokens ?? 2048,
         sample_rate: options.sampleRate ?? 24e3,
-        speaker_prefix: options.speakerPrefix ?? true,
-        normalize: options.normalize ?? false,
+        normalize: options.normalize ?? true,
         ...options.language && { language: options.language }
       }));
     });
@@ -437,13 +436,12 @@ var TTSResource = class {
         callbacks.onOpen?.();
         ws.send(JSON.stringify({
           text: options.text,
-          model: options.model || "kugel-1-turbo",
+          model_id: options.modelId || "kugel-1-turbo",
           voice_id: options.voiceId,
           cfg_scale: options.cfgScale ?? 2,
           max_new_tokens: options.maxNewTokens ?? 2048,
           sample_rate: options.sampleRate ?? 24e3,
-          speaker_prefix: options.speakerPrefix ?? true,
-          normalize: options.normalize ?? false,
+          normalize: options.normalize ?? true,
           ...options.language && { language: options.language }
         }));
       };
@@ -676,7 +674,6 @@ var MultiContextSession = class {
       if (this.config.cfgScale) msg.cfg_scale = this.config.cfgScale;
       if (this.config.maxNewTokens) msg.max_new_tokens = this.config.maxNewTokens;
       if (this.config.normalize !== void 0) msg.normalize = this.config.normalize;
-      if (this.config.speakerPrefix !== void 0) msg.speaker_prefix = this.config.speakerPrefix;
       if (this.config.inactivityTimeout) msg.inactivity_timeout = this.config.inactivityTimeout;
     }
     const voiceId = options?.voiceId || this.config.defaultVoiceId;

package/dist/index.mjs CHANGED Viewed

@@ -379,13 +379,12 @@ var TTSResource = class {
       callbacks.onOpen?.();
       ws.send(JSON.stringify({
         text: options.text,
-        model: options.model || "kugel-1-turbo",
+        model_id: options.modelId || "kugel-1-turbo",
         voice_id: options.voiceId,
         cfg_scale: options.cfgScale ?? 2,
         max_new_tokens: options.maxNewTokens ?? 2048,
         sample_rate: options.sampleRate ?? 24e3,
-        speaker_prefix: options.speakerPrefix ?? true,
-        normalize: options.normalize ?? false,
+        normalize: options.normalize ?? true,
         ...options.language && { language: options.language }
       }));
     });
@@ -401,13 +400,12 @@ var TTSResource = class {
         callbacks.onOpen?.();
         ws.send(JSON.stringify({
           text: options.text,
-          model: options.model || "kugel-1-turbo",
+          model_id: options.modelId || "kugel-1-turbo",
           voice_id: options.voiceId,
           cfg_scale: options.cfgScale ?? 2,
           max_new_tokens: options.maxNewTokens ?? 2048,
           sample_rate: options.sampleRate ?? 24e3,
-          speaker_prefix: options.speakerPrefix ?? true,
-          normalize: options.normalize ?? false,
+          normalize: options.normalize ?? true,
           ...options.language && { language: options.language }
         }));
       };
@@ -640,7 +638,6 @@ var MultiContextSession = class {
       if (this.config.cfgScale) msg.cfg_scale = this.config.cfgScale;
       if (this.config.maxNewTokens) msg.max_new_tokens = this.config.maxNewTokens;
       if (this.config.normalize !== void 0) msg.normalize = this.config.normalize;
-      if (this.config.speakerPrefix !== void 0) msg.speaker_prefix = this.config.speakerPrefix;
       if (this.config.inactivityTimeout) msg.inactivity_timeout = this.config.inactivityTimeout;
     }
     const voiceId = options?.voiceId || this.config.defaultVoiceId;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "kugelaudio",
-  "version": "0.1.9",
+  "version": "0.1.11",
   "description": "Official JavaScript/TypeScript SDK for KugelAudio TTS API",
   "main": "dist/index.js",
   "module": "dist/index.mjs",

package/src/client.ts CHANGED Viewed

@@ -369,13 +369,12 @@ class TTSResource {
       ws.send(JSON.stringify({
         text: options.text,
-        model: options.model || 'kugel-1-turbo',
+        model_id: options.modelId || 'kugel-1-turbo',
         voice_id: options.voiceId,
         cfg_scale: options.cfgScale ?? 2.0,
         max_new_tokens: options.maxNewTokens ?? 2048,
         sample_rate: options.sampleRate ?? 24000,
-        speaker_prefix: options.speakerPrefix ?? true,
-        normalize: options.normalize ?? false,
+        normalize: options.normalize ?? true,
         ...(options.language && { language: options.language }),
       }));
     });
@@ -397,13 +396,12 @@ class TTSResource {
         // Send TTS request
         ws.send(JSON.stringify({
           text: options.text,
-          model: options.model || 'kugel-1-turbo',
+          model_id: options.modelId || 'kugel-1-turbo',
           voice_id: options.voiceId,
           cfg_scale: options.cfgScale ?? 2.0,
           max_new_tokens: options.maxNewTokens ?? 2048,
           sample_rate: options.sampleRate ?? 24000,
-          speaker_prefix: options.speakerPrefix ?? true,
-          normalize: options.normalize ?? false,
+          normalize: options.normalize ?? true,
           ...(options.language && { language: options.language }),
         }));
       };
@@ -686,7 +684,6 @@ class MultiContextSession {
       if (this.config.cfgScale) msg.cfg_scale = this.config.cfgScale;
       if (this.config.maxNewTokens) msg.max_new_tokens = this.config.maxNewTokens;
       if (this.config.normalize !== undefined) msg.normalize = this.config.normalize;
-      if (this.config.speakerPrefix !== undefined) msg.speaker_prefix = this.config.speakerPrefix;
       if (this.config.inactivityTimeout) msg.inactivity_timeout = this.config.inactivityTimeout;
     }
@@ -807,13 +804,13 @@ class MultiContextSession {
  * // Generate audio with fast model (1.5B params)
  * const audio = await client.tts.generate({
  *   text: 'Hello, world!',
- *   model: 'kugel-1-turbo',
+ *   modelId: 'kugel-1-turbo',
  * });
  *
  * // Generate audio with premium model (7B params)
  * const audio = await client.tts.generate({
  *   text: 'Hello, world!',
- *   model: 'kugel-1',
+ *   modelId: 'kugel-1',
  * });
  * ```
  */

package/src/index.ts CHANGED Viewed

@@ -18,13 +18,13 @@
  * // Generate audio (non-streaming)
  * const audio = await client.tts.generate({
  *   text: 'Hello, world!',
- *   model: 'kugel-1-turbo',
+ *   modelId: 'kugel-1-turbo',
  *   voiceId: 123,
  * });
  *
  * // Generate audio (streaming)
  * await client.tts.stream(
- *   { text: 'Hello, world!', model: 'kugel-1-turbo' },
+ *   { text: 'Hello, world!', modelId: 'kugel-1-turbo' },
  *   {
  *     onChunk: (chunk) => {
  *       // Process audio chunk

package/src/types.ts CHANGED Viewed

@@ -17,7 +17,7 @@ export interface Model {
 /**
  * Voice category types.
  */
-export type VoiceCategory = 'premade' | 'cloned' | 'designed';
+export type VoiceCategory = 'premade' | 'cloned' | 'designed' | 'conversational' | 'narrative' | 'narrative_story' | 'characters';
 /**
  * Voice sex types.
@@ -54,7 +54,7 @@ export interface GenerateOptions {
   /** Text to synthesize */
   text: string;
   /** Model to use: 'kugel-1-turbo' (1.5B, fast) or 'kugel-1' (7B, premium). Default: 'kugel-1-turbo' */
-  model?: string;
+  modelId?: string;
   /** Voice ID to use */
   voiceId?: number;
   /** CFG scale for generation (default: 2.0) */
@@ -63,21 +63,18 @@ export interface GenerateOptions {
   maxNewTokens?: number;
   /** Output sample rate (default: 24000) */
   sampleRate?: number;
-  /** Whether to add speaker prefix (default: true) */
-  speakerPrefix?: boolean;
   /**
    * Enable text normalization (converts numbers, dates, etc. to spoken words).
    * When true, text will be normalized before TTS generation.
-   * Default: false
+   * Default: true
    *
-   * ⚠️ WARNING: Using normalize=true without specifying language adds ~150ms
-   * latency for language auto-detection. For best performance, always specify
-   * the language parameter when using normalization.
+   * ⚠️ For best performance, always specify the language parameter when using
+   * normalization. Without it, language auto-detection adds ~150ms latency.
    */
   normalize?: boolean;
   /**
    * ISO 639-1 language code for text normalization (e.g., 'de', 'en', 'fr').
-   * If not provided and normalize is true, language will be auto-detected
+   * If not provided and normalize is true (default), language will be auto-detected
    * (adds ~150ms latency).
    *
    * Supported: de, en, fr, es, it, pt, nl, pl, sv, da, no, fi, cs, hu, ro,
@@ -98,12 +95,20 @@ export interface StreamConfig {
   maxNewTokens?: number;
   /** Output sample rate */
   sampleRate?: number;
-  /** Whether to add speaker prefix */
-  speakerPrefix?: boolean;
   /** Auto-flush timeout in milliseconds */
   flushTimeoutMs?: number;
   /** Maximum buffer length */
   maxBufferLength?: number;
+  /**
+   * Enable text normalization (converts numbers, dates, etc. to spoken words).
+   * Default: true
+   */
+  normalize?: boolean;
+  /**
+   * ISO 639-1 language code for text normalization (e.g., 'de', 'en', 'fr').
+   * Specify to avoid ~150ms auto-detection latency.
+   */
+  language?: string;
 }
 /**
@@ -192,7 +197,7 @@ export interface KugelAudioOptions {
   orgId?: number;
   /** API base URL (default: https://api.kugelaudio.com) */
   apiUrl?: string;
-  /** TTS server URL (default: https://eu.kugelaudio.com) */
+  /** TTS server URL (default: same as apiUrl) */
   ttsUrl?: string;
   /** Request timeout in milliseconds (default: 60000) */
   timeout?: number;
@@ -221,8 +226,6 @@ export interface MultiContextConfig {
   maxNewTokens?: number;
   /** Enable text normalization (default: true) */
   normalize?: boolean;
-  /** Add speaker prefix (default: true) */
-  speakerPrefix?: boolean;
   /** Seconds before context auto-closes (default: 20.0) */
   inactivityTimeout?: number;
 }