npm - kugelaudio - Versions diffs - 0.1.5 → 0.2.0 - Mend

kugelaudio 0.1.5 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md CHANGED Viewed

@@ -150,6 +150,8 @@ const audio = await client.tts.generate({
   maxNewTokens: 2048,        // Maximum tokens to generate
   sampleRate: 24000,         // Output sample rate
   speakerPrefix: true,       // Add speaker prefix for better quality
+  normalize: true,           // Enable text normalization (see below)
+  language: 'en',            // Language for normalization
 });
 // Audio properties
@@ -250,6 +252,53 @@ onChunk: (chunk) => {
 }
 ```
+## Text Normalization
+Text normalization converts numbers, dates, times, and other non-verbal text into spoken words. For example:
+- "I have 3 apples" → "I have three apples"
+- "The meeting is at 2:30 PM" → "The meeting is at two thirty PM"
+- "€50.99" → "fifty euros and ninety-nine cents"
+### Usage
+```typescript
+// With explicit language (recommended - fastest)
+const audio = await client.tts.generate({
+  text: 'I bought 3 items for €50.99 on 01/15/2024.',
+  normalize: true,
+  language: 'en',  // Specify language for best performance
+});
+// With auto-detection (adds ~150ms latency)
+const audio = await client.tts.generate({
+  text: 'Ich habe 3 Artikel für 50,99€ gekauft.',
+  normalize: true,
+  // language not specified - will auto-detect
+});
+```
+### Supported Languages
+| Code | Language | Code | Language |
+|------|----------|------|----------|
+| `de` | German | `nl` | Dutch |
+| `en` | English | `pl` | Polish |
+| `fr` | French | `sv` | Swedish |
+| `es` | Spanish | `da` | Danish |
+| `it` | Italian | `no` | Norwegian |
+| `pt` | Portuguese | `fi` | Finnish |
+| `cs` | Czech | `hu` | Hungarian |
+| `ro` | Romanian | `el` | Greek |
+| `uk` | Ukrainian | `bg` | Bulgarian |
+| `tr` | Turkish | `vi` | Vietnamese |
+| `ar` | Arabic | `hi` | Hindi |
+| `zh` | Chinese | `ja` | Japanese |
+| `ko` | Korean | | |
+### Performance Warning
+> ⚠️ **Latency Warning**: Using `normalize: true` without specifying `language` adds approximately **150ms latency** for language auto-detection. For best performance in latency-sensitive applications, always specify the `language` parameter.
 ## Error Handling
 ```typescript
@@ -306,9 +355,13 @@ interface GenerateOptions {
   maxNewTokens?: number;   // Default: 2048
   sampleRate?: number;     // Default: 24000
   speakerPrefix?: boolean; // Default: true
+  normalize?: boolean;     // Default: false - Enable text normalization
+  language?: string;       // ISO 639-1 code for normalization (e.g., 'en', 'de')
 }
 ```
+> ⚠️ **Note**: Using `normalize: true` without `language` adds ~150ms latency for auto-detection.
 ### AudioChunk
 ```typescript

package/dist/index.d.mts CHANGED Viewed

@@ -59,6 +59,25 @@ interface GenerateOptions {
     sampleRate?: number;
     /** Whether to add speaker prefix (default: true) */
     speakerPrefix?: boolean;
+    /**
+     * Enable text normalization (converts numbers, dates, etc. to spoken words).
+     * When true, text will be normalized before TTS generation.
+     * Default: false
+     *
+     * ⚠️ WARNING: Using normalize=true without specifying language adds ~150ms
+     * latency for language auto-detection. For best performance, always specify
+     * the language parameter when using normalization.
+     */
+    normalize?: boolean;
+    /**
+     * ISO 639-1 language code for text normalization (e.g., 'de', 'en', 'fr').
+     * If not provided and normalize is true, language will be auto-detected
+     * (adds ~150ms latency).
+     *
+     * Supported: de, en, fr, es, it, pt, nl, pl, sv, da, no, fi, cs, hu, ro,
+     *            el, uk, bg, tr, vi, ar, hi, zh, ja, ko
+     */
+    language?: string;
 }
 /**
  * Streaming session configuration.
@@ -209,6 +228,28 @@ declare class TTSResource {
     private pendingRequests;
     private requestCounter;
     constructor(client: KugelAudio);
+    /**
+     * Pre-establish WebSocket connection for faster first request.
+     *
+     * Call this at application startup to eliminate cold start latency
+     * (~300-500ms) from your first TTS request.
+     *
+     * @example
+     * ```typescript
+     * const client = new KugelAudio({ apiKey: 'your_api_key' });
+     *
+     * // Pre-connect at startup
+     * await client.tts.connect();
+     *
+     * // First request is now fast (~100ms instead of ~500ms)
+     * await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
+     * ```
+     */
+    connect(): Promise<void>;
+    /**
+     * Check if WebSocket connection is established and open.
+     */
+    isConnected(): boolean;
     /**
      * Generate audio from text with streaming via WebSocket.
      * Returns complete audio after all chunks are received.
@@ -290,6 +331,23 @@ declare class KugelAudio {
     /** TTS resource */
     readonly tts: TTSResource;
     constructor(options: KugelAudioOptions);
+    /**
+     * Create a pre-connected KugelAudio client.
+     *
+     * Use this factory method to get a client that's already connected
+     * and ready for fast TTS requests. This eliminates cold start latency
+     * (~300-500ms) from your first TTS request.
+     *
+     * @example
+     * ```typescript
+     * // Client is ready immediately - no cold start on first request
+     * const client = await KugelAudio.create({ apiKey: 'your_api_key' });
+     *
+     * // First request is fast (~100ms instead of ~500ms)
+     * await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
+     * ```
+     */
+    static create(options: KugelAudioOptions): Promise<KugelAudio>;
     /** Get API key */
     get apiKey(): string;
     /** Check if using master key authentication */
@@ -303,6 +361,28 @@ declare class KugelAudio {
      * This closes any pooled WebSocket connections.
      */
     close(): void;
+    /**
+     * Pre-establish WebSocket connection for faster first request.
+     *
+     * Call this at application startup to eliminate cold start latency
+     * (~300-500ms) from your first TTS request.
+     *
+     * @example
+     * ```typescript
+     * const client = new KugelAudio({ apiKey: 'your_api_key' });
+     *
+     * // Pre-connect at startup
+     * await client.connect();
+     *
+     * // First request is now fast (~100ms instead of ~500ms)
+     * await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
+     * ```
+     */
+    connect(): Promise<void>;
+    /**
+     * Check if WebSocket connection is established and open.
+     */
+    isConnected(): boolean;
     /**
      * Make an HTTP request to the API.
      * @internal

package/dist/index.d.ts CHANGED Viewed

@@ -59,6 +59,25 @@ interface GenerateOptions {
     sampleRate?: number;
     /** Whether to add speaker prefix (default: true) */
     speakerPrefix?: boolean;
+    /**
+     * Enable text normalization (converts numbers, dates, etc. to spoken words).
+     * When true, text will be normalized before TTS generation.
+     * Default: false
+     *
+     * ⚠️ WARNING: Using normalize=true without specifying language adds ~150ms
+     * latency for language auto-detection. For best performance, always specify
+     * the language parameter when using normalization.
+     */
+    normalize?: boolean;
+    /**
+     * ISO 639-1 language code for text normalization (e.g., 'de', 'en', 'fr').
+     * If not provided and normalize is true, language will be auto-detected
+     * (adds ~150ms latency).
+     *
+     * Supported: de, en, fr, es, it, pt, nl, pl, sv, da, no, fi, cs, hu, ro,
+     *            el, uk, bg, tr, vi, ar, hi, zh, ja, ko
+     */
+    language?: string;
 }
 /**
  * Streaming session configuration.
@@ -209,6 +228,28 @@ declare class TTSResource {
     private pendingRequests;
     private requestCounter;
     constructor(client: KugelAudio);
+    /**
+     * Pre-establish WebSocket connection for faster first request.
+     *
+     * Call this at application startup to eliminate cold start latency
+     * (~300-500ms) from your first TTS request.
+     *
+     * @example
+     * ```typescript
+     * const client = new KugelAudio({ apiKey: 'your_api_key' });
+     *
+     * // Pre-connect at startup
+     * await client.tts.connect();
+     *
+     * // First request is now fast (~100ms instead of ~500ms)
+     * await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
+     * ```
+     */
+    connect(): Promise<void>;
+    /**
+     * Check if WebSocket connection is established and open.
+     */
+    isConnected(): boolean;
     /**
      * Generate audio from text with streaming via WebSocket.
      * Returns complete audio after all chunks are received.
@@ -290,6 +331,23 @@ declare class KugelAudio {
     /** TTS resource */
     readonly tts: TTSResource;
     constructor(options: KugelAudioOptions);
+    /**
+     * Create a pre-connected KugelAudio client.
+     *
+     * Use this factory method to get a client that's already connected
+     * and ready for fast TTS requests. This eliminates cold start latency
+     * (~300-500ms) from your first TTS request.
+     *
+     * @example
+     * ```typescript
+     * // Client is ready immediately - no cold start on first request
+     * const client = await KugelAudio.create({ apiKey: 'your_api_key' });
+     *
+     * // First request is fast (~100ms instead of ~500ms)
+     * await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
+     * ```
+     */
+    static create(options: KugelAudioOptions): Promise<KugelAudio>;
     /** Get API key */
     get apiKey(): string;
     /** Check if using master key authentication */
@@ -303,6 +361,28 @@ declare class KugelAudio {
      * This closes any pooled WebSocket connections.
      */
     close(): void;
+    /**
+     * Pre-establish WebSocket connection for faster first request.
+     *
+     * Call this at application startup to eliminate cold start latency
+     * (~300-500ms) from your first TTS request.
+     *
+     * @example
+     * ```typescript
+     * const client = new KugelAudio({ apiKey: 'your_api_key' });
+     *
+     * // Pre-connect at startup
+     * await client.connect();
+     *
+     * // First request is now fast (~100ms instead of ~500ms)
+     * await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
+     * ```
+     */
+    connect(): Promise<void>;
+    /**
+     * Check if WebSocket connection is established and open.
+     */
+    isConnected(): boolean;
     /**
      * Make an HTTP request to the API.
      * @internal

package/dist/index.js CHANGED Viewed

@@ -217,6 +217,32 @@ var TTSResource = class {
     this.pendingRequests = /* @__PURE__ */ new Map();
     this.requestCounter = 0;
   }
+  /**
+   * Pre-establish WebSocket connection for faster first request.
+   *
+   * Call this at application startup to eliminate cold start latency
+   * (~300-500ms) from your first TTS request.
+   *
+   * @example
+   * ```typescript
+   * const client = new KugelAudio({ apiKey: 'your_api_key' });
+   *
+   * // Pre-connect at startup
+   * await client.tts.connect();
+   *
+   * // First request is now fast (~100ms instead of ~500ms)
+   * await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
+   * ```
+   */
+  async connect() {
+    await this.getConnection();
+  }
+  /**
+   * Check if WebSocket connection is established and open.
+   */
+  isConnected() {
+    return this.wsConnection !== null && this.wsConnection.readyState === WebSocket.OPEN;
+  }
   /**
    * Generate audio from text with streaming via WebSocket.
    * Returns complete audio after all chunks are received.
@@ -390,7 +416,9 @@ var TTSResource = class {
         cfg_scale: options.cfgScale ?? 2,
         max_new_tokens: options.maxNewTokens ?? 2048,
         sample_rate: options.sampleRate ?? 24e3,
-        speaker_prefix: options.speakerPrefix ?? true
+        speaker_prefix: options.speakerPrefix ?? true,
+        normalize: options.normalize ?? false,
+        ...options.language && { language: options.language }
       }));
     });
   }
@@ -410,7 +438,9 @@ var TTSResource = class {
           cfg_scale: options.cfgScale ?? 2,
           max_new_tokens: options.maxNewTokens ?? 2048,
           sample_rate: options.sampleRate ?? 24e3,
-          speaker_prefix: options.speakerPrefix ?? true
+          speaker_prefix: options.speakerPrefix ?? true,
+          normalize: options.normalize ?? false,
+          ...options.language && { language: options.language }
         }));
       };
       ws.onmessage = (event) => {
@@ -492,7 +522,7 @@ var TTSResource = class {
     return new KugelAudioError(message);
   }
 };
-var KugelAudio = class {
+var KugelAudio = class _KugelAudio {
   constructor(options) {
     if (!options.apiKey) {
       throw new Error("API key is required");
@@ -507,6 +537,27 @@ var KugelAudio = class {
     this.voices = new VoicesResource(this);
     this.tts = new TTSResource(this);
   }
+  /**
+   * Create a pre-connected KugelAudio client.
+   *
+   * Use this factory method to get a client that's already connected
+   * and ready for fast TTS requests. This eliminates cold start latency
+   * (~300-500ms) from your first TTS request.
+   *
+   * @example
+   * ```typescript
+   * // Client is ready immediately - no cold start on first request
+   * const client = await KugelAudio.create({ apiKey: 'your_api_key' });
+   *
+   * // First request is fast (~100ms instead of ~500ms)
+   * await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
+   * ```
+   */
+  static async create(options) {
+    const client = new _KugelAudio(options);
+    await client.connect();
+    return client;
+  }
   /** Get API key */
   get apiKey() {
     return this._apiKey;
@@ -530,6 +581,32 @@ var KugelAudio = class {
   close() {
     this.tts.close();
   }
+  /**
+   * Pre-establish WebSocket connection for faster first request.
+   *
+   * Call this at application startup to eliminate cold start latency
+   * (~300-500ms) from your first TTS request.
+   *
+   * @example
+   * ```typescript
+   * const client = new KugelAudio({ apiKey: 'your_api_key' });
+   *
+   * // Pre-connect at startup
+   * await client.connect();
+   *
+   * // First request is now fast (~100ms instead of ~500ms)
+   * await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
+   * ```
+   */
+  async connect() {
+    await this.tts.connect();
+  }
+  /**
+   * Check if WebSocket connection is established and open.
+   */
+  isConnected() {
+    return this.tts.isConnected();
+  }
   /**
    * Make an HTTP request to the API.
    * @internal

package/dist/index.mjs CHANGED Viewed

@@ -181,6 +181,32 @@ var TTSResource = class {
     this.pendingRequests = /* @__PURE__ */ new Map();
     this.requestCounter = 0;
   }
+  /**
+   * Pre-establish WebSocket connection for faster first request.
+   *
+   * Call this at application startup to eliminate cold start latency
+   * (~300-500ms) from your first TTS request.
+   *
+   * @example
+   * ```typescript
+   * const client = new KugelAudio({ apiKey: 'your_api_key' });
+   *
+   * // Pre-connect at startup
+   * await client.tts.connect();
+   *
+   * // First request is now fast (~100ms instead of ~500ms)
+   * await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
+   * ```
+   */
+  async connect() {
+    await this.getConnection();
+  }
+  /**
+   * Check if WebSocket connection is established and open.
+   */
+  isConnected() {
+    return this.wsConnection !== null && this.wsConnection.readyState === WebSocket.OPEN;
+  }
   /**
    * Generate audio from text with streaming via WebSocket.
    * Returns complete audio after all chunks are received.
@@ -354,7 +380,9 @@ var TTSResource = class {
         cfg_scale: options.cfgScale ?? 2,
         max_new_tokens: options.maxNewTokens ?? 2048,
         sample_rate: options.sampleRate ?? 24e3,
-        speaker_prefix: options.speakerPrefix ?? true
+        speaker_prefix: options.speakerPrefix ?? true,
+        normalize: options.normalize ?? false,
+        ...options.language && { language: options.language }
       }));
     });
   }
@@ -374,7 +402,9 @@ var TTSResource = class {
           cfg_scale: options.cfgScale ?? 2,
           max_new_tokens: options.maxNewTokens ?? 2048,
           sample_rate: options.sampleRate ?? 24e3,
-          speaker_prefix: options.speakerPrefix ?? true
+          speaker_prefix: options.speakerPrefix ?? true,
+          normalize: options.normalize ?? false,
+          ...options.language && { language: options.language }
         }));
       };
       ws.onmessage = (event) => {
@@ -456,7 +486,7 @@ var TTSResource = class {
     return new KugelAudioError(message);
   }
 };
-var KugelAudio = class {
+var KugelAudio = class _KugelAudio {
   constructor(options) {
     if (!options.apiKey) {
       throw new Error("API key is required");
@@ -471,6 +501,27 @@ var KugelAudio = class {
     this.voices = new VoicesResource(this);
     this.tts = new TTSResource(this);
   }
+  /**
+   * Create a pre-connected KugelAudio client.
+   *
+   * Use this factory method to get a client that's already connected
+   * and ready for fast TTS requests. This eliminates cold start latency
+   * (~300-500ms) from your first TTS request.
+   *
+   * @example
+   * ```typescript
+   * // Client is ready immediately - no cold start on first request
+   * const client = await KugelAudio.create({ apiKey: 'your_api_key' });
+   *
+   * // First request is fast (~100ms instead of ~500ms)
+   * await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
+   * ```
+   */
+  static async create(options) {
+    const client = new _KugelAudio(options);
+    await client.connect();
+    return client;
+  }
   /** Get API key */
   get apiKey() {
     return this._apiKey;
@@ -494,6 +545,32 @@ var KugelAudio = class {
   close() {
     this.tts.close();
   }
+  /**
+   * Pre-establish WebSocket connection for faster first request.
+   *
+   * Call this at application startup to eliminate cold start latency
+   * (~300-500ms) from your first TTS request.
+   *
+   * @example
+   * ```typescript
+   * const client = new KugelAudio({ apiKey: 'your_api_key' });
+   *
+   * // Pre-connect at startup
+   * await client.connect();
+   *
+   * // First request is now fast (~100ms instead of ~500ms)
+   * await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
+   * ```
+   */
+  async connect() {
+    await this.tts.connect();
+  }
+  /**
+   * Check if WebSocket connection is established and open.
+   */
+  isConnected() {
+    return this.tts.isConnected();
+  }
   /**
    * Make an HTTP request to the API.
    * @internal

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "kugelaudio",
-  "version": "0.1.5",
+  "version": "0.2.0",
   "description": "Official JavaScript/TypeScript SDK for KugelAudio TTS API",
   "main": "dist/index.js",
   "module": "dist/index.mjs",

package/src/client.ts CHANGED Viewed

@@ -122,6 +122,34 @@ class TTSResource {
   constructor(private client: KugelAudio) {}
+  /**
+   * Pre-establish WebSocket connection for faster first request.
+   *
+   * Call this at application startup to eliminate cold start latency
+   * (~300-500ms) from your first TTS request.
+   *
+   * @example
+   * ```typescript
+   * const client = new KugelAudio({ apiKey: 'your_api_key' });
+   *
+   * // Pre-connect at startup
+   * await client.tts.connect();
+   *
+   * // First request is now fast (~100ms instead of ~500ms)
+   * await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
+   * ```
+   */
+  async connect(): Promise<void> {
+    await this.getConnection();
+  }
+  /**
+   * Check if WebSocket connection is established and open.
+   */
+  isConnected(): boolean {
+    return this.wsConnection !== null && this.wsConnection.readyState === WebSocket.OPEN;
+  }
   /**
    * Generate audio from text with streaming via WebSocket.
    * Returns complete audio after all chunks are received.
@@ -342,6 +370,8 @@ class TTSResource {
         max_new_tokens: options.maxNewTokens ?? 2048,
         sample_rate: options.sampleRate ?? 24000,
         speaker_prefix: options.speakerPrefix ?? true,
+        normalize: options.normalize ?? false,
+        ...(options.language && { language: options.language }),
       }));
     });
   }
@@ -368,6 +398,8 @@ class TTSResource {
           max_new_tokens: options.maxNewTokens ?? 2048,
           sample_rate: options.sampleRate ?? 24000,
           speaker_prefix: options.speakerPrefix ?? true,
+          normalize: options.normalize ?? false,
+          ...(options.language && { language: options.language }),
         }));
       };
@@ -518,6 +550,28 @@ export class KugelAudio {
     this.tts = new TTSResource(this);
   }
+  /**
+   * Create a pre-connected KugelAudio client.
+   *
+   * Use this factory method to get a client that's already connected
+   * and ready for fast TTS requests. This eliminates cold start latency
+   * (~300-500ms) from your first TTS request.
+   *
+   * @example
+   * ```typescript
+   * // Client is ready immediately - no cold start on first request
+   * const client = await KugelAudio.create({ apiKey: 'your_api_key' });
+   *
+   * // First request is fast (~100ms instead of ~500ms)
+   * await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
+   * ```
+   */
+  static async create(options: KugelAudioOptions): Promise<KugelAudio> {
+    const client = new KugelAudio(options);
+    await client.connect();
+    return client;
+  }
   /** Get API key */
   get apiKey(): string {
     return this._apiKey;
@@ -546,6 +600,34 @@ export class KugelAudio {
     this.tts.close();
   }
+  /**
+   * Pre-establish WebSocket connection for faster first request.
+   *
+   * Call this at application startup to eliminate cold start latency
+   * (~300-500ms) from your first TTS request.
+   *
+   * @example
+   * ```typescript
+   * const client = new KugelAudio({ apiKey: 'your_api_key' });
+   *
+   * // Pre-connect at startup
+   * await client.connect();
+   *
+   * // First request is now fast (~100ms instead of ~500ms)
+   * await client.tts.stream({ text: 'Hello' }, { onChunk: ... });
+   * ```
+   */
+  async connect(): Promise<void> {
+    await this.tts.connect();
+  }
+  /**
+   * Check if WebSocket connection is established and open.
+   */
+  isConnected(): boolean {
+    return this.tts.isConnected();
+  }
   /**
    * Make an HTTP request to the API.
    * @internal

package/src/types.ts CHANGED Viewed

@@ -65,6 +65,25 @@ export interface GenerateOptions {
   sampleRate?: number;
   /** Whether to add speaker prefix (default: true) */
   speakerPrefix?: boolean;
+  /**
+   * Enable text normalization (converts numbers, dates, etc. to spoken words).
+   * When true, text will be normalized before TTS generation.
+   * Default: false
+   *
+   * ⚠️ WARNING: Using normalize=true without specifying language adds ~150ms
+   * latency for language auto-detection. For best performance, always specify
+   * the language parameter when using normalization.
+   */
+  normalize?: boolean;
+  /**
+   * ISO 639-1 language code for text normalization (e.g., 'de', 'en', 'fr').
+   * If not provided and normalize is true, language will be auto-detected
+   * (adds ~150ms latency).
+   *
+   * Supported: de, en, fr, es, it, pt, nl, pl, sv, da, no, fi, cs, hu, ro,
+   *            el, uk, bg, tr, vi, ar, hi, zh, ja, ko
+   */
+  language?: string;
 }
 /**