npm - kugelaudio - Versions diffs - 0.2.0 → 0.2.3 - Mend

kugelaudio 0.2.0 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/src/types.ts CHANGED Viewed

@@ -17,7 +17,7 @@ export interface Model {
 /**
  * Voice category types.
  */
-export type VoiceCategory = 'premade' | 'cloned' | 'designed';
+export type VoiceCategory = 'premade' | 'cloned' | 'designed' | 'conversational' | 'narrative' | 'narrative_story' | 'characters';
 /**
  * Voice sex types.
@@ -47,6 +47,24 @@ export interface Voice {
   verified: boolean;
 }
+/**
+ * Word-level timestamp from server-side forced alignment.
+ */
+export interface WordTimestamp {
+  /** The aligned word */
+  word: string;
+  /** Start time in milliseconds (relative to chunk/audio start) */
+  startMs: number;
+  /** End time in milliseconds (relative to chunk/audio start) */
+  endMs: number;
+  /** Start character offset in the original text */
+  charStart: number;
+  /** End character offset in the original text */
+  charEnd: number;
+  /** Alignment confidence score (0.0 - 1.0) */
+  score: number;
+}
 /**
  * TTS generation request options.
  */
@@ -54,7 +72,7 @@ export interface GenerateOptions {
   /** Text to synthesize */
   text: string;
   /** Model to use: 'kugel-1-turbo' (1.5B, fast) or 'kugel-1' (7B, premium). Default: 'kugel-1-turbo' */
-  model?: string;
+  modelId?: string;
   /** Voice ID to use */
   voiceId?: number;
   /** CFG scale for generation (default: 2.0) */
@@ -63,27 +81,30 @@ export interface GenerateOptions {
   maxNewTokens?: number;
   /** Output sample rate (default: 24000) */
   sampleRate?: number;
-  /** Whether to add speaker prefix (default: true) */
-  speakerPrefix?: boolean;
   /**
    * Enable text normalization (converts numbers, dates, etc. to spoken words).
    * When true, text will be normalized before TTS generation.
-   * Default: false
+   * Default: true
    *
-   * ⚠️ WARNING: Using normalize=true without specifying language adds ~150ms
-   * latency for language auto-detection. For best performance, always specify
-   * the language parameter when using normalization.
+   * ⚠️ For best performance, always specify the language parameter when using
+   * normalization. Without it, language auto-detection adds ~150ms latency.
    */
   normalize?: boolean;
   /**
    * ISO 639-1 language code for text normalization (e.g., 'de', 'en', 'fr').
-   * If not provided and normalize is true, language will be auto-detected
+   * If not provided and normalize is true (default), language will be auto-detected
    * (adds ~150ms latency).
    *
    * Supported: de, en, fr, es, it, pt, nl, pl, sv, da, no, fi, cs, hu, ro,
    *            el, uk, bg, tr, vi, ar, hi, zh, ja, ko
    */
   language?: string;
+  /**
+   * Request word-level timestamps alongside audio.
+   * When true, the server performs forced alignment and returns per-word timing boundaries.
+   * Default: false
+   */
+  wordTimestamps?: boolean;
 }
 /**
@@ -98,12 +119,25 @@ export interface StreamConfig {
   maxNewTokens?: number;
   /** Output sample rate */
   sampleRate?: number;
-  /** Whether to add speaker prefix */
-  speakerPrefix?: boolean;
   /** Auto-flush timeout in milliseconds */
   flushTimeoutMs?: number;
   /** Maximum buffer length */
   maxBufferLength?: number;
+  /**
+   * Enable text normalization (converts numbers, dates, etc. to spoken words).
+   * Default: true
+   */
+  normalize?: boolean;
+  /**
+   * ISO 639-1 language code for text normalization (e.g., 'de', 'en', 'fr').
+   * Specify to avoid ~150ms auto-detection latency.
+   */
+  language?: string;
+  /**
+   * Request word-level timestamps alongside audio.
+   * Default: false
+   */
+  wordTimestamps?: boolean;
 }
 /**
@@ -160,6 +194,8 @@ export interface AudioResponse {
   generationMs: number;
   /** Real-time factor */
   rtf: number;
+  /** Per-word timing boundaries (populated when `wordTimestamps: true`) */
+  wordTimestamps: WordTimestamp[];
 }
 /**
@@ -168,6 +204,8 @@ export interface AudioResponse {
 export interface StreamCallbacks {
   /** Called when an audio chunk is received */
   onChunk?: (chunk: AudioChunk) => void;
+  /** Called when word-level timestamps are received (requires `wordTimestamps: true`) */
+  onWordTimestamps?: (timestamps: WordTimestamp[]) => void;
   /** Called when generation is complete */
   onFinal?: (stats: GenerationStats) => void;
   /** Called on error */
@@ -188,9 +226,11 @@ export interface KugelAudioOptions {
   isMasterKey?: boolean;
   /** Whether apiKey is a JWT token (for user authentication). Takes precedence over isMasterKey. */
   isToken?: boolean;
+  /** Organisation ID to bill usage against (required for token auth to enable usage recording). */
+  orgId?: number;
   /** API base URL (default: https://api.kugelaudio.com) */
   apiUrl?: string;
-  /** TTS server URL (default: https://eu.kugelaudio.com) */
+  /** TTS server URL (default: same as apiUrl) */
   ttsUrl?: string;
   /** Request timeout in milliseconds (default: 60000) */
   timeout?: number;
@@ -205,3 +245,67 @@ export interface ApiError {
   statusCode?: number;
 }
+/**
+ * Multi-context session configuration.
+ */
+export interface MultiContextConfig {
+  /** Default voice ID for new contexts */
+  defaultVoiceId?: number;
+  /** Output sample rate (default: 24000) */
+  sampleRate?: number;
+  /** CFG scale for generation (default: 2.0) */
+  cfgScale?: number;
+  /** Maximum tokens to generate (default: 2048) */
+  maxNewTokens?: number;
+  /** Enable text normalization (default: true) */
+  normalize?: boolean;
+  /** Seconds before context auto-closes (default: 20.0) */
+  inactivityTimeout?: number;
+}
+/**
+ * Voice settings for a specific context.
+ */
+export interface ContextVoiceSettings {
+  /** Stability (0.0-1.0) */
+  stability?: number;
+  /** Similarity boost (0.0-1.0) */
+  similarityBoost?: number;
+  /** Style (0.0-1.0) */
+  style?: number;
+  /** Use speaker boost */
+  useSpeakerBoost?: boolean;
+  /** Speed multiplier */
+  speed?: number;
+}
+/**
+ * Audio chunk from multi-context streaming.
+ */
+export interface MultiContextAudioChunk extends AudioChunk {
+  /** Context ID this audio belongs to */
+  contextId: string;
+}
+/**
+ * Event callbacks for multi-context streaming.
+ */
+export interface MultiContextCallbacks {
+  /** Called when session is started */
+  onSessionStarted?: (sessionId: string) => void;
+  /** Called when a context is created */
+  onContextCreated?: (contextId: string) => void;
+  /** Called when an audio chunk is received */
+  onChunk?: (chunk: MultiContextAudioChunk) => void;
+  /** Called when a context finishes generating */
+  onContextFinal?: (contextId: string) => void;
+  /** Called when a context is closed */
+  onContextClosed?: (contextId: string) => void;
+  /** Called when a context times out */
+  onContextTimeout?: (contextId: string) => void;
+  /** Called when session is closed */
+  onSessionClosed?: (stats: Record<string, unknown>) => void;
+  /** Called on error */
+  onError?: (error: Error, contextId?: string) => void;
+}

package/src/websocket.ts ADDED Viewed

@@ -0,0 +1,44 @@
+/**
+ * WebSocket compatibility layer for browser and Node.js environments.
+ *
+ * IMPORTANT: WebSocket resolution is lazy to avoid top-level side-effects
+ * that break server-side bundlers (Turbopack / Webpack) when this module
+ * is imported in a Node.js (API route) context.
+ */
+let _cachedWs: typeof WebSocket | null = null;
+/**
+ * Get the WebSocket constructor for the current environment.
+ * Uses native WebSocket in browsers, ws package in Node.js.
+ * Result is cached after first call.
+ */
+export function getWebSocket(): typeof WebSocket {
+    if (_cachedWs) return _cachedWs;
+    // Browser environment
+    if (typeof globalThis !== 'undefined' && typeof (globalThis as any).WebSocket !== 'undefined') {
+        _cachedWs = (globalThis as any).WebSocket;
+        return _cachedWs!;
+    }
+    // Node.js environment - use ws package via dynamic require
+    try {
+        // Use Function constructor to hide require from static analysis by bundlers
+        // eslint-disable-next-line no-new-func
+        const _require = typeof require !== 'undefined'
+            ? require
+            : Function('return typeof require !== "undefined" ? require : undefined')();
+        if (_require) {
+            const ws = _require('ws');
+            _cachedWs = ws.default || ws;
+            return _cachedWs!;
+        }
+    } catch {
+        // Fall through to error
+    }
+    throw new Error(
+        'WebSocket not available. In Node.js, install the "ws" package: npm install ws'
+    );
+}