npm - @tekyzinc/stt-component - Versions diffs - 0.3.0 → 0.3.2 - Mend

@tekyzinc/stt-component 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.d.cts CHANGED Viewed

@@ -188,12 +188,9 @@ declare class WorkerManager extends TypedEventEmitter<WorkerManagerEvents> {
     private handleMessage;
 }
-/**
- * Manages mid-recording correction timing.
- * Two triggers: pause detection and forced interval.
- */
 declare class CorrectionOrchestrator {
     private forcedTimer;
+    private initialTimer;
     private lastCorrectionTime;
     private correctionFn;
     private config;
@@ -201,7 +198,9 @@ declare class CorrectionOrchestrator {
     constructor(config: ResolvedSTTConfig['correction']);
     /** Set the function to call when a correction is triggered. */
     setCorrectionFn(fn: () => void): void;
-    /** Start the correction orchestrator (begin forced interval timer). */
+    /** Start the correction orchestrator.
+     *  Fires a quick initial correction after 1s for early feedback, then
+     *  switches to the regular forcedInterval cadence from that point. */
     start(): void;
     /** Stop the orchestrator (clear all timers). */
     stop(): void;
@@ -219,8 +218,13 @@ declare class SpeechStreamingManager {
     private recognition;
     private accumulated;
     private active;
+    private keepingWarm;
+    private currentLang;
     private receivedResult;
+    private lastFinalIndex;
+    private lastFinalText;
     private noResultTimer;
+    private idleTimer;
     private onTranscript;
     private onPause;
     private onError;
@@ -237,20 +241,29 @@ declare class SpeechStreamingManager {
     setOnDebug(fn: (message: string) => void): void;
     private log;
     /**
-     * Start streaming recognition. Returns a Promise that resolves once
-     * SpeechRecognition has claimed the microphone (onaudiostart) or after
-     * a 300ms fallback — whichever comes first. The engine should await
-     * this before calling getUserMedia to avoid dual-mic conflicts.
-     *
-     * When skipMicWait is true (warm restart — mic already active), returns
-     * immediately after calling recognition.start() without waiting for
-     * onaudiostart.
+     * Pre-warm: start recognition in muted mode so it's ready before the user
+     * clicks. Call after engine.init() completes. Eliminates startup latency on
+     * first click by keeping the Google Speech session alive.
+     */
+    preWarm(language: string): void;
+    /**
+     * Start streaming recognition. If recognition is already warm (session
+     * running from preWarm or a previous session within the idle window),
+     * activates instantly — no Google handshake. Otherwise cold-starts.
      */
     start(language: string, skipMicWait?: boolean): Promise<void>;
+    /**
+     * Create and start a new SpeechRecognition instance.
+     * Used by both preWarm() (active=false) and start() cold path (active=true).
+     */
+    private spawnRecognition;
     private clearNoResultTimer;
-    /** Stop streaming recognition and return accumulated text. */
+    private clearIdleTimer;
+    private startIdleTimer;
+    /** Stop streaming recognition and return accumulated text.
+     *  Keeps the recognition session alive (muted) for instant restart. */
     stop(): string;
-    /** Abort immediately without returning text. */
+    /** Abort immediately and release all resources. */
     destroy(): void;
 }

package/dist/index.d.ts CHANGED Viewed

@@ -188,12 +188,9 @@ declare class WorkerManager extends TypedEventEmitter<WorkerManagerEvents> {
     private handleMessage;
 }
-/**
- * Manages mid-recording correction timing.
- * Two triggers: pause detection and forced interval.
- */
 declare class CorrectionOrchestrator {
     private forcedTimer;
+    private initialTimer;
     private lastCorrectionTime;
     private correctionFn;
     private config;
@@ -201,7 +198,9 @@ declare class CorrectionOrchestrator {
     constructor(config: ResolvedSTTConfig['correction']);
     /** Set the function to call when a correction is triggered. */
     setCorrectionFn(fn: () => void): void;
-    /** Start the correction orchestrator (begin forced interval timer). */
+    /** Start the correction orchestrator.
+     *  Fires a quick initial correction after 1s for early feedback, then
+     *  switches to the regular forcedInterval cadence from that point. */
     start(): void;
     /** Stop the orchestrator (clear all timers). */
     stop(): void;
@@ -219,8 +218,13 @@ declare class SpeechStreamingManager {
     private recognition;
     private accumulated;
     private active;
+    private keepingWarm;
+    private currentLang;
     private receivedResult;
+    private lastFinalIndex;
+    private lastFinalText;
     private noResultTimer;
+    private idleTimer;
     private onTranscript;
     private onPause;
     private onError;
@@ -237,20 +241,29 @@ declare class SpeechStreamingManager {
     setOnDebug(fn: (message: string) => void): void;
     private log;
     /**
-     * Start streaming recognition. Returns a Promise that resolves once
-     * SpeechRecognition has claimed the microphone (onaudiostart) or after
-     * a 300ms fallback — whichever comes first. The engine should await
-     * this before calling getUserMedia to avoid dual-mic conflicts.
-     *
-     * When skipMicWait is true (warm restart — mic already active), returns
-     * immediately after calling recognition.start() without waiting for
-     * onaudiostart.
+     * Pre-warm: start recognition in muted mode so it's ready before the user
+     * clicks. Call after engine.init() completes. Eliminates startup latency on
+     * first click by keeping the Google Speech session alive.
+     */
+    preWarm(language: string): void;
+    /**
+     * Start streaming recognition. If recognition is already warm (session
+     * running from preWarm or a previous session within the idle window),
+     * activates instantly — no Google handshake. Otherwise cold-starts.
      */
     start(language: string, skipMicWait?: boolean): Promise<void>;
+    /**
+     * Create and start a new SpeechRecognition instance.
+     * Used by both preWarm() (active=false) and start() cold path (active=true).
+     */
+    private spawnRecognition;
     private clearNoResultTimer;
-    /** Stop streaming recognition and return accumulated text. */
+    private clearIdleTimer;
+    private startIdleTimer;
+    /** Stop streaming recognition and return accumulated text.
+     *  Keeps the recognition session alive (muted) for instant restart. */
     stop(): string;
-    /** Abort immediately without returning text. */
+    /** Abort immediately and release all resources. */
     destroy(): void;
 }

package/dist/index.js CHANGED Viewed

@@ -256,8 +256,10 @@ var WorkerManager = class extends TypedEventEmitter {
 };
 // src/correction-orchestrator.ts
+var INITIAL_CORRECTION_DELAY_MS = 1e3;
 var CorrectionOrchestrator = class {
   forcedTimer = null;
+  initialTimer = null;
   lastCorrectionTime = 0;
   correctionFn = null;
   config;
@@ -269,14 +271,25 @@ var CorrectionOrchestrator = class {
   setCorrectionFn(fn) {
     this.correctionFn = fn;
   }
-  /** Start the correction orchestrator (begin forced interval timer). */
+  /** Start the correction orchestrator.
+   *  Fires a quick initial correction after 1s for early feedback, then
+   *  switches to the regular forcedInterval cadence from that point. */
   start() {
     if (!this.config.enabled) return;
     this.lastCorrectionTime = Date.now();
-    this.startForcedTimer();
+    this.initialTimer = setTimeout(() => {
+      this.initialTimer = null;
+      this.correctionFn?.();
+      this.lastCorrectionTime = Date.now();
+      this.startForcedTimer();
+    }, INITIAL_CORRECTION_DELAY_MS);
   }
   /** Stop the orchestrator (clear all timers). */
   stop() {
+    if (this.initialTimer) {
+      clearTimeout(this.initialTimer);
+      this.initialTimer = null;
+    }
     this.stopForcedTimer();
   }
   /** Called when a speech pause is detected. Triggers correction if cooldown elapsed. */
@@ -385,12 +398,22 @@ function toBCP47(language) {
   return WHISPER_TO_BCP47[language.toLowerCase()] ?? language;
 }
 var NO_RESULT_TIMEOUT_MS = 5e3;
+var IDLE_TIMEOUT_MS = 3e4;
 var SpeechStreamingManager = class {
   recognition = null;
   accumulated = "";
   active = false;
+  // user is recording — emit results, trigger onPause on end
+  keepingWarm = false;
+  // recognition running silently between user sessions
+  currentLang = "";
+  // BCP-47 of running recognition (for fast-path check)
   receivedResult = false;
+  lastFinalIndex = -1;
+  // instance fields so warm-restart resets them cleanly
+  lastFinalText = "";
   noResultTimer = null;
+  idleTimer = null;
   onTranscript = null;
   onPause = null;
   onError = null;
@@ -420,14 +443,25 @@ var SpeechStreamingManager = class {
     console.warn(message);
   }
   /**
-   * Start streaming recognition. Returns a Promise that resolves once
-   * SpeechRecognition has claimed the microphone (onaudiostart) or after
-   * a 300ms fallback — whichever comes first. The engine should await
-   * this before calling getUserMedia to avoid dual-mic conflicts.
-   *
-   * When skipMicWait is true (warm restart — mic already active), returns
-   * immediately after calling recognition.start() without waiting for
-   * onaudiostart.
+   * Pre-warm: start recognition in muted mode so it's ready before the user
+   * clicks. Call after engine.init() completes. Eliminates startup latency on
+   * first click by keeping the Google Speech session alive.
+   */
+  preWarm(language) {
+    const SR = getSpeechRecognition();
+    if (!SR) return;
+    const bcp47 = toBCP47(language);
+    if (this.recognition && this.currentLang === bcp47) return;
+    this.log(`[SSM] preWarm() \u2014 lang: "${language}" \u2192 "${bcp47}"`);
+    this.keepingWarm = true;
+    this.active = false;
+    this.clearIdleTimer();
+    this.spawnRecognition(language);
+  }
+  /**
+   * Start streaming recognition. If recognition is already warm (session
+   * running from preWarm or a previous session within the idle window),
+   * activates instantly — no Google handshake. Otherwise cold-starts.
    */
   start(language, skipMicWait = false) {
     const SR = getSpeechRecognition();
@@ -438,14 +472,49 @@ var SpeechStreamingManager = class {
     const bcp47 = toBCP47(language);
     this.log(`[SSM] start() \u2014 lang: "${language}" \u2192 "${bcp47}"`);
     this.accumulated = "";
-    this.active = true;
     this.receivedResult = false;
+    this.lastFinalIndex = -1;
+    this.lastFinalText = "";
+    this.clearIdleTimer();
+    if (this.recognition && this.currentLang === bcp47) {
+      this.log("[SSM] start() \u2014 warm session, activating immediately");
+      this.keepingWarm = false;
+      this.active = true;
+      this.clearNoResultTimer();
+      this.noResultTimer = setTimeout(() => {
+        if (this.active && !this.receivedResult) {
+          this.log("[SSM] no-result timeout fired \u2014 no onresult in 5s");
+          this.onError?.(
+            "Speech streaming started but received no results. Mic may be blocked by another audio capture."
+          );
+        }
+      }, NO_RESULT_TIMEOUT_MS);
+      return Promise.resolve();
+    }
+    this.keepingWarm = false;
+    this.active = true;
+    return this.spawnRecognition(language, skipMicWait);
+  }
+  /**
+   * Create and start a new SpeechRecognition instance.
+   * Used by both preWarm() (active=false) and start() cold path (active=true).
+   */
+  spawnRecognition(language, skipMicWait = false) {
+    const SR = getSpeechRecognition();
+    const bcp47 = toBCP47(language);
+    if (this.recognition) {
+      const old = this.recognition;
+      this.recognition = null;
+      try {
+        old.stop();
+      } catch {
+      }
+    }
+    this.currentLang = bcp47;
     const recognition = new SR();
     recognition.continuous = true;
     recognition.interimResults = true;
     recognition.lang = bcp47;
-    let lastFinalIndex = -1;
-    let lastFinalText = "";
     let micReady = false;
     const micClaimPromise = new Promise((resolve) => {
       recognition.onaudiostart = () => {
@@ -465,26 +534,29 @@ var SpeechStreamingManager = class {
       }, 300);
     });
     this.clearNoResultTimer();
-    this.noResultTimer = setTimeout(() => {
-      if (this.active && !this.receivedResult) {
-        this.log("[SSM] no-result timeout fired \u2014 no onresult in 5s");
-        this.onError?.(
-          "Speech streaming started but received no results. Mic may be blocked by another audio capture."
-        );
-      }
-    }, NO_RESULT_TIMEOUT_MS);
+    if (this.active) {
+      this.noResultTimer = setTimeout(() => {
+        if (this.active && !this.receivedResult) {
+          this.log("[SSM] no-result timeout fired \u2014 no onresult in 5s");
+          this.onError?.(
+            "Speech streaming started but received no results. Mic may be blocked by another audio capture."
+          );
+        }
+      }, NO_RESULT_TIMEOUT_MS);
+    }
     recognition.onresult = (e) => {
       if (this.recognition !== recognition) return;
       this.receivedResult = true;
       this.clearNoResultTimer();
+      if (!this.active) return;
       let final_ = "";
       let interim = "";
       for (let i = e.resultIndex; i < e.results.length; i++) {
         const t = e.results[i][0].transcript;
         if (e.results[i].isFinal) {
-          if (i > lastFinalIndex) {
+          if (i > this.lastFinalIndex) {
             final_ += t;
-            lastFinalIndex = i;
+            this.lastFinalIndex = i;
           }
         } else {
           interim += t;
@@ -493,8 +565,8 @@ var SpeechStreamingManager = class {
       this.log(
         `[SSM] onresult \u2014 finals: "${final_}", interim: "${interim}", accumulated: "${this.accumulated}"`
       );
-      if (final_ && final_.trim() !== lastFinalText) {
-        lastFinalText = final_.trim();
+      if (final_ && final_.trim() !== this.lastFinalText) {
+        this.lastFinalText = final_.trim();
         this.accumulated = this.accumulated ? this.accumulated + " " + final_.trim() : final_.trim();
         this.onTranscript?.(this.accumulated);
       } else if (interim) {
@@ -506,11 +578,13 @@ var SpeechStreamingManager = class {
     recognition.onerror = (e) => {
       if (this.recognition !== recognition) return;
       this.log(`[SSM] onerror \u2014 ${e.error}`);
-      this.onError?.(e.error);
+      if (this.active) this.onError?.(e.error);
     };
     recognition.onend = () => {
       if (this.recognition !== recognition) return;
-      this.log(`[SSM] onend \u2014 active: ${this.active}, receivedResult: ${this.receivedResult}`);
+      this.log(
+        `[SSM] onend \u2014 active: ${this.active}, keepingWarm: ${this.keepingWarm}, receivedResult: ${this.receivedResult}`
+      );
       if (this.active) {
         this.onPause?.();
         try {
@@ -519,10 +593,22 @@ var SpeechStreamingManager = class {
         } catch (err) {
           this.log(`[SSM] restart THREW: ${err}`);
           this.recognition = null;
+          this.currentLang = "";
           this.onError?.("Speech recognition failed to restart after pause.");
         }
+      } else if (this.keepingWarm) {
+        try {
+          recognition.start();
+          this.log("[SSM] warm restart");
+        } catch (err) {
+          this.log(`[SSM] warm restart THREW: ${err}`);
+          this.recognition = null;
+          this.keepingWarm = false;
+          this.currentLang = "";
+        }
       } else {
         this.recognition = null;
+        this.currentLang = "";
       }
     };
     this.recognition = recognition;
@@ -532,15 +618,20 @@ var SpeechStreamingManager = class {
     } catch (err) {
       this.log(`[SSM] recognition.start() THREW: ${err}`);
       this.recognition = null;
+      this.currentLang = "";
+      const wasActive = this.active;
       this.active = false;
+      this.keepingWarm = false;
       this.clearNoResultTimer();
-      this.onError?.(
-        `Speech recognition failed to start: ${err instanceof Error ? err.message : String(err)}`
-      );
+      if (wasActive) {
+        this.onError?.(
+          `Speech recognition failed to start: ${err instanceof Error ? err.message : String(err)}`
+        );
+      }
       return Promise.resolve();
     }
-    if (skipMicWait) {
-      this.log("[SSM] skipMicWait \u2014 warm restart, returning immediately");
+    if (!this.active || skipMicWait) {
+      if (skipMicWait) this.log("[SSM] skipMicWait \u2014 warm restart, returning immediately");
       return Promise.resolve();
     }
     return micClaimPromise;
@@ -551,26 +642,50 @@ var SpeechStreamingManager = class {
       this.noResultTimer = null;
     }
   }
-  /** Stop streaming recognition and return accumulated text. */
+  clearIdleTimer() {
+    if (this.idleTimer) {
+      clearTimeout(this.idleTimer);
+      this.idleTimer = null;
+    }
+  }
+  startIdleTimer() {
+    this.clearIdleTimer();
+    this.idleTimer = setTimeout(() => {
+      this.idleTimer = null;
+      this.keepingWarm = false;
+      this.log("[SSM] idle timeout \u2014 stopping recognition");
+      if (this.recognition) {
+        const rec = this.recognition;
+        this.recognition = null;
+        this.currentLang = "";
+        try {
+          rec.stop();
+        } catch {
+        }
+      }
+    }, IDLE_TIMEOUT_MS);
+  }
+  /** Stop streaming recognition and return accumulated text.
+   *  Keeps the recognition session alive (muted) for instant restart. */
   stop() {
     this.active = false;
+    this.keepingWarm = true;
     this.clearNoResultTimer();
-    if (this.recognition) {
-      const rec = this.recognition;
-      this.recognition = null;
-      rec.stop();
-    }
     const result = this.accumulated;
     this.accumulated = "";
+    this.startIdleTimer();
     return result;
   }
-  /** Abort immediately without returning text. */
+  /** Abort immediately and release all resources. */
   destroy() {
     this.active = false;
+    this.keepingWarm = false;
     this.clearNoResultTimer();
+    this.clearIdleTimer();
     if (this.recognition) {
       const rec = this.recognition;
       this.recognition = null;
+      this.currentLang = "";
       rec.abort();
     }
     this.accumulated = "";
@@ -625,6 +740,9 @@ var STTEngine = class extends TypedEventEmitter {
       await this.workerManager.loadModel(this.config);
       this.state.isModelLoaded = true;
       this.updateStatus("ready");
+      if (this.config.streaming.enabled) {
+        this.speechStreaming.preWarm(this.config.language);
+      }
     } catch (err) {
       this.emitError("MODEL_LOAD_FAILED", err instanceof Error ? err.message : String(err));
       this.updateStatus("idle");