@tekyzinc/stt-component 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +142 -37
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +24 -10
- package/dist/index.d.ts +24 -10
- package/dist/index.js +142 -37
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.cts
CHANGED
|
@@ -218,8 +218,13 @@ declare class SpeechStreamingManager {
|
|
|
218
218
|
private recognition;
|
|
219
219
|
private accumulated;
|
|
220
220
|
private active;
|
|
221
|
+
private keepingWarm;
|
|
222
|
+
private currentLang;
|
|
221
223
|
private receivedResult;
|
|
224
|
+
private lastFinalIndex;
|
|
225
|
+
private lastFinalText;
|
|
222
226
|
private noResultTimer;
|
|
227
|
+
private idleTimer;
|
|
223
228
|
private onTranscript;
|
|
224
229
|
private onPause;
|
|
225
230
|
private onError;
|
|
@@ -236,20 +241,29 @@ declare class SpeechStreamingManager {
|
|
|
236
241
|
setOnDebug(fn: (message: string) => void): void;
|
|
237
242
|
private log;
|
|
238
243
|
/**
|
|
239
|
-
*
|
|
240
|
-
*
|
|
241
|
-
*
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
*
|
|
246
|
-
*
|
|
244
|
+
* Pre-warm: start recognition in muted mode so it's ready before the user
|
|
245
|
+
* clicks. Call after engine.init() completes. Eliminates startup latency on
|
|
246
|
+
* first click by keeping the Google Speech session alive.
|
|
247
|
+
*/
|
|
248
|
+
preWarm(language: string): void;
|
|
249
|
+
/**
|
|
250
|
+
* Start streaming recognition. If recognition is already warm (session
|
|
251
|
+
* running from preWarm or a previous session within the idle window),
|
|
252
|
+
* activates instantly — no Google handshake. Otherwise cold-starts.
|
|
247
253
|
*/
|
|
248
254
|
start(language: string, skipMicWait?: boolean): Promise<void>;
|
|
255
|
+
/**
|
|
256
|
+
* Create and start a new SpeechRecognition instance.
|
|
257
|
+
* Used by both preWarm() (active=false) and start() cold path (active=true).
|
|
258
|
+
*/
|
|
259
|
+
private spawnRecognition;
|
|
249
260
|
private clearNoResultTimer;
|
|
250
|
-
|
|
261
|
+
private clearIdleTimer;
|
|
262
|
+
private startIdleTimer;
|
|
263
|
+
/** Stop streaming recognition and return accumulated text.
|
|
264
|
+
* Keeps the recognition session alive (muted) for instant restart. */
|
|
251
265
|
stop(): string;
|
|
252
|
-
/** Abort immediately
|
|
266
|
+
/** Abort immediately and release all resources. */
|
|
253
267
|
destroy(): void;
|
|
254
268
|
}
|
|
255
269
|
|
package/dist/index.d.ts
CHANGED
|
@@ -218,8 +218,13 @@ declare class SpeechStreamingManager {
|
|
|
218
218
|
private recognition;
|
|
219
219
|
private accumulated;
|
|
220
220
|
private active;
|
|
221
|
+
private keepingWarm;
|
|
222
|
+
private currentLang;
|
|
221
223
|
private receivedResult;
|
|
224
|
+
private lastFinalIndex;
|
|
225
|
+
private lastFinalText;
|
|
222
226
|
private noResultTimer;
|
|
227
|
+
private idleTimer;
|
|
223
228
|
private onTranscript;
|
|
224
229
|
private onPause;
|
|
225
230
|
private onError;
|
|
@@ -236,20 +241,29 @@ declare class SpeechStreamingManager {
|
|
|
236
241
|
setOnDebug(fn: (message: string) => void): void;
|
|
237
242
|
private log;
|
|
238
243
|
/**
|
|
239
|
-
*
|
|
240
|
-
*
|
|
241
|
-
*
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
*
|
|
246
|
-
*
|
|
244
|
+
* Pre-warm: start recognition in muted mode so it's ready before the user
|
|
245
|
+
* clicks. Call after engine.init() completes. Eliminates startup latency on
|
|
246
|
+
* first click by keeping the Google Speech session alive.
|
|
247
|
+
*/
|
|
248
|
+
preWarm(language: string): void;
|
|
249
|
+
/**
|
|
250
|
+
* Start streaming recognition. If recognition is already warm (session
|
|
251
|
+
* running from preWarm or a previous session within the idle window),
|
|
252
|
+
* activates instantly — no Google handshake. Otherwise cold-starts.
|
|
247
253
|
*/
|
|
248
254
|
start(language: string, skipMicWait?: boolean): Promise<void>;
|
|
255
|
+
/**
|
|
256
|
+
* Create and start a new SpeechRecognition instance.
|
|
257
|
+
* Used by both preWarm() (active=false) and start() cold path (active=true).
|
|
258
|
+
*/
|
|
259
|
+
private spawnRecognition;
|
|
249
260
|
private clearNoResultTimer;
|
|
250
|
-
|
|
261
|
+
private clearIdleTimer;
|
|
262
|
+
private startIdleTimer;
|
|
263
|
+
/** Stop streaming recognition and return accumulated text.
|
|
264
|
+
* Keeps the recognition session alive (muted) for instant restart. */
|
|
251
265
|
stop(): string;
|
|
252
|
-
/** Abort immediately
|
|
266
|
+
/** Abort immediately and release all resources. */
|
|
253
267
|
destroy(): void;
|
|
254
268
|
}
|
|
255
269
|
|
package/dist/index.js
CHANGED
|
@@ -398,12 +398,22 @@ function toBCP47(language) {
|
|
|
398
398
|
return WHISPER_TO_BCP47[language.toLowerCase()] ?? language;
|
|
399
399
|
}
|
|
400
400
|
var NO_RESULT_TIMEOUT_MS = 5e3;
|
|
401
|
+
var IDLE_TIMEOUT_MS = 3e4;
|
|
401
402
|
var SpeechStreamingManager = class {
|
|
402
403
|
recognition = null;
|
|
403
404
|
accumulated = "";
|
|
404
405
|
active = false;
|
|
406
|
+
// user is recording — emit results, trigger onPause on end
|
|
407
|
+
keepingWarm = false;
|
|
408
|
+
// recognition running silently between user sessions
|
|
409
|
+
currentLang = "";
|
|
410
|
+
// BCP-47 of running recognition (for fast-path check)
|
|
405
411
|
receivedResult = false;
|
|
412
|
+
lastFinalIndex = -1;
|
|
413
|
+
// instance fields so warm-restart resets them cleanly
|
|
414
|
+
lastFinalText = "";
|
|
406
415
|
noResultTimer = null;
|
|
416
|
+
idleTimer = null;
|
|
407
417
|
onTranscript = null;
|
|
408
418
|
onPause = null;
|
|
409
419
|
onError = null;
|
|
@@ -433,14 +443,25 @@ var SpeechStreamingManager = class {
|
|
|
433
443
|
console.warn(message);
|
|
434
444
|
}
|
|
435
445
|
/**
|
|
436
|
-
*
|
|
437
|
-
*
|
|
438
|
-
*
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
446
|
+
* Pre-warm: start recognition in muted mode so it's ready before the user
|
|
447
|
+
* clicks. Call after engine.init() completes. Eliminates startup latency on
|
|
448
|
+
* first click by keeping the Google Speech session alive.
|
|
449
|
+
*/
|
|
450
|
+
preWarm(language) {
|
|
451
|
+
const SR = getSpeechRecognition();
|
|
452
|
+
if (!SR) return;
|
|
453
|
+
const bcp47 = toBCP47(language);
|
|
454
|
+
if (this.recognition && this.currentLang === bcp47) return;
|
|
455
|
+
this.log(`[SSM] preWarm() \u2014 lang: "${language}" \u2192 "${bcp47}"`);
|
|
456
|
+
this.keepingWarm = true;
|
|
457
|
+
this.active = false;
|
|
458
|
+
this.clearIdleTimer();
|
|
459
|
+
this.spawnRecognition(language);
|
|
460
|
+
}
|
|
461
|
+
/**
|
|
462
|
+
* Start streaming recognition. If recognition is already warm (session
|
|
463
|
+
* running from preWarm or a previous session within the idle window),
|
|
464
|
+
* activates instantly — no Google handshake. Otherwise cold-starts.
|
|
444
465
|
*/
|
|
445
466
|
start(language, skipMicWait = false) {
|
|
446
467
|
const SR = getSpeechRecognition();
|
|
@@ -451,14 +472,49 @@ var SpeechStreamingManager = class {
|
|
|
451
472
|
const bcp47 = toBCP47(language);
|
|
452
473
|
this.log(`[SSM] start() \u2014 lang: "${language}" \u2192 "${bcp47}"`);
|
|
453
474
|
this.accumulated = "";
|
|
454
|
-
this.active = true;
|
|
455
475
|
this.receivedResult = false;
|
|
476
|
+
this.lastFinalIndex = -1;
|
|
477
|
+
this.lastFinalText = "";
|
|
478
|
+
this.clearIdleTimer();
|
|
479
|
+
if (this.recognition && this.currentLang === bcp47) {
|
|
480
|
+
this.log("[SSM] start() \u2014 warm session, activating immediately");
|
|
481
|
+
this.keepingWarm = false;
|
|
482
|
+
this.active = true;
|
|
483
|
+
this.clearNoResultTimer();
|
|
484
|
+
this.noResultTimer = setTimeout(() => {
|
|
485
|
+
if (this.active && !this.receivedResult) {
|
|
486
|
+
this.log("[SSM] no-result timeout fired \u2014 no onresult in 5s");
|
|
487
|
+
this.onError?.(
|
|
488
|
+
"Speech streaming started but received no results. Mic may be blocked by another audio capture."
|
|
489
|
+
);
|
|
490
|
+
}
|
|
491
|
+
}, NO_RESULT_TIMEOUT_MS);
|
|
492
|
+
return Promise.resolve();
|
|
493
|
+
}
|
|
494
|
+
this.keepingWarm = false;
|
|
495
|
+
this.active = true;
|
|
496
|
+
return this.spawnRecognition(language, skipMicWait);
|
|
497
|
+
}
|
|
498
|
+
/**
|
|
499
|
+
* Create and start a new SpeechRecognition instance.
|
|
500
|
+
* Used by both preWarm() (active=false) and start() cold path (active=true).
|
|
501
|
+
*/
|
|
502
|
+
spawnRecognition(language, skipMicWait = false) {
|
|
503
|
+
const SR = getSpeechRecognition();
|
|
504
|
+
const bcp47 = toBCP47(language);
|
|
505
|
+
if (this.recognition) {
|
|
506
|
+
const old = this.recognition;
|
|
507
|
+
this.recognition = null;
|
|
508
|
+
try {
|
|
509
|
+
old.stop();
|
|
510
|
+
} catch {
|
|
511
|
+
}
|
|
512
|
+
}
|
|
513
|
+
this.currentLang = bcp47;
|
|
456
514
|
const recognition = new SR();
|
|
457
515
|
recognition.continuous = true;
|
|
458
516
|
recognition.interimResults = true;
|
|
459
517
|
recognition.lang = bcp47;
|
|
460
|
-
let lastFinalIndex = -1;
|
|
461
|
-
let lastFinalText = "";
|
|
462
518
|
let micReady = false;
|
|
463
519
|
const micClaimPromise = new Promise((resolve) => {
|
|
464
520
|
recognition.onaudiostart = () => {
|
|
@@ -478,26 +534,29 @@ var SpeechStreamingManager = class {
|
|
|
478
534
|
}, 300);
|
|
479
535
|
});
|
|
480
536
|
this.clearNoResultTimer();
|
|
481
|
-
this.
|
|
482
|
-
|
|
483
|
-
this.
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
537
|
+
if (this.active) {
|
|
538
|
+
this.noResultTimer = setTimeout(() => {
|
|
539
|
+
if (this.active && !this.receivedResult) {
|
|
540
|
+
this.log("[SSM] no-result timeout fired \u2014 no onresult in 5s");
|
|
541
|
+
this.onError?.(
|
|
542
|
+
"Speech streaming started but received no results. Mic may be blocked by another audio capture."
|
|
543
|
+
);
|
|
544
|
+
}
|
|
545
|
+
}, NO_RESULT_TIMEOUT_MS);
|
|
546
|
+
}
|
|
489
547
|
recognition.onresult = (e) => {
|
|
490
548
|
if (this.recognition !== recognition) return;
|
|
491
549
|
this.receivedResult = true;
|
|
492
550
|
this.clearNoResultTimer();
|
|
551
|
+
if (!this.active) return;
|
|
493
552
|
let final_ = "";
|
|
494
553
|
let interim = "";
|
|
495
554
|
for (let i = e.resultIndex; i < e.results.length; i++) {
|
|
496
555
|
const t = e.results[i][0].transcript;
|
|
497
556
|
if (e.results[i].isFinal) {
|
|
498
|
-
if (i > lastFinalIndex) {
|
|
557
|
+
if (i > this.lastFinalIndex) {
|
|
499
558
|
final_ += t;
|
|
500
|
-
lastFinalIndex = i;
|
|
559
|
+
this.lastFinalIndex = i;
|
|
501
560
|
}
|
|
502
561
|
} else {
|
|
503
562
|
interim += t;
|
|
@@ -506,8 +565,8 @@ var SpeechStreamingManager = class {
|
|
|
506
565
|
this.log(
|
|
507
566
|
`[SSM] onresult \u2014 finals: "${final_}", interim: "${interim}", accumulated: "${this.accumulated}"`
|
|
508
567
|
);
|
|
509
|
-
if (final_ && final_.trim() !== lastFinalText) {
|
|
510
|
-
lastFinalText = final_.trim();
|
|
568
|
+
if (final_ && final_.trim() !== this.lastFinalText) {
|
|
569
|
+
this.lastFinalText = final_.trim();
|
|
511
570
|
this.accumulated = this.accumulated ? this.accumulated + " " + final_.trim() : final_.trim();
|
|
512
571
|
this.onTranscript?.(this.accumulated);
|
|
513
572
|
} else if (interim) {
|
|
@@ -519,11 +578,13 @@ var SpeechStreamingManager = class {
|
|
|
519
578
|
recognition.onerror = (e) => {
|
|
520
579
|
if (this.recognition !== recognition) return;
|
|
521
580
|
this.log(`[SSM] onerror \u2014 ${e.error}`);
|
|
522
|
-
this.onError?.(e.error);
|
|
581
|
+
if (this.active) this.onError?.(e.error);
|
|
523
582
|
};
|
|
524
583
|
recognition.onend = () => {
|
|
525
584
|
if (this.recognition !== recognition) return;
|
|
526
|
-
this.log(
|
|
585
|
+
this.log(
|
|
586
|
+
`[SSM] onend \u2014 active: ${this.active}, keepingWarm: ${this.keepingWarm}, receivedResult: ${this.receivedResult}`
|
|
587
|
+
);
|
|
527
588
|
if (this.active) {
|
|
528
589
|
this.onPause?.();
|
|
529
590
|
try {
|
|
@@ -532,10 +593,22 @@ var SpeechStreamingManager = class {
|
|
|
532
593
|
} catch (err) {
|
|
533
594
|
this.log(`[SSM] restart THREW: ${err}`);
|
|
534
595
|
this.recognition = null;
|
|
596
|
+
this.currentLang = "";
|
|
535
597
|
this.onError?.("Speech recognition failed to restart after pause.");
|
|
536
598
|
}
|
|
599
|
+
} else if (this.keepingWarm) {
|
|
600
|
+
try {
|
|
601
|
+
recognition.start();
|
|
602
|
+
this.log("[SSM] warm restart");
|
|
603
|
+
} catch (err) {
|
|
604
|
+
this.log(`[SSM] warm restart THREW: ${err}`);
|
|
605
|
+
this.recognition = null;
|
|
606
|
+
this.keepingWarm = false;
|
|
607
|
+
this.currentLang = "";
|
|
608
|
+
}
|
|
537
609
|
} else {
|
|
538
610
|
this.recognition = null;
|
|
611
|
+
this.currentLang = "";
|
|
539
612
|
}
|
|
540
613
|
};
|
|
541
614
|
this.recognition = recognition;
|
|
@@ -545,15 +618,20 @@ var SpeechStreamingManager = class {
|
|
|
545
618
|
} catch (err) {
|
|
546
619
|
this.log(`[SSM] recognition.start() THREW: ${err}`);
|
|
547
620
|
this.recognition = null;
|
|
621
|
+
this.currentLang = "";
|
|
622
|
+
const wasActive = this.active;
|
|
548
623
|
this.active = false;
|
|
624
|
+
this.keepingWarm = false;
|
|
549
625
|
this.clearNoResultTimer();
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
626
|
+
if (wasActive) {
|
|
627
|
+
this.onError?.(
|
|
628
|
+
`Speech recognition failed to start: ${err instanceof Error ? err.message : String(err)}`
|
|
629
|
+
);
|
|
630
|
+
}
|
|
553
631
|
return Promise.resolve();
|
|
554
632
|
}
|
|
555
|
-
if (skipMicWait) {
|
|
556
|
-
this.log("[SSM] skipMicWait \u2014 warm restart, returning immediately");
|
|
633
|
+
if (!this.active || skipMicWait) {
|
|
634
|
+
if (skipMicWait) this.log("[SSM] skipMicWait \u2014 warm restart, returning immediately");
|
|
557
635
|
return Promise.resolve();
|
|
558
636
|
}
|
|
559
637
|
return micClaimPromise;
|
|
@@ -564,26 +642,50 @@ var SpeechStreamingManager = class {
|
|
|
564
642
|
this.noResultTimer = null;
|
|
565
643
|
}
|
|
566
644
|
}
|
|
567
|
-
|
|
645
|
+
clearIdleTimer() {
|
|
646
|
+
if (this.idleTimer) {
|
|
647
|
+
clearTimeout(this.idleTimer);
|
|
648
|
+
this.idleTimer = null;
|
|
649
|
+
}
|
|
650
|
+
}
|
|
651
|
+
startIdleTimer() {
|
|
652
|
+
this.clearIdleTimer();
|
|
653
|
+
this.idleTimer = setTimeout(() => {
|
|
654
|
+
this.idleTimer = null;
|
|
655
|
+
this.keepingWarm = false;
|
|
656
|
+
this.log("[SSM] idle timeout \u2014 stopping recognition");
|
|
657
|
+
if (this.recognition) {
|
|
658
|
+
const rec = this.recognition;
|
|
659
|
+
this.recognition = null;
|
|
660
|
+
this.currentLang = "";
|
|
661
|
+
try {
|
|
662
|
+
rec.stop();
|
|
663
|
+
} catch {
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
}, IDLE_TIMEOUT_MS);
|
|
667
|
+
}
|
|
668
|
+
/** Stop streaming recognition and return accumulated text.
|
|
669
|
+
* Keeps the recognition session alive (muted) for instant restart. */
|
|
568
670
|
stop() {
|
|
569
671
|
this.active = false;
|
|
672
|
+
this.keepingWarm = true;
|
|
570
673
|
this.clearNoResultTimer();
|
|
571
|
-
if (this.recognition) {
|
|
572
|
-
const rec = this.recognition;
|
|
573
|
-
this.recognition = null;
|
|
574
|
-
rec.stop();
|
|
575
|
-
}
|
|
576
674
|
const result = this.accumulated;
|
|
577
675
|
this.accumulated = "";
|
|
676
|
+
this.startIdleTimer();
|
|
578
677
|
return result;
|
|
579
678
|
}
|
|
580
|
-
/** Abort immediately
|
|
679
|
+
/** Abort immediately and release all resources. */
|
|
581
680
|
destroy() {
|
|
582
681
|
this.active = false;
|
|
682
|
+
this.keepingWarm = false;
|
|
583
683
|
this.clearNoResultTimer();
|
|
684
|
+
this.clearIdleTimer();
|
|
584
685
|
if (this.recognition) {
|
|
585
686
|
const rec = this.recognition;
|
|
586
687
|
this.recognition = null;
|
|
688
|
+
this.currentLang = "";
|
|
587
689
|
rec.abort();
|
|
588
690
|
}
|
|
589
691
|
this.accumulated = "";
|
|
@@ -638,6 +740,9 @@ var STTEngine = class extends TypedEventEmitter {
|
|
|
638
740
|
await this.workerManager.loadModel(this.config);
|
|
639
741
|
this.state.isModelLoaded = true;
|
|
640
742
|
this.updateStatus("ready");
|
|
743
|
+
if (this.config.streaming.enabled) {
|
|
744
|
+
this.speechStreaming.preWarm(this.config.language);
|
|
745
|
+
}
|
|
641
746
|
} catch (err) {
|
|
642
747
|
this.emitError("MODEL_LOAD_FAILED", err instanceof Error ? err.message : String(err));
|
|
643
748
|
this.updateStatus("idle");
|