@tekyzinc/stt-component 0.2.4 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +72 -1
- package/dist/index.cjs +95 -14
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +33 -5
- package/dist/index.d.ts +33 -5
- package/dist/index.js +93 -14
- package/dist/index.js.map +1 -1
- package/package.json +56 -56
package/dist/index.d.cts
CHANGED
|
@@ -14,9 +14,9 @@ interface STTCorrectionConfig {
|
|
|
14
14
|
enabled?: boolean;
|
|
15
15
|
/** Correction engine provider. Default: 'whisper' */
|
|
16
16
|
provider?: STTCorrectionProvider;
|
|
17
|
-
/** Silence duration (ms) before triggering correction. Default:
|
|
17
|
+
/** Silence duration (ms) before triggering correction. Default: 1000 */
|
|
18
18
|
pauseThreshold?: number;
|
|
19
|
-
/** Maximum interval (ms) between forced corrections. Default:
|
|
19
|
+
/** Maximum interval (ms) between forced corrections. Default: 3000 */
|
|
20
20
|
forcedInterval?: number;
|
|
21
21
|
}
|
|
22
22
|
/** Real-time streaming preview configuration. */
|
|
@@ -95,6 +95,10 @@ interface AudioCaptureHandle {
|
|
|
95
95
|
samples: Float32Array[];
|
|
96
96
|
/** Retain reference to prevent GC from stopping audio processing. */
|
|
97
97
|
_processor: ScriptProcessorNode;
|
|
98
|
+
/** Source node for disconnect/reconnect on pause/resume. */
|
|
99
|
+
_source: MediaStreamAudioSourceNode;
|
|
100
|
+
/** Gain node (silent) to prevent mic playback. */
|
|
101
|
+
_silencer: GainNode;
|
|
98
102
|
}
|
|
99
103
|
/** Default configuration values. */
|
|
100
104
|
declare const DEFAULT_STT_CONFIG: ResolvedSTTConfig;
|
|
@@ -124,6 +128,18 @@ declare class TypedEventEmitter<T extends Record<string, (...args: any[]) => voi
|
|
|
124
128
|
* Uses ScriptProcessorNode to collect Float32Array samples directly.
|
|
125
129
|
*/
|
|
126
130
|
declare function startCapture(): Promise<AudioCaptureHandle>;
|
|
131
|
+
/**
|
|
132
|
+
* Pause capture without releasing mic or AudioContext.
|
|
133
|
+
* Disconnects the audio source so no new samples are collected.
|
|
134
|
+
* Returns resampled audio from the recording period.
|
|
135
|
+
* Call resumeCapture() to start collecting again.
|
|
136
|
+
*/
|
|
137
|
+
declare function pauseCapture(capture: AudioCaptureHandle): Promise<Float32Array>;
|
|
138
|
+
/**
|
|
139
|
+
* Resume a paused capture. Reconnects the audio source to the processor.
|
|
140
|
+
* AudioContext is resumed if suspended.
|
|
141
|
+
*/
|
|
142
|
+
declare function resumeCapture(capture: AudioCaptureHandle): Promise<void>;
|
|
127
143
|
/**
|
|
128
144
|
* Copy current audio buffer without stopping capture.
|
|
129
145
|
* Returns a shallow copy of the samples array (each chunk is shared, not cloned).
|
|
@@ -152,8 +168,13 @@ type WorkerManagerEvents = {
|
|
|
152
168
|
declare class WorkerManager extends TypedEventEmitter<WorkerManagerEvents> {
|
|
153
169
|
private worker;
|
|
154
170
|
private transcribeResolve;
|
|
171
|
+
private currentTranscribePromise;
|
|
155
172
|
private modelReadyResolve;
|
|
156
173
|
private modelReadyReject;
|
|
174
|
+
/** True while a transcription job is running in the worker. */
|
|
175
|
+
get isTranscribing(): boolean;
|
|
176
|
+
/** Await the current in-flight transcription without starting a new one. */
|
|
177
|
+
awaitCurrentTranscription(): Promise<string>;
|
|
157
178
|
/** Spawn the Web Worker. Must be called before loadModel/transcribe. */
|
|
158
179
|
spawn(workerUrl?: URL): void;
|
|
159
180
|
/** Load the Whisper model in the worker. Resolves when ready. */
|
|
@@ -220,8 +241,12 @@ declare class SpeechStreamingManager {
|
|
|
220
241
|
* SpeechRecognition has claimed the microphone (onaudiostart) or after
|
|
221
242
|
* a 300ms fallback — whichever comes first. The engine should await
|
|
222
243
|
* this before calling getUserMedia to avoid dual-mic conflicts.
|
|
244
|
+
*
|
|
245
|
+
* When skipMicWait is true (warm restart — mic already active), returns
|
|
246
|
+
* immediately after calling recognition.start() without waiting for
|
|
247
|
+
* onaudiostart.
|
|
223
248
|
*/
|
|
224
|
-
start(language: string): Promise<void>;
|
|
249
|
+
start(language: string, skipMicWait?: boolean): Promise<void>;
|
|
225
250
|
private clearNoResultTimer;
|
|
226
251
|
/** Stop streaming recognition and return accumulated text. */
|
|
227
252
|
stop(): string;
|
|
@@ -250,6 +275,8 @@ declare class STTEngine extends TypedEventEmitter<STTEvents> {
|
|
|
250
275
|
private capture;
|
|
251
276
|
private state;
|
|
252
277
|
private workerUrl?;
|
|
278
|
+
/** Prevents performCorrection from emitting while stop() is consuming the in-flight result. */
|
|
279
|
+
private _stopping;
|
|
253
280
|
/**
|
|
254
281
|
* Create a new STT engine instance.
|
|
255
282
|
* @param config - Optional configuration overrides (model, backend, language, etc.).
|
|
@@ -260,7 +287,8 @@ declare class STTEngine extends TypedEventEmitter<STTEvents> {
|
|
|
260
287
|
init(): Promise<void>;
|
|
261
288
|
/** Start recording audio and enable correction cycles. */
|
|
262
289
|
start(): Promise<void>;
|
|
263
|
-
/** Stop recording, run final transcription, return text.
|
|
290
|
+
/** Stop recording, run final transcription, return text.
|
|
291
|
+
* Mic and AudioContext stay alive for fast restart — call destroy() to fully release. */
|
|
264
292
|
stop(): Promise<string>;
|
|
265
293
|
/** Destroy the engine: terminate worker, release all resources. */
|
|
266
294
|
destroy(): void;
|
|
@@ -276,4 +304,4 @@ declare class STTEngine extends TypedEventEmitter<STTEvents> {
|
|
|
276
304
|
private emitDebug;
|
|
277
305
|
}
|
|
278
306
|
|
|
279
|
-
export { type AudioCaptureHandle, CorrectionOrchestrator, DEFAULT_STT_CONFIG, type ResolvedSTTConfig, type STTBackend, type STTChunkingConfig, type STTConfig, type STTCorrectionConfig, type STTCorrectionProvider, STTEngine, type STTError, type STTEvents, type STTModelSize, type STTState, type STTStatus, type STTStreamingConfig, type STTStreamingProvider, SpeechStreamingManager, TypedEventEmitter, WorkerManager, type WorkerManagerEvents, resampleAudio, resolveConfig, snapshotAudio, startCapture, stopCapture };
|
|
307
|
+
export { type AudioCaptureHandle, CorrectionOrchestrator, DEFAULT_STT_CONFIG, type ResolvedSTTConfig, type STTBackend, type STTChunkingConfig, type STTConfig, type STTCorrectionConfig, type STTCorrectionProvider, STTEngine, type STTError, type STTEvents, type STTModelSize, type STTState, type STTStatus, type STTStreamingConfig, type STTStreamingProvider, SpeechStreamingManager, TypedEventEmitter, WorkerManager, type WorkerManagerEvents, pauseCapture, resampleAudio, resolveConfig, resumeCapture, snapshotAudio, startCapture, stopCapture };
|
package/dist/index.d.ts
CHANGED
|
@@ -14,9 +14,9 @@ interface STTCorrectionConfig {
|
|
|
14
14
|
enabled?: boolean;
|
|
15
15
|
/** Correction engine provider. Default: 'whisper' */
|
|
16
16
|
provider?: STTCorrectionProvider;
|
|
17
|
-
/** Silence duration (ms) before triggering correction. Default:
|
|
17
|
+
/** Silence duration (ms) before triggering correction. Default: 1000 */
|
|
18
18
|
pauseThreshold?: number;
|
|
19
|
-
/** Maximum interval (ms) between forced corrections. Default:
|
|
19
|
+
/** Maximum interval (ms) between forced corrections. Default: 3000 */
|
|
20
20
|
forcedInterval?: number;
|
|
21
21
|
}
|
|
22
22
|
/** Real-time streaming preview configuration. */
|
|
@@ -95,6 +95,10 @@ interface AudioCaptureHandle {
|
|
|
95
95
|
samples: Float32Array[];
|
|
96
96
|
/** Retain reference to prevent GC from stopping audio processing. */
|
|
97
97
|
_processor: ScriptProcessorNode;
|
|
98
|
+
/** Source node for disconnect/reconnect on pause/resume. */
|
|
99
|
+
_source: MediaStreamAudioSourceNode;
|
|
100
|
+
/** Gain node (silent) to prevent mic playback. */
|
|
101
|
+
_silencer: GainNode;
|
|
98
102
|
}
|
|
99
103
|
/** Default configuration values. */
|
|
100
104
|
declare const DEFAULT_STT_CONFIG: ResolvedSTTConfig;
|
|
@@ -124,6 +128,18 @@ declare class TypedEventEmitter<T extends Record<string, (...args: any[]) => voi
|
|
|
124
128
|
* Uses ScriptProcessorNode to collect Float32Array samples directly.
|
|
125
129
|
*/
|
|
126
130
|
declare function startCapture(): Promise<AudioCaptureHandle>;
|
|
131
|
+
/**
|
|
132
|
+
* Pause capture without releasing mic or AudioContext.
|
|
133
|
+
* Disconnects the audio source so no new samples are collected.
|
|
134
|
+
* Returns resampled audio from the recording period.
|
|
135
|
+
* Call resumeCapture() to start collecting again.
|
|
136
|
+
*/
|
|
137
|
+
declare function pauseCapture(capture: AudioCaptureHandle): Promise<Float32Array>;
|
|
138
|
+
/**
|
|
139
|
+
* Resume a paused capture. Reconnects the audio source to the processor.
|
|
140
|
+
* AudioContext is resumed if suspended.
|
|
141
|
+
*/
|
|
142
|
+
declare function resumeCapture(capture: AudioCaptureHandle): Promise<void>;
|
|
127
143
|
/**
|
|
128
144
|
* Copy current audio buffer without stopping capture.
|
|
129
145
|
* Returns a shallow copy of the samples array (each chunk is shared, not cloned).
|
|
@@ -152,8 +168,13 @@ type WorkerManagerEvents = {
|
|
|
152
168
|
declare class WorkerManager extends TypedEventEmitter<WorkerManagerEvents> {
|
|
153
169
|
private worker;
|
|
154
170
|
private transcribeResolve;
|
|
171
|
+
private currentTranscribePromise;
|
|
155
172
|
private modelReadyResolve;
|
|
156
173
|
private modelReadyReject;
|
|
174
|
+
/** True while a transcription job is running in the worker. */
|
|
175
|
+
get isTranscribing(): boolean;
|
|
176
|
+
/** Await the current in-flight transcription without starting a new one. */
|
|
177
|
+
awaitCurrentTranscription(): Promise<string>;
|
|
157
178
|
/** Spawn the Web Worker. Must be called before loadModel/transcribe. */
|
|
158
179
|
spawn(workerUrl?: URL): void;
|
|
159
180
|
/** Load the Whisper model in the worker. Resolves when ready. */
|
|
@@ -220,8 +241,12 @@ declare class SpeechStreamingManager {
|
|
|
220
241
|
* SpeechRecognition has claimed the microphone (onaudiostart) or after
|
|
221
242
|
* a 300ms fallback — whichever comes first. The engine should await
|
|
222
243
|
* this before calling getUserMedia to avoid dual-mic conflicts.
|
|
244
|
+
*
|
|
245
|
+
* When skipMicWait is true (warm restart — mic already active), returns
|
|
246
|
+
* immediately after calling recognition.start() without waiting for
|
|
247
|
+
* onaudiostart.
|
|
223
248
|
*/
|
|
224
|
-
start(language: string): Promise<void>;
|
|
249
|
+
start(language: string, skipMicWait?: boolean): Promise<void>;
|
|
225
250
|
private clearNoResultTimer;
|
|
226
251
|
/** Stop streaming recognition and return accumulated text. */
|
|
227
252
|
stop(): string;
|
|
@@ -250,6 +275,8 @@ declare class STTEngine extends TypedEventEmitter<STTEvents> {
|
|
|
250
275
|
private capture;
|
|
251
276
|
private state;
|
|
252
277
|
private workerUrl?;
|
|
278
|
+
/** Prevents performCorrection from emitting while stop() is consuming the in-flight result. */
|
|
279
|
+
private _stopping;
|
|
253
280
|
/**
|
|
254
281
|
* Create a new STT engine instance.
|
|
255
282
|
* @param config - Optional configuration overrides (model, backend, language, etc.).
|
|
@@ -260,7 +287,8 @@ declare class STTEngine extends TypedEventEmitter<STTEvents> {
|
|
|
260
287
|
init(): Promise<void>;
|
|
261
288
|
/** Start recording audio and enable correction cycles. */
|
|
262
289
|
start(): Promise<void>;
|
|
263
|
-
/** Stop recording, run final transcription, return text.
|
|
290
|
+
/** Stop recording, run final transcription, return text.
|
|
291
|
+
* Mic and AudioContext stay alive for fast restart — call destroy() to fully release. */
|
|
264
292
|
stop(): Promise<string>;
|
|
265
293
|
/** Destroy the engine: terminate worker, release all resources. */
|
|
266
294
|
destroy(): void;
|
|
@@ -276,4 +304,4 @@ declare class STTEngine extends TypedEventEmitter<STTEvents> {
|
|
|
276
304
|
private emitDebug;
|
|
277
305
|
}
|
|
278
306
|
|
|
279
|
-
export { type AudioCaptureHandle, CorrectionOrchestrator, DEFAULT_STT_CONFIG, type ResolvedSTTConfig, type STTBackend, type STTChunkingConfig, type STTConfig, type STTCorrectionConfig, type STTCorrectionProvider, STTEngine, type STTError, type STTEvents, type STTModelSize, type STTState, type STTStatus, type STTStreamingConfig, type STTStreamingProvider, SpeechStreamingManager, TypedEventEmitter, WorkerManager, type WorkerManagerEvents, resampleAudio, resolveConfig, snapshotAudio, startCapture, stopCapture };
|
|
307
|
+
export { type AudioCaptureHandle, CorrectionOrchestrator, DEFAULT_STT_CONFIG, type ResolvedSTTConfig, type STTBackend, type STTChunkingConfig, type STTConfig, type STTCorrectionConfig, type STTCorrectionProvider, STTEngine, type STTError, type STTEvents, type STTModelSize, type STTState, type STTStatus, type STTStreamingConfig, type STTStreamingProvider, SpeechStreamingManager, TypedEventEmitter, WorkerManager, type WorkerManagerEvents, pauseCapture, resampleAudio, resolveConfig, resumeCapture, snapshotAudio, startCapture, stopCapture };
|
package/dist/index.js
CHANGED
|
@@ -7,8 +7,8 @@ var DEFAULT_STT_CONFIG = {
|
|
|
7
7
|
correction: {
|
|
8
8
|
enabled: true,
|
|
9
9
|
provider: "whisper",
|
|
10
|
-
pauseThreshold:
|
|
11
|
-
forcedInterval:
|
|
10
|
+
pauseThreshold: 1e3,
|
|
11
|
+
forcedInterval: 3e3
|
|
12
12
|
},
|
|
13
13
|
chunking: {
|
|
14
14
|
chunkLengthS: 30,
|
|
@@ -97,7 +97,19 @@ async function startCapture() {
|
|
|
97
97
|
source.connect(processor);
|
|
98
98
|
processor.connect(silencer);
|
|
99
99
|
silencer.connect(audioCtx.destination);
|
|
100
|
-
return { audioCtx, stream, samples, _processor: processor };
|
|
100
|
+
return { audioCtx, stream, samples, _processor: processor, _source: source, _silencer: silencer };
|
|
101
|
+
}
|
|
102
|
+
async function pauseCapture(capture) {
|
|
103
|
+
capture._source.disconnect();
|
|
104
|
+
const currentSamples = [...capture.samples];
|
|
105
|
+
capture.samples.length = 0;
|
|
106
|
+
return resampleAudio(currentSamples, capture.audioCtx.sampleRate);
|
|
107
|
+
}
|
|
108
|
+
async function resumeCapture(capture) {
|
|
109
|
+
if (capture.audioCtx.state === "suspended") {
|
|
110
|
+
await capture.audioCtx.resume();
|
|
111
|
+
}
|
|
112
|
+
capture._source.connect(capture._processor);
|
|
101
113
|
}
|
|
102
114
|
function snapshotAudio(capture) {
|
|
103
115
|
return [...capture.samples];
|
|
@@ -142,8 +154,17 @@ async function stopCapture(capture) {
|
|
|
142
154
|
var WorkerManager = class extends TypedEventEmitter {
|
|
143
155
|
worker = null;
|
|
144
156
|
transcribeResolve = null;
|
|
157
|
+
currentTranscribePromise = null;
|
|
145
158
|
modelReadyResolve = null;
|
|
146
159
|
modelReadyReject = null;
|
|
160
|
+
/** True while a transcription job is running in the worker. */
|
|
161
|
+
get isTranscribing() {
|
|
162
|
+
return this.transcribeResolve !== null;
|
|
163
|
+
}
|
|
164
|
+
/** Await the current in-flight transcription without starting a new one. */
|
|
165
|
+
awaitCurrentTranscription() {
|
|
166
|
+
return this.currentTranscribePromise ?? Promise.resolve("");
|
|
167
|
+
}
|
|
147
168
|
/** Spawn the Web Worker. Must be called before loadModel/transcribe. */
|
|
148
169
|
spawn(workerUrl) {
|
|
149
170
|
if (this.worker) return;
|
|
@@ -179,10 +200,11 @@ var WorkerManager = class extends TypedEventEmitter {
|
|
|
179
200
|
async transcribe(audio) {
|
|
180
201
|
if (!this.worker) throw new Error("Worker not spawned");
|
|
181
202
|
if (audio.length === 0) return "";
|
|
182
|
-
|
|
203
|
+
this.currentTranscribePromise = new Promise((resolve) => {
|
|
183
204
|
this.transcribeResolve = resolve;
|
|
184
205
|
this.worker.postMessage({ type: "transcribe", audio }, [audio.buffer]);
|
|
185
206
|
});
|
|
207
|
+
return this.currentTranscribePromise;
|
|
186
208
|
}
|
|
187
209
|
/** Cancel any in-flight transcription. */
|
|
188
210
|
cancel() {
|
|
@@ -402,8 +424,12 @@ var SpeechStreamingManager = class {
|
|
|
402
424
|
* SpeechRecognition has claimed the microphone (onaudiostart) or after
|
|
403
425
|
* a 300ms fallback — whichever comes first. The engine should await
|
|
404
426
|
* this before calling getUserMedia to avoid dual-mic conflicts.
|
|
427
|
+
*
|
|
428
|
+
* When skipMicWait is true (warm restart — mic already active), returns
|
|
429
|
+
* immediately after calling recognition.start() without waiting for
|
|
430
|
+
* onaudiostart.
|
|
405
431
|
*/
|
|
406
|
-
start(language) {
|
|
432
|
+
start(language, skipMicWait = false) {
|
|
407
433
|
const SR = getSpeechRecognition();
|
|
408
434
|
if (!SR) {
|
|
409
435
|
this.log("[SSM] SpeechRecognition not available in this environment");
|
|
@@ -513,6 +539,10 @@ var SpeechStreamingManager = class {
|
|
|
513
539
|
);
|
|
514
540
|
return Promise.resolve();
|
|
515
541
|
}
|
|
542
|
+
if (skipMicWait) {
|
|
543
|
+
this.log("[SSM] skipMicWait \u2014 warm restart, returning immediately");
|
|
544
|
+
return Promise.resolve();
|
|
545
|
+
}
|
|
516
546
|
return micClaimPromise;
|
|
517
547
|
}
|
|
518
548
|
clearNoResultTimer() {
|
|
@@ -560,6 +590,8 @@ var STTEngine = class extends TypedEventEmitter {
|
|
|
560
590
|
capture = null;
|
|
561
591
|
state;
|
|
562
592
|
workerUrl;
|
|
593
|
+
/** Prevents performCorrection from emitting while stop() is consuming the in-flight result. */
|
|
594
|
+
_stopping = false;
|
|
563
595
|
/**
|
|
564
596
|
* Create a new STT engine instance.
|
|
565
597
|
* @param config - Optional configuration overrides (model, backend, language, etc.).
|
|
@@ -605,14 +637,22 @@ var STTEngine = class extends TypedEventEmitter {
|
|
|
605
637
|
throw new Error(`Cannot start: engine is "${this.state.status}", expected "ready"`);
|
|
606
638
|
}
|
|
607
639
|
try {
|
|
640
|
+
const warmCapture = this.capture && this.capture.stream.getTracks().every((t) => t.readyState === "live");
|
|
608
641
|
this.emitDebug(
|
|
609
|
-
`[STT] start() \u2014 streaming: ${this.config.streaming.enabled}, lang: "${this.config.language}"`
|
|
642
|
+
`[STT] start() \u2014 streaming: ${this.config.streaming.enabled}, lang: "${this.config.language}", warm: ${!!warmCapture}`
|
|
610
643
|
);
|
|
611
644
|
if (this.config.streaming.enabled) {
|
|
612
|
-
await this.speechStreaming.start(this.config.language);
|
|
613
|
-
|
|
645
|
+
await this.speechStreaming.start(this.config.language, !!warmCapture);
|
|
646
|
+
if (!warmCapture) {
|
|
647
|
+
this.emitDebug("[STT] Speech API mic claim complete \u2014 starting getUserMedia");
|
|
648
|
+
}
|
|
649
|
+
}
|
|
650
|
+
if (warmCapture) {
|
|
651
|
+
await resumeCapture(this.capture);
|
|
652
|
+
this.emitDebug("[STT] warm mic resumed \u2014 skipped getUserMedia");
|
|
653
|
+
} else {
|
|
654
|
+
this.capture = await startCapture();
|
|
614
655
|
}
|
|
615
|
-
this.capture = await startCapture();
|
|
616
656
|
this.updateStatus("recording");
|
|
617
657
|
this.correctionOrchestrator.start();
|
|
618
658
|
} catch (err) {
|
|
@@ -622,16 +662,49 @@ var STTEngine = class extends TypedEventEmitter {
|
|
|
622
662
|
);
|
|
623
663
|
}
|
|
624
664
|
}
|
|
625
|
-
/** Stop recording, run final transcription, return text.
|
|
665
|
+
/** Stop recording, run final transcription, return text.
|
|
666
|
+
* Mic and AudioContext stay alive for fast restart — call destroy() to fully release. */
|
|
626
667
|
async stop() {
|
|
627
668
|
if (!this.capture) return "";
|
|
669
|
+
this._stopping = true;
|
|
628
670
|
this.correctionOrchestrator.stop();
|
|
629
671
|
this.speechStreaming.stop();
|
|
630
|
-
this.workerManager.cancel();
|
|
631
672
|
this.updateStatus("processing");
|
|
673
|
+
if (this.workerManager.isTranscribing) {
|
|
674
|
+
try {
|
|
675
|
+
const [audio, inFlightText] = await Promise.all([
|
|
676
|
+
pauseCapture(this.capture),
|
|
677
|
+
this.workerManager.awaitCurrentTranscription()
|
|
678
|
+
]);
|
|
679
|
+
this._stopping = false;
|
|
680
|
+
const text = inFlightText.trim();
|
|
681
|
+
if (text) {
|
|
682
|
+
this.emit("correction", text);
|
|
683
|
+
this.updateStatus("ready");
|
|
684
|
+
return text;
|
|
685
|
+
}
|
|
686
|
+
if (audio.length > 0) {
|
|
687
|
+
const freshText = await this.workerManager.transcribe(audio);
|
|
688
|
+
this.emit("correction", freshText);
|
|
689
|
+
this.updateStatus("ready");
|
|
690
|
+
return freshText;
|
|
691
|
+
}
|
|
692
|
+
this.updateStatus("ready");
|
|
693
|
+
return "";
|
|
694
|
+
} catch (err) {
|
|
695
|
+
this._stopping = false;
|
|
696
|
+
this.emitError(
|
|
697
|
+
"TRANSCRIPTION_FAILED",
|
|
698
|
+
err instanceof Error ? err.message : "Final transcription failed."
|
|
699
|
+
);
|
|
700
|
+
this.updateStatus("ready");
|
|
701
|
+
return "";
|
|
702
|
+
}
|
|
703
|
+
}
|
|
704
|
+
this.workerManager.cancel();
|
|
705
|
+
this._stopping = false;
|
|
632
706
|
try {
|
|
633
|
-
const audio = await
|
|
634
|
-
this.capture = null;
|
|
707
|
+
const audio = await pauseCapture(this.capture);
|
|
635
708
|
if (audio.length === 0) {
|
|
636
709
|
this.updateStatus("ready");
|
|
637
710
|
return "";
|
|
@@ -654,6 +727,10 @@ var STTEngine = class extends TypedEventEmitter {
|
|
|
654
727
|
this.correctionOrchestrator.stop();
|
|
655
728
|
this.speechStreaming.destroy();
|
|
656
729
|
if (this.capture) {
|
|
730
|
+
try {
|
|
731
|
+
this.capture._processor.disconnect();
|
|
732
|
+
} catch {
|
|
733
|
+
}
|
|
657
734
|
for (const track of this.capture.stream.getTracks()) {
|
|
658
735
|
track.stop();
|
|
659
736
|
}
|
|
@@ -682,7 +759,7 @@ var STTEngine = class extends TypedEventEmitter {
|
|
|
682
759
|
const audio = await resampleAudio(samples, nativeSr);
|
|
683
760
|
if (audio.length === 0) return;
|
|
684
761
|
const text = await this.workerManager.transcribe(audio);
|
|
685
|
-
if (text.trim() && this.capture) {
|
|
762
|
+
if (text.trim() && this.capture && !this._stopping) {
|
|
686
763
|
this.emit("correction", text);
|
|
687
764
|
}
|
|
688
765
|
} catch (err) {
|
|
@@ -739,8 +816,10 @@ export {
|
|
|
739
816
|
SpeechStreamingManager,
|
|
740
817
|
TypedEventEmitter,
|
|
741
818
|
WorkerManager,
|
|
819
|
+
pauseCapture,
|
|
742
820
|
resampleAudio,
|
|
743
821
|
resolveConfig,
|
|
822
|
+
resumeCapture,
|
|
744
823
|
snapshotAudio,
|
|
745
824
|
startCapture,
|
|
746
825
|
stopCapture
|