speech-to-speech 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -300,38 +300,123 @@ import {
300
300
 
301
301
  #### `STTLogic`
302
302
 
303
- Main speech recognition controller with session management.
303
+ Main speech recognition controller. Wraps the browser's Web Speech API with:
304
+
305
+ - **Silent session rotation.** Chromium ends Web Speech sessions on its own (typically after ~60s). `STTLogic` detects the browser's `end` event, commits the current session into an in-memory transcript, and transparently starts a fresh session — all without notifying the consumer. `onTranscript` is never fired during a rotation.
306
+ - **Dedup-safe transcript model.** A high-water-mark (`processedFinalCount`) ensures each `isFinal` result is ingested exactly once across rotations, eliminating the duplicate-word artifacts typical of naive `results` concatenation.
307
+ - **Two delivery modes.** Pick when the final transcript is emitted via the `continueOnSilence` option:
308
+
309
+ | `continueOnSilence` | Behaviour |
310
+ | ------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------- |
311
+ | `true` *(default)* | **Continuous / manual-stop.** Listening keeps running across all silent restarts until the consumer calls `stt.stop()`. `onTranscript` fires exactly once, on stop. |
312
+ | `false` | **Silence-triggered.** When the user has been silent for `silenceThresholdMs`, `onTranscript` fires with the final transcript and recognition auto-stops. |
313
+
314
+ In **both** modes, `onInterimTranscript` streams the live transcript (committed sessions + current-session finals + in-flight partial) continuously, including during silent rotations — so the UI never goes blank.
304
315
 
305
316
  ```typescript
306
317
  const stt = new STTLogic(
307
318
  // Log callback
308
319
  (message: string, level?: "info" | "warning" | "error") => void,
309
- // Transcript callback
320
+ // Final transcript callback — fires ONCE (see modes above)
310
321
  (transcript: string) => void,
311
322
  // Options
312
323
  {
313
- sessionDurationMs?: number, // Session duration (default: 30000)
314
- interimSaveIntervalMs?: number, // Interim save interval (default: 5000)
315
- preserveTranscriptOnStart?: boolean,
324
+ // --- Delivery mode (new) ---
325
+ continueOnSilence?: boolean, // default: true (manual stop). false => silence-triggered.
326
+ silenceThresholdMs?: number, // default: 1500. Only used when continueOnSilence=false.
327
+
328
+ // --- Live UI streaming ---
329
+ onInterimTranscript?: (text: string) => void, // fires on every result, both interim & final
330
+
331
+ // --- Misc ---
332
+ preserveTranscriptOnStart?: boolean, // keep the previous transcript when start() is called again
333
+
334
+ // --- Deprecated (accepted for backward compat, ignored) ---
335
+ sessionDurationMs?: number, // silent rotation is now browser-driven, not timer-driven
336
+ interimSaveIntervalMs?: number,
316
337
  }
317
338
  );
318
339
 
319
340
  // Core methods
320
341
  stt.start(); // Start listening
321
- stt.stop(); // Stop listening
342
+ stt.stop(); // Stop listening AND emit onTranscript
322
343
  stt.destroy(); // Cleanup resources
323
- stt.getFullTranscript(); // Get accumulated transcript
324
- stt.clearTranscript(); // Clear transcript
344
+ stt.getFullTranscript(); // Live transcript: committed + current session + in-flight interim
345
+ stt.clearTranscript(); // Clear all accumulated transcript
325
346
 
326
347
  // Callbacks
327
- stt.setWordsUpdateCallback((words: string[]) => {}); // Word-by-word updates
348
+ stt.setWordsUpdateCallback((words: string[]) => {}); // Word stream of the live transcript
328
349
  stt.setMicTimeUpdateCallback((ms: number) => {}); // Mic active time
329
350
  stt.setVadCallbacks(
330
- () => console.log("Speech started"), // onSpeechStart
331
- () => console.log("Speech ended") // onSpeechEnd
351
+ () => console.log("Speech started"), // onSpeechStart (heuristic)
352
+ () => console.log("Speech ended") // onSpeechEnd (heuristic)
332
353
  );
333
354
  ```
334
355
 
356
+ ##### Mode 1 — Continuous (manual stop)
357
+
358
+ Use this for long-form dictation, note-taking, or chat inputs where the user decides when they are done.
359
+
360
+ ```typescript
361
+ const stt = new STTLogic(
362
+ (msg, level) => console.log(`[${level}]`, msg),
363
+ (finalText) => {
364
+ // Fires ONCE, when stt.stop() is called by you.
365
+ saveToDB(finalText);
366
+ },
367
+ {
368
+ continueOnSilence: true, // (default)
369
+ onInterimTranscript: (liveText) => {
370
+ // Fires continuously — render the growing text as the user speaks.
371
+ liveCaption.textContent = liveText;
372
+ },
373
+ },
374
+ );
375
+
376
+ stt.start();
377
+ // ... user keeps talking for 5 minutes; Web Speech silently rotates several times ...
378
+ stopButton.onclick = () => stt.stop(); // only here does onTranscript fire
379
+ ```
380
+
381
+ ##### Mode 2 — Silence-triggered auto-stop
382
+
383
+ Use this for turn-taking conversational UIs (voice assistants, STS loops), where "user stopped talking" is the signal to act.
384
+
385
+ ```typescript
386
+ const stt = new STTLogic(
387
+ (msg, level) => console.log(`[${level}]`, msg),
388
+ (finalText) => {
389
+ // Fires automatically once the user has been silent for silenceThresholdMs.
390
+ sendToLLM(finalText);
391
+ },
392
+ {
393
+ continueOnSilence: false,
394
+ silenceThresholdMs: 1500, // 1.5s of silence => auto-emit & auto-stop
395
+ onInterimTranscript: (liveText) => {
396
+ liveCaption.textContent = liveText;
397
+ },
398
+ },
399
+ );
400
+
401
+ stt.start();
402
+ // User speaks, pauses 1.5s, onTranscript fires and listening stops on its own.
403
+ // To begin the next turn, call stt.start() again.
404
+ ```
405
+
406
+ ##### Observing silent session rotations
407
+
408
+ When `continueOnSilence: true`, the library will silently restart the underlying recognition session whenever the browser ends it. You can observe this in the browser DevTools console — `STTLogic` prints three clearly-prefixed markers:
409
+
410
+ ```text
411
+ [STT] 🔴 Session ENDED by Web Speech (sessionId=1) — will silently restart
412
+ [STT] 🔄 Silent restart requested (newSessionId=2, restartCount=1) — committing 3 final segment(s) + interim into memory
413
+ [STT] 🟢 Session RESTARTED silently (sessionId=2) in 180ms — committed="hello there how are you doing today"
414
+ ...
415
+ [STT] ⏹️ Explicit STOP — emitting onTranscript once (len=284, silent restarts during session=2)
416
+ ```
417
+
418
+ The `onTranscript` callback only fires on the final `⏹️ Explicit STOP` line (or when the silence threshold hits in mode 2). If you never see anything between the red/green pairs, the rotation is fully transparent — which is the intended behaviour.
419
+
335
420
  ### TTS (Text-to-Speech)
336
421
 
337
422
  #### `TTSLogic`
@@ -471,36 +556,43 @@ await player.close();
471
556
  ```typescript
472
557
  import { STTLogic } from "speech-to-speech";
473
558
 
559
+ const liveEl = document.getElementById("live")!;
560
+ const finalEl = document.getElementById("final")!;
561
+
474
562
  const stt = new STTLogic(
475
563
  (message, level) => console.log(`[STT ${level}] ${message}`),
476
- (transcript) => {
477
- document.getElementById("output")!.textContent = transcript;
564
+ (finalTranscript) => {
565
+ // Fires exactly once — when stt.stop() is called (manual mode)
566
+ // or when silence >= silenceThresholdMs is detected (silence mode).
567
+ finalEl.textContent = finalTranscript;
478
568
  },
479
569
  {
480
- sessionDurationMs: 30000,
481
- interimSaveIntervalMs: 5000,
570
+ continueOnSilence: true, // manual-stop mode — swap to false for silence auto-stop
571
+ // silenceThresholdMs: 1500, // only used when continueOnSilence=false
572
+ onInterimTranscript: (liveText) => {
573
+ // Streams continuously — even across silent session rotations.
574
+ liveEl.textContent = liveText;
575
+ },
482
576
  }
483
577
  );
484
578
 
485
- // Listen for individual words
579
+ // Optional: word-by-word stream of the live transcript
486
580
  stt.setWordsUpdateCallback((words) => {
487
- console.log("Heard words:", words);
581
+ console.log("Words so far:", words);
488
582
  });
489
583
 
490
- // Detect speech start/end
584
+ // Optional: rough VAD based on Web Speech interim/final transitions
491
585
  stt.setVadCallbacks(
492
586
  () => console.log("User started speaking"),
493
587
  () => console.log("User stopped speaking")
494
588
  );
495
589
 
496
- // Start listening
590
+ // Start listening — silent restarts happen under the hood if Web Speech
591
+ // ends its session; you do nothing.
497
592
  stt.start();
498
593
 
499
- // Stop after 10 seconds
500
- setTimeout(() => {
501
- stt.stop();
502
- console.log("Final transcript:", stt.getFullTranscript());
503
- }, 10000);
594
+ // Stop whenever the user decides. Final transcript arrives via onTranscript.
595
+ stopButton.addEventListener("click", () => stt.stop());
504
596
 
505
597
  // Cleanup on page unload
506
598
  window.addEventListener("beforeunload", () => stt.destroy());
@@ -562,22 +654,22 @@ async function init() {
562
654
  tts = new TTSLogic({ voiceId: "en_US-hfc_female-medium" });
563
655
  await tts.initialize();
564
656
 
565
- // Initialize STT
657
+ // Initialize STT in silence-triggered mode — the library itself decides
658
+ // when the user is done and fires `onTranscript` automatically.
566
659
  stt = new STTLogic(
567
660
  (msg, level) => console.log(`[STT] ${msg}`),
568
- (transcript) => console.log("Transcript:", transcript),
569
- { sessionDurationMs: 60000 }
570
- );
571
-
572
- // Process speech when user stops talking
573
- stt.setVadCallbacks(
574
- () => console.log("Listening..."),
575
- async () => {
576
- const transcript = stt.getFullTranscript();
577
- if (transcript.trim().length > 3) {
578
- await processSpeech(transcript);
579
- stt.clearTranscript();
661
+ async (finalTranscript) => {
662
+ // Fires once per turn, when silence >= silenceThresholdMs is detected.
663
+ if (finalTranscript.trim().length > 3) {
664
+ await processSpeech(finalTranscript);
580
665
  }
666
+ stt.clearTranscript();
667
+ stt.start(); // start next turn
668
+ },
669
+ {
670
+ continueOnSilence: false,
671
+ silenceThresholdMs: 1500,
672
+ onInterimTranscript: (live) => (liveCaption.textContent = live),
581
673
  }
582
674
  );
583
675
  }
@@ -668,7 +760,9 @@ service.stopSpeaking();
668
760
 
669
761
  ## Interim Transcript Streaming
670
762
 
671
- Get real-time partial results while the user is still speaking. Pass `onInterimTranscript` directly to `initializeSTT()`:
763
+ Get real-time partial results while the user is still speaking. `onInterimTranscript` fires on **every** recognition update (both interim and final results) with the full live transcript — including the text committed from prior silent session rotations — so you can render a continuously-growing caption without any gaps when the browser rotates the underlying Web Speech session.
764
+
765
+ Pass `onInterimTranscript` directly to `initializeSTT()`:
672
766
 
673
767
  ```ts
674
768
  import { createSpeechService } from "speech-to-speech";
@@ -677,9 +771,10 @@ const service = createSpeechService();
677
771
 
678
772
  service.initializeSTT({
679
773
  onTranscript: (finalText) => console.log("Final:", finalText),
680
- onInterimTranscript: (partialText) => {
681
- // Called on every interim result great for live captions
682
- liveCaption.textContent = partialText;
774
+ onInterimTranscript: (liveText) => {
775
+ // Full live text: committed sessions + current-session finals + in-flight partial.
776
+ // Never empties mid-session due to Web Speech's internal timeouts.
777
+ liveCaption.textContent = liveText;
683
778
  },
684
779
  });
685
780
 
@@ -805,7 +900,9 @@ See [Piper Voices](https://rhasspy.github.io/piper-samples/) for the complete li
805
900
  | ---------------------------------- | ------------------------------------------------------------------------------------------ |
806
901
  | "Speech Recognition not supported" | Use Chrome, Safari, or Edge. Firefox doesn't support Web Speech API. |
807
902
  | No transcript | Check microphone permissions. Ensure `stt.start()` was called. |
808
- | Transcript stops | Browser sessions timeout after ~30s. Library auto-restarts, but check `sessionDurationMs`. |
903
+ | Transcript stops | The library silently restarts the recognition session whenever the browser ends it — nothing to configure. Open DevTools and look for the `[STT] 🔴 … 🟢` log pair to confirm a rotation happened. |
904
+ | `onTranscript` never fires | In `continueOnSilence: true` (default) it only fires on `stt.stop()`. Call `stop()` to get the final transcript, or switch to `continueOnSilence: false` + `silenceThresholdMs` for automatic delivery. |
905
+ | Duplicated words in final | Fixed in v0.1.5. If you still see duplicates, ensure you are on ≥ 0.1.5 — the old `sessionDurationMs` / `interimSaveIntervalMs` timer path no longer runs. |
809
906
 
810
907
  ### Dev Server Issues (Vite)
811
908
 
@@ -842,6 +939,18 @@ npm run clean # Remove dist/
842
939
 
843
940
  ## Changelog
844
941
 
942
+ ### v0.1.5
943
+
944
+ - **`STTLogic` — silent session rotation.** Web Speech's internal session end (the ~60s browser timeout, error retries, any spontaneous `end` event) now triggers a fully-silent restart: the library commits the current session into an in-memory transcript and starts a fresh recognition session. `onTranscript` is **not** emitted during rotations, so the consumer sees one uninterrupted listening session.
945
+ - **`STTLogic` — dedup-safe transcript model.** The previous `results` concatenation + `collapseRepeats` safety net is replaced by a high-water-mark (`processedFinalCount`) that ingests each `isFinal` result exactly once. This eliminates the duplicate-word/line artifacts that could previously appear in the final transcript.
946
+ - **`STTLogic` — new option `continueOnSilence` (default `true`).**
947
+ - `true` → manual-stop mode. `onTranscript` fires only when the consumer calls `stt.stop()`.
948
+ - `false` → silence-triggered mode. `onTranscript` fires (and listening auto-stops) when the user has been silent for `silenceThresholdMs`.
949
+ - **`STTLogic` — new option `silenceThresholdMs` (default `1500`).** Silence window used when `continueOnSilence: false`.
950
+ - **`onInterimTranscript`** now fires on every recognition update (interim AND final), and always includes the committed transcript from prior silent rotations — UI captions stay gap-free.
951
+ - **Deprecated options (accepted for backward compatibility, now no-ops):** `sessionDurationMs`, `interimSaveIntervalMs`. Session rotation is browser-driven, not timer-driven.
952
+ - **Observability.** `STTLogic` emits colored `[STT]` console markers on session end, silent restart, and explicit stop, so you can verify behaviour from DevTools without any extra wiring.
953
+
845
954
  ### v0.1.4
846
955
 
847
956
  - **`createSpeechService()`** — Unified service wrapper that wires STT + TTS together with a single ergonomic API. Supports `initializeSTT`, `initializeTTS`, `startListening`, `stopListening`, `speak`, `stopSpeaking`, and `getCompatibilityInfo`.
@@ -128,7 +128,18 @@ type VadCallbacks = {
128
128
  type LogCallback = (message: string, type?: "info" | "error" | "warning") => void;
129
129
  type TranscriptCallback = (transcript: string) => void;
130
130
  interface ResetSTTOptions {
131
+ /**
132
+ * @deprecated No longer used. Silent session rotation is now driven purely
133
+ * by the browser's own `end` event — the library never forces a
134
+ * rotation on a timer. This option is accepted for API
135
+ * compatibility and otherwise ignored.
136
+ */
131
137
  sessionDurationMs?: number;
138
+ /**
139
+ * @deprecated No longer used. The interim → final promotion is now handled
140
+ * per-result via the Web Speech `isFinal` flag. Kept for API
141
+ * compatibility.
142
+ */
132
143
  interimSaveIntervalMs?: number;
133
144
  preserveTranscriptOnStart?: boolean;
134
145
  /** Enable short filler (default: false) */
@@ -156,18 +167,44 @@ interface ResetSTTOptions {
156
167
  /** Language hint for LLM (e.g., "English", "Hindi") */
157
168
  languageHint?: string;
158
169
  /**
159
- * Called on every interim (non-final) recognition result with the current
160
- * partial transcript text. Useful for real‑time UI updates.
161
- * Does not affect the final transcript or setWordsUpdateCallback.
170
+ * Called on every recognition update (interim AND final) with the current
171
+ * full live transcript (committed + current session finals + in-flight
172
+ * partial). Useful for real‑time UI updates.
173
+ *
174
+ * This fires continuously in BOTH modes (continueOnSilence true or false)
175
+ * and is NEVER suppressed during silent session rotations, so the UI
176
+ * always reflects the actual text the user has spoken so far.
162
177
  */
163
178
  onInterimTranscript?: (text: string) => void;
179
+ /**
180
+ * Controls when the final `onTranscript` callback fires.
181
+ *
182
+ * - `true` (default) — "continuous / manual-stop" mode. The library keeps
183
+ * listening until the consumer explicitly calls `stop()`. `onTranscript`
184
+ * is fired exactly ONCE, on stop, with the full accumulated transcript.
185
+ * Internal Web Speech session rotations (the ~60s browser timeout or any
186
+ * forced restart) are fully silent: `onTranscript` is NOT fired and the
187
+ * in-memory transcript is preserved across the restart.
188
+ *
189
+ * - `false` — "silence-triggered" mode. The library watches for user
190
+ * silence; once the user has been silent for `silenceThresholdMs`, the
191
+ * final transcript is emitted via `onTranscript` and listening is
192
+ * stopped automatically.
193
+ *
194
+ * In BOTH modes, `onInterimTranscript` is invoked live throughout.
195
+ */
196
+ continueOnSilence?: boolean;
197
+ /**
198
+ * Silence duration (ms) after which the transcript is auto-emitted and
199
+ * listening is stopped. Only used when `continueOnSilence` is `false`.
200
+ * Default: 1500ms.
201
+ */
202
+ silenceThresholdMs?: number;
164
203
  }
165
204
  type STTLogicOptions = ResetSTTOptions;
166
205
  declare class ResetSTTLogic {
167
206
  private recognition;
168
207
  private isListening;
169
- private fullTranscript;
170
- private heardWords;
171
208
  private onLog;
172
209
  private onTranscript;
173
210
  private onWordsUpdate;
@@ -181,14 +218,14 @@ declare class ResetSTTLogic {
181
218
  private restartCount;
182
219
  private isRestarting;
183
220
  private isRecognitionRunning;
184
- private lastInterimTranscript;
185
221
  private onInterimTranscriptCallback?;
186
- private lastInterimSaveTime;
187
- private interimSaveInterval;
188
- private lastInterimResultTime;
189
- private lastSavedLength;
190
- private transcriptBeforeRestart;
191
- private sessionStartTranscript;
222
+ private committedTranscript;
223
+ private currentSessionFinals;
224
+ private currentInterim;
225
+ private processedFinalCount;
226
+ private heardWords;
227
+ private lastResultTime;
228
+ private silenceAutoStopScheduled;
192
229
  private resultHandler?;
193
230
  private errorHandler?;
194
231
  private endHandler?;
@@ -210,13 +247,34 @@ declare class ResetSTTLogic {
210
247
  isInAutoRestart(): boolean;
211
248
  getFullTranscript(): string;
212
249
  clearTranscript(): void;
250
+ /**
251
+ * Build the current full transcript from the three buckets.
252
+ * @param includeInterim whether to include the in-flight interim partial.
253
+ */
254
+ private composeTranscript;
255
+ /**
256
+ * Fold the active session's finals + interim into the committed transcript
257
+ * and reset per-session state. Intended to be called right before a silent
258
+ * recognition rotation (either our performRestart or an implicit `end`
259
+ * from Web Speech). Never emits onTranscript.
260
+ */
261
+ private commitCurrentSession;
213
262
  private setupRecognition;
214
263
  private waitForEventOnce;
215
264
  private startMicTimer;
216
265
  private stopMicTimer;
217
- private saveInterimToFinal;
218
266
  private getSuffixToAppend;
219
267
  private collapseRepeats;
268
+ /**
269
+ * Silent restart. Called only when Web Speech has ended the current
270
+ * session on its own (`end` event) while the consumer is still listening
271
+ * and silence has NOT yet fired. The active session is folded into the
272
+ * committed transcript and a fresh recognition session is started.
273
+ *
274
+ * `onTranscript` is never invoked from this path — to the consumer the
275
+ * session looks uninterrupted. `onInterimTranscript` continues to receive
276
+ * live updates once the new session produces results.
277
+ */
220
278
  private performRestart;
221
279
  start(): void;
222
280
  stop(): void;
@@ -128,7 +128,18 @@ type VadCallbacks = {
128
128
  type LogCallback = (message: string, type?: "info" | "error" | "warning") => void;
129
129
  type TranscriptCallback = (transcript: string) => void;
130
130
  interface ResetSTTOptions {
131
+ /**
132
+ * @deprecated No longer used. Silent session rotation is now driven purely
133
+ * by the browser's own `end` event — the library never forces a
134
+ * rotation on a timer. This option is accepted for API
135
+ * compatibility and otherwise ignored.
136
+ */
131
137
  sessionDurationMs?: number;
138
+ /**
139
+ * @deprecated No longer used. The interim → final promotion is now handled
140
+ * per-result via the Web Speech `isFinal` flag. Kept for API
141
+ * compatibility.
142
+ */
132
143
  interimSaveIntervalMs?: number;
133
144
  preserveTranscriptOnStart?: boolean;
134
145
  /** Enable short filler (default: false) */
@@ -156,18 +167,44 @@ interface ResetSTTOptions {
156
167
  /** Language hint for LLM (e.g., "English", "Hindi") */
157
168
  languageHint?: string;
158
169
  /**
159
- * Called on every interim (non-final) recognition result with the current
160
- * partial transcript text. Useful for real‑time UI updates.
161
- * Does not affect the final transcript or setWordsUpdateCallback.
170
+ * Called on every recognition update (interim AND final) with the current
171
+ * full live transcript (committed + current session finals + in-flight
172
+ * partial). Useful for real‑time UI updates.
173
+ *
174
+ * This fires continuously in BOTH modes (continueOnSilence true or false)
175
+ * and is NEVER suppressed during silent session rotations, so the UI
176
+ * always reflects the actual text the user has spoken so far.
162
177
  */
163
178
  onInterimTranscript?: (text: string) => void;
179
+ /**
180
+ * Controls when the final `onTranscript` callback fires.
181
+ *
182
+ * - `true` (default) — "continuous / manual-stop" mode. The library keeps
183
+ * listening until the consumer explicitly calls `stop()`. `onTranscript`
184
+ * is fired exactly ONCE, on stop, with the full accumulated transcript.
185
+ * Internal Web Speech session rotations (the ~60s browser timeout or any
186
+ * forced restart) are fully silent: `onTranscript` is NOT fired and the
187
+ * in-memory transcript is preserved across the restart.
188
+ *
189
+ * - `false` — "silence-triggered" mode. The library watches for user
190
+ * silence; once the user has been silent for `silenceThresholdMs`, the
191
+ * final transcript is emitted via `onTranscript` and listening is
192
+ * stopped automatically.
193
+ *
194
+ * In BOTH modes, `onInterimTranscript` is invoked live throughout.
195
+ */
196
+ continueOnSilence?: boolean;
197
+ /**
198
+ * Silence duration (ms) after which the transcript is auto-emitted and
199
+ * listening is stopped. Only used when `continueOnSilence` is `false`.
200
+ * Default: 1500ms.
201
+ */
202
+ silenceThresholdMs?: number;
164
203
  }
165
204
  type STTLogicOptions = ResetSTTOptions;
166
205
  declare class ResetSTTLogic {
167
206
  private recognition;
168
207
  private isListening;
169
- private fullTranscript;
170
- private heardWords;
171
208
  private onLog;
172
209
  private onTranscript;
173
210
  private onWordsUpdate;
@@ -181,14 +218,14 @@ declare class ResetSTTLogic {
181
218
  private restartCount;
182
219
  private isRestarting;
183
220
  private isRecognitionRunning;
184
- private lastInterimTranscript;
185
221
  private onInterimTranscriptCallback?;
186
- private lastInterimSaveTime;
187
- private interimSaveInterval;
188
- private lastInterimResultTime;
189
- private lastSavedLength;
190
- private transcriptBeforeRestart;
191
- private sessionStartTranscript;
222
+ private committedTranscript;
223
+ private currentSessionFinals;
224
+ private currentInterim;
225
+ private processedFinalCount;
226
+ private heardWords;
227
+ private lastResultTime;
228
+ private silenceAutoStopScheduled;
192
229
  private resultHandler?;
193
230
  private errorHandler?;
194
231
  private endHandler?;
@@ -210,13 +247,34 @@ declare class ResetSTTLogic {
210
247
  isInAutoRestart(): boolean;
211
248
  getFullTranscript(): string;
212
249
  clearTranscript(): void;
250
+ /**
251
+ * Build the current full transcript from the three buckets.
252
+ * @param includeInterim whether to include the in-flight interim partial.
253
+ */
254
+ private composeTranscript;
255
+ /**
256
+ * Fold the active session's finals + interim into the committed transcript
257
+ * and reset per-session state. Intended to be called right before a silent
258
+ * recognition rotation (either our performRestart or an implicit `end`
259
+ * from Web Speech). Never emits onTranscript.
260
+ */
261
+ private commitCurrentSession;
213
262
  private setupRecognition;
214
263
  private waitForEventOnce;
215
264
  private startMicTimer;
216
265
  private stopMicTimer;
217
- private saveInterimToFinal;
218
266
  private getSuffixToAppend;
219
267
  private collapseRepeats;
268
+ /**
269
+ * Silent restart. Called only when Web Speech has ended the current
270
+ * session on its own (`end` event) while the consumer is still listening
271
+ * and silence has NOT yet fired. The active session is folded into the
272
+ * committed transcript and a fresh recognition session is started.
273
+ *
274
+ * `onTranscript` is never invoked from this path — to the consumer the
275
+ * session looks uninterrupted. `onInterimTranscript` continues to receive
276
+ * live updates once the new session produces results.
277
+ */
220
278
  private performRestart;
221
279
  start(): void;
222
280
  stop(): void;