npm - @untemps/vocal - Versions diffs - 2.0.0-beta.19 → 2.0.0-beta.20 - Mend

@untemps/vocal 2.0.0-beta.19 → 2.0.0-beta.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,20 @@
+# [2.0.0-beta.20](https://github.com/untemps/vocal/compare/v2.0.0-beta.19...v2.0.0-beta.20) (2026-05-20)
+### Features
+* Auto-restart recognition on silence in continuous mode ([#84](https://github.com/untemps/vocal/issues/84)) ([79a55f5](https://github.com/untemps/vocal/commit/79a55f5e295d2027a1473ce59872e6a09b4655c1))
+### BREAKING CHANGES
+* continuous mode now keeps the session alive across silence and aggregates results — semantics that callers using `continuous: true` must adapt to:
+- Recording no longer ends after ~7s of silence; call `stop()` or `abort()` explicitly to terminate the session.
+- A synthetic `result` event is emitted just before `end` on `stop()`, carrying the joined final transcripts. `event instanceof SpeechRecognitionEvent` returns `false` for this event — read the transcript through the listener's second argument (`(event, bestAlternative, alternatives) => ...`).
+- Intermediate `end` and `start` events fired by the browser during silent restart cycles are no longer forwarded to user listeners. `isRecording` stays `true` across the cycle.
+- `abort()` discards the aggregated buffer without emitting.
+`continuous: false` consumers see no behavioural change.
 # [2.0.0-beta.19](https://github.com/untemps/vocal/compare/v2.0.0-beta.18...v2.0.0-beta.19) (2026-05-17)
 # [2.0.0-beta.18](https://github.com/untemps/vocal/compare/v2.0.0-beta.17...v2.0.0-beta.18) (2026-05-16)

package/README.md CHANGED Viewed

@@ -62,10 +62,22 @@ Please refer to [this section](https://developer.mozilla.org/en-US/docs/Web/API/
 | ---------------- | ----------------- | ---------- | ----------------------------------------------------------------------------------------------------------------- |
 | grammars         | SpeechGrammarList | null       | Grammars understood by the recognition [JSpeech Grammar Format](https://www.w3.org/TR/jsgf/)                      |
 | lang             | string            | 'en-US'    | Language understood by the recognition [BCP 47 language tag](https://tools.ietf.org/html/bcp47)                   |
-| continuous       | boolean           | false      | Whether continuous results are returned for each recognition, or only a single result                             |
+| continuous       | boolean           | false      | Whether continuous results are returned for each recognition, or only a single result (see [Continuous mode](#continuous-mode)) |
 | interimResults   | boolean           | false      | Whether interim results should be returned or not. Interim results are results that are not yet final             |
 | maxAlternatives  | number            | 1          | Maximum number of SpeechRecognitionAlternatives provided per result                                               |
+### Continuous mode
+Browsers (notably Chrome) automatically end a recognition session after a few seconds of silence, even when `continuous` is `true`. Vocal transparently restarts the underlying engine after such a silence-induced `end`, so recording keeps running until `stop()` or `abort()` is explicitly called. The intermediate `end` and `start` events triggered by the restart are not forwarded to user listeners — `isRecording` stays `true` across the restart, and the cycle is throttled to at most one restart per second to avoid `InvalidStateError`.
+The restart is disabled automatically when the recognition emits a fatal error (`not-allowed`, `service-not-allowed`, `audio-capture`).
+#### Aggregated result on stop
+To compensate for results being split across silent restart cycles, Vocal accumulates every final result (`isFinal: true`) received during a session. On explicit `stop()`, an extra `result` event is emitted just before `end`, carrying the joined transcripts as a single string. Interim results and `abort()` are excluded — `abort()` discards the buffer without emitting.
+The aggregated event is a synthetic `Event` shaped to match `SpeechRecognitionEvent` (`resultIndex` + `results[0][0].transcript`); it is not a real `SpeechRecognitionEvent` instance, so `event instanceof SpeechRecognitionEvent` returns `false`. Read the transcript through the second argument of the listener (`bestAlternative`).
 ## Events
 Events described below are those from the `SpeechRecognition` Web API.

package/dist/index.cjs CHANGED Viewed

@@ -1,2 +1,2 @@
-Object.defineProperty(exports,Symbol.toStringTag,{value:`Module`});let e=require(`@untemps/user-permissions-utils`);var t=class t{static defaultOptions={grammars:null,lang:`en-US`,continuous:!1,interimResults:!1,maxAlternatives:1};static eventTypes={AUDIO_END:`audioend`,AUDIO_START:`audiostart`,END:`end`,ERROR:`error`,NO_MATCH:`nomatch`,RESULT:`result`,SOUND_END:`soundend`,SOUND_START:`soundstart`,SPEECH_END:`speechend`,SPEECH_START:`speechstart`,START:`start`};static get isSupported(){return!!t._resolveSpeechRecognition()&&!!(0,e.isNavigatorPermissionsSupported)()&&!!(0,e.isNavigatorMediaDevicesSupported)()}static set isSupported(e){throw Error(`You cannot set isSupported directly.`)}_instance=null;_listeners={};_isRecording=!1;_onEnd=()=>{this._isRecording=!1};constructor(e){let n=t._resolveSpeechRecognition();if(!n)throw new DOMException(`SpeechRecognition not supported`,`NOT_SUPPORTED_ERR`);this._instance=new n;let{grammars:r,...i}={...t.defaultOptions,...e??{}},a=this._instance;if(Object.assign(a,i),r)a.grammars=r;else{let e=t._resolveSpeechGrammarList();a.grammars=e?new e:null}this._instance.addEventListener(`end`,this._onEnd)}get isRecording(){return this._isRecording}set isRecording(e){throw Error(`You cannot set isRecording directly.`)}async start({signal:t}={}){if(this._instance)try{if(!await(0,e.getUserMediaStream)(`microphone`,{audio:!0},{signal:t}))throw Error(`Unable to retrieve the stream from media device`);this._instance.start(),this._isRecording=!0}catch(e){if(e instanceof Error&&e.name===`AbortError`)return this;throw e}return this}stop(){return this._instance&&(this._instance.stop(),this._isRecording=!1),this}abort(){return this._instance&&(this._instance.abort(),this._isRecording=!1),this}addEventListener(e,n){if(!this._includesEventType(e))throw Error(this._unknownEventTypeMessage(e));if(this._instance){let r=r=>{let i=[];if(e===t.eventTypes.RESULT){let e=r;if(e.results?.length>0&&e.resultIndex<e.results.length){let t=Array.from(e.results[e.resultIndex]),n=t.reduce((e,t)=>(t.confidence??0)>(e.confidence??0)?t:e);i.push(n.transcript,t.map(e=>e.transcript))}}n.call(this,r,...i)};this._instance.addEventListener(e,r),this._listeners[e]||(this._listeners[e]=[]),this._listeners[e].push({callback:n,handler:r})}return this}removeEventListener(e,t){if(!this._includesEventType(e))throw Error(this._unknownEventTypeMessage(e));let n=this._instance;if(n&&this._listeners[e])if(t!==void 0){let r=this._listeners[e].findIndex(e=>e.callback===t);r!==-1&&(n.removeEventListener(e,this._listeners[e][r].handler),this._listeners[e].splice(r,1),this._listeners[e].length===0&&delete this._listeners[e])}else this._listeners[e].forEach(({handler:t})=>n.removeEventListener(e,t)),delete this._listeners[e];return this}once(e,t){let n=(...r)=>{t.call(this,...r),this.removeEventListener(e,n)};return this.addEventListener(e,n)}cleanup(){return this.stop(),Object.keys(this._listeners).forEach(e=>this.removeEventListener(e)),this._instance?.removeEventListener(`end`,this._onEnd),this._instance=null,this}_includesEventType(e){return Object.values(t.eventTypes).includes(e)}_unknownEventTypeMessage(e){return`Unknown event type "${e}". Valid types are: ${Object.values(t.eventTypes).join(`, `)}.`}static _resolveSpeechRecognition(){if(!(typeof window>`u`))return window.SpeechRecognition??window.webkitSpeechRecognition??window.mozSpeechRecognition??window.msSpeechRecognition}static _resolveSpeechGrammarList(){return window.SpeechGrammarList??window.webkitSpeechGrammarList??window.mozSpeechGrammarList??window.msSpeechGrammarList}};exports.Vocal=t;
+Object.defineProperty(exports,Symbol.toStringTag,{value:`Module`});let e=require(`@untemps/user-permissions-utils`);var t=1e3,n=new Set([`not-allowed`,`service-not-allowed`,`audio-capture`]),r=class r{static defaultOptions={grammars:null,lang:`en-US`,continuous:!1,interimResults:!1,maxAlternatives:1};static eventTypes={AUDIO_END:`audioend`,AUDIO_START:`audiostart`,END:`end`,ERROR:`error`,NO_MATCH:`nomatch`,RESULT:`result`,SOUND_END:`soundend`,SOUND_START:`soundstart`,SPEECH_END:`speechend`,SPEECH_START:`speechstart`,START:`start`};static get isSupported(){return!!r._resolveSpeechRecognition()&&!!(0,e.isNavigatorPermissionsSupported)()&&!!(0,e.isNavigatorMediaDevicesSupported)()}static set isSupported(e){throw Error(`You cannot set isSupported directly.`)}_instance=null;_listeners={};_isRecording=!1;_explicitStop=!1;_lastStartedAt=0;_restartTimeoutId=null;_isRestarting=!1;_finalTranscripts=[];_onEnd=e=>{if(this._shouldAutoRestart()){let n=Math.max(0,t-(Date.now()-this._lastStartedAt));this._isRestarting=!0,this._restartTimeoutId=setTimeout(()=>this._restart(),n),e.stopImmediatePropagation();return}this._isRecording=!1};_onStart=e=>{this._isRestarting&&(e.stopImmediatePropagation(),queueMicrotask(()=>{this._isRestarting=!1}))};_onError=e=>{n.has(e.error)&&(this._explicitStop=!0,this._clearRestartTimeout(),this._isRecording=!1)};_onResult=e=>{let t=e,n=t.results?.[t.resultIndex];n?.isFinal&&this._finalTranscripts.push(r._pickBestAlternative(Array.from(n)).transcript)};constructor(e){let t=r._resolveSpeechRecognition();if(!t)throw new DOMException(`SpeechRecognition not supported`,`NOT_SUPPORTED_ERR`);this._instance=new t;let{grammars:n,...i}={...r.defaultOptions,...e??{}},a=this._instance;if(Object.assign(a,i),n)a.grammars=n;else{let e=r._resolveSpeechGrammarList();a.grammars=e?new e:null}this._instance.addEventListener(r.eventTypes.END,this._onEnd),this._instance.addEventListener(r.eventTypes.START,this._onStart),this._instance.addEventListener(r.eventTypes.ERROR,this._onError),this._instance.addEventListener(r.eventTypes.RESULT,this._onResult)}get isRecording(){return this._isRecording}set isRecording(e){throw Error(`You cannot set isRecording directly.`)}async start({signal:t}={}){if(this._instance)try{if(!await(0,e.getUserMediaStream)(`microphone`,{audio:!0},{signal:t}))throw Error(`Unable to retrieve the stream from media device`);this._explicitStop=!1,this._finalTranscripts=[],this._instance.start(),this._isRecording=!0,this._lastStartedAt=Date.now()}catch(e){if(e instanceof Error&&e.name===`AbortError`)return this;throw e}return this}stop(){return this._instance&&(this._explicitStop=!0,this._clearRestartTimeout(),this._emitAggregatedResult(),this._instance.stop(),this._isRecording=!1),this}abort(){return this._instance&&(this._explicitStop=!0,this._clearRestartTimeout(),this._instance.abort(),this._isRecording=!1,this._finalTranscripts=[]),this}addEventListener(e,t){if(!this._includesEventType(e))throw Error(this._unknownEventTypeMessage(e));if(this._instance){let n=n=>{if(this._isRestarting&&(e===r.eventTypes.END||e===r.eventTypes.START))return;let i=[];if(e===r.eventTypes.RESULT){let e=n;if(e.results?.length>0&&e.resultIndex<e.results.length){let t=Array.from(e.results[e.resultIndex]);i.push(r._pickBestAlternative(t).transcript,t.map(e=>e.transcript))}}t.call(this,n,...i)};this._instance.addEventListener(e,n),this._listeners[e]||(this._listeners[e]=[]),this._listeners[e].push({callback:t,handler:n})}return this}removeEventListener(e,t){if(!this._includesEventType(e))throw Error(this._unknownEventTypeMessage(e));let n=this._instance;if(n&&this._listeners[e])if(t!==void 0){let r=this._listeners[e].findIndex(e=>e.callback===t);r!==-1&&(n.removeEventListener(e,this._listeners[e][r].handler),this._listeners[e].splice(r,1),this._listeners[e].length===0&&delete this._listeners[e])}else this._listeners[e].forEach(({handler:t})=>n.removeEventListener(e,t)),delete this._listeners[e];return this}once(e,t){let n=(...r)=>{t.call(this,...r),this.removeEventListener(e,n)};return this.addEventListener(e,n)}cleanup(){return this.stop(),Object.keys(this._listeners).forEach(e=>this.removeEventListener(e)),this._instance?.removeEventListener(r.eventTypes.END,this._onEnd),this._instance?.removeEventListener(r.eventTypes.START,this._onStart),this._instance?.removeEventListener(r.eventTypes.ERROR,this._onError),this._instance?.removeEventListener(r.eventTypes.RESULT,this._onResult),this._instance=null,this}_restart=()=>{this._restartTimeoutId=null;try{this._instance.start(),this._lastStartedAt=Date.now()}catch{this._isRestarting=!1,this._isRecording=!1}};_emitAggregatedResult(){let e=this._finalTranscripts;if(this._finalTranscripts=[],e.length===0)return;let t=e.join(` `).trim(),n=Object.assign([{transcript:t,confidence:1}],{isFinal:!0}),i=Object.assign(new Event(r.eventTypes.RESULT),{resultIndex:0,results:[n]});[...this._listeners[r.eventTypes.RESULT]??[]].forEach(({handler:e})=>e(i))}static _pickBestAlternative(e){return e.reduce((e,t)=>(t.confidence??0)>(e.confidence??0)?t:e)}_shouldAutoRestart(){return!!this._instance&&!this._explicitStop&&this._instance.continuous}_clearRestartTimeout(){this._restartTimeoutId!==null&&(clearTimeout(this._restartTimeoutId),this._restartTimeoutId=null),this._isRestarting=!1}_includesEventType(e){return Object.values(r.eventTypes).includes(e)}_unknownEventTypeMessage(e){return`Unknown event type "${e}". Valid types are: ${Object.values(r.eventTypes).join(`, `)}.`}static _resolveSpeechRecognition(){if(!(typeof window>`u`))return window.SpeechRecognition??window.webkitSpeechRecognition??window.mozSpeechRecognition??window.msSpeechRecognition}static _resolveSpeechGrammarList(){return window.SpeechGrammarList??window.webkitSpeechGrammarList??window.mozSpeechGrammarList??window.msSpeechGrammarList}};exports.Vocal=r;
 //# sourceMappingURL=index.cjs.map

package/dist/index.es.js CHANGED Viewed

@@ -1,6 +1,10 @@
 import { getUserMediaStream as e, isNavigatorMediaDevicesSupported as t, isNavigatorPermissionsSupported as n } from "@untemps/user-permissions-utils";
 //#region src/Vocal.ts
-var r = class r {
+var r = 1e3, i = new Set([
+	"not-allowed",
+	"service-not-allowed",
+	"audio-capture"
+]), a = class a {
 	static defaultOptions = {
 		grammars: null,
 		lang: "en-US",
@@ -22,7 +26,7 @@ var r = class r {
 		START: "start"
 	};
 	static get isSupported() {
-		return !!r._resolveSpeechRecognition() && !!n() && !!t();
+		return !!a._resolveSpeechRecognition() && !!n() && !!t();
 	}
 	static set isSupported(e) {
 		throw Error("You cannot set isSupported directly.");
@@ -30,23 +34,45 @@ var r = class r {
 	_instance = null;
 	_listeners = {};
 	_isRecording = !1;
-	_onEnd = () => {
+	_explicitStop = !1;
+	_lastStartedAt = 0;
+	_restartTimeoutId = null;
+	_isRestarting = !1;
+	_finalTranscripts = [];
+	_onEnd = (e) => {
+		if (this._shouldAutoRestart()) {
+			let t = Math.max(0, r - (Date.now() - this._lastStartedAt));
+			this._isRestarting = !0, this._restartTimeoutId = setTimeout(() => this._restart(), t), e.stopImmediatePropagation();
+			return;
+		}
 		this._isRecording = !1;
 	};
+	_onStart = (e) => {
+		this._isRestarting && (e.stopImmediatePropagation(), queueMicrotask(() => {
+			this._isRestarting = !1;
+		}));
+	};
+	_onError = (e) => {
+		i.has(e.error) && (this._explicitStop = !0, this._clearRestartTimeout(), this._isRecording = !1);
+	};
+	_onResult = (e) => {
+		let t = e, n = t.results?.[t.resultIndex];
+		n?.isFinal && this._finalTranscripts.push(a._pickBestAlternative(Array.from(n)).transcript);
+	};
 	constructor(e) {
-		let t = r._resolveSpeechRecognition();
+		let t = a._resolveSpeechRecognition();
 		if (!t) throw new DOMException("SpeechRecognition not supported", "NOT_SUPPORTED_ERR");
 		this._instance = new t();
-		let { grammars: n, ...i } = {
-			...r.defaultOptions,
+		let { grammars: n, ...r } = {
+			...a.defaultOptions,
 			...e ?? {}
-		}, a = this._instance;
-		if (Object.assign(a, i), n) a.grammars = n;
+		}, i = this._instance;
+		if (Object.assign(i, r), n) i.grammars = n;
 		else {
-			let e = r._resolveSpeechGrammarList();
-			a.grammars = e ? new e() : null;
+			let e = a._resolveSpeechGrammarList();
+			i.grammars = e ? new e() : null;
 		}
-		this._instance.addEventListener("end", this._onEnd);
+		this._instance.addEventListener(a.eventTypes.END, this._onEnd), this._instance.addEventListener(a.eventTypes.START, this._onStart), this._instance.addEventListener(a.eventTypes.ERROR, this._onError), this._instance.addEventListener(a.eventTypes.RESULT, this._onResult);
 	}
 	get isRecording() {
 		return this._isRecording;
@@ -57,7 +83,7 @@ var r = class r {
 	async start({ signal: t } = {}) {
 		if (this._instance) try {
 			if (!await e("microphone", { audio: !0 }, { signal: t })) throw Error("Unable to retrieve the stream from media device");
-			this._instance.start(), this._isRecording = !0;
+			this._explicitStop = !1, this._finalTranscripts = [], this._instance.start(), this._isRecording = !0, this._lastStartedAt = Date.now();
 		} catch (e) {
 			if (e instanceof Error && e.name === "AbortError") return this;
 			throw e;
@@ -65,24 +91,25 @@ var r = class r {
 		return this;
 	}
 	stop() {
-		return this._instance && (this._instance.stop(), this._isRecording = !1), this;
+		return this._instance && (this._explicitStop = !0, this._clearRestartTimeout(), this._emitAggregatedResult(), this._instance.stop(), this._isRecording = !1), this;
 	}
 	abort() {
-		return this._instance && (this._instance.abort(), this._isRecording = !1), this;
+		return this._instance && (this._explicitStop = !0, this._clearRestartTimeout(), this._instance.abort(), this._isRecording = !1, this._finalTranscripts = []), this;
 	}
 	addEventListener(e, t) {
 		if (!this._includesEventType(e)) throw Error(this._unknownEventTypeMessage(e));
 		if (this._instance) {
 			let n = (n) => {
-				let i = [];
-				if (e === r.eventTypes.RESULT) {
+				if (this._isRestarting && (e === a.eventTypes.END || e === a.eventTypes.START)) return;
+				let r = [];
+				if (e === a.eventTypes.RESULT) {
 					let e = n;
 					if (e.results?.length > 0 && e.resultIndex < e.results.length) {
-						let t = Array.from(e.results[e.resultIndex]), n = t.reduce((e, t) => (t.confidence ?? 0) > (e.confidence ?? 0) ? t : e);
-						i.push(n.transcript, t.map((e) => e.transcript));
+						let t = Array.from(e.results[e.resultIndex]);
+						r.push(a._pickBestAlternative(t).transcript, t.map((e) => e.transcript));
 					}
 				}
-				t.call(this, n, ...i);
+				t.call(this, n, ...r);
 			};
 			this._instance.addEventListener(e, n), this._listeners[e] || (this._listeners[e] = []), this._listeners[e].push({
 				callback: t,
@@ -107,13 +134,42 @@ var r = class r {
 		return this.addEventListener(e, n);
 	}
 	cleanup() {
-		return this.stop(), Object.keys(this._listeners).forEach((e) => this.removeEventListener(e)), this._instance?.removeEventListener("end", this._onEnd), this._instance = null, this;
+		return this.stop(), Object.keys(this._listeners).forEach((e) => this.removeEventListener(e)), this._instance?.removeEventListener(a.eventTypes.END, this._onEnd), this._instance?.removeEventListener(a.eventTypes.START, this._onStart), this._instance?.removeEventListener(a.eventTypes.ERROR, this._onError), this._instance?.removeEventListener(a.eventTypes.RESULT, this._onResult), this._instance = null, this;
+	}
+	_restart = () => {
+		this._restartTimeoutId = null;
+		try {
+			this._instance.start(), this._lastStartedAt = Date.now();
+		} catch {
+			this._isRestarting = !1, this._isRecording = !1;
+		}
+	};
+	_emitAggregatedResult() {
+		let e = this._finalTranscripts;
+		if (this._finalTranscripts = [], e.length === 0) return;
+		let t = e.join(" ").trim(), n = Object.assign([{
+			transcript: t,
+			confidence: 1
+		}], { isFinal: !0 }), r = Object.assign(new Event(a.eventTypes.RESULT), {
+			resultIndex: 0,
+			results: [n]
+		});
+		[...this._listeners[a.eventTypes.RESULT] ?? []].forEach(({ handler: e }) => e(r));
+	}
+	static _pickBestAlternative(e) {
+		return e.reduce((e, t) => (t.confidence ?? 0) > (e.confidence ?? 0) ? t : e);
+	}
+	_shouldAutoRestart() {
+		return !!this._instance && !this._explicitStop && this._instance.continuous;
+	}
+	_clearRestartTimeout() {
+		this._restartTimeoutId !== null && (clearTimeout(this._restartTimeoutId), this._restartTimeoutId = null), this._isRestarting = !1;
 	}
 	_includesEventType(e) {
-		return Object.values(r.eventTypes).includes(e);
+		return Object.values(a.eventTypes).includes(e);
 	}
 	_unknownEventTypeMessage(e) {
-		return `Unknown event type "${e}". Valid types are: ${Object.values(r.eventTypes).join(", ")}.`;
+		return `Unknown event type "${e}". Valid types are: ${Object.values(a.eventTypes).join(", ")}.`;
 	}
 	static _resolveSpeechRecognition() {
 		if (!(typeof window > "u")) return window.SpeechRecognition ?? window.webkitSpeechRecognition ?? window.mozSpeechRecognition ?? window.msSpeechRecognition;
@@ -123,6 +179,6 @@ var r = class r {
 	}
 };
 //#endregion
-export { r as Vocal };
+export { a as Vocal };
 //# sourceMappingURL=index.es.js.map

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "@untemps/vocal",
-	"version": "2.0.0-beta.19",
+	"version": "2.0.0-beta.20",
 	"description": "Class wrapped around the SpeechRecognition Web API",
 	"repository": "git@github.com:untemps/vocal.git",
 	"keywords": [