npm - @speechos/core - Versions diffs - 0.2.0 → 0.2.2 - Mend

@speechos/core 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/dist/audio-capture.d.cts +130 -0
package/dist/audio-capture.d.ts +130 -0
package/dist/backend.d.cts +41 -0
package/dist/backend.d.ts +41 -0
package/dist/config.d.cts +13 -7
package/dist/config.d.ts +13 -7
package/dist/index.cjs +1239 -158
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +6 -5
package/dist/index.d.ts +6 -5
package/dist/index.js +1238 -157
package/dist/index.js.map +1 -1
package/dist/livekit.d.cts +81 -14
package/dist/livekit.d.ts +81 -14
package/dist/speechos.d.cts +19 -3
package/dist/speechos.d.ts +19 -3
package/dist/state.d.cts +4 -1
package/dist/state.d.ts +4 -1
package/dist/types.d.cts +105 -9
package/dist/types.d.ts +105 -9
package/dist/websocket.d.cts +133 -0
package/dist/websocket.d.ts +133 -0
package/package.json +5 -4
package/dist/transcript-store.d.cts +0 -35
package/dist/transcript-store.d.ts +0 -35

package/dist/index.js CHANGED Viewed

@@ -12,8 +12,6 @@ const defaultConfig = {
 	apiKey: "",
 	userId: "",
 	host: DEFAULT_HOST,
-	position: "bottom-center",
-	zIndex: 999999,
 	debug: false
 };
 /**
@@ -21,31 +19,19 @@ const defaultConfig = {
 * @param userConfig - User-provided configuration
 * @returns Validated and merged configuration
 */
-function validateConfig(userConfig = {}) {
+function validateConfig(userConfig) {
 	if (!userConfig.apiKey) throw new Error("SpeechOS requires an apiKey. Get one from your team dashboard at /a/<team-slug>/.");
-	const config = {
-		...defaultConfig,
-		...userConfig
+	return {
+		apiKey: userConfig.apiKey,
+		userId: userConfig.userId ?? defaultConfig.userId,
+		host: userConfig.host ?? defaultConfig.host,
+		debug: userConfig.debug ?? defaultConfig.debug
 	};
-	const validPositions = [
-		"bottom-center",
-		"bottom-right",
-		"bottom-left"
-	];
-	if (!validPositions.includes(config.position)) {
-		console.warn(`Invalid position "${config.position}". Using default "bottom-center".`);
-		config.position = "bottom-center";
-	}
-	if (typeof config.zIndex !== "number" || config.zIndex < 0) {
-		console.warn(`Invalid zIndex "${config.zIndex}". Using default ${defaultConfig.zIndex}.`);
-		config.zIndex = defaultConfig.zIndex;
-	}
-	return config;
 }
 /**
 * Current active configuration (singleton)
 */
-let currentConfig = defaultConfig;
+let currentConfig = { ...defaultConfig };
 /**
 * Get the current configuration
 */
@@ -168,33 +154,38 @@ const initialState = {
 var StateManager = class {
 	state;
 	subscribers = /* @__PURE__ */ new Set();
+	/** Cached immutable snapshot for useSyncExternalStore compatibility */
+	snapshot;
 	constructor(initialState$1) {
 		this.state = { ...initialState$1 };
+		this.snapshot = Object.freeze({ ...this.state });
 	}
 	/**
-	* Get the current state (returns a copy to prevent mutations)
+	* Get the current state snapshot (returns a stable reference for React)
+	* This returns an immutable frozen object that only changes when setState is called.
 	*/
 	getState() {
-		return { ...this.state };
+		return this.snapshot;
 	}
 	/**
 	* Update state with partial values
 	* @param partial - Partial state to merge with current state
 	*/
 	setState(partial) {
-		const prevState = { ...this.state };
+		const prevState = this.snapshot;
 		this.state = {
 			...this.state,
 			...partial
 		};
+		this.snapshot = Object.freeze({ ...this.state });
 		this.subscribers.forEach((callback) => {
 			try {
-				callback(this.state, prevState);
+				callback(this.snapshot, prevState);
 			} catch (error) {
 				console.error("Error in state change callback:", error);
 			}
 		});
-		events.emit("state:change", { state: this.state });
+		events.emit("state:change", { state: this.snapshot });
 	}
 	/**
 	* Subscribe to state changes
@@ -211,7 +202,17 @@ var StateManager = class {
 	* Reset state to initial values
 	*/
 	reset() {
-		this.setState(initialState);
+		const prevState = this.snapshot;
+		this.state = { ...initialState };
+		this.snapshot = Object.freeze({ ...this.state });
+		this.subscribers.forEach((callback) => {
+			try {
+				callback(this.snapshot, prevState);
+			} catch (error) {
+				console.error("Error in state change callback:", error);
+			}
+		});
+		events.emit("state:change", { state: this.snapshot });
 	}
 	/**
 	* Show the widget
@@ -346,12 +347,15 @@ function createStateManager(initial) {
 //#endregion
 //#region src/livekit.ts
-const MESSAGE_TYPE_REQUEST_TRANSCRIPT = "request_transcript";
-const MESSAGE_TYPE_TRANSCRIPT = "transcript";
-const MESSAGE_TYPE_EDIT_TEXT = "edit_text";
-const MESSAGE_TYPE_EDITED_TEXT = "edited_text";
-const MESSAGE_TYPE_ERROR = "error";
+const MESSAGE_TYPE_REQUEST_TRANSCRIPT$1 = "request_transcript";
+const MESSAGE_TYPE_TRANSCRIPT$1 = "transcript";
+const MESSAGE_TYPE_EDIT_TEXT$1 = "edit_text";
+const MESSAGE_TYPE_EDITED_TEXT$1 = "edited_text";
+const MESSAGE_TYPE_EXECUTE_COMMAND$1 = "execute_command";
+const MESSAGE_TYPE_COMMAND_RESULT$1 = "command_result";
+const MESSAGE_TYPE_ERROR$1 = "error";
 const TOPIC_SPEECHOS = "speechos";
+const TOKEN_CACHE_TTL_MS = 4 * 60 * 1e3;
 /**
 * A deferred promise with timeout support.
 * Encapsulates resolve/reject/timeout in a single object for cleaner async handling.
@@ -415,53 +419,116 @@ var LiveKitManager = class {
 	room = null;
 	tokenData = null;
 	micTrack = null;
+	cachedTokenData = null;
+	tokenCacheTimestamp = null;
+	tokenPrefetchPromise = null;
+	tokenRefreshTimer = null;
+	autoRefreshEnabled = false;
 	pendingTranscript = null;
 	pendingEditText = null;
+	pendingCommand = null;
 	pendingTrackSubscribed = null;
-	preWarmPromise = null;
 	editOriginalText = null;
+	sessionSettings = {};
 	/**
-	* Pre-warm resources for faster connection
-	* Call this when user shows intent (e.g., expands widget)
-	* Only fetches token - mic permission is requested when user clicks Dictate
+	* Check if the cached token is still valid (within TTL)
 	*/
-	async preWarm() {
-		if (this.tokenData || this.preWarmPromise || this.room?.state === "connected") {
-			const config$1 = getConfig();
-			if (config$1.debug) console.log("[SpeechOS] Pre-warm skipped - token already available");
-			return;
-		}
+	isCachedTokenValid() {
+		if (!this.cachedTokenData || !this.tokenCacheTimestamp) return false;
+		const age = Date.now() - this.tokenCacheTimestamp;
+		return age < TOKEN_CACHE_TTL_MS;
+	}
+	/**
+	* Pre-fetch a LiveKit token for later use
+	* Call this early (e.g., when widget expands) to reduce latency when starting a voice session.
+	* If a prefetch is already in progress, returns the existing promise.
+	* If a valid cached token exists, returns it immediately.
+	*/
+	async prefetchToken() {
 		const config = getConfig();
-		if (config.debug) console.log("[SpeechOS] Pre-warming: fetching token...");
-		this.preWarmPromise = (async () => {
-			try {
-				await this.fetchToken();
-				if (config.debug) console.log("[SpeechOS] Pre-warm complete - token ready");
-			} catch (error) {
-				if (config.debug) console.warn("[SpeechOS] Pre-warm failed:", error);
-				this.preWarmPromise = null;
-			}
-		})();
-		await this.preWarmPromise;
+		if (this.isCachedTokenValid() && this.cachedTokenData) {
+			if (config.debug) console.log("[SpeechOS] Using cached token (prefetch hit)");
+			return this.cachedTokenData;
+		}
+		if (this.tokenPrefetchPromise) {
+			if (config.debug) console.log("[SpeechOS] Prefetch already in progress, awaiting...");
+			return this.tokenPrefetchPromise;
+		}
+		if (config.debug) console.log("[SpeechOS] Starting token prefetch...");
+		this.tokenPrefetchPromise = this.fetchTokenFromServer().then((data) => {
+			this.cachedTokenData = data;
+			this.tokenCacheTimestamp = Date.now();
+			this.tokenPrefetchPromise = null;
+			return data;
+		}).catch((error) => {
+			this.tokenPrefetchPromise = null;
+			throw error;
+		});
+		return this.tokenPrefetchPromise;
 	}
 	/**
 	* Fetch a LiveKit token from the backend
+	* Uses cached token if valid, otherwise fetches a fresh one.
+	* Includes language settings and user vocabulary which are stored in the VoiceSession.
 	*/
 	async fetchToken() {
+		const config = getConfig();
+		if (this.isCachedTokenValid() && this.cachedTokenData) {
+			if (config.debug) console.log("[SpeechOS] Using cached token");
+			this.tokenData = this.cachedTokenData;
+			return this.cachedTokenData;
+		}
+		if (this.tokenPrefetchPromise) {
+			if (config.debug) console.log("[SpeechOS] Waiting for prefetch to complete...");
+			const data$1 = await this.tokenPrefetchPromise;
+			this.tokenData = data$1;
+			return data$1;
+		}
+		const data = await this.fetchTokenFromServer();
+		this.cachedTokenData = data;
+		this.tokenCacheTimestamp = Date.now();
+		this.tokenData = data;
+		return data;
+	}
+	/**
+	* Internal method to fetch a fresh token from the server
+	*/
+	async fetchTokenFromServer() {
 		const config = getConfig();
 		const url = `${config.host}/livekit/api/token/`;
-		if (config.debug) console.log("[SpeechOS] Fetching LiveKit token from:", url);
+		const settings = this.sessionSettings;
+		const inputLanguage = settings.inputLanguageCode ?? "en-US";
+		const outputLanguage = settings.outputLanguageCode ?? "en-US";
+		const smartFormat = settings.smartFormat ?? true;
+		const vocabulary = settings.vocabulary ?? [];
+		const snippets = settings.snippets ?? [];
+		if (config.debug) {
+			console.log("[SpeechOS] Fetching LiveKit token from:", url);
+			console.log("[SpeechOS] Session settings:", {
+				inputLanguage,
+				outputLanguage,
+				smartFormat,
+				snippetsCount: snippets.length,
+				vocabularyCount: vocabulary.length
+			});
+		}
 		const response = await fetch(url, {
 			method: "POST",
 			headers: {
 				"Content-Type": "application/json",
 				...config.apiKey ? { Authorization: `Api-Key ${config.apiKey}` } : {}
 			},
-			body: JSON.stringify({ user_id: config.userId || null })
+			body: JSON.stringify({
+				user_id: config.userId || null,
+				input_language: inputLanguage,
+				output_language: outputLanguage,
+				smart_format: smartFormat,
+				custom_vocabulary: vocabulary,
+				custom_snippets: snippets
+			})
 		});
 		if (!response.ok) throw new Error(`Failed to fetch LiveKit token: ${response.status} ${response.statusText}`);
 		const data = await response.json();
-		this.tokenData = data;
 		if (config.debug) console.log("[SpeechOS] LiveKit token received:", {
 			room: data.room,
 			identity: data.identity,
@@ -474,8 +541,7 @@ var LiveKitManager = class {
 	*/
 	async connect() {
 		const config = getConfig();
-		if (!this.tokenData) await this.fetchToken();
-		else if (config.debug) console.log("[SpeechOS] Using pre-fetched token");
+		await this.fetchToken();
 		if (!this.tokenData) throw new Error("No token available for LiveKit connection");
 		this.room = new Room({
 			adaptiveStream: true,
@@ -539,7 +605,7 @@ var LiveKitManager = class {
 		try {
 			const message = JSON.parse(new TextDecoder().decode(data));
 			if (config.debug) console.log("[SpeechOS] Data received:", message);
-			if (message.type === MESSAGE_TYPE_TRANSCRIPT) {
+			if (message.type === MESSAGE_TYPE_TRANSCRIPT$1) {
 				const transcript = message.transcript || "";
 				if (config.debug) console.log("[SpeechOS] Transcript received:", transcript);
 				events.emit("transcription:complete", { text: transcript });
@@ -547,7 +613,7 @@ var LiveKitManager = class {
 					this.pendingTranscript.resolve(transcript);
 					this.pendingTranscript = null;
 				}
-			} else if (message.type === MESSAGE_TYPE_EDITED_TEXT) {
+			} else if (message.type === MESSAGE_TYPE_EDITED_TEXT$1) {
 				const editedText = message.text || "";
 				if (config.debug) console.log("[SpeechOS] Edited text received:", editedText);
 				events.emit("edit:complete", {
@@ -559,7 +625,15 @@ var LiveKitManager = class {
 					this.pendingEditText = null;
 				}
 				this.editOriginalText = null;
-			} else if (message.type === MESSAGE_TYPE_ERROR) {
+			} else if (message.type === MESSAGE_TYPE_COMMAND_RESULT$1) {
+				const commandResult = message.command || null;
+				if (config.debug) console.log("[SpeechOS] Command result received:", commandResult);
+				events.emit("command:complete", { command: commandResult });
+				if (this.pendingCommand) {
+					this.pendingCommand.resolve(commandResult);
+					this.pendingCommand = null;
+				}
+			} else if (message.type === MESSAGE_TYPE_ERROR$1) {
 				const serverError = message;
 				const errorCode = serverError.code || "server_error";
 				const errorMessage = serverError.message || "A server error occurred";
@@ -579,6 +653,10 @@ var LiveKitManager = class {
 					this.pendingEditText.reject(error);
 					this.pendingEditText = null;
 				}
+				if (this.pendingCommand) {
+					this.pendingCommand.reject(error);
+					this.pendingCommand = null;
+				}
 			}
 		} catch (error) {
 			console.error("[SpeechOS] Failed to parse data message:", error);
@@ -586,16 +664,34 @@ var LiveKitManager = class {
 	}
 	/**
 	* Publish microphone audio track
+	* Uses the device ID from session settings if set
 	*/
 	async enableMicrophone() {
 		if (!this.room || this.room.state !== "connected") throw new Error("Not connected to room");
 		const config = getConfig();
 		if (!this.micTrack) {
 			if (config.debug) console.log("[SpeechOS] Creating microphone track...");
-			this.micTrack = await createLocalAudioTrack({
+			const deviceId = this.sessionSettings.audioDeviceId;
+			const trackOptions = {
 				echoCancellation: true,
 				noiseSuppression: true
-			});
+			};
+			if (deviceId) {
+				trackOptions.deviceId = { exact: deviceId };
+				if (config.debug) console.log("[SpeechOS] Using audio device:", deviceId);
+			}
+			try {
+				this.micTrack = await createLocalAudioTrack(trackOptions);
+			} catch (error) {
+				if (deviceId && error instanceof Error) {
+					console.warn("[SpeechOS] Selected audio device unavailable, falling back to default:", error.message);
+					this.micTrack = await createLocalAudioTrack({
+						echoCancellation: true,
+						noiseSuppression: true
+					});
+				} else throw error;
+			}
+			this.logMicrophoneInfo();
 		}
 		const existingPub = this.room.localParticipant.getTrackPublication(Track.Source.Microphone);
 		if (!existingPub) {
@@ -605,6 +701,24 @@ var LiveKitManager = class {
 		}
 	}
 	/**
+	* Log information about the current microphone track
+	*/
+	logMicrophoneInfo() {
+		if (!this.micTrack) return;
+		const config = getConfig();
+		const mediaTrack = this.micTrack.mediaStreamTrack;
+		const settings = mediaTrack.getSettings();
+		console.log("[SpeechOS] Microphone active:", {
+			deviceId: settings.deviceId || "unknown",
+			label: mediaTrack.label || "Unknown device",
+			sampleRate: settings.sampleRate,
+			channelCount: settings.channelCount,
+			echoCancellation: settings.echoCancellation,
+			noiseSuppression: settings.noiseSuppression
+		});
+		if (config.debug) console.log("[SpeechOS] Full audio track settings:", settings);
+	}
+	/**
 	* Disable microphone audio track
 	*/
 	async disableMicrophone() {
@@ -636,30 +750,85 @@ var LiveKitManager = class {
 		});
 	}
 	/**
-	* Start a voice session
-	* Connects to room, enables microphone, and waits for agent to subscribe to our track
+	* Start a voice session with pre-connect audio buffering
+	* Fetches a fresh token, then enables mic with preConnectBuffer to capture audio while connecting.
+	* Agent subscription happens in the background - we don't block on it.
+	*
+	* @param options - Session options including action type and parameters
 	*/
-	async startVoiceSession() {
+	async startVoiceSession(options) {
 		const config = getConfig();
 		if (config.debug) console.log("[SpeechOS] Starting voice session...");
-		if (this.preWarmPromise) {
-			if (config.debug) console.log("[SpeechOS] Waiting for pre-warm to complete...");
-			await this.preWarmPromise;
-		}
-		if (this.tokenData) {
-			if (config.debug) console.log("[SpeechOS] Using cached token from init");
-		} else {
-			if (config.debug) console.log("[SpeechOS] Fetching fresh token for session...");
-			await this.fetchToken();
-		}
+		this.sessionSettings = options?.settings || {};
+		await this.fetchToken();
+		if (!this.tokenData) throw new Error("No token available for LiveKit connection");
 		this.pendingTrackSubscribed = new Deferred();
 		this.pendingTrackSubscribed.setTimeout(15e3, "Connection timed out - agent not available", "connection_timeout", "connection");
-		await this.connect();
-		await this.enableMicrophone();
-		if (config.debug) console.log("[SpeechOS] Microphone published, waiting for LocalTrackSubscribed event...");
-		await this.pendingTrackSubscribed.promise;
-		this.pendingTrackSubscribed = null;
-		if (config.debug) console.log("[SpeechOS] Voice session ready - agent subscribed to audio");
+		this.room = new Room({
+			adaptiveStream: true,
+			dynacast: true
+		});
+		this.setupRoomEvents();
+		if (config.debug) console.log("[SpeechOS] Connecting to LiveKit room:", this.tokenData.room, "at", this.tokenData.ws_url);
+		await this.room.connect(this.tokenData.ws_url, this.tokenData.token);
+		if (config.debug) console.log("[SpeechOS] Connected, enabling microphone with preConnectBuffer...");
+		await this.enableMicrophoneWithPreConnectBuffer();
+		if (options?.onMicReady) options.onMicReady();
+		state.setConnected(true);
+		if (config.debug) console.log("[SpeechOS] Voice session ready - microphone active");
+		this.waitForAgentSubscription();
+	}
+	/**
+	* Wait for the agent to subscribe to our audio track in the background
+	* Handles timeout errors without blocking the main flow
+	*/
+	waitForAgentSubscription() {
+		const config = getConfig();
+		if (!this.pendingTrackSubscribed) return;
+		this.pendingTrackSubscribed.promise.then(() => {
+			if (config.debug) console.log("[SpeechOS] Agent subscribed to audio track - full duplex established");
+			this.pendingTrackSubscribed = null;
+		}).catch((error) => {
+			console.warn("[SpeechOS] Agent subscription timeout:", error.message);
+			this.pendingTrackSubscribed = null;
+		});
+	}
+	/**
+	* Enable microphone with pre-connect buffering
+	* This starts capturing audio locally before the room is connected,
+	* buffering it until the connection is established.
+	*/
+	async enableMicrophoneWithPreConnectBuffer() {
+		if (!this.room) throw new Error("Room not initialized");
+		const config = getConfig();
+		const deviceId = this.sessionSettings.audioDeviceId;
+		const constraints = {
+			echoCancellation: true,
+			noiseSuppression: true
+		};
+		if (deviceId) {
+			constraints.deviceId = { exact: deviceId };
+			if (config.debug) console.log("[SpeechOS] Using audio device:", deviceId);
+		}
+		try {
+			await this.room.localParticipant.setMicrophoneEnabled(true, constraints, { preConnectBuffer: true });
+			state.setMicEnabled(true);
+			const micPub = this.room.localParticipant.getTrackPublication(Track.Source.Microphone);
+			if (micPub?.track) {
+				this.micTrack = micPub.track;
+				this.logMicrophoneInfo();
+			}
+			if (config.debug) console.log("[SpeechOS] Microphone enabled with pre-connect buffer - audio is being captured");
+		} catch (error) {
+			if (deviceId && error instanceof Error) {
+				console.warn("[SpeechOS] Selected audio device unavailable, falling back to default:", error.message);
+				await this.room.localParticipant.setMicrophoneEnabled(true, {
+					echoCancellation: true,
+					noiseSuppression: true
+				}, { preConnectBuffer: true });
+				state.setMicEnabled(true);
+			} else throw error;
+		}
 	}
 	/**
 	* Stop the voice session and request the transcript
@@ -668,12 +837,19 @@ var LiveKitManager = class {
 	*/
 	async stopVoiceSession() {
 		const config = getConfig();
+		const settings = this.sessionSettings;
+		const inputLanguage = settings.inputLanguageCode ?? "en-US";
+		const outputLanguage = settings.outputLanguageCode ?? "en-US";
+		console.log("[SpeechOS] Dictate command:", {
+			inputLanguage,
+			outputLanguage
+		});
 		if (config.debug) console.log("[SpeechOS] Stopping voice session, requesting transcript...");
 		await this.disableMicrophone();
 		if (config.debug) console.log("[SpeechOS] Requesting transcript from agent...");
 		this.pendingTranscript = new Deferred();
 		this.pendingTranscript.setTimeout(1e4, "Transcription timed out. Please try again.", "transcription_timeout", "timeout");
-		await this.sendDataMessage({ type: MESSAGE_TYPE_REQUEST_TRANSCRIPT });
+		await this.sendDataMessage({ type: MESSAGE_TYPE_REQUEST_TRANSCRIPT$1 });
 		const result = await this.pendingTranscript.promise;
 		this.pendingTranscript = null;
 		return result;
@@ -692,6 +868,14 @@ var LiveKitManager = class {
 	*/
 	async requestEditText(originalText) {
 		const config = getConfig();
+		const settings = this.sessionSettings;
+		const inputLanguage = settings.inputLanguageCode ?? "en-US";
+		const outputLanguage = settings.outputLanguageCode ?? "en-US";
+		console.log("[SpeechOS] Edit command:", {
+			inputLanguage,
+			outputLanguage,
+			originalTextLength: originalText.length
+		});
 		if (config.debug) console.log("[SpeechOS] Requesting text edit...");
 		this.editOriginalText = originalText;
 		await this.disableMicrophone();
@@ -699,7 +883,7 @@ var LiveKitManager = class {
 		this.pendingEditText = new Deferred();
 		this.pendingEditText.setTimeout(15e3, "Edit request timed out. Please try again.", "edit_timeout", "timeout");
 		await this.sendDataMessage({
-			type: MESSAGE_TYPE_EDIT_TEXT,
+			type: MESSAGE_TYPE_EDIT_TEXT$1,
 			text: originalText
 		});
 		const result = await this.pendingEditText.promise;
@@ -713,6 +897,39 @@ var LiveKitManager = class {
 		return this.requestEditText(originalText);
 	}
 	/**
+	* Request command matching using the transcript as input
+	* Sends command definitions to the backend, which matches the user's speech against them
+	* Returns a promise that resolves with the matched command or null if no match
+	* @throws Error if timeout occurs waiting for command result
+	*/
+	async requestCommand(commands) {
+		const config = getConfig();
+		const settings = this.sessionSettings;
+		const inputLanguage = settings.inputLanguageCode ?? "en-US";
+		console.log("[SpeechOS] Command request:", {
+			inputLanguage,
+			commandCount: commands.length
+		});
+		if (config.debug) console.log("[SpeechOS] Requesting command match...");
+		await this.disableMicrophone();
+		if (config.debug) console.log("[SpeechOS] Sending execute_command request to agent...");
+		this.pendingCommand = new Deferred();
+		this.pendingCommand.setTimeout(15e3, "Command request timed out. Please try again.", "command_timeout", "timeout");
+		await this.sendDataMessage({
+			type: MESSAGE_TYPE_EXECUTE_COMMAND$1,
+			commands
+		});
+		const result = await this.pendingCommand.promise;
+		this.pendingCommand = null;
+		return result;
+	}
+	/**
+	* Alias for requestCommand - granular API naming
+	*/
+	async stopAndCommand(commands) {
+		return this.requestCommand(commands);
+	}
+	/**
 	* Disconnect from the current room
 	* Clears the token so a fresh one is fetched for the next session
 	*/
@@ -735,16 +952,110 @@ var LiveKitManager = class {
 			this.pendingEditText.reject(new Error("Disconnected"));
 			this.pendingEditText = null;
 		}
+		if (this.pendingCommand) {
+			this.pendingCommand.reject(new Error("Disconnected"));
+			this.pendingCommand = null;
+		}
 		if (this.pendingTrackSubscribed) {
 			this.pendingTrackSubscribed.reject(new Error("Disconnected"));
 			this.pendingTrackSubscribed = null;
 		}
 		this.tokenData = null;
-		this.preWarmPromise = null;
 		this.editOriginalText = null;
+		this.sessionSettings = {};
 		if (config.debug) console.log("[SpeechOS] Session state cleared");
 	}
 	/**
+	* Invalidate the cached token
+	* Call this when settings change that would affect the token (language, vocabulary)
+	*/
+	invalidateTokenCache() {
+		const config = getConfig();
+		if (config.debug) console.log("[SpeechOS] Token cache invalidated");
+		this.cachedTokenData = null;
+		this.tokenCacheTimestamp = null;
+	}
+	/**
+	* Start auto-refreshing the token while the widget is expanded.
+	* Call this after a voice session completes to immediately fetch a fresh token
+	* (since each command requires its own token) and keep it fresh for subsequent commands.
+	*/
+	startAutoRefresh() {
+		const config = getConfig();
+		this.autoRefreshEnabled = true;
+		if (config.debug) console.log("[SpeechOS] Token auto-refresh enabled");
+		this.invalidateTokenCache();
+		this.prefetchToken().then(() => {
+			this.scheduleTokenRefresh();
+		}).catch((error) => {
+			if (config.debug) console.warn("[SpeechOS] Failed to prefetch token after command:", error);
+			if (this.autoRefreshEnabled) this.tokenRefreshTimer = setTimeout(() => {
+				this.performAutoRefresh();
+			}, 5 * 1e3);
+		});
+	}
+	/**
+	* Stop auto-refreshing the token.
+	* Call this when the widget collapses or user navigates away.
+	*/
+	stopAutoRefresh() {
+		const config = getConfig();
+		this.autoRefreshEnabled = false;
+		if (this.tokenRefreshTimer) {
+			clearTimeout(this.tokenRefreshTimer);
+			this.tokenRefreshTimer = null;
+		}
+		if (config.debug) console.log("[SpeechOS] Token auto-refresh disabled");
+	}
+	/**
+	* Schedule a token refresh before the current cache expires.
+	* Handles computer sleep by checking elapsed time on each refresh attempt.
+	*/
+	scheduleTokenRefresh() {
+		if (!this.autoRefreshEnabled) return;
+		if (this.tokenRefreshTimer) {
+			clearTimeout(this.tokenRefreshTimer);
+			this.tokenRefreshTimer = null;
+		}
+		const config = getConfig();
+		const refreshBuffer = 30 * 1e3;
+		let timeUntilRefresh;
+		if (this.tokenCacheTimestamp) {
+			const age = Date.now() - this.tokenCacheTimestamp;
+			const timeRemaining = TOKEN_CACHE_TTL_MS - age;
+			timeUntilRefresh = Math.max(0, timeRemaining - refreshBuffer);
+		} else timeUntilRefresh = 0;
+		if (config.debug) console.log(`[SpeechOS] Scheduling token refresh in ${Math.round(timeUntilRefresh / 1e3)}s`);
+		this.tokenRefreshTimer = setTimeout(() => {
+			this.performAutoRefresh();
+		}, timeUntilRefresh);
+	}
+	/**
+	* Perform the auto-refresh, handling computer sleep scenarios.
+	*/
+	async performAutoRefresh() {
+		if (!this.autoRefreshEnabled) return;
+		const config = getConfig();
+		if (this.isCachedTokenValid()) {
+			if (config.debug) console.log("[SpeechOS] Token still valid on refresh check, rescheduling");
+			this.scheduleTokenRefresh();
+			return;
+		}
+		if (config.debug) console.log("[SpeechOS] Auto-refreshing token...");
+		try {
+			const data = await this.fetchTokenFromServer();
+			this.cachedTokenData = data;
+			this.tokenCacheTimestamp = Date.now();
+			if (config.debug) console.log("[SpeechOS] Token auto-refreshed successfully");
+			this.scheduleTokenRefresh();
+		} catch (error) {
+			console.warn("[SpeechOS] Token auto-refresh failed:", error);
+			if (this.autoRefreshEnabled) this.tokenRefreshTimer = setTimeout(() => {
+				this.performAutoRefresh();
+			}, 30 * 1e3);
+		}
+	}
+	/**
 	* Get the current room instance
 	*/
 	getRoom() {
@@ -768,88 +1079,747 @@ var LiveKitManager = class {
 	isMicrophoneEnabled() {
 		return this.micTrack !== null;
 	}
-	/**
-	* Clear the cached token
-	* Used when user identity changes to ensure next session gets a fresh token
-	*/
-	clearToken() {
-		const config = getConfig();
-		if (config.debug) console.log("[SpeechOS] Clearing cached token");
-		this.tokenData = null;
-		this.preWarmPromise = null;
-	}
 };
 const livekit = new LiveKitManager();
+events.on("settings:changed", () => {
+	livekit.invalidateTokenCache();
+});
 //#endregion
-//#region src/transcript-store.ts
-const STORAGE_KEY = "speechos_transcripts";
-const MAX_ENTRIES = 50;
+//#region src/audio-capture.ts
 /**
-* Generate a unique ID for transcript entries
+* Detect if running in Safari.
 */
-function generateId() {
-	return `${Date.now()}-${Math.random().toString(36).slice(2, 9)}`;
+function isSafari() {
+	const ua = navigator.userAgent.toLowerCase();
+	const vendor = navigator.vendor?.toLowerCase() || "";
+	const hasSafariUA = ua.includes("safari") && !ua.includes("chrome") && !ua.includes("chromium");
+	const isAppleVendor = vendor.includes("apple");
+	return hasSafariUA && isAppleVendor;
 }
 /**
-* Get all transcripts from localStorage
+* Detect the best supported audio format for the current browser.
+*
+* IMPORTANT: Safari must use MP4/AAC. Its WebM/Opus implementation is buggy
+* and produces truncated/incomplete audio.
 */
-function getTranscripts() {
-	try {
-		const stored = localStorage.getItem(STORAGE_KEY);
-		if (!stored) return [];
-		const entries = JSON.parse(stored);
-		return entries.sort((a, b) => b.timestamp - a.timestamp);
-	} catch {
-		return [];
+function getSupportedAudioFormat() {
+	if (isSafari()) {
+		if (MediaRecorder.isTypeSupported("audio/mp4")) return {
+			mimeType: "audio/mp4",
+			format: "mp4",
+			needsEncodingParams: false
+		};
+		return {
+			mimeType: "",
+			format: "mp4",
+			needsEncodingParams: true
+		};
 	}
+	if (MediaRecorder.isTypeSupported("audio/webm;codecs=opus")) return {
+		mimeType: "audio/webm;codecs=opus",
+		format: "webm",
+		needsEncodingParams: false
+	};
+	if (MediaRecorder.isTypeSupported("audio/webm")) return {
+		mimeType: "audio/webm",
+		format: "webm",
+		needsEncodingParams: false
+	};
+	if (MediaRecorder.isTypeSupported("audio/mp4")) return {
+		mimeType: "audio/mp4",
+		format: "mp4",
+		needsEncodingParams: false
+	};
+	return {
+		mimeType: "",
+		format: "webm",
+		needsEncodingParams: true
+	};
 }
 /**
-* Save a new transcript entry
+* Audio capture manager with buffering support.
+*
+* Usage:
+* 1. Create instance with onChunk callback
+* 2. Call start() - immediately begins capturing
+* 3. Call setReady() when connection is established - flushes buffer
+* 4. Call stop() when done
 */
-function saveTranscript(text, action, originalText) {
-	const entry = {
-		id: generateId(),
-		text,
-		timestamp: Date.now(),
-		action,
-		...originalText && { originalText }
-	};
-	const entries = getTranscripts();
-	entries.unshift(entry);
-	const pruned = entries.slice(0, MAX_ENTRIES);
-	try {
-		localStorage.setItem(STORAGE_KEY, JSON.stringify(pruned));
-	} catch {}
-	return entry;
-}
+var AudioCapture = class AudioCapture {
+	mediaStream = null;
+	recorder = null;
+	buffer = [];
+	isReady = false;
+	isRecording = false;
+	onChunk;
+	audioFormat;
+	deviceId;
+	/**
+	* Time slice for MediaRecorder in milliseconds.
+	*
+	* Safari requires a larger timeslice (1000ms) to properly flush its internal
+	* audio buffers. Smaller values cause Safari to drop or truncate audio data.
+	* See: https://community.openai.com/t/whisper-problem-with-audio-mp4-blobs-from-safari/
+	*
+	* Other browsers (Chrome, Firefox, Edge) work well with smaller timeslices
+	* which provide lower latency for real-time transcription.
+	*/
+	static TIME_SLICE_MS = 100;
+	static SAFARI_TIME_SLICE_MS = 1e3;
+	/**
+	* @param onChunk - Callback for receiving audio chunks
+	* @param deviceId - Optional audio device ID (empty string or undefined for system default)
+	*/
+	constructor(onChunk, deviceId) {
+		this.onChunk = onChunk;
+		this.audioFormat = getSupportedAudioFormat();
+		this.deviceId = deviceId;
+	}
+	/**
+	* Get the appropriate timeslice for the current browser.
+	* Safari needs a larger timeslice to avoid dropping audio data.
+	*/
+	getTimeSlice() {
+		return isSafari() ? AudioCapture.SAFARI_TIME_SLICE_MS : AudioCapture.TIME_SLICE_MS;
+	}
+	/**
+	* Get the timeslice being used (in milliseconds).
+	* Useful for callers that need to wait for audio processing.
+	*/
+	getTimeSliceMs() {
+		return this.getTimeSlice();
+	}
+	/**
+	* Get the audio format being used.
+	*/
+	getFormat() {
+		return this.audioFormat;
+	}
+	/**
+	* Start capturing audio immediately.
+	*
+	* Audio chunks will be buffered until setReady() is called.
+	*/
+	async start() {
+		const config = getConfig();
+		if (this.isRecording) {
+			if (config.debug) console.log("[SpeechOS] AudioCapture already recording");
+			return;
+		}
+		this.buffer = [];
+		this.isReady = false;
+		const constraints = { audio: {
+			echoCancellation: true,
+			noiseSuppression: true,
+			...this.deviceId ? { deviceId: { exact: this.deviceId } } : {}
+		} };
+		if (config.debug) {
+			console.log("[SpeechOS] AudioCapture starting with format:", this.audioFormat.mimeType);
+			console.log("[SpeechOS] Detected Safari:", isSafari());
+			if (this.deviceId) console.log("[SpeechOS] Using audio device:", this.deviceId);
+		}
+		try {
+			this.mediaStream = await navigator.mediaDevices.getUserMedia(constraints);
+			const recorderOptions = {};
+			if (this.audioFormat.mimeType) recorderOptions.mimeType = this.audioFormat.mimeType;
+			this.recorder = new MediaRecorder(this.mediaStream, recorderOptions);
+			this.recorder.ondataavailable = (event) => {
+				if (event.data && event.data.size > 0) this.handleChunk(event.data);
+			};
+			this.recorder.onerror = (event) => {
+				console.error("[SpeechOS] MediaRecorder error:", event);
+			};
+			const timeSlice = this.getTimeSlice();
+			this.recorder.start(timeSlice);
+			this.isRecording = true;
+			if (config.debug) console.log(`[SpeechOS] AudioCapture started with ${timeSlice}ms timeslice, buffering until ready`);
+		} catch (error) {
+			if (this.deviceId && error instanceof Error) {
+				console.warn("[SpeechOS] Selected device unavailable, trying default:", error.message);
+				this.mediaStream = await navigator.mediaDevices.getUserMedia({ audio: {
+					echoCancellation: true,
+					noiseSuppression: true
+				} });
+				const recorderOptions = {};
+				if (this.audioFormat.mimeType) recorderOptions.mimeType = this.audioFormat.mimeType;
+				this.recorder = new MediaRecorder(this.mediaStream, recorderOptions);
+				this.recorder.ondataavailable = (event) => {
+					if (event.data && event.data.size > 0) this.handleChunk(event.data);
+				};
+				this.recorder.start(this.getTimeSlice());
+				this.isRecording = true;
+			} else throw error;
+		}
+	}
+	/**
+	* Handle an audio chunk with atomic buffer swap pattern.
+	*
+	* If not ready: buffer the chunk.
+	* If ready: send directly via callback.
+	*/
+	handleChunk(chunk) {
+		if (this.isReady) this.onChunk(chunk);
+		else this.buffer.push(chunk);
+	}
+	/**
+	* Mark the capture as ready (connection established).
+	*
+	* This flushes any buffered chunks and switches to direct mode.
+	* Uses atomic swap to prevent chunk reordering.
+	*/
+	setReady() {
+		const config = getConfig();
+		if (this.isReady) return;
+		const toFlush = this.buffer;
+		this.buffer = [];
+		for (const chunk of toFlush) this.onChunk(chunk);
+		this.isReady = true;
+		if (config.debug) console.log(`[SpeechOS] AudioCapture ready, flushed ${toFlush.length} buffered chunks`);
+	}
+	/**
+	* Stop capturing audio and wait for final chunk.
+	*
+	* Uses requestData() before stop() to force the MediaRecorder to flush
+	* any buffered audio immediately. This is critical for Safari which
+	* may hold audio data in internal buffers.
+	*
+	* Safari requires an additional delay after stopping to ensure all audio
+	* from its internal encoding pipeline has been fully processed and emitted.
+	*/
+	async stop() {
+		const config = getConfig();
+		const safari = isSafari();
+		if (this.recorder && this.recorder.state !== "inactive") {
+			if (this.recorder.state === "recording") try {
+				const dataPromise = new Promise((resolve) => {
+					const handler = (event) => {
+						this.recorder?.removeEventListener("dataavailable", handler);
+						if (config.debug) console.log(`[SpeechOS] requestData flush received: ${event.data.size} bytes`);
+						resolve();
+					};
+					this.recorder?.addEventListener("dataavailable", handler);
+				});
+				this.recorder.requestData();
+				if (config.debug) console.log("[SpeechOS] Requested data flush before stop");
+				await dataPromise;
+			} catch (e) {
+				if (config.debug) console.log("[SpeechOS] requestData() not supported or failed:", e);
+			}
+			const stopPromise = new Promise((resolve) => {
+				if (!this.recorder) {
+					resolve();
+					return;
+				}
+				this.recorder.onstop = () => {
+					if (config.debug) console.log("[SpeechOS] MediaRecorder onstop fired");
+					resolve();
+				};
+			});
+			this.recorder.stop();
+			await stopPromise;
+			if (safari) {
+				if (config.debug) console.log("[SpeechOS] Safari: waiting 2s for encoding pipeline to flush");
+				await new Promise((resolve) => setTimeout(resolve, 2e3));
+			}
+		}
+		if (this.mediaStream) {
+			for (const track of this.mediaStream.getTracks()) track.stop();
+			this.mediaStream = null;
+		}
+		this.recorder = null;
+		this.isRecording = false;
+		this.isReady = false;
+		this.buffer = [];
+		if (config.debug) console.log("[SpeechOS] AudioCapture stopped");
+	}
+	/**
+	* Check if currently recording.
+	*/
+	get recording() {
+		return this.isRecording;
+	}
+	/**
+	* Check if ready (connection established, direct mode active).
+	*/
+	get ready() {
+		return this.isReady;
+	}
+	/**
+	* Get the number of buffered chunks waiting to be sent.
+	*/
+	get bufferedChunks() {
+		return this.buffer.length;
+	}
+};
 /**
-* Clear all transcript history
+* Factory function to create an AudioCapture instance.
+* @param onChunk - Callback for receiving audio chunks
+* @param deviceId - Optional audio device ID (empty string or undefined for system default)
 */
-function clearTranscripts() {
-	try {
-		localStorage.removeItem(STORAGE_KEY);
-	} catch {}
+function createAudioCapture(onChunk, deviceId) {
+	return new AudioCapture(onChunk, deviceId);
 }
+//#endregion
+//#region src/websocket.ts
+const MESSAGE_TYPE_AUTH = "auth";
+const MESSAGE_TYPE_READY = "ready";
+const MESSAGE_TYPE_TRANSCRIPTION = "transcription";
+const MESSAGE_TYPE_REQUEST_TRANSCRIPT = "request_transcript";
+const MESSAGE_TYPE_TRANSCRIPT = "transcript";
+const MESSAGE_TYPE_EDIT_TEXT = "edit_text";
+const MESSAGE_TYPE_EDITED_TEXT = "edited_text";
+const MESSAGE_TYPE_EXECUTE_COMMAND = "execute_command";
+const MESSAGE_TYPE_COMMAND_RESULT = "command_result";
+const MESSAGE_TYPE_ERROR = "error";
 /**
-* Delete a single transcript by ID
+* Response timeout in milliseconds.
 */
-function deleteTranscript(id) {
-	const entries = getTranscripts().filter((e) => e.id !== id);
-	try {
-		localStorage.setItem(STORAGE_KEY, JSON.stringify(entries));
-	} catch {}
-}
-const transcriptStore = {
-	getTranscripts,
-	saveTranscript,
-	clearTranscripts,
-	deleteTranscript
+const RESPONSE_TIMEOUT_MS = 15e3;
+/**
+* A deferred promise with timeout support.
+*/
+var Deferred$1 = class {
+	promise;
+	_resolve;
+	_reject;
+	_timeoutId = null;
+	_settled = false;
+	constructor() {
+		this.promise = new Promise((resolve, reject) => {
+			this._resolve = resolve;
+			this._reject = reject;
+		});
+	}
+	setTimeout(ms, errorMessage, errorCode, errorSource) {
+		this._timeoutId = setTimeout(() => {
+			if (!this._settled) {
+				console.error(`[SpeechOS] Error: ${errorMessage} (${errorCode})`);
+				events.emit("error", {
+					code: errorCode,
+					message: errorMessage,
+					source: errorSource
+				});
+				this.reject(new Error(errorMessage));
+			}
+		}, ms);
+	}
+	resolve(value) {
+		if (!this._settled) {
+			this._settled = true;
+			this.clearTimeout();
+			this._resolve(value);
+		}
+	}
+	reject(error) {
+		if (!this._settled) {
+			this._settled = true;
+			this.clearTimeout();
+			this._reject(error);
+		}
+	}
+	clearTimeout() {
+		if (this._timeoutId !== null) {
+			clearTimeout(this._timeoutId);
+			this._timeoutId = null;
+		}
+	}
+	get isSettled() {
+		return this._settled;
+	}
+};
+/**
+* Maximum time to wait for WebSocket buffer to drain.
+*/
+const BUFFER_DRAIN_TIMEOUT_MS = 5e3;
+/**
+* Polling interval for checking WebSocket buffer.
+*/
+const BUFFER_CHECK_INTERVAL_MS = 50;
+/**
+* WebSocket connection manager for voice sessions.
+*/
+var WebSocketManager = class {
+	ws = null;
+	audioCapture = null;
+	sessionId = null;
+	pendingAuth = null;
+	pendingTranscript = null;
+	pendingEditText = null;
+	pendingCommand = null;
+	pendingAudioSends = /* @__PURE__ */ new Set();
+	editOriginalText = null;
+	lastInputText = void 0;
+	sessionAction = "dictate";
+	sessionInputText = "";
+	sessionCommands = [];
+	sessionSettings = {};
+	/**
+	* Get the WebSocket URL for voice sessions.
+	*/
+	getWebSocketUrl() {
+		const config = getConfig();
+		const host = config.host || "https://app.speechos.ai";
+		const wsUrl = host.replace(/^http/, "ws");
+		return `${wsUrl}/ws/voice/`;
+	}
+	/**
+	* Start a voice session with the WebSocket backend.
+	*
+	* This method:
+	* 1. Starts audio capture immediately (buffering)
+	* 2. Opens WebSocket connection
+	* 3. Authenticates with API key and action parameters
+	* 4. Flushes buffered audio and continues streaming
+	*
+	* @param options - Session options including action type and parameters
+	*/
+	async startVoiceSession(options) {
+		const config = getConfig();
+		this.sessionAction = options?.action || "dictate";
+		this.sessionInputText = options?.inputText || "";
+		this.sessionCommands = options?.commands || [];
+		this.sessionSettings = options?.settings || {};
+		if (this.sessionAction === "edit") this.editOriginalText = this.sessionInputText;
+		if (config.debug) console.log("[SpeechOS] Starting WebSocket voice session...");
+		this.audioCapture = createAudioCapture((chunk) => {
+			this.sendAudioChunk(chunk);
+		}, this.sessionSettings.audioDeviceId);
+		await this.audioCapture.start();
+		if (options?.onMicReady) options.onMicReady();
+		state.setMicEnabled(true);
+		const wsUrl = this.getWebSocketUrl();
+		if (config.debug) console.log("[SpeechOS] Connecting to WebSocket:", wsUrl);
+		this.ws = new WebSocket(wsUrl);
+		this.ws.onopen = () => {
+			if (config.debug) console.log("[SpeechOS] WebSocket connected, authenticating...");
+			this.authenticate();
+		};
+		this.ws.onmessage = (event) => {
+			this.handleMessage(event.data);
+		};
+		this.ws.onerror = (event) => {
+			console.error("[SpeechOS] WebSocket error:", event);
+			events.emit("error", {
+				code: "websocket_error",
+				message: "WebSocket connection error",
+				source: "connection"
+			});
+		};
+		this.ws.onclose = (event) => {
+			if (config.debug) console.log("[SpeechOS] WebSocket closed:", event.code, event.reason);
+			state.setConnected(false);
+		};
+		this.pendingAuth = new Deferred$1();
+		this.pendingAuth.setTimeout(RESPONSE_TIMEOUT_MS, "Connection timed out", "connection_timeout", "connection");
+		await this.pendingAuth.promise;
+		this.pendingAuth = null;
+		if (this.audioCapture) this.audioCapture.setReady();
+		state.setConnected(true);
+		if (config.debug) console.log("[SpeechOS] WebSocket voice session ready");
+	}
+	/**
+	* Send authentication message with action parameters.
+	* All session parameters are now sent upfront in the auth message.
+	*/
+	authenticate() {
+		const config = getConfig();
+		const audioFormat = getSupportedAudioFormat();
+		const settings = this.sessionSettings;
+		const authMessage = {
+			type: MESSAGE_TYPE_AUTH,
+			api_key: config.apiKey,
+			user_id: config.userId || null,
+			input_language: settings.inputLanguageCode ?? "en-US",
+			output_language: settings.outputLanguageCode ?? "en-US",
+			smart_format: settings.smartFormat ?? true,
+			custom_vocabulary: settings.vocabulary ?? [],
+			custom_snippets: settings.snippets ?? [],
+			audio_format: audioFormat.format,
+			action: this.sessionAction,
+			input_text: this.sessionInputText,
+			commands: this.sessionCommands
+		};
+		if (config.debug) console.log("[SpeechOS] Sending auth message with action:", this.sessionAction);
+		this.ws?.send(JSON.stringify(authMessage));
+	}
+	/**
+	* Send an audio chunk over the WebSocket.
+	* Tracks the promise so we can wait for all sends to complete.
+	*/
+	sendAudioChunk(chunk) {
+		const sendPromise = this.doSendAudioChunk(chunk);
+		this.pendingAudioSends.add(sendPromise);
+		sendPromise.finally(() => {
+			this.pendingAudioSends.delete(sendPromise);
+		});
+	}
+	/**
+	* Actually send the audio chunk (async operation).
+	*/
+	async doSendAudioChunk(chunk) {
+		if (this.ws && this.ws.readyState === WebSocket.OPEN) {
+			const arrayBuffer = await chunk.arrayBuffer();
+			this.ws.send(arrayBuffer);
+		}
+	}
+	/**
+	* Handle incoming WebSocket messages.
+	*/
+	handleMessage(data) {
+		const config = getConfig();
+		try {
+			const message = JSON.parse(data);
+			if (config.debug) console.log("[SpeechOS] WebSocket message:", message);
+			switch (message.type) {
+				case MESSAGE_TYPE_READY:
+					this.handleReady(message);
+					break;
+				case MESSAGE_TYPE_TRANSCRIPTION:
+					this.handleIntermediateTranscription(message);
+					break;
+				case MESSAGE_TYPE_TRANSCRIPT:
+					this.handleFinalTranscript(message);
+					break;
+				case MESSAGE_TYPE_EDITED_TEXT:
+					this.handleEditedText(message);
+					break;
+				case MESSAGE_TYPE_COMMAND_RESULT:
+					this.handleCommandResult(message);
+					break;
+				case MESSAGE_TYPE_ERROR:
+					this.handleError(message);
+					break;
+				default: if (config.debug) console.log("[SpeechOS] Unknown message type:", message.type);
+			}
+		} catch (error) {
+			console.error("[SpeechOS] Failed to parse message:", error);
+		}
+	}
+	handleReady(message) {
+		const config = getConfig();
+		this.sessionId = message.session_id;
+		if (config.debug) console.log("[SpeechOS] Session ready:", this.sessionId);
+		if (this.pendingAuth) this.pendingAuth.resolve();
+	}
+	handleIntermediateTranscription(message) {
+		const config = getConfig();
+		if (config.debug) console.log("[SpeechOS] Intermediate transcription:", message.transcript, "final:", message.is_final);
+	}
+	handleFinalTranscript(message) {
+		const transcript = message.transcript || "";
+		events.emit("transcription:complete", { text: transcript });
+		if (this.pendingTranscript) {
+			this.pendingTranscript.resolve(transcript);
+			this.pendingTranscript = null;
+		}
+	}
+	handleEditedText(message) {
+		const editedText = message.text || "";
+		events.emit("edit:complete", {
+			text: editedText,
+			originalText: this.editOriginalText || ""
+		});
+		if (this.pendingEditText) {
+			this.pendingEditText.resolve(editedText);
+			this.pendingEditText = null;
+		}
+		this.editOriginalText = null;
+	}
+	handleCommandResult(message) {
+		const commandResult = message.command || null;
+		this.lastInputText = message.transcript;
+		events.emit("command:complete", { command: commandResult });
+		if (this.pendingCommand) {
+			this.pendingCommand.resolve(commandResult);
+			this.pendingCommand = null;
+		}
+	}
+	handleError(message) {
+		const errorCode = message.code || "server_error";
+		const errorMessage = message.message || "A server error occurred";
+		console.error(`[SpeechOS] Error: ${errorMessage} (${errorCode})`);
+		events.emit("error", {
+			code: errorCode,
+			message: errorMessage,
+			source: "server"
+		});
+		const error = new Error(errorMessage);
+		if (this.pendingAuth) {
+			this.pendingAuth.reject(error);
+			this.pendingAuth = null;
+		}
+		if (this.pendingTranscript) {
+			this.pendingTranscript.reject(error);
+			this.pendingTranscript = null;
+		}
+		if (this.pendingEditText) {
+			this.pendingEditText.reject(error);
+			this.pendingEditText = null;
+		}
+		if (this.pendingCommand) {
+			this.pendingCommand.reject(error);
+			this.pendingCommand = null;
+		}
+	}
+	/**
+	* Stop the voice session and request the transcript.
+	*/
+	async stopVoiceSession() {
+		const config = getConfig();
+		if (config.debug) console.log("[SpeechOS] Stopping voice session, requesting transcript...");
+		await this.stopAudioCapture();
+		this.pendingTranscript = new Deferred$1();
+		this.pendingTranscript.setTimeout(RESPONSE_TIMEOUT_MS, "Transcription timed out. Please try again.", "transcription_timeout", "timeout");
+		this.sendMessage({ type: MESSAGE_TYPE_REQUEST_TRANSCRIPT });
+		const result = await this.pendingTranscript.promise;
+		this.pendingTranscript = null;
+		return result;
+	}
+	/**
+	* Request text editing using the transcript as instructions.
+	* Note: The input text was already sent in the auth message via startVoiceSession.
+	*/
+	async requestEditText(_originalText) {
+		const config = getConfig();
+		if (config.debug) console.log("[SpeechOS] Requesting text edit...");
+		await this.stopAudioCapture();
+		this.pendingEditText = new Deferred$1();
+		this.pendingEditText.setTimeout(RESPONSE_TIMEOUT_MS, "Edit request timed out. Please try again.", "edit_timeout", "timeout");
+		this.sendMessage({ type: MESSAGE_TYPE_EDIT_TEXT });
+		const result = await this.pendingEditText.promise;
+		this.pendingEditText = null;
+		return result;
+	}
+	/**
+	* Request command matching using the transcript as input.
+	* Note: The command definitions were already sent in the auth message via startVoiceSession.
+	*/
+	async requestCommand(_commands) {
+		const config = getConfig();
+		if (config.debug) console.log("[SpeechOS] Requesting command match...");
+		await this.stopAudioCapture();
+		this.pendingCommand = new Deferred$1();
+		this.pendingCommand.setTimeout(RESPONSE_TIMEOUT_MS, "Command request timed out. Please try again.", "command_timeout", "timeout");
+		this.sendMessage({ type: MESSAGE_TYPE_EXECUTE_COMMAND });
+		const result = await this.pendingCommand.promise;
+		this.pendingCommand = null;
+		return result;
+	}
+	/**
+	* Stop audio capture and wait for all data to be sent.
+	*
+	* Waits for:
+	* 1. All pending sendAudioChunk calls to complete (arrayBuffer conversion)
+	* 2. WebSocket buffer to drain (all data transmitted)
+	*
+	* WebSocket message ordering ensures server receives all audio before transcript request.
+	*/
+	async stopAudioCapture() {
+		const config = getConfig();
+		const startTime = Date.now();
+		if (config.debug) console.log("[SpeechOS] stopAudioCapture: starting...");
+		if (this.audioCapture) {
+			await this.audioCapture.stop();
+			this.audioCapture = null;
+			if (config.debug) console.log(`[SpeechOS] stopAudioCapture: recorder stopped after ${Date.now() - startTime}ms`);
+		}
+		state.setMicEnabled(false);
+		if (this.pendingAudioSends.size > 0) {
+			if (config.debug) console.log(`[SpeechOS] stopAudioCapture: waiting for ${this.pendingAudioSends.size} pending audio sends...`);
+			await Promise.all(this.pendingAudioSends);
+			if (config.debug) console.log(`[SpeechOS] stopAudioCapture: all sends complete after ${Date.now() - startTime}ms`);
+		} else if (config.debug) console.log("[SpeechOS] stopAudioCapture: no pending sends");
+		await this.waitForBufferDrain();
+		if (config.debug) console.log(`[SpeechOS] stopAudioCapture: complete after ${Date.now() - startTime}ms`);
+	}
+	/**
+	* Wait for the WebSocket send buffer to drain.
+	*
+	* This ensures all audio data has been transmitted before we request
+	* the transcript. Uses the same pattern as LiveKit's ReadableStream approach.
+	*/
+	async waitForBufferDrain() {
+		if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
+		const config = getConfig();
+		const startTime = Date.now();
+		while (this.ws.bufferedAmount > 0) {
+			if (Date.now() - startTime > BUFFER_DRAIN_TIMEOUT_MS) {
+				console.warn(`[SpeechOS] Buffer drain timeout, ${this.ws.bufferedAmount} bytes still pending`);
+				break;
+			}
+			await new Promise((resolve) => setTimeout(resolve, BUFFER_CHECK_INTERVAL_MS));
+		}
+		if (config.debug) console.log(`[SpeechOS] Buffer drained in ${Date.now() - startTime}ms`);
+	}
+	/**
+	* Send a JSON message over the WebSocket.
+	*/
+	sendMessage(message) {
+		if (this.ws && this.ws.readyState === WebSocket.OPEN) this.ws.send(JSON.stringify(message));
+	}
+	/**
+	* Disconnect from the WebSocket.
+	*/
+	async disconnect() {
+		const config = getConfig();
+		if (config.debug) console.log("[SpeechOS] Disconnecting WebSocket...");
+		await this.stopAudioCapture();
+		if (this.ws) {
+			this.ws.close();
+			this.ws = null;
+		}
+		const error = new Error("Disconnected");
+		if (this.pendingAuth) {
+			this.pendingAuth.reject(error);
+			this.pendingAuth = null;
+		}
+		if (this.pendingTranscript) {
+			this.pendingTranscript.reject(error);
+			this.pendingTranscript = null;
+		}
+		if (this.pendingEditText) {
+			this.pendingEditText.reject(error);
+			this.pendingEditText = null;
+		}
+		if (this.pendingCommand) {
+			this.pendingCommand.reject(error);
+			this.pendingCommand = null;
+		}
+		this.sessionId = null;
+		this.editOriginalText = null;
+		this.lastInputText = void 0;
+		this.sessionSettings = {};
+		state.setConnected(false);
+		state.setMicEnabled(false);
+		if (config.debug) console.log("[SpeechOS] WebSocket disconnected");
+	}
+	/**
+	* Check if connected to WebSocket.
+	*/
+	isConnected() {
+		return this.ws !== null && this.ws.readyState === WebSocket.OPEN;
+	}
+	/**
+	* Get the last input text from a command result.
+	* This is the raw transcript of what the user said.
+	*/
+	getLastInputText() {
+		return this.lastInputText;
+	}
 };
+const websocket = new WebSocketManager();
 //#endregion
 //#region src/speechos.ts
 /**
+* Get the active voice backend (always websocket now)
+*/
+function getBackend$1() {
+	return websocket;
+}
+/**
 * SpeechOS Core SDK
 *
 * Provides two API layers:
@@ -868,7 +1838,6 @@ var SpeechOSCore = class {
 		const currentConfig$1 = getConfig();
 		if (currentConfig$1.debug) console.log("[SpeechOS] Initialized with config:", {
 			host: currentConfig$1.host,
-			position: currentConfig$1.position,
 			debug: currentConfig$1.debug
 		});
 	}
@@ -908,7 +1877,6 @@ var SpeechOSCore = class {
 		state.setRecordingState("processing");
 		try {
 			const transcript = await livekit.stopAndGetTranscript();
-			transcriptStore.saveTranscript(transcript, "dictate");
 			state.completeRecording();
 			return transcript;
 		} catch (error) {
@@ -925,7 +1893,6 @@ var SpeechOSCore = class {
 		state.setRecordingState("processing");
 		try {
 			const editedText = await livekit.stopAndEdit(originalText);
-			transcriptStore.saveTranscript(editedText, "edit", originalText);
 			state.completeRecording();
 			return editedText;
 		} catch (error) {
@@ -951,8 +1918,13 @@ var SpeechOSCore = class {
 		state.setActiveAction("dictate");
 		state.startRecording();
 		try {
-			await livekit.startVoiceSession();
-			state.setRecordingState("recording");
+			const backend = getBackend$1();
+			await backend.startVoiceSession({
+				action: "dictate",
+				onMicReady: () => {
+					state.setRecordingState("recording");
+				}
+			});
 			return new Promise((resolve, reject) => {
 				this._dictateResolve = resolve;
 				this._dictateReject = reject;
@@ -972,8 +1944,8 @@ var SpeechOSCore = class {
 	async stopDictation() {
 		state.setRecordingState("processing");
 		try {
-			const transcript = await livekit.stopVoiceSession();
-			transcriptStore.saveTranscript(transcript, "dictate");
+			const backend = getBackend$1();
+			const transcript = await backend.stopVoiceSession();
 			state.completeRecording();
 			if (this._dictateResolve) {
 				this._dictateResolve(transcript);
@@ -1007,8 +1979,14 @@ var SpeechOSCore = class {
 		state.startRecording();
 		this._editOriginalText = originalText;
 		try {
-			await livekit.startVoiceSession();
-			state.setRecordingState("recording");
+			const backend = getBackend$1();
+			await backend.startVoiceSession({
+				action: "edit",
+				inputText: originalText,
+				onMicReady: () => {
+					state.setRecordingState("recording");
+				}
+			});
 			return new Promise((resolve, reject) => {
 				this._editResolve = resolve;
 				this._editReject = reject;
@@ -1029,9 +2007,9 @@ var SpeechOSCore = class {
 	async stopEdit() {
 		state.setRecordingState("processing");
 		try {
+			const backend = getBackend$1();
 			const originalText = this._editOriginalText || "";
-			const editedText = await livekit.requestEditText(originalText);
-			transcriptStore.saveTranscript(editedText, "edit", originalText);
+			const editedText = await backend.requestEditText(originalText);
 			state.completeRecording();
 			if (this._editResolve) {
 				this._editResolve(editedText);
@@ -1054,6 +2032,71 @@ var SpeechOSCore = class {
 		}
 	}
 	/**
+	* One-shot command: connect, wait for agent, record voice, match against commands
+	* Automatically handles the full voice session lifecycle
+	*
+	* @param commands - Array of command definitions to match against
+	* @returns The matched command result or null if no match
+	*/
+	async command(commands) {
+		this.ensureInitialized();
+		state.setActiveAction("command");
+		state.startRecording();
+		this._commandCommands = commands;
+		try {
+			const backend = getBackend$1();
+			await backend.startVoiceSession({
+				action: "command",
+				commands,
+				onMicReady: () => {
+					state.setRecordingState("recording");
+				}
+			});
+			return new Promise((resolve, reject) => {
+				this._commandResolve = resolve;
+				this._commandReject = reject;
+			});
+		} catch (error) {
+			state.setError(error instanceof Error ? error.message : "Failed to start command");
+			await this.cleanup();
+			throw error;
+		}
+	}
+	_commandCommands;
+	_commandResolve;
+	_commandReject;
+	/**
+	* Stop command recording and get the matched command
+	* Call this after command() when user stops speaking
+	*/
+	async stopCommand() {
+		state.setRecordingState("processing");
+		try {
+			const backend = getBackend$1();
+			const commands = this._commandCommands || [];
+			const result = await backend.requestCommand(commands);
+			state.completeRecording();
+			if (this._commandResolve) {
+				this._commandResolve(result);
+				this._commandResolve = void 0;
+				this._commandReject = void 0;
+			}
+			return result;
+		} catch (error) {
+			const err = error instanceof Error ? error : new Error("Command request failed");
+			state.setError(err.message);
+			if (this._commandReject) {
+				this._commandReject(err);
+				this._commandResolve = void 0;
+				this._commandReject = void 0;
+			}
+			throw err;
+		} finally {
+			this._commandCommands = void 0;
+			await this.cleanup();
+		}
+	}
+	/**
 	* Cancel the current operation
 	*/
 	async cancel() {
@@ -1068,7 +2111,13 @@ var SpeechOSCore = class {
 			this._editResolve = void 0;
 			this._editReject = void 0;
 		}
+		if (this._commandReject) {
+			this._commandReject(err);
+			this._commandResolve = void 0;
+			this._commandReject = void 0;
+		}
 		this._editOriginalText = void 0;
+		this._commandCommands = void 0;
 		await this.cleanup();
 		state.cancelRecording();
 	}
@@ -1095,7 +2144,8 @@ var SpeechOSCore = class {
 	}
 	async cleanup() {
 		try {
-			await livekit.disconnect();
+			const backend = getBackend$1();
+			await backend.disconnect();
 		} catch (error) {
 			const config = getConfig();
 			if (config.debug) console.warn("[SpeechOS] Cleanup disconnect error:", error);
@@ -1111,6 +2161,9 @@ var SpeechOSCore = class {
 		this._editResolve = void 0;
 		this._editReject = void 0;
 		this._editOriginalText = void 0;
+		this._commandResolve = void 0;
+		this._commandReject = void 0;
+		this._commandCommands = void 0;
 		resetConfig();
 		state.reset();
 		events.clear();
@@ -1118,10 +2171,38 @@ var SpeechOSCore = class {
 };
 const speechOS = new SpeechOSCore();
+//#endregion
+//#region src/backend.ts
+/**
+* WebSocket backend adapter - wraps the websocket module to match the VoiceBackend interface
+*/
+const websocketBackend = {
+	startVoiceSession: (options) => websocket.startVoiceSession(options),
+	stopVoiceSession: () => websocket.stopVoiceSession(),
+	requestEditText: (text) => websocket.requestEditText(text),
+	requestCommand: (commands) => websocket.requestCommand(commands),
+	disconnect: () => websocket.disconnect(),
+	isConnected: () => websocket.isConnected(),
+	getLastInputText: () => websocket.getLastInputText(),
+	prefetchToken: () => Promise.resolve({}),
+	startAutoRefresh: () => {},
+	stopAutoRefresh: () => {},
+	invalidateTokenCache: () => {}
+};
+/**
+* Get the active voice backend.
+* Always returns WebSocket backend (LiveKit is legacy).
+*
+* @returns The websocket backend
+*/
+function getBackend() {
+	return websocketBackend;
+}
 //#endregion
 //#region src/index.ts
 const VERSION = "0.1.0";
 //#endregion
-export { DEFAULT_HOST, Deferred, SpeechOSEventEmitter, VERSION, createStateManager, defaultConfig, events, getConfig, livekit, resetConfig, setConfig, speechOS, state, transcriptStore, updateUserId, validateConfig };
+export { DEFAULT_HOST, Deferred, SpeechOSEventEmitter, VERSION, createStateManager, events, getBackend, getConfig, livekit, resetConfig, setConfig, speechOS, state, updateUserId, validateConfig, websocket };
 //# sourceMappingURL=index.js.map