npm - @speechos/core - Versions diffs - 0.2.9 → 0.2.11 - Mend

@speechos/core 0.2.9 → 0.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/dist/index.js CHANGED Viewed

@@ -1,5 +1,3 @@
-import { Room, RoomEvent, Track, createLocalAudioTrack } from "livekit-client";
 //#region src/config.ts
 /**
 * Default host - can be overridden by SPEECHOS_HOST env var at build time
@@ -187,6 +185,8 @@ const initialState = {
 	isMicEnabled: false,
 	activeAction: null,
 	focusedElement: null,
+	selectionText: null,
+	selectionElement: null,
 	recordingState: "idle",
 	errorMessage: null
 };
@@ -288,6 +288,26 @@ var StateManager = class {
 		this.setState({ focusedElement: element });
 	}
 	/**
+	* Set the current text selection
+	* @param text - Selected text (null to clear)
+	* @param element - Element associated with selection
+	*/
+	setSelection(text, element) {
+		this.setState({
+			selectionText: text,
+			selectionElement: element
+		});
+	}
+	/**
+	* Clear the current text selection
+	*/
+	clearSelection() {
+		this.setState({
+			selectionText: null,
+			selectionElement: null
+		});
+	}
+	/**
 	* Set the active action
 	* @param action - The action to set as active
 	*/
@@ -303,7 +323,7 @@ var StateManager = class {
 	}
 	/**
 	* Set the connection state
-	* @param isConnected - Whether connected to LiveKit
+	* @param isConnected - Whether connected to the backend
 	*/
 	setConnected(isConnected) {
 		this.setState({ isConnected });
@@ -389,746 +409,6 @@ function createStateManager(initial) {
 	});
 }
-//#endregion
-//#region src/livekit.ts
-const MESSAGE_TYPE_REQUEST_TRANSCRIPT$1 = "request_transcript";
-const MESSAGE_TYPE_TRANSCRIPT$1 = "transcript";
-const MESSAGE_TYPE_EDIT_TEXT$1 = "edit_text";
-const MESSAGE_TYPE_EDITED_TEXT$1 = "edited_text";
-const MESSAGE_TYPE_EXECUTE_COMMAND$1 = "execute_command";
-const MESSAGE_TYPE_COMMAND_RESULT$1 = "command_result";
-const MESSAGE_TYPE_ERROR$1 = "error";
-const TOPIC_SPEECHOS = "speechos";
-const TOKEN_CACHE_TTL_MS = 4 * 60 * 1e3;
-/**
-* A deferred promise with timeout support.
-* Encapsulates resolve/reject/timeout in a single object for cleaner async handling.
-*/
-var Deferred = class {
-	promise;
-	_resolve;
-	_reject;
-	_timeoutId = null;
-	_settled = false;
-	constructor() {
-		this.promise = new Promise((resolve, reject) => {
-			this._resolve = resolve;
-			this._reject = reject;
-		});
-	}
-	/**
-	* Set a timeout that will reject the promise with the given error
-	*/
-	setTimeout(ms, errorMessage, errorCode, errorSource) {
-		this._timeoutId = setTimeout(() => {
-			if (!this._settled) {
-				console.error(`[SpeechOS] Error: ${errorMessage} (${errorCode})`);
-				events.emit("error", {
-					code: errorCode,
-					message: errorMessage,
-					source: errorSource
-				});
-				this.reject(new Error(errorMessage));
-			}
-		}, ms);
-	}
-	resolve(value) {
-		if (!this._settled) {
-			this._settled = true;
-			this.clearTimeout();
-			this._resolve(value);
-		}
-	}
-	reject(error) {
-		if (!this._settled) {
-			this._settled = true;
-			this.clearTimeout();
-			this._reject(error);
-		}
-	}
-	clearTimeout() {
-		if (this._timeoutId !== null) {
-			clearTimeout(this._timeoutId);
-			this._timeoutId = null;
-		}
-	}
-	get isSettled() {
-		return this._settled;
-	}
-};
-/**
-* LiveKit connection manager
-*/
-var LiveKitManager = class {
-	room = null;
-	tokenData = null;
-	micTrack = null;
-	cachedTokenData = null;
-	tokenCacheTimestamp = null;
-	tokenPrefetchPromise = null;
-	tokenRefreshTimer = null;
-	autoRefreshEnabled = false;
-	pendingTranscript = null;
-	pendingEditText = null;
-	pendingCommand = null;
-	pendingTrackSubscribed = null;
-	editOriginalText = null;
-	sessionSettings = {};
-	/**
-	* Check if the cached token is still valid (within TTL)
-	*/
-	isCachedTokenValid() {
-		if (!this.cachedTokenData || !this.tokenCacheTimestamp) return false;
-		const age = Date.now() - this.tokenCacheTimestamp;
-		return age < TOKEN_CACHE_TTL_MS;
-	}
-	/**
-	* Pre-fetch a LiveKit token for later use
-	* Call this early (e.g., when widget expands) to reduce latency when starting a voice session.
-	* If a prefetch is already in progress, returns the existing promise.
-	* If a valid cached token exists, returns it immediately.
-	*/
-	async prefetchToken() {
-		const config = getConfig();
-		if (this.isCachedTokenValid() && this.cachedTokenData) {
-			if (config.debug) console.log("[SpeechOS] Using cached token (prefetch hit)");
-			return this.cachedTokenData;
-		}
-		if (this.tokenPrefetchPromise) {
-			if (config.debug) console.log("[SpeechOS] Prefetch already in progress, awaiting...");
-			return this.tokenPrefetchPromise;
-		}
-		if (config.debug) console.log("[SpeechOS] Starting token prefetch...");
-		this.tokenPrefetchPromise = this.fetchTokenFromServer().then((data) => {
-			this.cachedTokenData = data;
-			this.tokenCacheTimestamp = Date.now();
-			this.tokenPrefetchPromise = null;
-			return data;
-		}).catch((error) => {
-			this.tokenPrefetchPromise = null;
-			throw error;
-		});
-		return this.tokenPrefetchPromise;
-	}
-	/**
-	* Fetch a LiveKit token from the backend
-	* Uses cached token if valid, otherwise fetches a fresh one.
-	* Includes language settings and user vocabulary which are stored in the VoiceSession.
-	*/
-	async fetchToken() {
-		const config = getConfig();
-		if (this.isCachedTokenValid() && this.cachedTokenData) {
-			if (config.debug) console.log("[SpeechOS] Using cached token");
-			this.tokenData = this.cachedTokenData;
-			return this.cachedTokenData;
-		}
-		if (this.tokenPrefetchPromise) {
-			if (config.debug) console.log("[SpeechOS] Waiting for prefetch to complete...");
-			const data$1 = await this.tokenPrefetchPromise;
-			this.tokenData = data$1;
-			return data$1;
-		}
-		const data = await this.fetchTokenFromServer();
-		this.cachedTokenData = data;
-		this.tokenCacheTimestamp = Date.now();
-		this.tokenData = data;
-		return data;
-	}
-	/**
-	* Internal method to fetch a fresh token from the server
-	*/
-	async fetchTokenFromServer() {
-		const config = getConfig();
-		const url = `${config.host}/livekit/api/token/`;
-		const settings = this.sessionSettings;
-		const inputLanguage = settings.inputLanguageCode ?? "en-US";
-		const outputLanguage = settings.outputLanguageCode ?? "en-US";
-		const smartFormat = settings.smartFormat ?? true;
-		const vocabulary = settings.vocabulary ?? [];
-		const snippets = settings.snippets ?? [];
-		if (config.debug) {
-			console.log("[SpeechOS] Fetching LiveKit token from:", url);
-			console.log("[SpeechOS] Session settings:", {
-				inputLanguage,
-				outputLanguage,
-				smartFormat,
-				snippetsCount: snippets.length,
-				vocabularyCount: vocabulary.length
-			});
-		}
-		const response = await fetch(url, {
-			method: "POST",
-			headers: {
-				"Content-Type": "application/json",
-				...config.apiKey ? { Authorization: `Api-Key ${config.apiKey}` } : {}
-			},
-			body: JSON.stringify({
-				user_id: config.userId || null,
-				input_language: inputLanguage,
-				output_language: outputLanguage,
-				smart_format: smartFormat,
-				custom_vocabulary: vocabulary,
-				custom_snippets: snippets
-			})
-		});
-		if (!response.ok) throw new Error(`Failed to fetch LiveKit token: ${response.status} ${response.statusText}`);
-		const data = await response.json();
-		if (config.debug) console.log("[SpeechOS] LiveKit token received:", {
-			room: data.room,
-			identity: data.identity,
-			ws_url: data.ws_url
-		});
-		return data;
-	}
-	/**
-	* Connect to a LiveKit room (fresh connection each time)
-	*/
-	async connect() {
-		const config = getConfig();
-		await this.fetchToken();
-		if (!this.tokenData) throw new Error("No token available for LiveKit connection");
-		this.room = new Room({
-			adaptiveStream: true,
-			dynacast: true
-		});
-		this.setupRoomEvents();
-		if (config.debug) console.log("[SpeechOS] Connecting to LiveKit room:", this.tokenData.room);
-		await this.room.connect(this.tokenData.ws_url, this.tokenData.token);
-		state.setConnected(true);
-		if (config.debug) console.log("[SpeechOS] Connected to LiveKit room:", this.room.name);
-		return this.room;
-	}
-	/**
-	* Wait until the agent is ready to receive audio
-	* Resolves when LocalTrackSubscribed event is received
-	*/
-	async waitUntilReady() {
-		if (!this.room || this.room.state !== "connected") throw new Error("Not connected to room");
-		if (this.pendingTrackSubscribed) return this.pendingTrackSubscribed.promise;
-		this.pendingTrackSubscribed = new Deferred();
-		this.pendingTrackSubscribed.setTimeout(15e3, "Connection timed out - agent not available", "connection_timeout", "connection");
-		return this.pendingTrackSubscribed.promise;
-	}
-	/**
-	* Set up LiveKit room event listeners
-	*/
-	setupRoomEvents() {
-		if (!this.room) return;
-		const config = getConfig();
-		this.room.on(RoomEvent.Connected, () => {
-			if (config.debug) console.log("[SpeechOS] Room connected");
-			state.setConnected(true);
-		});
-		this.room.on(RoomEvent.Disconnected, (reason) => {
-			if (config.debug) console.log("[SpeechOS] Room disconnected:", reason);
-			state.setConnected(false);
-			state.setMicEnabled(false);
-		});
-		this.room.on(RoomEvent.ParticipantConnected, (participant) => {
-			if (config.debug) console.log("[SpeechOS] Participant connected:", participant.identity);
-		});
-		this.room.on(RoomEvent.LocalTrackSubscribed, (publication) => {
-			if (config.debug) console.log("[SpeechOS] LocalTrackSubscribed event fired:", publication.trackSid);
-			if (this.pendingTrackSubscribed) {
-				this.pendingTrackSubscribed.resolve();
-				this.pendingTrackSubscribed = null;
-			}
-		});
-		this.room.on(RoomEvent.LocalTrackPublished, (publication) => {
-			if (config.debug) console.log("[SpeechOS] LocalTrackPublished:", publication.trackSid, publication.source);
-		});
-		this.room.on(RoomEvent.DataReceived, (data, participant) => {
-			this.handleDataMessage(data, participant);
-		});
-	}
-	/**
-	* Handle incoming data messages from the agent
-	*/
-	handleDataMessage(data, _participant) {
-		const config = getConfig();
-		try {
-			const message = JSON.parse(new TextDecoder().decode(data));
-			if (config.debug) console.log("[SpeechOS] Data received:", message);
-			if (message.type === MESSAGE_TYPE_TRANSCRIPT$1) {
-				const transcript = message.transcript || "";
-				if (config.debug) console.log("[SpeechOS] Transcript received:", transcript);
-				events.emit("transcription:complete", { text: transcript });
-				if (this.pendingTranscript) {
-					this.pendingTranscript.resolve(transcript);
-					this.pendingTranscript = null;
-				}
-			} else if (message.type === MESSAGE_TYPE_EDITED_TEXT$1) {
-				const editedText = message.text || "";
-				if (config.debug) console.log("[SpeechOS] Edited text received:", editedText);
-				events.emit("edit:complete", {
-					text: editedText,
-					originalText: this.editOriginalText || ""
-				});
-				if (this.pendingEditText) {
-					this.pendingEditText.resolve(editedText);
-					this.pendingEditText = null;
-				}
-				this.editOriginalText = null;
-			} else if (message.type === MESSAGE_TYPE_COMMAND_RESULT$1) {
-				const commandResult = message.command || null;
-				if (config.debug) console.log("[SpeechOS] Command result received:", commandResult);
-				events.emit("command:complete", { command: commandResult });
-				if (this.pendingCommand) {
-					this.pendingCommand.resolve(commandResult);
-					this.pendingCommand = null;
-				}
-			} else if (message.type === MESSAGE_TYPE_ERROR$1) {
-				const serverError = message;
-				const errorCode = serverError.code || "server_error";
-				const errorMessage = serverError.message || "A server error occurred";
-				console.error(`[SpeechOS] Error: ${errorMessage} (${errorCode})`);
-				if (config.debug && serverError.details) console.error("[SpeechOS] Error details:", serverError.details);
-				events.emit("error", {
-					code: errorCode,
-					message: errorMessage,
-					source: "server"
-				});
-				const error = new Error(errorMessage);
-				if (this.pendingTranscript) {
-					this.pendingTranscript.reject(error);
-					this.pendingTranscript = null;
-				}
-				if (this.pendingEditText) {
-					this.pendingEditText.reject(error);
-					this.pendingEditText = null;
-				}
-				if (this.pendingCommand) {
-					this.pendingCommand.reject(error);
-					this.pendingCommand = null;
-				}
-			}
-		} catch (error) {
-			console.error("[SpeechOS] Failed to parse data message:", error);
-		}
-	}
-	/**
-	* Publish microphone audio track
-	* Uses the device ID from session settings if set
-	*/
-	async enableMicrophone() {
-		if (!this.room || this.room.state !== "connected") throw new Error("Not connected to room");
-		const config = getConfig();
-		if (!this.micTrack) {
-			if (config.debug) console.log("[SpeechOS] Creating microphone track...");
-			const deviceId = this.sessionSettings.audioDeviceId;
-			const trackOptions = {
-				echoCancellation: true,
-				noiseSuppression: true
-			};
-			if (deviceId) {
-				trackOptions.deviceId = { exact: deviceId };
-				if (config.debug) console.log("[SpeechOS] Using audio device:", deviceId);
-			}
-			try {
-				this.micTrack = await createLocalAudioTrack(trackOptions);
-			} catch (error) {
-				if (deviceId && error instanceof Error) {
-					console.warn("[SpeechOS] Selected audio device unavailable, falling back to default:", error.message);
-					this.micTrack = await createLocalAudioTrack({
-						echoCancellation: true,
-						noiseSuppression: true
-					});
-				} else throw error;
-			}
-			this.logMicrophoneInfo();
-		}
-		const existingPub = this.room.localParticipant.getTrackPublication(Track.Source.Microphone);
-		if (!existingPub) {
-			await this.room.localParticipant.publishTrack(this.micTrack, { source: Track.Source.Microphone });
-			state.setMicEnabled(true);
-			if (config.debug) console.log("[SpeechOS] Microphone track published");
-		}
-	}
-	/**
-	* Log information about the current microphone track
-	*/
-	logMicrophoneInfo() {
-		if (!this.micTrack) return;
-		const config = getConfig();
-		const mediaTrack = this.micTrack.mediaStreamTrack;
-		const settings = mediaTrack.getSettings();
-		console.log("[SpeechOS] Microphone active:", {
-			deviceId: settings.deviceId || "unknown",
-			label: mediaTrack.label || "Unknown device",
-			sampleRate: settings.sampleRate,
-			channelCount: settings.channelCount,
-			echoCancellation: settings.echoCancellation,
-			noiseSuppression: settings.noiseSuppression
-		});
-		if (config.debug) console.log("[SpeechOS] Full audio track settings:", settings);
-	}
-	/**
-	* Disable microphone audio track
-	*/
-	async disableMicrophone() {
-		const config = getConfig();
-		if (this.micTrack) {
-			if (config.debug) console.log("[SpeechOS] Disabling microphone track...");
-			if (this.room?.state === "connected") try {
-				await this.room.localParticipant.unpublishTrack(this.micTrack);
-				if (config.debug) console.log("[SpeechOS] Microphone track unpublished");
-			} catch (error) {
-				console.warn("[SpeechOS] Error unpublishing track:", error);
-			}
-			this.micTrack.stop();
-			this.micTrack.detach();
-			this.micTrack = null;
-			state.setMicEnabled(false);
-			if (config.debug) console.log("[SpeechOS] Microphone track stopped and detached");
-		}
-	}
-	/**
-	* Send a data message to the room
-	*/
-	async sendDataMessage(message) {
-		if (!this.room || this.room.state !== "connected") throw new Error("Not connected to room");
-		const data = new TextEncoder().encode(JSON.stringify(message));
-		await this.room.localParticipant.publishData(data, {
-			reliable: true,
-			topic: TOPIC_SPEECHOS
-		});
-	}
-	/**
-	* Start a voice session with pre-connect audio buffering
-	* Fetches a fresh token, then enables mic with preConnectBuffer to capture audio while connecting.
-	* Agent subscription happens in the background - we don't block on it.
-	*
-	* @param options - Session options including action type and parameters
-	*/
-	async startVoiceSession(options) {
-		const config = getConfig();
-		if (config.debug) console.log("[SpeechOS] Starting voice session...");
-		this.sessionSettings = options?.settings || {};
-		await this.fetchToken();
-		if (!this.tokenData) throw new Error("No token available for LiveKit connection");
-		this.pendingTrackSubscribed = new Deferred();
-		this.pendingTrackSubscribed.setTimeout(15e3, "Connection timed out - agent not available", "connection_timeout", "connection");
-		this.room = new Room({
-			adaptiveStream: true,
-			dynacast: true
-		});
-		this.setupRoomEvents();
-		if (config.debug) console.log("[SpeechOS] Connecting to LiveKit room:", this.tokenData.room, "at", this.tokenData.ws_url);
-		await this.room.connect(this.tokenData.ws_url, this.tokenData.token);
-		if (config.debug) console.log("[SpeechOS] Connected, enabling microphone with preConnectBuffer...");
-		await this.enableMicrophoneWithPreConnectBuffer();
-		if (options?.onMicReady) options.onMicReady();
-		state.setConnected(true);
-		if (config.debug) console.log("[SpeechOS] Voice session ready - microphone active");
-		this.waitForAgentSubscription();
-	}
-	/**
-	* Wait for the agent to subscribe to our audio track in the background
-	* Handles timeout errors without blocking the main flow
-	*/
-	waitForAgentSubscription() {
-		const config = getConfig();
-		if (!this.pendingTrackSubscribed) return;
-		this.pendingTrackSubscribed.promise.then(() => {
-			if (config.debug) console.log("[SpeechOS] Agent subscribed to audio track - full duplex established");
-			this.pendingTrackSubscribed = null;
-		}).catch((error) => {
-			console.warn("[SpeechOS] Agent subscription timeout:", error.message);
-			this.pendingTrackSubscribed = null;
-		});
-	}
-	/**
-	* Enable microphone with pre-connect buffering
-	* This starts capturing audio locally before the room is connected,
-	* buffering it until the connection is established.
-	*/
-	async enableMicrophoneWithPreConnectBuffer() {
-		if (!this.room) throw new Error("Room not initialized");
-		const config = getConfig();
-		const deviceId = this.sessionSettings.audioDeviceId;
-		const constraints = {
-			echoCancellation: true,
-			noiseSuppression: true
-		};
-		if (deviceId) {
-			constraints.deviceId = { exact: deviceId };
-			if (config.debug) console.log("[SpeechOS] Using audio device:", deviceId);
-		}
-		try {
-			await this.room.localParticipant.setMicrophoneEnabled(true, constraints, { preConnectBuffer: true });
-			state.setMicEnabled(true);
-			const micPub = this.room.localParticipant.getTrackPublication(Track.Source.Microphone);
-			if (micPub?.track) {
-				this.micTrack = micPub.track;
-				this.logMicrophoneInfo();
-			}
-			if (config.debug) console.log("[SpeechOS] Microphone enabled with pre-connect buffer - audio is being captured");
-		} catch (error) {
-			if (deviceId && error instanceof Error) {
-				console.warn("[SpeechOS] Selected audio device unavailable, falling back to default:", error.message);
-				await this.room.localParticipant.setMicrophoneEnabled(true, {
-					echoCancellation: true,
-					noiseSuppression: true
-				}, { preConnectBuffer: true });
-				state.setMicEnabled(true);
-			} else throw error;
-		}
-	}
-	/**
-	* Stop the voice session and request the transcript
-	* Returns a promise that resolves with the transcript text
-	* @throws Error if timeout occurs waiting for transcript
-	*/
-	async stopVoiceSession() {
-		const config = getConfig();
-		const settings = this.sessionSettings;
-		const inputLanguage = settings.inputLanguageCode ?? "en-US";
-		const outputLanguage = settings.outputLanguageCode ?? "en-US";
-		console.log("[SpeechOS] Dictate command:", {
-			inputLanguage,
-			outputLanguage
-		});
-		if (config.debug) console.log("[SpeechOS] Stopping voice session, requesting transcript...");
-		await this.disableMicrophone();
-		if (config.debug) console.log("[SpeechOS] Requesting transcript from agent...");
-		this.pendingTranscript = new Deferred();
-		this.pendingTranscript.setTimeout(1e4, "Transcription timed out. Please try again.", "transcription_timeout", "timeout");
-		await this.sendDataMessage({ type: MESSAGE_TYPE_REQUEST_TRANSCRIPT$1 });
-		const result = await this.pendingTranscript.promise;
-		this.pendingTranscript = null;
-		return result;
-	}
-	/**
-	* Alias for stopVoiceSession - granular API naming
-	*/
-	async stopAndGetTranscript() {
-		return this.stopVoiceSession();
-	}
-	/**
-	* Request text editing using the transcript as instructions
-	* Sends the original text to the backend, which applies the spoken instructions
-	* Returns a promise that resolves with the edited text
-	* @throws Error if timeout occurs waiting for edited text
-	*/
-	async requestEditText(originalText) {
-		const config = getConfig();
-		const settings = this.sessionSettings;
-		const inputLanguage = settings.inputLanguageCode ?? "en-US";
-		const outputLanguage = settings.outputLanguageCode ?? "en-US";
-		console.log("[SpeechOS] Edit command:", {
-			inputLanguage,
-			outputLanguage,
-			originalTextLength: originalText.length
-		});
-		if (config.debug) console.log("[SpeechOS] Requesting text edit...");
-		this.editOriginalText = originalText;
-		await this.disableMicrophone();
-		if (config.debug) console.log("[SpeechOS] Sending edit_text request to agent...");
-		this.pendingEditText = new Deferred();
-		this.pendingEditText.setTimeout(15e3, "Edit request timed out. Please try again.", "edit_timeout", "timeout");
-		await this.sendDataMessage({
-			type: MESSAGE_TYPE_EDIT_TEXT$1,
-			text: originalText
-		});
-		const result = await this.pendingEditText.promise;
-		this.pendingEditText = null;
-		return result;
-	}
-	/**
-	* Alias for requestEditText - granular API naming
-	*/
-	async stopAndEdit(originalText) {
-		return this.requestEditText(originalText);
-	}
-	/**
-	* Request command matching using the transcript as input
-	* Sends command definitions to the backend, which matches the user's speech against them
-	* Returns a promise that resolves with the matched command or null if no match
-	* @throws Error if timeout occurs waiting for command result
-	*/
-	async requestCommand(commands) {
-		const config = getConfig();
-		const settings = this.sessionSettings;
-		const inputLanguage = settings.inputLanguageCode ?? "en-US";
-		console.log("[SpeechOS] Command request:", {
-			inputLanguage,
-			commandCount: commands.length
-		});
-		if (config.debug) console.log("[SpeechOS] Requesting command match...");
-		await this.disableMicrophone();
-		if (config.debug) console.log("[SpeechOS] Sending execute_command request to agent...");
-		this.pendingCommand = new Deferred();
-		this.pendingCommand.setTimeout(15e3, "Command request timed out. Please try again.", "command_timeout", "timeout");
-		await this.sendDataMessage({
-			type: MESSAGE_TYPE_EXECUTE_COMMAND$1,
-			commands
-		});
-		const result = await this.pendingCommand.promise;
-		this.pendingCommand = null;
-		return result;
-	}
-	/**
-	* Alias for requestCommand - granular API naming
-	*/
-	async stopAndCommand(commands) {
-		return this.requestCommand(commands);
-	}
-	/**
-	* Disconnect from the current room
-	* Clears the token so a fresh one is fetched for the next session
-	*/
-	async disconnect() {
-		const config = getConfig();
-		if (config.debug) console.log("[SpeechOS] Disconnecting from room...");
-		await this.disableMicrophone();
-		if (this.room) {
-			this.room.removeAllListeners();
-			await this.room.disconnect();
-			this.room = null;
-			state.setConnected(false);
-			if (config.debug) console.log("[SpeechOS] Room disconnected and cleaned up");
-		}
-		if (this.pendingTranscript) {
-			this.pendingTranscript.reject(new Error("Disconnected"));
-			this.pendingTranscript = null;
-		}
-		if (this.pendingEditText) {
-			this.pendingEditText.reject(new Error("Disconnected"));
-			this.pendingEditText = null;
-		}
-		if (this.pendingCommand) {
-			this.pendingCommand.reject(new Error("Disconnected"));
-			this.pendingCommand = null;
-		}
-		if (this.pendingTrackSubscribed) {
-			this.pendingTrackSubscribed.reject(new Error("Disconnected"));
-			this.pendingTrackSubscribed = null;
-		}
-		this.tokenData = null;
-		this.editOriginalText = null;
-		this.sessionSettings = {};
-		if (config.debug) console.log("[SpeechOS] Session state cleared");
-	}
-	/**
-	* Invalidate the cached token
-	* Call this when settings change that would affect the token (language, vocabulary)
-	*/
-	invalidateTokenCache() {
-		const config = getConfig();
-		if (config.debug) console.log("[SpeechOS] Token cache invalidated");
-		this.cachedTokenData = null;
-		this.tokenCacheTimestamp = null;
-	}
-	/**
-	* Start auto-refreshing the token while the widget is expanded.
-	* Call this after a voice session completes to immediately fetch a fresh token
-	* (since each command requires its own token) and keep it fresh for subsequent commands.
-	*/
-	startAutoRefresh() {
-		const config = getConfig();
-		this.autoRefreshEnabled = true;
-		if (config.debug) console.log("[SpeechOS] Token auto-refresh enabled");
-		this.invalidateTokenCache();
-		this.prefetchToken().then(() => {
-			this.scheduleTokenRefresh();
-		}).catch((error) => {
-			if (config.debug) console.warn("[SpeechOS] Failed to prefetch token after command:", error);
-			if (this.autoRefreshEnabled) this.tokenRefreshTimer = setTimeout(() => {
-				this.performAutoRefresh();
-			}, 5 * 1e3);
-		});
-	}
-	/**
-	* Stop auto-refreshing the token.
-	* Call this when the widget collapses or user navigates away.
-	*/
-	stopAutoRefresh() {
-		const config = getConfig();
-		this.autoRefreshEnabled = false;
-		if (this.tokenRefreshTimer) {
-			clearTimeout(this.tokenRefreshTimer);
-			this.tokenRefreshTimer = null;
-		}
-		if (config.debug) console.log("[SpeechOS] Token auto-refresh disabled");
-	}
-	/**
-	* Schedule a token refresh before the current cache expires.
-	* Handles computer sleep by checking elapsed time on each refresh attempt.
-	*/
-	scheduleTokenRefresh() {
-		if (!this.autoRefreshEnabled) return;
-		if (this.tokenRefreshTimer) {
-			clearTimeout(this.tokenRefreshTimer);
-			this.tokenRefreshTimer = null;
-		}
-		const config = getConfig();
-		const refreshBuffer = 30 * 1e3;
-		let timeUntilRefresh;
-		if (this.tokenCacheTimestamp) {
-			const age = Date.now() - this.tokenCacheTimestamp;
-			const timeRemaining = TOKEN_CACHE_TTL_MS - age;
-			timeUntilRefresh = Math.max(0, timeRemaining - refreshBuffer);
-		} else timeUntilRefresh = 0;
-		if (config.debug) console.log(`[SpeechOS] Scheduling token refresh in ${Math.round(timeUntilRefresh / 1e3)}s`);
-		this.tokenRefreshTimer = setTimeout(() => {
-			this.performAutoRefresh();
-		}, timeUntilRefresh);
-	}
-	/**
-	* Perform the auto-refresh, handling computer sleep scenarios.
-	*/
-	async performAutoRefresh() {
-		if (!this.autoRefreshEnabled) return;
-		const config = getConfig();
-		if (this.isCachedTokenValid()) {
-			if (config.debug) console.log("[SpeechOS] Token still valid on refresh check, rescheduling");
-			this.scheduleTokenRefresh();
-			return;
-		}
-		if (config.debug) console.log("[SpeechOS] Auto-refreshing token...");
-		try {
-			const data = await this.fetchTokenFromServer();
-			this.cachedTokenData = data;
-			this.tokenCacheTimestamp = Date.now();
-			if (config.debug) console.log("[SpeechOS] Token auto-refreshed successfully");
-			this.scheduleTokenRefresh();
-		} catch (error) {
-			console.warn("[SpeechOS] Token auto-refresh failed:", error);
-			if (this.autoRefreshEnabled) this.tokenRefreshTimer = setTimeout(() => {
-				this.performAutoRefresh();
-			}, 30 * 1e3);
-		}
-	}
-	/**
-	* Get the current room instance
-	*/
-	getRoom() {
-		return this.room;
-	}
-	/**
-	* Get the current token data
-	*/
-	getTokenData() {
-		return this.tokenData;
-	}
-	/**
-	* Check if connected to a room
-	*/
-	isConnected() {
-		return this.room?.state === "connected";
-	}
-	/**
-	* Check if microphone is enabled
-	*/
-	isMicrophoneEnabled() {
-		return this.micTrack !== null;
-	}
-};
-const livekit = new LiveKitManager();
-events.on("settings:changed", () => {
-	livekit.invalidateTokenCache();
-});
 //#endregion
 //#region src/audio-capture.ts
 /**
@@ -1426,7 +706,7 @@ const RESPONSE_TIMEOUT_MS = 15e3;
 /**
 * A deferred promise with timeout support.
 */
-var Deferred$1 = class {
+var Deferred = class {
 	promise;
 	_resolve;
 	_reject;
@@ -1537,7 +817,7 @@ var WebSocketManager = class {
 		state.setMicEnabled(true);
 		const wsUrl = this.getWebSocketUrl();
 		if (config.debug) console.log("[SpeechOS] Connecting to WebSocket:", wsUrl);
-		this.pendingAuth = new Deferred$1();
+		this.pendingAuth = new Deferred();
 		this.pendingAuth.setTimeout(RESPONSE_TIMEOUT_MS, "Connection timed out", "connection_timeout", "connection");
 		const factory = config.webSocketFactory ?? ((url) => new WebSocket(url));
 		this.ws = factory(wsUrl);
@@ -1685,11 +965,11 @@ var WebSocketManager = class {
 		this.editOriginalText = null;
 	}
 	handleCommandResult(message) {
-		const commandResult = message.command || null;
+		const commands = message.commands || [];
 		this.lastInputText = message.transcript;
-		events.emit("command:complete", { command: commandResult });
+		events.emit("command:complete", { commands });
 		if (this.pendingCommand) {
-			this.pendingCommand.resolve(commandResult);
+			this.pendingCommand.resolve(commands);
 			this.pendingCommand = null;
 		}
 	}
@@ -1727,7 +1007,7 @@ var WebSocketManager = class {
 		const config = getConfig();
 		if (config.debug) console.log("[SpeechOS] Stopping voice session, requesting transcript...");
 		await this.stopAudioCapture();
-		this.pendingTranscript = new Deferred$1();
+		this.pendingTranscript = new Deferred();
 		this.pendingTranscript.setTimeout(RESPONSE_TIMEOUT_MS, "Transcription timed out. Please try again.", "transcription_timeout", "timeout");
 		this.sendMessage({ type: MESSAGE_TYPE_REQUEST_TRANSCRIPT });
 		const result = await this.pendingTranscript.promise;
@@ -1742,7 +1022,7 @@ var WebSocketManager = class {
 		const config = getConfig();
 		if (config.debug) console.log("[SpeechOS] Requesting text edit...");
 		await this.stopAudioCapture();
-		this.pendingEditText = new Deferred$1();
+		this.pendingEditText = new Deferred();
 		this.pendingEditText.setTimeout(RESPONSE_TIMEOUT_MS, "Edit request timed out. Please try again.", "edit_timeout", "timeout");
 		this.sendMessage({ type: MESSAGE_TYPE_EDIT_TEXT });
 		const result = await this.pendingEditText.promise;
@@ -1752,12 +1032,13 @@ var WebSocketManager = class {
 	/**
 	* Request command matching using the transcript as input.
 	* Note: The command definitions were already sent in the auth message via startVoiceSession.
+	* Returns an array of matched commands (empty array if no matches).
 	*/
 	async requestCommand(_commands) {
 		const config = getConfig();
 		if (config.debug) console.log("[SpeechOS] Requesting command match...");
 		await this.stopAudioCapture();
-		this.pendingCommand = new Deferred$1();
+		this.pendingCommand = new Deferred();
 		this.pendingCommand.setTimeout(RESPONSE_TIMEOUT_MS, "Command request timed out. Please try again.", "command_timeout", "timeout");
 		this.sendMessage({ type: MESSAGE_TYPE_EXECUTE_COMMAND });
 		const result = await this.pendingCommand.promise;
@@ -1795,7 +1076,7 @@ var WebSocketManager = class {
 	* Wait for the WebSocket send buffer to drain.
 	*
 	* This ensures all audio data has been transmitted before we request
-	* the transcript. Uses the same pattern as LiveKit's ReadableStream approach.
+	* the transcript.
 	*/
 	async waitForBufferDrain() {
 		if (!this.ws || this.ws.readyState !== WS_OPEN) return;
@@ -1871,7 +1152,7 @@ const websocket = new WebSocketManager();
 //#endregion
 //#region src/speechos.ts
 /**
-* Get the active voice backend (always websocket now)
+* Get the active voice backend
 */
 function getBackend$1() {
 	return websocket;
@@ -1879,9 +1160,7 @@ function getBackend$1() {
 /**
 * SpeechOS Core SDK
 *
-* Provides two API layers:
-* 1. Low-level API: Granular control over LiveKit connection lifecycle
-* 2. High-level API: One-shot methods for common voice tasks
+* Provides a high-level API for common voice tasks.
 */
 var SpeechOSCore = class {
 	initialized = false;
@@ -1905,67 +1184,7 @@ var SpeechOSCore = class {
 		return this.initialized;
 	}
 	/**
-	* Connect to LiveKit (fetches token, establishes connection)
-	* Call this before other low-level methods
-	*/
-	async connect() {
-		this.ensureInitialized();
-		await livekit.connect();
-	}
-	/**
-	* Wait until the agent is ready to receive audio
-	* Resolves when the agent subscribes to our audio track
-	*/
-	async waitUntilReady() {
-		return livekit.waitUntilReady();
-	}
-	/**
-	* Enable microphone (user is now being recorded)
-	*/
-	async enableMicrophone() {
-		await livekit.enableMicrophone();
-		state.setRecordingState("recording");
-	}
-	/**
-	* Stop recording and get the transcript
-	* @returns The transcribed text
-	*/
-	async stopAndGetTranscript() {
-		state.setRecordingState("processing");
-		try {
-			const transcript = await livekit.stopAndGetTranscript();
-			state.completeRecording();
-			return transcript;
-		} catch (error) {
-			state.setError(error instanceof Error ? error.message : "Transcription failed");
-			throw error;
-		}
-	}
-	/**
-	* Stop recording and get edited text
-	* @param originalText - The original text to edit based on voice instructions
-	* @returns The edited text
-	*/
-	async stopAndEdit(originalText) {
-		state.setRecordingState("processing");
-		try {
-			const editedText = await livekit.stopAndEdit(originalText);
-			state.completeRecording();
-			return editedText;
-		} catch (error) {
-			state.setError(error instanceof Error ? error.message : "Edit request failed");
-			throw error;
-		}
-	}
-	/**
-	* Disconnect from LiveKit
-	*/
-	async disconnect() {
-		await livekit.disconnect();
-		state.completeRecording();
-	}
-	/**
-	* One-shot dictation: connect, wait for agent, record, and get transcript
+	* One-shot dictation: connect, record, and get transcript
 	* Automatically handles the full voice session lifecycle
 	*
 	* @returns The transcribed text
@@ -2024,7 +1243,7 @@ var SpeechOSCore = class {
 		}
 	}
 	/**
-	* One-shot edit: connect, wait for agent, record voice instructions, apply to text
+	* One-shot edit: connect, record voice instructions, apply to text
 	* Automatically handles the full voice session lifecycle
 	*
 	* @param originalText - The text to edit
@@ -2089,11 +1308,11 @@ var SpeechOSCore = class {
 		}
 	}
 	/**
-	* One-shot command: connect, wait for agent, record voice, match against commands
+	* One-shot command: connect, record voice, match against commands
 	* Automatically handles the full voice session lifecycle
 	*
 	* @param commands - Array of command definitions to match against
-	* @returns The matched command result or null if no match
+	* @returns Array of matched commands (empty array if no matches)
 	*/
 	async command(commands) {
 		this.ensureInitialized();
@@ -2123,22 +1342,24 @@ var SpeechOSCore = class {
 	_commandResolve;
 	_commandReject;
 	/**
-	* Stop command recording and get the matched command
+	* Stop command recording and get the matched commands
 	* Call this after command() when user stops speaking
+	*
+	* @returns Array of matched commands (empty array if no matches)
 	*/
 	async stopCommand() {
 		state.setRecordingState("processing");
 		try {
 			const backend = getBackend$1();
 			const commands = this._commandCommands || [];
-			const result = await backend.requestCommand(commands);
+			const results = await backend.requestCommand(commands);
 			state.completeRecording();
 			if (this._commandResolve) {
-				this._commandResolve(result);
+				this._commandResolve(results);
 				this._commandResolve = void 0;
 				this._commandReject = void 0;
 			}
-			return result;
+			return results;
 		} catch (error) {
 			const err = error instanceof Error ? error : new Error("Command request failed");
 			state.setError(err.message);
@@ -2240,15 +1461,10 @@ const websocketBackend = {
 	requestCommand: (commands) => websocket.requestCommand(commands),
 	disconnect: () => websocket.disconnect(),
 	isConnected: () => websocket.isConnected(),
-	getLastInputText: () => websocket.getLastInputText(),
-	prefetchToken: () => Promise.resolve({}),
-	startAutoRefresh: () => {},
-	stopAutoRefresh: () => {},
-	invalidateTokenCache: () => {}
+	getLastInputText: () => websocket.getLastInputText()
 };
 /**
 * Get the active voice backend.
-* Always returns WebSocket backend (LiveKit is legacy).
 *
 * @returns The websocket backend
 */
@@ -2256,10 +1472,197 @@ function getBackend() {
 	return websocketBackend;
 }
+//#endregion
+//#region src/tts.ts
+/**
+* Default TTS voice ID (matches server default).
+* The server validates voice IDs - pass any valid voice ID or omit to use default.
+*/
+const DEFAULT_TTS_VOICE_ID = "21m00Tcm4TlvDq8ikWAM";
+/**
+* Map HTTP status codes to TTS error codes
+*/
+function mapHttpStatusToErrorCode(status) {
+	switch (status) {
+		case 400: return "invalid_request";
+		case 402: return "usage_limit_exceeded";
+		case 403: return "authentication_failed";
+		default: return "unknown_error";
+	}
+}
+/**
+* TTS Client for synthesizing speech from text
+*/
+var TTSClient = class {
+	/**
+	* Synthesize text to speech and return audio bytes
+	*
+	* @param text - Text to synthesize (max 1000 chars)
+	* @param options - Optional synthesis options
+	* @returns Audio data and content type
+	*
+	* @example
+	* ```typescript
+	* const result = await tts.synthesize('Hello world');
+	* console.log(result.audio); // ArrayBuffer
+	* console.log(result.contentType); // 'audio/mpeg'
+	* ```
+	*/
+	async synthesize(text, options) {
+		const config = getConfig();
+		if (!config.apiKey) {
+			const error = {
+				code: "authentication_failed",
+				message: "API key not configured. Call SpeechOS.init() first.",
+				phase: "synthesize"
+			};
+			events.emit("tts:error", error);
+			throw new Error(error.message);
+		}
+		events.emit("tts:synthesize:start", { text });
+		try {
+			const response = await fetch(`${config.host}/api/tts/`, {
+				method: "POST",
+				headers: {
+					"Authorization": `Api-Key ${config.apiKey}`,
+					"Content-Type": "application/json"
+				},
+				signal: options?.signal,
+				body: JSON.stringify({
+					text,
+					voice_id: options?.voiceId,
+					language: options?.language ?? "en",
+					user_id: config.userId || void 0
+				})
+			});
+			if (!response.ok) {
+				const errorCode = mapHttpStatusToErrorCode(response.status);
+				let errorMessage;
+				try {
+					const errorData = await response.json();
+					errorMessage = errorData.detail || errorData.message || `HTTP ${response.status}`;
+				} catch {
+					errorMessage = `HTTP ${response.status}: ${response.statusText}`;
+				}
+				const error = {
+					code: errorCode,
+					message: errorMessage,
+					phase: "synthesize"
+				};
+				events.emit("tts:error", error);
+				throw new Error(errorMessage);
+			}
+			const contentType = response.headers.get("Content-Type") || "audio/mpeg";
+			const arrayBuffer = await response.arrayBuffer();
+			events.emit("tts:synthesize:complete", { text });
+			return {
+				audio: arrayBuffer,
+				contentType
+			};
+		} catch (error) {
+			if (error instanceof Error && error.name === "AbortError") throw error;
+			if (error instanceof Error && error.message.includes("HTTP")) throw error;
+			const networkError = {
+				code: "network_error",
+				message: error instanceof Error ? error.message : "Network request failed",
+				phase: "synthesize"
+			};
+			events.emit("tts:error", networkError);
+			throw new Error(networkError.message);
+		}
+	}
+	/**
+	* Stream TTS audio chunks as they arrive from the server
+	*
+	* Useful for progressive playback or processing large texts.
+	*
+	* @param text - Text to synthesize (max 1000 chars)
+	* @param options - Optional synthesis options
+	* @yields Audio chunks as Uint8Array
+	*
+	* @example
+	* ```typescript
+	* const chunks: Uint8Array[] = [];
+	* for await (const chunk of tts.stream('Hello world')) {
+	*   chunks.push(chunk);
+	* }
+	* ```
+	*/
+	async *stream(text, options) {
+		const config = getConfig();
+		if (!config.apiKey) {
+			const error = {
+				code: "authentication_failed",
+				message: "API key not configured. Call SpeechOS.init() first.",
+				phase: "synthesize"
+			};
+			events.emit("tts:error", error);
+			throw new Error(error.message);
+		}
+		events.emit("tts:synthesize:start", { text });
+		try {
+			const response = await fetch(`${config.host}/api/tts/`, {
+				method: "POST",
+				headers: {
+					"Authorization": `Api-Key ${config.apiKey}`,
+					"Content-Type": "application/json"
+				},
+				signal: options?.signal,
+				body: JSON.stringify({
+					text,
+					voice_id: options?.voiceId,
+					language: options?.language ?? "en",
+					user_id: config.userId || void 0
+				})
+			});
+			if (!response.ok) {
+				const errorCode = mapHttpStatusToErrorCode(response.status);
+				let errorMessage;
+				try {
+					const errorData = await response.json();
+					errorMessage = errorData.detail || errorData.message || `HTTP ${response.status}`;
+				} catch {
+					errorMessage = `HTTP ${response.status}: ${response.statusText}`;
+				}
+				const error = {
+					code: errorCode,
+					message: errorMessage,
+					phase: "synthesize"
+				};
+				events.emit("tts:error", error);
+				throw new Error(errorMessage);
+			}
+			const reader = response.body?.getReader();
+			if (!reader) throw new Error("Response body is not readable");
+			try {
+				while (true) {
+					const { done, value } = await reader.read();
+					if (done) break;
+					yield value;
+				}
+			} finally {
+				reader.releaseLock();
+			}
+			events.emit("tts:synthesize:complete", { text });
+		} catch (error) {
+			if (error instanceof Error && error.name === "AbortError") return;
+			if (error instanceof Error && error.message.includes("HTTP")) throw error;
+			const networkError = {
+				code: "network_error",
+				message: error instanceof Error ? error.message : "Network request failed",
+				phase: "synthesize"
+			};
+			events.emit("tts:error", networkError);
+			throw new Error(networkError.message);
+		}
+	}
+};
+const tts = new TTSClient();
 //#endregion
 //#region src/index.ts
 const VERSION = "0.1.0";
 //#endregion
-export { DEFAULT_HOST, Deferred, SpeechOSEventEmitter, VERSION, clearSettingsToken, createStateManager, events, getBackend, getConfig, getSettingsToken, livekit, resetConfig, setConfig, speechOS, state, updateUserId, validateConfig, websocket };
+export { DEFAULT_HOST, DEFAULT_TTS_VOICE_ID, Deferred, SpeechOSEventEmitter, TTSClient, VERSION, clearSettingsToken, createStateManager, events, getBackend, getConfig, getSettingsToken, resetConfig, setConfig, speechOS, state, tts, updateUserId, validateConfig, websocket };
 //# sourceMappingURL=index.js.map