@speechos/core 0.2.9 → 0.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,5 +1,3 @@
1
- import { Room, RoomEvent, Track, createLocalAudioTrack } from "livekit-client";
2
-
3
1
  //#region src/config.ts
4
2
  /**
5
3
  * Default host - can be overridden by SPEECHOS_HOST env var at build time
@@ -187,6 +185,8 @@ const initialState = {
187
185
  isMicEnabled: false,
188
186
  activeAction: null,
189
187
  focusedElement: null,
188
+ selectionText: null,
189
+ selectionElement: null,
190
190
  recordingState: "idle",
191
191
  errorMessage: null
192
192
  };
@@ -288,6 +288,26 @@ var StateManager = class {
288
288
  this.setState({ focusedElement: element });
289
289
  }
290
290
  /**
291
+ * Set the current text selection
292
+ * @param text - Selected text (null to clear)
293
+ * @param element - Element associated with selection
294
+ */
295
+ setSelection(text, element) {
296
+ this.setState({
297
+ selectionText: text,
298
+ selectionElement: element
299
+ });
300
+ }
301
+ /**
302
+ * Clear the current text selection
303
+ */
304
+ clearSelection() {
305
+ this.setState({
306
+ selectionText: null,
307
+ selectionElement: null
308
+ });
309
+ }
310
+ /**
291
311
  * Set the active action
292
312
  * @param action - The action to set as active
293
313
  */
@@ -303,7 +323,7 @@ var StateManager = class {
303
323
  }
304
324
  /**
305
325
  * Set the connection state
306
- * @param isConnected - Whether connected to LiveKit
326
+ * @param isConnected - Whether connected to the backend
307
327
  */
308
328
  setConnected(isConnected) {
309
329
  this.setState({ isConnected });
@@ -389,746 +409,6 @@ function createStateManager(initial) {
389
409
  });
390
410
  }
391
411
 
392
- //#endregion
393
- //#region src/livekit.ts
394
- const MESSAGE_TYPE_REQUEST_TRANSCRIPT$1 = "request_transcript";
395
- const MESSAGE_TYPE_TRANSCRIPT$1 = "transcript";
396
- const MESSAGE_TYPE_EDIT_TEXT$1 = "edit_text";
397
- const MESSAGE_TYPE_EDITED_TEXT$1 = "edited_text";
398
- const MESSAGE_TYPE_EXECUTE_COMMAND$1 = "execute_command";
399
- const MESSAGE_TYPE_COMMAND_RESULT$1 = "command_result";
400
- const MESSAGE_TYPE_ERROR$1 = "error";
401
- const TOPIC_SPEECHOS = "speechos";
402
- const TOKEN_CACHE_TTL_MS = 4 * 60 * 1e3;
403
- /**
404
- * A deferred promise with timeout support.
405
- * Encapsulates resolve/reject/timeout in a single object for cleaner async handling.
406
- */
407
- var Deferred = class {
408
- promise;
409
- _resolve;
410
- _reject;
411
- _timeoutId = null;
412
- _settled = false;
413
- constructor() {
414
- this.promise = new Promise((resolve, reject) => {
415
- this._resolve = resolve;
416
- this._reject = reject;
417
- });
418
- }
419
- /**
420
- * Set a timeout that will reject the promise with the given error
421
- */
422
- setTimeout(ms, errorMessage, errorCode, errorSource) {
423
- this._timeoutId = setTimeout(() => {
424
- if (!this._settled) {
425
- console.error(`[SpeechOS] Error: ${errorMessage} (${errorCode})`);
426
- events.emit("error", {
427
- code: errorCode,
428
- message: errorMessage,
429
- source: errorSource
430
- });
431
- this.reject(new Error(errorMessage));
432
- }
433
- }, ms);
434
- }
435
- resolve(value) {
436
- if (!this._settled) {
437
- this._settled = true;
438
- this.clearTimeout();
439
- this._resolve(value);
440
- }
441
- }
442
- reject(error) {
443
- if (!this._settled) {
444
- this._settled = true;
445
- this.clearTimeout();
446
- this._reject(error);
447
- }
448
- }
449
- clearTimeout() {
450
- if (this._timeoutId !== null) {
451
- clearTimeout(this._timeoutId);
452
- this._timeoutId = null;
453
- }
454
- }
455
- get isSettled() {
456
- return this._settled;
457
- }
458
- };
459
- /**
460
- * LiveKit connection manager
461
- */
462
- var LiveKitManager = class {
463
- room = null;
464
- tokenData = null;
465
- micTrack = null;
466
- cachedTokenData = null;
467
- tokenCacheTimestamp = null;
468
- tokenPrefetchPromise = null;
469
- tokenRefreshTimer = null;
470
- autoRefreshEnabled = false;
471
- pendingTranscript = null;
472
- pendingEditText = null;
473
- pendingCommand = null;
474
- pendingTrackSubscribed = null;
475
- editOriginalText = null;
476
- sessionSettings = {};
477
- /**
478
- * Check if the cached token is still valid (within TTL)
479
- */
480
- isCachedTokenValid() {
481
- if (!this.cachedTokenData || !this.tokenCacheTimestamp) return false;
482
- const age = Date.now() - this.tokenCacheTimestamp;
483
- return age < TOKEN_CACHE_TTL_MS;
484
- }
485
- /**
486
- * Pre-fetch a LiveKit token for later use
487
- * Call this early (e.g., when widget expands) to reduce latency when starting a voice session.
488
- * If a prefetch is already in progress, returns the existing promise.
489
- * If a valid cached token exists, returns it immediately.
490
- */
491
- async prefetchToken() {
492
- const config = getConfig();
493
- if (this.isCachedTokenValid() && this.cachedTokenData) {
494
- if (config.debug) console.log("[SpeechOS] Using cached token (prefetch hit)");
495
- return this.cachedTokenData;
496
- }
497
- if (this.tokenPrefetchPromise) {
498
- if (config.debug) console.log("[SpeechOS] Prefetch already in progress, awaiting...");
499
- return this.tokenPrefetchPromise;
500
- }
501
- if (config.debug) console.log("[SpeechOS] Starting token prefetch...");
502
- this.tokenPrefetchPromise = this.fetchTokenFromServer().then((data) => {
503
- this.cachedTokenData = data;
504
- this.tokenCacheTimestamp = Date.now();
505
- this.tokenPrefetchPromise = null;
506
- return data;
507
- }).catch((error) => {
508
- this.tokenPrefetchPromise = null;
509
- throw error;
510
- });
511
- return this.tokenPrefetchPromise;
512
- }
513
- /**
514
- * Fetch a LiveKit token from the backend
515
- * Uses cached token if valid, otherwise fetches a fresh one.
516
- * Includes language settings and user vocabulary which are stored in the VoiceSession.
517
- */
518
- async fetchToken() {
519
- const config = getConfig();
520
- if (this.isCachedTokenValid() && this.cachedTokenData) {
521
- if (config.debug) console.log("[SpeechOS] Using cached token");
522
- this.tokenData = this.cachedTokenData;
523
- return this.cachedTokenData;
524
- }
525
- if (this.tokenPrefetchPromise) {
526
- if (config.debug) console.log("[SpeechOS] Waiting for prefetch to complete...");
527
- const data$1 = await this.tokenPrefetchPromise;
528
- this.tokenData = data$1;
529
- return data$1;
530
- }
531
- const data = await this.fetchTokenFromServer();
532
- this.cachedTokenData = data;
533
- this.tokenCacheTimestamp = Date.now();
534
- this.tokenData = data;
535
- return data;
536
- }
537
- /**
538
- * Internal method to fetch a fresh token from the server
539
- */
540
- async fetchTokenFromServer() {
541
- const config = getConfig();
542
- const url = `${config.host}/livekit/api/token/`;
543
- const settings = this.sessionSettings;
544
- const inputLanguage = settings.inputLanguageCode ?? "en-US";
545
- const outputLanguage = settings.outputLanguageCode ?? "en-US";
546
- const smartFormat = settings.smartFormat ?? true;
547
- const vocabulary = settings.vocabulary ?? [];
548
- const snippets = settings.snippets ?? [];
549
- if (config.debug) {
550
- console.log("[SpeechOS] Fetching LiveKit token from:", url);
551
- console.log("[SpeechOS] Session settings:", {
552
- inputLanguage,
553
- outputLanguage,
554
- smartFormat,
555
- snippetsCount: snippets.length,
556
- vocabularyCount: vocabulary.length
557
- });
558
- }
559
- const response = await fetch(url, {
560
- method: "POST",
561
- headers: {
562
- "Content-Type": "application/json",
563
- ...config.apiKey ? { Authorization: `Api-Key ${config.apiKey}` } : {}
564
- },
565
- body: JSON.stringify({
566
- user_id: config.userId || null,
567
- input_language: inputLanguage,
568
- output_language: outputLanguage,
569
- smart_format: smartFormat,
570
- custom_vocabulary: vocabulary,
571
- custom_snippets: snippets
572
- })
573
- });
574
- if (!response.ok) throw new Error(`Failed to fetch LiveKit token: ${response.status} ${response.statusText}`);
575
- const data = await response.json();
576
- if (config.debug) console.log("[SpeechOS] LiveKit token received:", {
577
- room: data.room,
578
- identity: data.identity,
579
- ws_url: data.ws_url
580
- });
581
- return data;
582
- }
583
- /**
584
- * Connect to a LiveKit room (fresh connection each time)
585
- */
586
- async connect() {
587
- const config = getConfig();
588
- await this.fetchToken();
589
- if (!this.tokenData) throw new Error("No token available for LiveKit connection");
590
- this.room = new Room({
591
- adaptiveStream: true,
592
- dynacast: true
593
- });
594
- this.setupRoomEvents();
595
- if (config.debug) console.log("[SpeechOS] Connecting to LiveKit room:", this.tokenData.room);
596
- await this.room.connect(this.tokenData.ws_url, this.tokenData.token);
597
- state.setConnected(true);
598
- if (config.debug) console.log("[SpeechOS] Connected to LiveKit room:", this.room.name);
599
- return this.room;
600
- }
601
- /**
602
- * Wait until the agent is ready to receive audio
603
- * Resolves when LocalTrackSubscribed event is received
604
- */
605
- async waitUntilReady() {
606
- if (!this.room || this.room.state !== "connected") throw new Error("Not connected to room");
607
- if (this.pendingTrackSubscribed) return this.pendingTrackSubscribed.promise;
608
- this.pendingTrackSubscribed = new Deferred();
609
- this.pendingTrackSubscribed.setTimeout(15e3, "Connection timed out - agent not available", "connection_timeout", "connection");
610
- return this.pendingTrackSubscribed.promise;
611
- }
612
- /**
613
- * Set up LiveKit room event listeners
614
- */
615
- setupRoomEvents() {
616
- if (!this.room) return;
617
- const config = getConfig();
618
- this.room.on(RoomEvent.Connected, () => {
619
- if (config.debug) console.log("[SpeechOS] Room connected");
620
- state.setConnected(true);
621
- });
622
- this.room.on(RoomEvent.Disconnected, (reason) => {
623
- if (config.debug) console.log("[SpeechOS] Room disconnected:", reason);
624
- state.setConnected(false);
625
- state.setMicEnabled(false);
626
- });
627
- this.room.on(RoomEvent.ParticipantConnected, (participant) => {
628
- if (config.debug) console.log("[SpeechOS] Participant connected:", participant.identity);
629
- });
630
- this.room.on(RoomEvent.LocalTrackSubscribed, (publication) => {
631
- if (config.debug) console.log("[SpeechOS] LocalTrackSubscribed event fired:", publication.trackSid);
632
- if (this.pendingTrackSubscribed) {
633
- this.pendingTrackSubscribed.resolve();
634
- this.pendingTrackSubscribed = null;
635
- }
636
- });
637
- this.room.on(RoomEvent.LocalTrackPublished, (publication) => {
638
- if (config.debug) console.log("[SpeechOS] LocalTrackPublished:", publication.trackSid, publication.source);
639
- });
640
- this.room.on(RoomEvent.DataReceived, (data, participant) => {
641
- this.handleDataMessage(data, participant);
642
- });
643
- }
644
- /**
645
- * Handle incoming data messages from the agent
646
- */
647
- handleDataMessage(data, _participant) {
648
- const config = getConfig();
649
- try {
650
- const message = JSON.parse(new TextDecoder().decode(data));
651
- if (config.debug) console.log("[SpeechOS] Data received:", message);
652
- if (message.type === MESSAGE_TYPE_TRANSCRIPT$1) {
653
- const transcript = message.transcript || "";
654
- if (config.debug) console.log("[SpeechOS] Transcript received:", transcript);
655
- events.emit("transcription:complete", { text: transcript });
656
- if (this.pendingTranscript) {
657
- this.pendingTranscript.resolve(transcript);
658
- this.pendingTranscript = null;
659
- }
660
- } else if (message.type === MESSAGE_TYPE_EDITED_TEXT$1) {
661
- const editedText = message.text || "";
662
- if (config.debug) console.log("[SpeechOS] Edited text received:", editedText);
663
- events.emit("edit:complete", {
664
- text: editedText,
665
- originalText: this.editOriginalText || ""
666
- });
667
- if (this.pendingEditText) {
668
- this.pendingEditText.resolve(editedText);
669
- this.pendingEditText = null;
670
- }
671
- this.editOriginalText = null;
672
- } else if (message.type === MESSAGE_TYPE_COMMAND_RESULT$1) {
673
- const commandResult = message.command || null;
674
- if (config.debug) console.log("[SpeechOS] Command result received:", commandResult);
675
- events.emit("command:complete", { command: commandResult });
676
- if (this.pendingCommand) {
677
- this.pendingCommand.resolve(commandResult);
678
- this.pendingCommand = null;
679
- }
680
- } else if (message.type === MESSAGE_TYPE_ERROR$1) {
681
- const serverError = message;
682
- const errorCode = serverError.code || "server_error";
683
- const errorMessage = serverError.message || "A server error occurred";
684
- console.error(`[SpeechOS] Error: ${errorMessage} (${errorCode})`);
685
- if (config.debug && serverError.details) console.error("[SpeechOS] Error details:", serverError.details);
686
- events.emit("error", {
687
- code: errorCode,
688
- message: errorMessage,
689
- source: "server"
690
- });
691
- const error = new Error(errorMessage);
692
- if (this.pendingTranscript) {
693
- this.pendingTranscript.reject(error);
694
- this.pendingTranscript = null;
695
- }
696
- if (this.pendingEditText) {
697
- this.pendingEditText.reject(error);
698
- this.pendingEditText = null;
699
- }
700
- if (this.pendingCommand) {
701
- this.pendingCommand.reject(error);
702
- this.pendingCommand = null;
703
- }
704
- }
705
- } catch (error) {
706
- console.error("[SpeechOS] Failed to parse data message:", error);
707
- }
708
- }
709
- /**
710
- * Publish microphone audio track
711
- * Uses the device ID from session settings if set
712
- */
713
- async enableMicrophone() {
714
- if (!this.room || this.room.state !== "connected") throw new Error("Not connected to room");
715
- const config = getConfig();
716
- if (!this.micTrack) {
717
- if (config.debug) console.log("[SpeechOS] Creating microphone track...");
718
- const deviceId = this.sessionSettings.audioDeviceId;
719
- const trackOptions = {
720
- echoCancellation: true,
721
- noiseSuppression: true
722
- };
723
- if (deviceId) {
724
- trackOptions.deviceId = { exact: deviceId };
725
- if (config.debug) console.log("[SpeechOS] Using audio device:", deviceId);
726
- }
727
- try {
728
- this.micTrack = await createLocalAudioTrack(trackOptions);
729
- } catch (error) {
730
- if (deviceId && error instanceof Error) {
731
- console.warn("[SpeechOS] Selected audio device unavailable, falling back to default:", error.message);
732
- this.micTrack = await createLocalAudioTrack({
733
- echoCancellation: true,
734
- noiseSuppression: true
735
- });
736
- } else throw error;
737
- }
738
- this.logMicrophoneInfo();
739
- }
740
- const existingPub = this.room.localParticipant.getTrackPublication(Track.Source.Microphone);
741
- if (!existingPub) {
742
- await this.room.localParticipant.publishTrack(this.micTrack, { source: Track.Source.Microphone });
743
- state.setMicEnabled(true);
744
- if (config.debug) console.log("[SpeechOS] Microphone track published");
745
- }
746
- }
747
- /**
748
- * Log information about the current microphone track
749
- */
750
- logMicrophoneInfo() {
751
- if (!this.micTrack) return;
752
- const config = getConfig();
753
- const mediaTrack = this.micTrack.mediaStreamTrack;
754
- const settings = mediaTrack.getSettings();
755
- console.log("[SpeechOS] Microphone active:", {
756
- deviceId: settings.deviceId || "unknown",
757
- label: mediaTrack.label || "Unknown device",
758
- sampleRate: settings.sampleRate,
759
- channelCount: settings.channelCount,
760
- echoCancellation: settings.echoCancellation,
761
- noiseSuppression: settings.noiseSuppression
762
- });
763
- if (config.debug) console.log("[SpeechOS] Full audio track settings:", settings);
764
- }
765
- /**
766
- * Disable microphone audio track
767
- */
768
- async disableMicrophone() {
769
- const config = getConfig();
770
- if (this.micTrack) {
771
- if (config.debug) console.log("[SpeechOS] Disabling microphone track...");
772
- if (this.room?.state === "connected") try {
773
- await this.room.localParticipant.unpublishTrack(this.micTrack);
774
- if (config.debug) console.log("[SpeechOS] Microphone track unpublished");
775
- } catch (error) {
776
- console.warn("[SpeechOS] Error unpublishing track:", error);
777
- }
778
- this.micTrack.stop();
779
- this.micTrack.detach();
780
- this.micTrack = null;
781
- state.setMicEnabled(false);
782
- if (config.debug) console.log("[SpeechOS] Microphone track stopped and detached");
783
- }
784
- }
785
- /**
786
- * Send a data message to the room
787
- */
788
- async sendDataMessage(message) {
789
- if (!this.room || this.room.state !== "connected") throw new Error("Not connected to room");
790
- const data = new TextEncoder().encode(JSON.stringify(message));
791
- await this.room.localParticipant.publishData(data, {
792
- reliable: true,
793
- topic: TOPIC_SPEECHOS
794
- });
795
- }
796
- /**
797
- * Start a voice session with pre-connect audio buffering
798
- * Fetches a fresh token, then enables mic with preConnectBuffer to capture audio while connecting.
799
- * Agent subscription happens in the background - we don't block on it.
800
- *
801
- * @param options - Session options including action type and parameters
802
- */
803
- async startVoiceSession(options) {
804
- const config = getConfig();
805
- if (config.debug) console.log("[SpeechOS] Starting voice session...");
806
- this.sessionSettings = options?.settings || {};
807
- await this.fetchToken();
808
- if (!this.tokenData) throw new Error("No token available for LiveKit connection");
809
- this.pendingTrackSubscribed = new Deferred();
810
- this.pendingTrackSubscribed.setTimeout(15e3, "Connection timed out - agent not available", "connection_timeout", "connection");
811
- this.room = new Room({
812
- adaptiveStream: true,
813
- dynacast: true
814
- });
815
- this.setupRoomEvents();
816
- if (config.debug) console.log("[SpeechOS] Connecting to LiveKit room:", this.tokenData.room, "at", this.tokenData.ws_url);
817
- await this.room.connect(this.tokenData.ws_url, this.tokenData.token);
818
- if (config.debug) console.log("[SpeechOS] Connected, enabling microphone with preConnectBuffer...");
819
- await this.enableMicrophoneWithPreConnectBuffer();
820
- if (options?.onMicReady) options.onMicReady();
821
- state.setConnected(true);
822
- if (config.debug) console.log("[SpeechOS] Voice session ready - microphone active");
823
- this.waitForAgentSubscription();
824
- }
825
- /**
826
- * Wait for the agent to subscribe to our audio track in the background
827
- * Handles timeout errors without blocking the main flow
828
- */
829
- waitForAgentSubscription() {
830
- const config = getConfig();
831
- if (!this.pendingTrackSubscribed) return;
832
- this.pendingTrackSubscribed.promise.then(() => {
833
- if (config.debug) console.log("[SpeechOS] Agent subscribed to audio track - full duplex established");
834
- this.pendingTrackSubscribed = null;
835
- }).catch((error) => {
836
- console.warn("[SpeechOS] Agent subscription timeout:", error.message);
837
- this.pendingTrackSubscribed = null;
838
- });
839
- }
840
- /**
841
- * Enable microphone with pre-connect buffering
842
- * This starts capturing audio locally before the room is connected,
843
- * buffering it until the connection is established.
844
- */
845
- async enableMicrophoneWithPreConnectBuffer() {
846
- if (!this.room) throw new Error("Room not initialized");
847
- const config = getConfig();
848
- const deviceId = this.sessionSettings.audioDeviceId;
849
- const constraints = {
850
- echoCancellation: true,
851
- noiseSuppression: true
852
- };
853
- if (deviceId) {
854
- constraints.deviceId = { exact: deviceId };
855
- if (config.debug) console.log("[SpeechOS] Using audio device:", deviceId);
856
- }
857
- try {
858
- await this.room.localParticipant.setMicrophoneEnabled(true, constraints, { preConnectBuffer: true });
859
- state.setMicEnabled(true);
860
- const micPub = this.room.localParticipant.getTrackPublication(Track.Source.Microphone);
861
- if (micPub?.track) {
862
- this.micTrack = micPub.track;
863
- this.logMicrophoneInfo();
864
- }
865
- if (config.debug) console.log("[SpeechOS] Microphone enabled with pre-connect buffer - audio is being captured");
866
- } catch (error) {
867
- if (deviceId && error instanceof Error) {
868
- console.warn("[SpeechOS] Selected audio device unavailable, falling back to default:", error.message);
869
- await this.room.localParticipant.setMicrophoneEnabled(true, {
870
- echoCancellation: true,
871
- noiseSuppression: true
872
- }, { preConnectBuffer: true });
873
- state.setMicEnabled(true);
874
- } else throw error;
875
- }
876
- }
877
- /**
878
- * Stop the voice session and request the transcript
879
- * Returns a promise that resolves with the transcript text
880
- * @throws Error if timeout occurs waiting for transcript
881
- */
882
- async stopVoiceSession() {
883
- const config = getConfig();
884
- const settings = this.sessionSettings;
885
- const inputLanguage = settings.inputLanguageCode ?? "en-US";
886
- const outputLanguage = settings.outputLanguageCode ?? "en-US";
887
- console.log("[SpeechOS] Dictate command:", {
888
- inputLanguage,
889
- outputLanguage
890
- });
891
- if (config.debug) console.log("[SpeechOS] Stopping voice session, requesting transcript...");
892
- await this.disableMicrophone();
893
- if (config.debug) console.log("[SpeechOS] Requesting transcript from agent...");
894
- this.pendingTranscript = new Deferred();
895
- this.pendingTranscript.setTimeout(1e4, "Transcription timed out. Please try again.", "transcription_timeout", "timeout");
896
- await this.sendDataMessage({ type: MESSAGE_TYPE_REQUEST_TRANSCRIPT$1 });
897
- const result = await this.pendingTranscript.promise;
898
- this.pendingTranscript = null;
899
- return result;
900
- }
901
- /**
902
- * Alias for stopVoiceSession - granular API naming
903
- */
904
- async stopAndGetTranscript() {
905
- return this.stopVoiceSession();
906
- }
907
- /**
908
- * Request text editing using the transcript as instructions
909
- * Sends the original text to the backend, which applies the spoken instructions
910
- * Returns a promise that resolves with the edited text
911
- * @throws Error if timeout occurs waiting for edited text
912
- */
913
- async requestEditText(originalText) {
914
- const config = getConfig();
915
- const settings = this.sessionSettings;
916
- const inputLanguage = settings.inputLanguageCode ?? "en-US";
917
- const outputLanguage = settings.outputLanguageCode ?? "en-US";
918
- console.log("[SpeechOS] Edit command:", {
919
- inputLanguage,
920
- outputLanguage,
921
- originalTextLength: originalText.length
922
- });
923
- if (config.debug) console.log("[SpeechOS] Requesting text edit...");
924
- this.editOriginalText = originalText;
925
- await this.disableMicrophone();
926
- if (config.debug) console.log("[SpeechOS] Sending edit_text request to agent...");
927
- this.pendingEditText = new Deferred();
928
- this.pendingEditText.setTimeout(15e3, "Edit request timed out. Please try again.", "edit_timeout", "timeout");
929
- await this.sendDataMessage({
930
- type: MESSAGE_TYPE_EDIT_TEXT$1,
931
- text: originalText
932
- });
933
- const result = await this.pendingEditText.promise;
934
- this.pendingEditText = null;
935
- return result;
936
- }
937
- /**
938
- * Alias for requestEditText - granular API naming
939
- */
940
- async stopAndEdit(originalText) {
941
- return this.requestEditText(originalText);
942
- }
943
- /**
944
- * Request command matching using the transcript as input
945
- * Sends command definitions to the backend, which matches the user's speech against them
946
- * Returns a promise that resolves with the matched command or null if no match
947
- * @throws Error if timeout occurs waiting for command result
948
- */
949
- async requestCommand(commands) {
950
- const config = getConfig();
951
- const settings = this.sessionSettings;
952
- const inputLanguage = settings.inputLanguageCode ?? "en-US";
953
- console.log("[SpeechOS] Command request:", {
954
- inputLanguage,
955
- commandCount: commands.length
956
- });
957
- if (config.debug) console.log("[SpeechOS] Requesting command match...");
958
- await this.disableMicrophone();
959
- if (config.debug) console.log("[SpeechOS] Sending execute_command request to agent...");
960
- this.pendingCommand = new Deferred();
961
- this.pendingCommand.setTimeout(15e3, "Command request timed out. Please try again.", "command_timeout", "timeout");
962
- await this.sendDataMessage({
963
- type: MESSAGE_TYPE_EXECUTE_COMMAND$1,
964
- commands
965
- });
966
- const result = await this.pendingCommand.promise;
967
- this.pendingCommand = null;
968
- return result;
969
- }
970
- /**
971
- * Alias for requestCommand - granular API naming
972
- */
973
- async stopAndCommand(commands) {
974
- return this.requestCommand(commands);
975
- }
976
- /**
977
- * Disconnect from the current room
978
- * Clears the token so a fresh one is fetched for the next session
979
- */
980
- async disconnect() {
981
- const config = getConfig();
982
- if (config.debug) console.log("[SpeechOS] Disconnecting from room...");
983
- await this.disableMicrophone();
984
- if (this.room) {
985
- this.room.removeAllListeners();
986
- await this.room.disconnect();
987
- this.room = null;
988
- state.setConnected(false);
989
- if (config.debug) console.log("[SpeechOS] Room disconnected and cleaned up");
990
- }
991
- if (this.pendingTranscript) {
992
- this.pendingTranscript.reject(new Error("Disconnected"));
993
- this.pendingTranscript = null;
994
- }
995
- if (this.pendingEditText) {
996
- this.pendingEditText.reject(new Error("Disconnected"));
997
- this.pendingEditText = null;
998
- }
999
- if (this.pendingCommand) {
1000
- this.pendingCommand.reject(new Error("Disconnected"));
1001
- this.pendingCommand = null;
1002
- }
1003
- if (this.pendingTrackSubscribed) {
1004
- this.pendingTrackSubscribed.reject(new Error("Disconnected"));
1005
- this.pendingTrackSubscribed = null;
1006
- }
1007
- this.tokenData = null;
1008
- this.editOriginalText = null;
1009
- this.sessionSettings = {};
1010
- if (config.debug) console.log("[SpeechOS] Session state cleared");
1011
- }
1012
- /**
1013
- * Invalidate the cached token
1014
- * Call this when settings change that would affect the token (language, vocabulary)
1015
- */
1016
- invalidateTokenCache() {
1017
- const config = getConfig();
1018
- if (config.debug) console.log("[SpeechOS] Token cache invalidated");
1019
- this.cachedTokenData = null;
1020
- this.tokenCacheTimestamp = null;
1021
- }
1022
- /**
1023
- * Start auto-refreshing the token while the widget is expanded.
1024
- * Call this after a voice session completes to immediately fetch a fresh token
1025
- * (since each command requires its own token) and keep it fresh for subsequent commands.
1026
- */
1027
- startAutoRefresh() {
1028
- const config = getConfig();
1029
- this.autoRefreshEnabled = true;
1030
- if (config.debug) console.log("[SpeechOS] Token auto-refresh enabled");
1031
- this.invalidateTokenCache();
1032
- this.prefetchToken().then(() => {
1033
- this.scheduleTokenRefresh();
1034
- }).catch((error) => {
1035
- if (config.debug) console.warn("[SpeechOS] Failed to prefetch token after command:", error);
1036
- if (this.autoRefreshEnabled) this.tokenRefreshTimer = setTimeout(() => {
1037
- this.performAutoRefresh();
1038
- }, 5 * 1e3);
1039
- });
1040
- }
1041
- /**
1042
- * Stop auto-refreshing the token.
1043
- * Call this when the widget collapses or user navigates away.
1044
- */
1045
- stopAutoRefresh() {
1046
- const config = getConfig();
1047
- this.autoRefreshEnabled = false;
1048
- if (this.tokenRefreshTimer) {
1049
- clearTimeout(this.tokenRefreshTimer);
1050
- this.tokenRefreshTimer = null;
1051
- }
1052
- if (config.debug) console.log("[SpeechOS] Token auto-refresh disabled");
1053
- }
1054
- /**
1055
- * Schedule a token refresh before the current cache expires.
1056
- * Handles computer sleep by checking elapsed time on each refresh attempt.
1057
- */
1058
- scheduleTokenRefresh() {
1059
- if (!this.autoRefreshEnabled) return;
1060
- if (this.tokenRefreshTimer) {
1061
- clearTimeout(this.tokenRefreshTimer);
1062
- this.tokenRefreshTimer = null;
1063
- }
1064
- const config = getConfig();
1065
- const refreshBuffer = 30 * 1e3;
1066
- let timeUntilRefresh;
1067
- if (this.tokenCacheTimestamp) {
1068
- const age = Date.now() - this.tokenCacheTimestamp;
1069
- const timeRemaining = TOKEN_CACHE_TTL_MS - age;
1070
- timeUntilRefresh = Math.max(0, timeRemaining - refreshBuffer);
1071
- } else timeUntilRefresh = 0;
1072
- if (config.debug) console.log(`[SpeechOS] Scheduling token refresh in ${Math.round(timeUntilRefresh / 1e3)}s`);
1073
- this.tokenRefreshTimer = setTimeout(() => {
1074
- this.performAutoRefresh();
1075
- }, timeUntilRefresh);
1076
- }
1077
- /**
1078
- * Perform the auto-refresh, handling computer sleep scenarios.
1079
- */
1080
- async performAutoRefresh() {
1081
- if (!this.autoRefreshEnabled) return;
1082
- const config = getConfig();
1083
- if (this.isCachedTokenValid()) {
1084
- if (config.debug) console.log("[SpeechOS] Token still valid on refresh check, rescheduling");
1085
- this.scheduleTokenRefresh();
1086
- return;
1087
- }
1088
- if (config.debug) console.log("[SpeechOS] Auto-refreshing token...");
1089
- try {
1090
- const data = await this.fetchTokenFromServer();
1091
- this.cachedTokenData = data;
1092
- this.tokenCacheTimestamp = Date.now();
1093
- if (config.debug) console.log("[SpeechOS] Token auto-refreshed successfully");
1094
- this.scheduleTokenRefresh();
1095
- } catch (error) {
1096
- console.warn("[SpeechOS] Token auto-refresh failed:", error);
1097
- if (this.autoRefreshEnabled) this.tokenRefreshTimer = setTimeout(() => {
1098
- this.performAutoRefresh();
1099
- }, 30 * 1e3);
1100
- }
1101
- }
1102
- /**
1103
- * Get the current room instance
1104
- */
1105
- getRoom() {
1106
- return this.room;
1107
- }
1108
- /**
1109
- * Get the current token data
1110
- */
1111
- getTokenData() {
1112
- return this.tokenData;
1113
- }
1114
- /**
1115
- * Check if connected to a room
1116
- */
1117
- isConnected() {
1118
- return this.room?.state === "connected";
1119
- }
1120
- /**
1121
- * Check if microphone is enabled
1122
- */
1123
- isMicrophoneEnabled() {
1124
- return this.micTrack !== null;
1125
- }
1126
- };
1127
- const livekit = new LiveKitManager();
1128
- events.on("settings:changed", () => {
1129
- livekit.invalidateTokenCache();
1130
- });
1131
-
1132
412
  //#endregion
1133
413
  //#region src/audio-capture.ts
1134
414
  /**
@@ -1426,7 +706,7 @@ const RESPONSE_TIMEOUT_MS = 15e3;
1426
706
  /**
1427
707
  * A deferred promise with timeout support.
1428
708
  */
1429
- var Deferred$1 = class {
709
+ var Deferred = class {
1430
710
  promise;
1431
711
  _resolve;
1432
712
  _reject;
@@ -1537,7 +817,7 @@ var WebSocketManager = class {
1537
817
  state.setMicEnabled(true);
1538
818
  const wsUrl = this.getWebSocketUrl();
1539
819
  if (config.debug) console.log("[SpeechOS] Connecting to WebSocket:", wsUrl);
1540
- this.pendingAuth = new Deferred$1();
820
+ this.pendingAuth = new Deferred();
1541
821
  this.pendingAuth.setTimeout(RESPONSE_TIMEOUT_MS, "Connection timed out", "connection_timeout", "connection");
1542
822
  const factory = config.webSocketFactory ?? ((url) => new WebSocket(url));
1543
823
  this.ws = factory(wsUrl);
@@ -1685,11 +965,11 @@ var WebSocketManager = class {
1685
965
  this.editOriginalText = null;
1686
966
  }
1687
967
  handleCommandResult(message) {
1688
- const commandResult = message.command || null;
968
+ const commands = message.commands || [];
1689
969
  this.lastInputText = message.transcript;
1690
- events.emit("command:complete", { command: commandResult });
970
+ events.emit("command:complete", { commands });
1691
971
  if (this.pendingCommand) {
1692
- this.pendingCommand.resolve(commandResult);
972
+ this.pendingCommand.resolve(commands);
1693
973
  this.pendingCommand = null;
1694
974
  }
1695
975
  }
@@ -1727,7 +1007,7 @@ var WebSocketManager = class {
1727
1007
  const config = getConfig();
1728
1008
  if (config.debug) console.log("[SpeechOS] Stopping voice session, requesting transcript...");
1729
1009
  await this.stopAudioCapture();
1730
- this.pendingTranscript = new Deferred$1();
1010
+ this.pendingTranscript = new Deferred();
1731
1011
  this.pendingTranscript.setTimeout(RESPONSE_TIMEOUT_MS, "Transcription timed out. Please try again.", "transcription_timeout", "timeout");
1732
1012
  this.sendMessage({ type: MESSAGE_TYPE_REQUEST_TRANSCRIPT });
1733
1013
  const result = await this.pendingTranscript.promise;
@@ -1742,7 +1022,7 @@ var WebSocketManager = class {
1742
1022
  const config = getConfig();
1743
1023
  if (config.debug) console.log("[SpeechOS] Requesting text edit...");
1744
1024
  await this.stopAudioCapture();
1745
- this.pendingEditText = new Deferred$1();
1025
+ this.pendingEditText = new Deferred();
1746
1026
  this.pendingEditText.setTimeout(RESPONSE_TIMEOUT_MS, "Edit request timed out. Please try again.", "edit_timeout", "timeout");
1747
1027
  this.sendMessage({ type: MESSAGE_TYPE_EDIT_TEXT });
1748
1028
  const result = await this.pendingEditText.promise;
@@ -1752,12 +1032,13 @@ var WebSocketManager = class {
1752
1032
  /**
1753
1033
  * Request command matching using the transcript as input.
1754
1034
  * Note: The command definitions were already sent in the auth message via startVoiceSession.
1035
+ * Returns an array of matched commands (empty array if no matches).
1755
1036
  */
1756
1037
  async requestCommand(_commands) {
1757
1038
  const config = getConfig();
1758
1039
  if (config.debug) console.log("[SpeechOS] Requesting command match...");
1759
1040
  await this.stopAudioCapture();
1760
- this.pendingCommand = new Deferred$1();
1041
+ this.pendingCommand = new Deferred();
1761
1042
  this.pendingCommand.setTimeout(RESPONSE_TIMEOUT_MS, "Command request timed out. Please try again.", "command_timeout", "timeout");
1762
1043
  this.sendMessage({ type: MESSAGE_TYPE_EXECUTE_COMMAND });
1763
1044
  const result = await this.pendingCommand.promise;
@@ -1795,7 +1076,7 @@ var WebSocketManager = class {
1795
1076
  * Wait for the WebSocket send buffer to drain.
1796
1077
  *
1797
1078
  * This ensures all audio data has been transmitted before we request
1798
- * the transcript. Uses the same pattern as LiveKit's ReadableStream approach.
1079
+ * the transcript.
1799
1080
  */
1800
1081
  async waitForBufferDrain() {
1801
1082
  if (!this.ws || this.ws.readyState !== WS_OPEN) return;
@@ -1871,7 +1152,7 @@ const websocket = new WebSocketManager();
1871
1152
  //#endregion
1872
1153
  //#region src/speechos.ts
1873
1154
  /**
1874
- * Get the active voice backend (always websocket now)
1155
+ * Get the active voice backend
1875
1156
  */
1876
1157
  function getBackend$1() {
1877
1158
  return websocket;
@@ -1879,9 +1160,7 @@ function getBackend$1() {
1879
1160
  /**
1880
1161
  * SpeechOS Core SDK
1881
1162
  *
1882
- * Provides two API layers:
1883
- * 1. Low-level API: Granular control over LiveKit connection lifecycle
1884
- * 2. High-level API: One-shot methods for common voice tasks
1163
+ * Provides a high-level API for common voice tasks.
1885
1164
  */
1886
1165
  var SpeechOSCore = class {
1887
1166
  initialized = false;
@@ -1905,67 +1184,7 @@ var SpeechOSCore = class {
1905
1184
  return this.initialized;
1906
1185
  }
1907
1186
  /**
1908
- * Connect to LiveKit (fetches token, establishes connection)
1909
- * Call this before other low-level methods
1910
- */
1911
- async connect() {
1912
- this.ensureInitialized();
1913
- await livekit.connect();
1914
- }
1915
- /**
1916
- * Wait until the agent is ready to receive audio
1917
- * Resolves when the agent subscribes to our audio track
1918
- */
1919
- async waitUntilReady() {
1920
- return livekit.waitUntilReady();
1921
- }
1922
- /**
1923
- * Enable microphone (user is now being recorded)
1924
- */
1925
- async enableMicrophone() {
1926
- await livekit.enableMicrophone();
1927
- state.setRecordingState("recording");
1928
- }
1929
- /**
1930
- * Stop recording and get the transcript
1931
- * @returns The transcribed text
1932
- */
1933
- async stopAndGetTranscript() {
1934
- state.setRecordingState("processing");
1935
- try {
1936
- const transcript = await livekit.stopAndGetTranscript();
1937
- state.completeRecording();
1938
- return transcript;
1939
- } catch (error) {
1940
- state.setError(error instanceof Error ? error.message : "Transcription failed");
1941
- throw error;
1942
- }
1943
- }
1944
- /**
1945
- * Stop recording and get edited text
1946
- * @param originalText - The original text to edit based on voice instructions
1947
- * @returns The edited text
1948
- */
1949
- async stopAndEdit(originalText) {
1950
- state.setRecordingState("processing");
1951
- try {
1952
- const editedText = await livekit.stopAndEdit(originalText);
1953
- state.completeRecording();
1954
- return editedText;
1955
- } catch (error) {
1956
- state.setError(error instanceof Error ? error.message : "Edit request failed");
1957
- throw error;
1958
- }
1959
- }
1960
- /**
1961
- * Disconnect from LiveKit
1962
- */
1963
- async disconnect() {
1964
- await livekit.disconnect();
1965
- state.completeRecording();
1966
- }
1967
- /**
1968
- * One-shot dictation: connect, wait for agent, record, and get transcript
1187
+ * One-shot dictation: connect, record, and get transcript
1969
1188
  * Automatically handles the full voice session lifecycle
1970
1189
  *
1971
1190
  * @returns The transcribed text
@@ -2024,7 +1243,7 @@ var SpeechOSCore = class {
2024
1243
  }
2025
1244
  }
2026
1245
  /**
2027
- * One-shot edit: connect, wait for agent, record voice instructions, apply to text
1246
+ * One-shot edit: connect, record voice instructions, apply to text
2028
1247
  * Automatically handles the full voice session lifecycle
2029
1248
  *
2030
1249
  * @param originalText - The text to edit
@@ -2089,11 +1308,11 @@ var SpeechOSCore = class {
2089
1308
  }
2090
1309
  }
2091
1310
  /**
2092
- * One-shot command: connect, wait for agent, record voice, match against commands
1311
+ * One-shot command: connect, record voice, match against commands
2093
1312
  * Automatically handles the full voice session lifecycle
2094
1313
  *
2095
1314
  * @param commands - Array of command definitions to match against
2096
- * @returns The matched command result or null if no match
1315
+ * @returns Array of matched commands (empty array if no matches)
2097
1316
  */
2098
1317
  async command(commands) {
2099
1318
  this.ensureInitialized();
@@ -2123,22 +1342,24 @@ var SpeechOSCore = class {
2123
1342
  _commandResolve;
2124
1343
  _commandReject;
2125
1344
  /**
2126
- * Stop command recording and get the matched command
1345
+ * Stop command recording and get the matched commands
2127
1346
  * Call this after command() when user stops speaking
1347
+ *
1348
+ * @returns Array of matched commands (empty array if no matches)
2128
1349
  */
2129
1350
  async stopCommand() {
2130
1351
  state.setRecordingState("processing");
2131
1352
  try {
2132
1353
  const backend = getBackend$1();
2133
1354
  const commands = this._commandCommands || [];
2134
- const result = await backend.requestCommand(commands);
1355
+ const results = await backend.requestCommand(commands);
2135
1356
  state.completeRecording();
2136
1357
  if (this._commandResolve) {
2137
- this._commandResolve(result);
1358
+ this._commandResolve(results);
2138
1359
  this._commandResolve = void 0;
2139
1360
  this._commandReject = void 0;
2140
1361
  }
2141
- return result;
1362
+ return results;
2142
1363
  } catch (error) {
2143
1364
  const err = error instanceof Error ? error : new Error("Command request failed");
2144
1365
  state.setError(err.message);
@@ -2240,15 +1461,10 @@ const websocketBackend = {
2240
1461
  requestCommand: (commands) => websocket.requestCommand(commands),
2241
1462
  disconnect: () => websocket.disconnect(),
2242
1463
  isConnected: () => websocket.isConnected(),
2243
- getLastInputText: () => websocket.getLastInputText(),
2244
- prefetchToken: () => Promise.resolve({}),
2245
- startAutoRefresh: () => {},
2246
- stopAutoRefresh: () => {},
2247
- invalidateTokenCache: () => {}
1464
+ getLastInputText: () => websocket.getLastInputText()
2248
1465
  };
2249
1466
  /**
2250
1467
  * Get the active voice backend.
2251
- * Always returns WebSocket backend (LiveKit is legacy).
2252
1468
  *
2253
1469
  * @returns The websocket backend
2254
1470
  */
@@ -2256,10 +1472,197 @@ function getBackend() {
2256
1472
  return websocketBackend;
2257
1473
  }
2258
1474
 
1475
+ //#endregion
1476
+ //#region src/tts.ts
1477
+ /**
1478
+ * Default TTS voice ID (matches server default).
1479
+ * The server validates voice IDs - pass any valid voice ID or omit to use default.
1480
+ */
1481
+ const DEFAULT_TTS_VOICE_ID = "21m00Tcm4TlvDq8ikWAM";
1482
+ /**
1483
+ * Map HTTP status codes to TTS error codes
1484
+ */
1485
+ function mapHttpStatusToErrorCode(status) {
1486
+ switch (status) {
1487
+ case 400: return "invalid_request";
1488
+ case 402: return "usage_limit_exceeded";
1489
+ case 403: return "authentication_failed";
1490
+ default: return "unknown_error";
1491
+ }
1492
+ }
1493
+ /**
1494
+ * TTS Client for synthesizing speech from text
1495
+ */
1496
+ var TTSClient = class {
1497
+ /**
1498
+ * Synthesize text to speech and return audio bytes
1499
+ *
1500
+ * @param text - Text to synthesize (max 1000 chars)
1501
+ * @param options - Optional synthesis options
1502
+ * @returns Audio data and content type
1503
+ *
1504
+ * @example
1505
+ * ```typescript
1506
+ * const result = await tts.synthesize('Hello world');
1507
+ * console.log(result.audio); // ArrayBuffer
1508
+ * console.log(result.contentType); // 'audio/mpeg'
1509
+ * ```
1510
+ */
1511
+ async synthesize(text, options) {
1512
+ const config = getConfig();
1513
+ if (!config.apiKey) {
1514
+ const error = {
1515
+ code: "authentication_failed",
1516
+ message: "API key not configured. Call SpeechOS.init() first.",
1517
+ phase: "synthesize"
1518
+ };
1519
+ events.emit("tts:error", error);
1520
+ throw new Error(error.message);
1521
+ }
1522
+ events.emit("tts:synthesize:start", { text });
1523
+ try {
1524
+ const response = await fetch(`${config.host}/api/tts/`, {
1525
+ method: "POST",
1526
+ headers: {
1527
+ "Authorization": `Api-Key ${config.apiKey}`,
1528
+ "Content-Type": "application/json"
1529
+ },
1530
+ signal: options?.signal,
1531
+ body: JSON.stringify({
1532
+ text,
1533
+ voice_id: options?.voiceId,
1534
+ language: options?.language ?? "en",
1535
+ user_id: config.userId || void 0
1536
+ })
1537
+ });
1538
+ if (!response.ok) {
1539
+ const errorCode = mapHttpStatusToErrorCode(response.status);
1540
+ let errorMessage;
1541
+ try {
1542
+ const errorData = await response.json();
1543
+ errorMessage = errorData.detail || errorData.message || `HTTP ${response.status}`;
1544
+ } catch {
1545
+ errorMessage = `HTTP ${response.status}: ${response.statusText}`;
1546
+ }
1547
+ const error = {
1548
+ code: errorCode,
1549
+ message: errorMessage,
1550
+ phase: "synthesize"
1551
+ };
1552
+ events.emit("tts:error", error);
1553
+ throw new Error(errorMessage);
1554
+ }
1555
+ const contentType = response.headers.get("Content-Type") || "audio/mpeg";
1556
+ const arrayBuffer = await response.arrayBuffer();
1557
+ events.emit("tts:synthesize:complete", { text });
1558
+ return {
1559
+ audio: arrayBuffer,
1560
+ contentType
1561
+ };
1562
+ } catch (error) {
1563
+ if (error instanceof Error && error.name === "AbortError") throw error;
1564
+ if (error instanceof Error && error.message.includes("HTTP")) throw error;
1565
+ const networkError = {
1566
+ code: "network_error",
1567
+ message: error instanceof Error ? error.message : "Network request failed",
1568
+ phase: "synthesize"
1569
+ };
1570
+ events.emit("tts:error", networkError);
1571
+ throw new Error(networkError.message);
1572
+ }
1573
+ }
1574
+ /**
1575
+ * Stream TTS audio chunks as they arrive from the server
1576
+ *
1577
+ * Useful for progressive playback or processing large texts.
1578
+ *
1579
+ * @param text - Text to synthesize (max 1000 chars)
1580
+ * @param options - Optional synthesis options
1581
+ * @yields Audio chunks as Uint8Array
1582
+ *
1583
+ * @example
1584
+ * ```typescript
1585
+ * const chunks: Uint8Array[] = [];
1586
+ * for await (const chunk of tts.stream('Hello world')) {
1587
+ * chunks.push(chunk);
1588
+ * }
1589
+ * ```
1590
+ */
1591
+ async *stream(text, options) {
1592
+ const config = getConfig();
1593
+ if (!config.apiKey) {
1594
+ const error = {
1595
+ code: "authentication_failed",
1596
+ message: "API key not configured. Call SpeechOS.init() first.",
1597
+ phase: "synthesize"
1598
+ };
1599
+ events.emit("tts:error", error);
1600
+ throw new Error(error.message);
1601
+ }
1602
+ events.emit("tts:synthesize:start", { text });
1603
+ try {
1604
+ const response = await fetch(`${config.host}/api/tts/`, {
1605
+ method: "POST",
1606
+ headers: {
1607
+ "Authorization": `Api-Key ${config.apiKey}`,
1608
+ "Content-Type": "application/json"
1609
+ },
1610
+ signal: options?.signal,
1611
+ body: JSON.stringify({
1612
+ text,
1613
+ voice_id: options?.voiceId,
1614
+ language: options?.language ?? "en",
1615
+ user_id: config.userId || void 0
1616
+ })
1617
+ });
1618
+ if (!response.ok) {
1619
+ const errorCode = mapHttpStatusToErrorCode(response.status);
1620
+ let errorMessage;
1621
+ try {
1622
+ const errorData = await response.json();
1623
+ errorMessage = errorData.detail || errorData.message || `HTTP ${response.status}`;
1624
+ } catch {
1625
+ errorMessage = `HTTP ${response.status}: ${response.statusText}`;
1626
+ }
1627
+ const error = {
1628
+ code: errorCode,
1629
+ message: errorMessage,
1630
+ phase: "synthesize"
1631
+ };
1632
+ events.emit("tts:error", error);
1633
+ throw new Error(errorMessage);
1634
+ }
1635
+ const reader = response.body?.getReader();
1636
+ if (!reader) throw new Error("Response body is not readable");
1637
+ try {
1638
+ while (true) {
1639
+ const { done, value } = await reader.read();
1640
+ if (done) break;
1641
+ yield value;
1642
+ }
1643
+ } finally {
1644
+ reader.releaseLock();
1645
+ }
1646
+ events.emit("tts:synthesize:complete", { text });
1647
+ } catch (error) {
1648
+ if (error instanceof Error && error.name === "AbortError") return;
1649
+ if (error instanceof Error && error.message.includes("HTTP")) throw error;
1650
+ const networkError = {
1651
+ code: "network_error",
1652
+ message: error instanceof Error ? error.message : "Network request failed",
1653
+ phase: "synthesize"
1654
+ };
1655
+ events.emit("tts:error", networkError);
1656
+ throw new Error(networkError.message);
1657
+ }
1658
+ }
1659
+ };
1660
+ const tts = new TTSClient();
1661
+
2259
1662
  //#endregion
2260
1663
  //#region src/index.ts
2261
1664
  const VERSION = "0.1.0";
2262
1665
 
2263
1666
  //#endregion
2264
- export { DEFAULT_HOST, Deferred, SpeechOSEventEmitter, VERSION, clearSettingsToken, createStateManager, events, getBackend, getConfig, getSettingsToken, livekit, resetConfig, setConfig, speechOS, state, updateUserId, validateConfig, websocket };
1667
+ export { DEFAULT_HOST, DEFAULT_TTS_VOICE_ID, Deferred, SpeechOSEventEmitter, TTSClient, VERSION, clearSettingsToken, createStateManager, events, getBackend, getConfig, getSettingsToken, resetConfig, setConfig, speechOS, state, tts, updateUserId, validateConfig, websocket };
2265
1668
  //# sourceMappingURL=index.js.map