@speechos/core 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -35,8 +35,6 @@ const defaultConfig = {
35
35
  apiKey: "",
36
36
  userId: "",
37
37
  host: DEFAULT_HOST,
38
- position: "bottom-center",
39
- zIndex: 999999,
40
38
  debug: false
41
39
  };
42
40
  /**
@@ -44,31 +42,19 @@ const defaultConfig = {
44
42
  * @param userConfig - User-provided configuration
45
43
  * @returns Validated and merged configuration
46
44
  */
47
- function validateConfig(userConfig = {}) {
45
+ function validateConfig(userConfig) {
48
46
  if (!userConfig.apiKey) throw new Error("SpeechOS requires an apiKey. Get one from your team dashboard at /a/<team-slug>/.");
49
- const config = {
50
- ...defaultConfig,
51
- ...userConfig
47
+ return {
48
+ apiKey: userConfig.apiKey,
49
+ userId: userConfig.userId ?? defaultConfig.userId,
50
+ host: userConfig.host ?? defaultConfig.host,
51
+ debug: userConfig.debug ?? defaultConfig.debug
52
52
  };
53
- const validPositions = [
54
- "bottom-center",
55
- "bottom-right",
56
- "bottom-left"
57
- ];
58
- if (!validPositions.includes(config.position)) {
59
- console.warn(`Invalid position "${config.position}". Using default "bottom-center".`);
60
- config.position = "bottom-center";
61
- }
62
- if (typeof config.zIndex !== "number" || config.zIndex < 0) {
63
- console.warn(`Invalid zIndex "${config.zIndex}". Using default ${defaultConfig.zIndex}.`);
64
- config.zIndex = defaultConfig.zIndex;
65
- }
66
- return config;
67
53
  }
68
54
  /**
69
55
  * Current active configuration (singleton)
70
56
  */
71
- let currentConfig = defaultConfig;
57
+ let currentConfig = { ...defaultConfig };
72
58
  /**
73
59
  * Get the current configuration
74
60
  */
@@ -191,33 +177,38 @@ const initialState = {
191
177
  var StateManager = class {
192
178
  state;
193
179
  subscribers = /* @__PURE__ */ new Set();
180
+ /** Cached immutable snapshot for useSyncExternalStore compatibility */
181
+ snapshot;
194
182
  constructor(initialState$1) {
195
183
  this.state = { ...initialState$1 };
184
+ this.snapshot = Object.freeze({ ...this.state });
196
185
  }
197
186
  /**
198
- * Get the current state (returns a copy to prevent mutations)
187
+ * Get the current state snapshot (returns a stable reference for React)
188
+ * This returns an immutable frozen object that only changes when setState is called.
199
189
  */
200
190
  getState() {
201
- return { ...this.state };
191
+ return this.snapshot;
202
192
  }
203
193
  /**
204
194
  * Update state with partial values
205
195
  * @param partial - Partial state to merge with current state
206
196
  */
207
197
  setState(partial) {
208
- const prevState = { ...this.state };
198
+ const prevState = this.snapshot;
209
199
  this.state = {
210
200
  ...this.state,
211
201
  ...partial
212
202
  };
203
+ this.snapshot = Object.freeze({ ...this.state });
213
204
  this.subscribers.forEach((callback) => {
214
205
  try {
215
- callback(this.state, prevState);
206
+ callback(this.snapshot, prevState);
216
207
  } catch (error) {
217
208
  console.error("Error in state change callback:", error);
218
209
  }
219
210
  });
220
- events.emit("state:change", { state: this.state });
211
+ events.emit("state:change", { state: this.snapshot });
221
212
  }
222
213
  /**
223
214
  * Subscribe to state changes
@@ -234,7 +225,17 @@ var StateManager = class {
234
225
  * Reset state to initial values
235
226
  */
236
227
  reset() {
237
- this.setState(initialState);
228
+ const prevState = this.snapshot;
229
+ this.state = { ...initialState };
230
+ this.snapshot = Object.freeze({ ...this.state });
231
+ this.subscribers.forEach((callback) => {
232
+ try {
233
+ callback(this.snapshot, prevState);
234
+ } catch (error) {
235
+ console.error("Error in state change callback:", error);
236
+ }
237
+ });
238
+ events.emit("state:change", { state: this.snapshot });
238
239
  }
239
240
  /**
240
241
  * Show the widget
@@ -369,12 +370,15 @@ function createStateManager(initial) {
369
370
 
370
371
  //#endregion
371
372
  //#region src/livekit.ts
372
- const MESSAGE_TYPE_REQUEST_TRANSCRIPT = "request_transcript";
373
- const MESSAGE_TYPE_TRANSCRIPT = "transcript";
374
- const MESSAGE_TYPE_EDIT_TEXT = "edit_text";
375
- const MESSAGE_TYPE_EDITED_TEXT = "edited_text";
376
- const MESSAGE_TYPE_ERROR = "error";
373
+ const MESSAGE_TYPE_REQUEST_TRANSCRIPT$1 = "request_transcript";
374
+ const MESSAGE_TYPE_TRANSCRIPT$1 = "transcript";
375
+ const MESSAGE_TYPE_EDIT_TEXT$1 = "edit_text";
376
+ const MESSAGE_TYPE_EDITED_TEXT$1 = "edited_text";
377
+ const MESSAGE_TYPE_EXECUTE_COMMAND$1 = "execute_command";
378
+ const MESSAGE_TYPE_COMMAND_RESULT$1 = "command_result";
379
+ const MESSAGE_TYPE_ERROR$1 = "error";
377
380
  const TOPIC_SPEECHOS = "speechos";
381
+ const TOKEN_CACHE_TTL_MS = 4 * 60 * 1e3;
378
382
  /**
379
383
  * A deferred promise with timeout support.
380
384
  * Encapsulates resolve/reject/timeout in a single object for cleaner async handling.
@@ -438,53 +442,116 @@ var LiveKitManager = class {
438
442
  room = null;
439
443
  tokenData = null;
440
444
  micTrack = null;
445
+ cachedTokenData = null;
446
+ tokenCacheTimestamp = null;
447
+ tokenPrefetchPromise = null;
448
+ tokenRefreshTimer = null;
449
+ autoRefreshEnabled = false;
441
450
  pendingTranscript = null;
442
451
  pendingEditText = null;
452
+ pendingCommand = null;
443
453
  pendingTrackSubscribed = null;
444
- preWarmPromise = null;
445
454
  editOriginalText = null;
455
+ sessionSettings = {};
446
456
  /**
447
- * Pre-warm resources for faster connection
448
- * Call this when user shows intent (e.g., expands widget)
449
- * Only fetches token - mic permission is requested when user clicks Dictate
457
+ * Check if the cached token is still valid (within TTL)
450
458
  */
451
- async preWarm() {
452
- if (this.tokenData || this.preWarmPromise || this.room?.state === "connected") {
453
- const config$1 = getConfig();
454
- if (config$1.debug) console.log("[SpeechOS] Pre-warm skipped - token already available");
455
- return;
456
- }
459
+ isCachedTokenValid() {
460
+ if (!this.cachedTokenData || !this.tokenCacheTimestamp) return false;
461
+ const age = Date.now() - this.tokenCacheTimestamp;
462
+ return age < TOKEN_CACHE_TTL_MS;
463
+ }
464
+ /**
465
+ * Pre-fetch a LiveKit token for later use
466
+ * Call this early (e.g., when widget expands) to reduce latency when starting a voice session.
467
+ * If a prefetch is already in progress, returns the existing promise.
468
+ * If a valid cached token exists, returns it immediately.
469
+ */
470
+ async prefetchToken() {
457
471
  const config = getConfig();
458
- if (config.debug) console.log("[SpeechOS] Pre-warming: fetching token...");
459
- this.preWarmPromise = (async () => {
460
- try {
461
- await this.fetchToken();
462
- if (config.debug) console.log("[SpeechOS] Pre-warm complete - token ready");
463
- } catch (error) {
464
- if (config.debug) console.warn("[SpeechOS] Pre-warm failed:", error);
465
- this.preWarmPromise = null;
466
- }
467
- })();
468
- await this.preWarmPromise;
472
+ if (this.isCachedTokenValid() && this.cachedTokenData) {
473
+ if (config.debug) console.log("[SpeechOS] Using cached token (prefetch hit)");
474
+ return this.cachedTokenData;
475
+ }
476
+ if (this.tokenPrefetchPromise) {
477
+ if (config.debug) console.log("[SpeechOS] Prefetch already in progress, awaiting...");
478
+ return this.tokenPrefetchPromise;
479
+ }
480
+ if (config.debug) console.log("[SpeechOS] Starting token prefetch...");
481
+ this.tokenPrefetchPromise = this.fetchTokenFromServer().then((data) => {
482
+ this.cachedTokenData = data;
483
+ this.tokenCacheTimestamp = Date.now();
484
+ this.tokenPrefetchPromise = null;
485
+ return data;
486
+ }).catch((error) => {
487
+ this.tokenPrefetchPromise = null;
488
+ throw error;
489
+ });
490
+ return this.tokenPrefetchPromise;
469
491
  }
470
492
  /**
471
493
  * Fetch a LiveKit token from the backend
494
+ * Uses cached token if valid, otherwise fetches a fresh one.
495
+ * Includes language settings and user vocabulary which are stored in the VoiceSession.
472
496
  */
473
497
  async fetchToken() {
498
+ const config = getConfig();
499
+ if (this.isCachedTokenValid() && this.cachedTokenData) {
500
+ if (config.debug) console.log("[SpeechOS] Using cached token");
501
+ this.tokenData = this.cachedTokenData;
502
+ return this.cachedTokenData;
503
+ }
504
+ if (this.tokenPrefetchPromise) {
505
+ if (config.debug) console.log("[SpeechOS] Waiting for prefetch to complete...");
506
+ const data$1 = await this.tokenPrefetchPromise;
507
+ this.tokenData = data$1;
508
+ return data$1;
509
+ }
510
+ const data = await this.fetchTokenFromServer();
511
+ this.cachedTokenData = data;
512
+ this.tokenCacheTimestamp = Date.now();
513
+ this.tokenData = data;
514
+ return data;
515
+ }
516
+ /**
517
+ * Internal method to fetch a fresh token from the server
518
+ */
519
+ async fetchTokenFromServer() {
474
520
  const config = getConfig();
475
521
  const url = `${config.host}/livekit/api/token/`;
476
- if (config.debug) console.log("[SpeechOS] Fetching LiveKit token from:", url);
522
+ const settings = this.sessionSettings;
523
+ const inputLanguage = settings.inputLanguageCode ?? "en-US";
524
+ const outputLanguage = settings.outputLanguageCode ?? "en-US";
525
+ const smartFormat = settings.smartFormat ?? true;
526
+ const vocabulary = settings.vocabulary ?? [];
527
+ const snippets = settings.snippets ?? [];
528
+ if (config.debug) {
529
+ console.log("[SpeechOS] Fetching LiveKit token from:", url);
530
+ console.log("[SpeechOS] Session settings:", {
531
+ inputLanguage,
532
+ outputLanguage,
533
+ smartFormat,
534
+ snippetsCount: snippets.length,
535
+ vocabularyCount: vocabulary.length
536
+ });
537
+ }
477
538
  const response = await fetch(url, {
478
539
  method: "POST",
479
540
  headers: {
480
541
  "Content-Type": "application/json",
481
542
  ...config.apiKey ? { Authorization: `Api-Key ${config.apiKey}` } : {}
482
543
  },
483
- body: JSON.stringify({ user_id: config.userId || null })
544
+ body: JSON.stringify({
545
+ user_id: config.userId || null,
546
+ input_language: inputLanguage,
547
+ output_language: outputLanguage,
548
+ smart_format: smartFormat,
549
+ custom_vocabulary: vocabulary,
550
+ custom_snippets: snippets
551
+ })
484
552
  });
485
553
  if (!response.ok) throw new Error(`Failed to fetch LiveKit token: ${response.status} ${response.statusText}`);
486
554
  const data = await response.json();
487
- this.tokenData = data;
488
555
  if (config.debug) console.log("[SpeechOS] LiveKit token received:", {
489
556
  room: data.room,
490
557
  identity: data.identity,
@@ -497,8 +564,7 @@ var LiveKitManager = class {
497
564
  */
498
565
  async connect() {
499
566
  const config = getConfig();
500
- if (!this.tokenData) await this.fetchToken();
501
- else if (config.debug) console.log("[SpeechOS] Using pre-fetched token");
567
+ await this.fetchToken();
502
568
  if (!this.tokenData) throw new Error("No token available for LiveKit connection");
503
569
  this.room = new livekit_client.Room({
504
570
  adaptiveStream: true,
@@ -562,7 +628,7 @@ var LiveKitManager = class {
562
628
  try {
563
629
  const message = JSON.parse(new TextDecoder().decode(data));
564
630
  if (config.debug) console.log("[SpeechOS] Data received:", message);
565
- if (message.type === MESSAGE_TYPE_TRANSCRIPT) {
631
+ if (message.type === MESSAGE_TYPE_TRANSCRIPT$1) {
566
632
  const transcript = message.transcript || "";
567
633
  if (config.debug) console.log("[SpeechOS] Transcript received:", transcript);
568
634
  events.emit("transcription:complete", { text: transcript });
@@ -570,7 +636,7 @@ var LiveKitManager = class {
570
636
  this.pendingTranscript.resolve(transcript);
571
637
  this.pendingTranscript = null;
572
638
  }
573
- } else if (message.type === MESSAGE_TYPE_EDITED_TEXT) {
639
+ } else if (message.type === MESSAGE_TYPE_EDITED_TEXT$1) {
574
640
  const editedText = message.text || "";
575
641
  if (config.debug) console.log("[SpeechOS] Edited text received:", editedText);
576
642
  events.emit("edit:complete", {
@@ -582,7 +648,15 @@ var LiveKitManager = class {
582
648
  this.pendingEditText = null;
583
649
  }
584
650
  this.editOriginalText = null;
585
- } else if (message.type === MESSAGE_TYPE_ERROR) {
651
+ } else if (message.type === MESSAGE_TYPE_COMMAND_RESULT$1) {
652
+ const commandResult = message.command || null;
653
+ if (config.debug) console.log("[SpeechOS] Command result received:", commandResult);
654
+ events.emit("command:complete", { command: commandResult });
655
+ if (this.pendingCommand) {
656
+ this.pendingCommand.resolve(commandResult);
657
+ this.pendingCommand = null;
658
+ }
659
+ } else if (message.type === MESSAGE_TYPE_ERROR$1) {
586
660
  const serverError = message;
587
661
  const errorCode = serverError.code || "server_error";
588
662
  const errorMessage = serverError.message || "A server error occurred";
@@ -602,6 +676,10 @@ var LiveKitManager = class {
602
676
  this.pendingEditText.reject(error);
603
677
  this.pendingEditText = null;
604
678
  }
679
+ if (this.pendingCommand) {
680
+ this.pendingCommand.reject(error);
681
+ this.pendingCommand = null;
682
+ }
605
683
  }
606
684
  } catch (error) {
607
685
  console.error("[SpeechOS] Failed to parse data message:", error);
@@ -609,16 +687,34 @@ var LiveKitManager = class {
609
687
  }
610
688
  /**
611
689
  * Publish microphone audio track
690
+ * Uses the device ID from session settings if set
612
691
  */
613
692
  async enableMicrophone() {
614
693
  if (!this.room || this.room.state !== "connected") throw new Error("Not connected to room");
615
694
  const config = getConfig();
616
695
  if (!this.micTrack) {
617
696
  if (config.debug) console.log("[SpeechOS] Creating microphone track...");
618
- this.micTrack = await (0, livekit_client.createLocalAudioTrack)({
697
+ const deviceId = this.sessionSettings.audioDeviceId;
698
+ const trackOptions = {
619
699
  echoCancellation: true,
620
700
  noiseSuppression: true
621
- });
701
+ };
702
+ if (deviceId) {
703
+ trackOptions.deviceId = { exact: deviceId };
704
+ if (config.debug) console.log("[SpeechOS] Using audio device:", deviceId);
705
+ }
706
+ try {
707
+ this.micTrack = await (0, livekit_client.createLocalAudioTrack)(trackOptions);
708
+ } catch (error) {
709
+ if (deviceId && error instanceof Error) {
710
+ console.warn("[SpeechOS] Selected audio device unavailable, falling back to default:", error.message);
711
+ this.micTrack = await (0, livekit_client.createLocalAudioTrack)({
712
+ echoCancellation: true,
713
+ noiseSuppression: true
714
+ });
715
+ } else throw error;
716
+ }
717
+ this.logMicrophoneInfo();
622
718
  }
623
719
  const existingPub = this.room.localParticipant.getTrackPublication(livekit_client.Track.Source.Microphone);
624
720
  if (!existingPub) {
@@ -628,6 +724,24 @@ var LiveKitManager = class {
628
724
  }
629
725
  }
630
726
  /**
727
+ * Log information about the current microphone track
728
+ */
729
+ logMicrophoneInfo() {
730
+ if (!this.micTrack) return;
731
+ const config = getConfig();
732
+ const mediaTrack = this.micTrack.mediaStreamTrack;
733
+ const settings = mediaTrack.getSettings();
734
+ console.log("[SpeechOS] Microphone active:", {
735
+ deviceId: settings.deviceId || "unknown",
736
+ label: mediaTrack.label || "Unknown device",
737
+ sampleRate: settings.sampleRate,
738
+ channelCount: settings.channelCount,
739
+ echoCancellation: settings.echoCancellation,
740
+ noiseSuppression: settings.noiseSuppression
741
+ });
742
+ if (config.debug) console.log("[SpeechOS] Full audio track settings:", settings);
743
+ }
744
+ /**
631
745
  * Disable microphone audio track
632
746
  */
633
747
  async disableMicrophone() {
@@ -659,30 +773,85 @@ var LiveKitManager = class {
659
773
  });
660
774
  }
661
775
  /**
662
- * Start a voice session
663
- * Connects to room, enables microphone, and waits for agent to subscribe to our track
776
+ * Start a voice session with pre-connect audio buffering
777
+ * Fetches a fresh token, then enables mic with preConnectBuffer to capture audio while connecting.
778
+ * Agent subscription happens in the background - we don't block on it.
779
+ *
780
+ * @param options - Session options including action type and parameters
664
781
  */
665
- async startVoiceSession() {
782
+ async startVoiceSession(options) {
666
783
  const config = getConfig();
667
784
  if (config.debug) console.log("[SpeechOS] Starting voice session...");
668
- if (this.preWarmPromise) {
669
- if (config.debug) console.log("[SpeechOS] Waiting for pre-warm to complete...");
670
- await this.preWarmPromise;
671
- }
672
- if (this.tokenData) {
673
- if (config.debug) console.log("[SpeechOS] Using cached token from init");
674
- } else {
675
- if (config.debug) console.log("[SpeechOS] Fetching fresh token for session...");
676
- await this.fetchToken();
677
- }
785
+ this.sessionSettings = options?.settings || {};
786
+ await this.fetchToken();
787
+ if (!this.tokenData) throw new Error("No token available for LiveKit connection");
678
788
  this.pendingTrackSubscribed = new Deferred();
679
789
  this.pendingTrackSubscribed.setTimeout(15e3, "Connection timed out - agent not available", "connection_timeout", "connection");
680
- await this.connect();
681
- await this.enableMicrophone();
682
- if (config.debug) console.log("[SpeechOS] Microphone published, waiting for LocalTrackSubscribed event...");
683
- await this.pendingTrackSubscribed.promise;
684
- this.pendingTrackSubscribed = null;
685
- if (config.debug) console.log("[SpeechOS] Voice session ready - agent subscribed to audio");
790
+ this.room = new livekit_client.Room({
791
+ adaptiveStream: true,
792
+ dynacast: true
793
+ });
794
+ this.setupRoomEvents();
795
+ if (config.debug) console.log("[SpeechOS] Connecting to LiveKit room:", this.tokenData.room, "at", this.tokenData.ws_url);
796
+ await this.room.connect(this.tokenData.ws_url, this.tokenData.token);
797
+ if (config.debug) console.log("[SpeechOS] Connected, enabling microphone with preConnectBuffer...");
798
+ await this.enableMicrophoneWithPreConnectBuffer();
799
+ if (options?.onMicReady) options.onMicReady();
800
+ state.setConnected(true);
801
+ if (config.debug) console.log("[SpeechOS] Voice session ready - microphone active");
802
+ this.waitForAgentSubscription();
803
+ }
804
+ /**
805
+ * Wait for the agent to subscribe to our audio track in the background
806
+ * Handles timeout errors without blocking the main flow
807
+ */
808
+ waitForAgentSubscription() {
809
+ const config = getConfig();
810
+ if (!this.pendingTrackSubscribed) return;
811
+ this.pendingTrackSubscribed.promise.then(() => {
812
+ if (config.debug) console.log("[SpeechOS] Agent subscribed to audio track - full duplex established");
813
+ this.pendingTrackSubscribed = null;
814
+ }).catch((error) => {
815
+ console.warn("[SpeechOS] Agent subscription timeout:", error.message);
816
+ this.pendingTrackSubscribed = null;
817
+ });
818
+ }
819
+ /**
820
+ * Enable microphone with pre-connect buffering
821
+ * This starts capturing audio locally before the room is connected,
822
+ * buffering it until the connection is established.
823
+ */
824
+ async enableMicrophoneWithPreConnectBuffer() {
825
+ if (!this.room) throw new Error("Room not initialized");
826
+ const config = getConfig();
827
+ const deviceId = this.sessionSettings.audioDeviceId;
828
+ const constraints = {
829
+ echoCancellation: true,
830
+ noiseSuppression: true
831
+ };
832
+ if (deviceId) {
833
+ constraints.deviceId = { exact: deviceId };
834
+ if (config.debug) console.log("[SpeechOS] Using audio device:", deviceId);
835
+ }
836
+ try {
837
+ await this.room.localParticipant.setMicrophoneEnabled(true, constraints, { preConnectBuffer: true });
838
+ state.setMicEnabled(true);
839
+ const micPub = this.room.localParticipant.getTrackPublication(livekit_client.Track.Source.Microphone);
840
+ if (micPub?.track) {
841
+ this.micTrack = micPub.track;
842
+ this.logMicrophoneInfo();
843
+ }
844
+ if (config.debug) console.log("[SpeechOS] Microphone enabled with pre-connect buffer - audio is being captured");
845
+ } catch (error) {
846
+ if (deviceId && error instanceof Error) {
847
+ console.warn("[SpeechOS] Selected audio device unavailable, falling back to default:", error.message);
848
+ await this.room.localParticipant.setMicrophoneEnabled(true, {
849
+ echoCancellation: true,
850
+ noiseSuppression: true
851
+ }, { preConnectBuffer: true });
852
+ state.setMicEnabled(true);
853
+ } else throw error;
854
+ }
686
855
  }
687
856
  /**
688
857
  * Stop the voice session and request the transcript
@@ -691,12 +860,19 @@ var LiveKitManager = class {
691
860
  */
692
861
  async stopVoiceSession() {
693
862
  const config = getConfig();
863
+ const settings = this.sessionSettings;
864
+ const inputLanguage = settings.inputLanguageCode ?? "en-US";
865
+ const outputLanguage = settings.outputLanguageCode ?? "en-US";
866
+ console.log("[SpeechOS] Dictate command:", {
867
+ inputLanguage,
868
+ outputLanguage
869
+ });
694
870
  if (config.debug) console.log("[SpeechOS] Stopping voice session, requesting transcript...");
695
871
  await this.disableMicrophone();
696
872
  if (config.debug) console.log("[SpeechOS] Requesting transcript from agent...");
697
873
  this.pendingTranscript = new Deferred();
698
874
  this.pendingTranscript.setTimeout(1e4, "Transcription timed out. Please try again.", "transcription_timeout", "timeout");
699
- await this.sendDataMessage({ type: MESSAGE_TYPE_REQUEST_TRANSCRIPT });
875
+ await this.sendDataMessage({ type: MESSAGE_TYPE_REQUEST_TRANSCRIPT$1 });
700
876
  const result = await this.pendingTranscript.promise;
701
877
  this.pendingTranscript = null;
702
878
  return result;
@@ -715,6 +891,14 @@ var LiveKitManager = class {
715
891
  */
716
892
  async requestEditText(originalText) {
717
893
  const config = getConfig();
894
+ const settings = this.sessionSettings;
895
+ const inputLanguage = settings.inputLanguageCode ?? "en-US";
896
+ const outputLanguage = settings.outputLanguageCode ?? "en-US";
897
+ console.log("[SpeechOS] Edit command:", {
898
+ inputLanguage,
899
+ outputLanguage,
900
+ originalTextLength: originalText.length
901
+ });
718
902
  if (config.debug) console.log("[SpeechOS] Requesting text edit...");
719
903
  this.editOriginalText = originalText;
720
904
  await this.disableMicrophone();
@@ -722,7 +906,7 @@ var LiveKitManager = class {
722
906
  this.pendingEditText = new Deferred();
723
907
  this.pendingEditText.setTimeout(15e3, "Edit request timed out. Please try again.", "edit_timeout", "timeout");
724
908
  await this.sendDataMessage({
725
- type: MESSAGE_TYPE_EDIT_TEXT,
909
+ type: MESSAGE_TYPE_EDIT_TEXT$1,
726
910
  text: originalText
727
911
  });
728
912
  const result = await this.pendingEditText.promise;
@@ -736,6 +920,39 @@ var LiveKitManager = class {
736
920
  return this.requestEditText(originalText);
737
921
  }
738
922
  /**
923
+ * Request command matching using the transcript as input
924
+ * Sends command definitions to the backend, which matches the user's speech against them
925
+ * Returns a promise that resolves with the matched command or null if no match
926
+ * @throws Error if timeout occurs waiting for command result
927
+ */
928
+ async requestCommand(commands) {
929
+ const config = getConfig();
930
+ const settings = this.sessionSettings;
931
+ const inputLanguage = settings.inputLanguageCode ?? "en-US";
932
+ console.log("[SpeechOS] Command request:", {
933
+ inputLanguage,
934
+ commandCount: commands.length
935
+ });
936
+ if (config.debug) console.log("[SpeechOS] Requesting command match...");
937
+ await this.disableMicrophone();
938
+ if (config.debug) console.log("[SpeechOS] Sending execute_command request to agent...");
939
+ this.pendingCommand = new Deferred();
940
+ this.pendingCommand.setTimeout(15e3, "Command request timed out. Please try again.", "command_timeout", "timeout");
941
+ await this.sendDataMessage({
942
+ type: MESSAGE_TYPE_EXECUTE_COMMAND$1,
943
+ commands
944
+ });
945
+ const result = await this.pendingCommand.promise;
946
+ this.pendingCommand = null;
947
+ return result;
948
+ }
949
+ /**
950
+ * Alias for requestCommand - granular API naming
951
+ */
952
+ async stopAndCommand(commands) {
953
+ return this.requestCommand(commands);
954
+ }
955
+ /**
739
956
  * Disconnect from the current room
740
957
  * Clears the token so a fresh one is fetched for the next session
741
958
  */
@@ -758,16 +975,110 @@ var LiveKitManager = class {
758
975
  this.pendingEditText.reject(new Error("Disconnected"));
759
976
  this.pendingEditText = null;
760
977
  }
978
+ if (this.pendingCommand) {
979
+ this.pendingCommand.reject(new Error("Disconnected"));
980
+ this.pendingCommand = null;
981
+ }
761
982
  if (this.pendingTrackSubscribed) {
762
983
  this.pendingTrackSubscribed.reject(new Error("Disconnected"));
763
984
  this.pendingTrackSubscribed = null;
764
985
  }
765
986
  this.tokenData = null;
766
- this.preWarmPromise = null;
767
987
  this.editOriginalText = null;
988
+ this.sessionSettings = {};
768
989
  if (config.debug) console.log("[SpeechOS] Session state cleared");
769
990
  }
770
991
  /**
992
+ * Invalidate the cached token
993
+ * Call this when settings change that would affect the token (language, vocabulary)
994
+ */
995
+ invalidateTokenCache() {
996
+ const config = getConfig();
997
+ if (config.debug) console.log("[SpeechOS] Token cache invalidated");
998
+ this.cachedTokenData = null;
999
+ this.tokenCacheTimestamp = null;
1000
+ }
1001
+ /**
1002
+ * Start auto-refreshing the token while the widget is expanded.
1003
+ * Call this after a voice session completes to immediately fetch a fresh token
1004
+ * (since each command requires its own token) and keep it fresh for subsequent commands.
1005
+ */
1006
+ startAutoRefresh() {
1007
+ const config = getConfig();
1008
+ this.autoRefreshEnabled = true;
1009
+ if (config.debug) console.log("[SpeechOS] Token auto-refresh enabled");
1010
+ this.invalidateTokenCache();
1011
+ this.prefetchToken().then(() => {
1012
+ this.scheduleTokenRefresh();
1013
+ }).catch((error) => {
1014
+ if (config.debug) console.warn("[SpeechOS] Failed to prefetch token after command:", error);
1015
+ if (this.autoRefreshEnabled) this.tokenRefreshTimer = setTimeout(() => {
1016
+ this.performAutoRefresh();
1017
+ }, 5 * 1e3);
1018
+ });
1019
+ }
1020
+ /**
1021
+ * Stop auto-refreshing the token.
1022
+ * Call this when the widget collapses or user navigates away.
1023
+ */
1024
+ stopAutoRefresh() {
1025
+ const config = getConfig();
1026
+ this.autoRefreshEnabled = false;
1027
+ if (this.tokenRefreshTimer) {
1028
+ clearTimeout(this.tokenRefreshTimer);
1029
+ this.tokenRefreshTimer = null;
1030
+ }
1031
+ if (config.debug) console.log("[SpeechOS] Token auto-refresh disabled");
1032
+ }
1033
+ /**
1034
+ * Schedule a token refresh before the current cache expires.
1035
+ * Handles computer sleep by checking elapsed time on each refresh attempt.
1036
+ */
1037
+ scheduleTokenRefresh() {
1038
+ if (!this.autoRefreshEnabled) return;
1039
+ if (this.tokenRefreshTimer) {
1040
+ clearTimeout(this.tokenRefreshTimer);
1041
+ this.tokenRefreshTimer = null;
1042
+ }
1043
+ const config = getConfig();
1044
+ const refreshBuffer = 30 * 1e3;
1045
+ let timeUntilRefresh;
1046
+ if (this.tokenCacheTimestamp) {
1047
+ const age = Date.now() - this.tokenCacheTimestamp;
1048
+ const timeRemaining = TOKEN_CACHE_TTL_MS - age;
1049
+ timeUntilRefresh = Math.max(0, timeRemaining - refreshBuffer);
1050
+ } else timeUntilRefresh = 0;
1051
+ if (config.debug) console.log(`[SpeechOS] Scheduling token refresh in ${Math.round(timeUntilRefresh / 1e3)}s`);
1052
+ this.tokenRefreshTimer = setTimeout(() => {
1053
+ this.performAutoRefresh();
1054
+ }, timeUntilRefresh);
1055
+ }
1056
+ /**
1057
+ * Perform the auto-refresh, handling computer sleep scenarios.
1058
+ */
1059
+ async performAutoRefresh() {
1060
+ if (!this.autoRefreshEnabled) return;
1061
+ const config = getConfig();
1062
+ if (this.isCachedTokenValid()) {
1063
+ if (config.debug) console.log("[SpeechOS] Token still valid on refresh check, rescheduling");
1064
+ this.scheduleTokenRefresh();
1065
+ return;
1066
+ }
1067
+ if (config.debug) console.log("[SpeechOS] Auto-refreshing token...");
1068
+ try {
1069
+ const data = await this.fetchTokenFromServer();
1070
+ this.cachedTokenData = data;
1071
+ this.tokenCacheTimestamp = Date.now();
1072
+ if (config.debug) console.log("[SpeechOS] Token auto-refreshed successfully");
1073
+ this.scheduleTokenRefresh();
1074
+ } catch (error) {
1075
+ console.warn("[SpeechOS] Token auto-refresh failed:", error);
1076
+ if (this.autoRefreshEnabled) this.tokenRefreshTimer = setTimeout(() => {
1077
+ this.performAutoRefresh();
1078
+ }, 30 * 1e3);
1079
+ }
1080
+ }
1081
+ /**
771
1082
  * Get the current room instance
772
1083
  */
773
1084
  getRoom() {
@@ -791,88 +1102,747 @@ var LiveKitManager = class {
791
1102
  isMicrophoneEnabled() {
792
1103
  return this.micTrack !== null;
793
1104
  }
794
- /**
795
- * Clear the cached token
796
- * Used when user identity changes to ensure next session gets a fresh token
797
- */
798
- clearToken() {
799
- const config = getConfig();
800
- if (config.debug) console.log("[SpeechOS] Clearing cached token");
801
- this.tokenData = null;
802
- this.preWarmPromise = null;
803
- }
804
1105
  };
805
1106
  const livekit = new LiveKitManager();
1107
+ events.on("settings:changed", () => {
1108
+ livekit.invalidateTokenCache();
1109
+ });
806
1110
 
807
1111
  //#endregion
808
- //#region src/transcript-store.ts
809
- const STORAGE_KEY = "speechos_transcripts";
810
- const MAX_ENTRIES = 50;
1112
+ //#region src/audio-capture.ts
811
1113
  /**
812
- * Generate a unique ID for transcript entries
1114
+ * Detect if running in Safari.
813
1115
  */
814
- function generateId() {
815
- return `${Date.now()}-${Math.random().toString(36).slice(2, 9)}`;
1116
+ function isSafari() {
1117
+ const ua = navigator.userAgent.toLowerCase();
1118
+ const vendor = navigator.vendor?.toLowerCase() || "";
1119
+ const hasSafariUA = ua.includes("safari") && !ua.includes("chrome") && !ua.includes("chromium");
1120
+ const isAppleVendor = vendor.includes("apple");
1121
+ return hasSafariUA && isAppleVendor;
816
1122
  }
817
1123
  /**
818
- * Get all transcripts from localStorage
1124
+ * Detect the best supported audio format for the current browser.
1125
+ *
1126
+ * IMPORTANT: Safari must use MP4/AAC. Its WebM/Opus implementation is buggy
1127
+ * and produces truncated/incomplete audio.
819
1128
  */
820
- function getTranscripts() {
821
- try {
822
- const stored = localStorage.getItem(STORAGE_KEY);
823
- if (!stored) return [];
824
- const entries = JSON.parse(stored);
825
- return entries.sort((a, b) => b.timestamp - a.timestamp);
826
- } catch {
827
- return [];
1129
+ function getSupportedAudioFormat() {
1130
+ if (isSafari()) {
1131
+ if (MediaRecorder.isTypeSupported("audio/mp4")) return {
1132
+ mimeType: "audio/mp4",
1133
+ format: "mp4",
1134
+ needsEncodingParams: false
1135
+ };
1136
+ return {
1137
+ mimeType: "",
1138
+ format: "mp4",
1139
+ needsEncodingParams: true
1140
+ };
828
1141
  }
1142
+ if (MediaRecorder.isTypeSupported("audio/webm;codecs=opus")) return {
1143
+ mimeType: "audio/webm;codecs=opus",
1144
+ format: "webm",
1145
+ needsEncodingParams: false
1146
+ };
1147
+ if (MediaRecorder.isTypeSupported("audio/webm")) return {
1148
+ mimeType: "audio/webm",
1149
+ format: "webm",
1150
+ needsEncodingParams: false
1151
+ };
1152
+ if (MediaRecorder.isTypeSupported("audio/mp4")) return {
1153
+ mimeType: "audio/mp4",
1154
+ format: "mp4",
1155
+ needsEncodingParams: false
1156
+ };
1157
+ return {
1158
+ mimeType: "",
1159
+ format: "webm",
1160
+ needsEncodingParams: true
1161
+ };
829
1162
  }
830
1163
  /**
831
- * Save a new transcript entry
1164
+ * Audio capture manager with buffering support.
1165
+ *
1166
+ * Usage:
1167
+ * 1. Create instance with onChunk callback
1168
+ * 2. Call start() - immediately begins capturing
1169
+ * 3. Call setReady() when connection is established - flushes buffer
1170
+ * 4. Call stop() when done
832
1171
  */
833
- function saveTranscript(text, action, originalText) {
834
- const entry = {
835
- id: generateId(),
836
- text,
837
- timestamp: Date.now(),
838
- action,
839
- ...originalText && { originalText }
840
- };
841
- const entries = getTranscripts();
842
- entries.unshift(entry);
843
- const pruned = entries.slice(0, MAX_ENTRIES);
844
- try {
845
- localStorage.setItem(STORAGE_KEY, JSON.stringify(pruned));
846
- } catch {}
847
- return entry;
848
- }
1172
+ var AudioCapture = class AudioCapture {
1173
+ mediaStream = null;
1174
+ recorder = null;
1175
+ buffer = [];
1176
+ isReady = false;
1177
+ isRecording = false;
1178
+ onChunk;
1179
+ audioFormat;
1180
+ deviceId;
1181
+ /**
1182
+ * Time slice for MediaRecorder in milliseconds.
1183
+ *
1184
+ * Safari requires a larger timeslice (1000ms) to properly flush its internal
1185
+ * audio buffers. Smaller values cause Safari to drop or truncate audio data.
1186
+ * See: https://community.openai.com/t/whisper-problem-with-audio-mp4-blobs-from-safari/
1187
+ *
1188
+ * Other browsers (Chrome, Firefox, Edge) work well with smaller timeslices
1189
+ * which provide lower latency for real-time transcription.
1190
+ */
1191
+ static TIME_SLICE_MS = 100;
1192
+ static SAFARI_TIME_SLICE_MS = 1e3;
1193
+ /**
1194
+ * @param onChunk - Callback for receiving audio chunks
1195
+ * @param deviceId - Optional audio device ID (empty string or undefined for system default)
1196
+ */
1197
+ constructor(onChunk, deviceId) {
1198
+ this.onChunk = onChunk;
1199
+ this.audioFormat = getSupportedAudioFormat();
1200
+ this.deviceId = deviceId;
1201
+ }
1202
+ /**
1203
+ * Get the appropriate timeslice for the current browser.
1204
+ * Safari needs a larger timeslice to avoid dropping audio data.
1205
+ */
1206
+ getTimeSlice() {
1207
+ return isSafari() ? AudioCapture.SAFARI_TIME_SLICE_MS : AudioCapture.TIME_SLICE_MS;
1208
+ }
1209
+ /**
1210
+ * Get the timeslice being used (in milliseconds).
1211
+ * Useful for callers that need to wait for audio processing.
1212
+ */
1213
+ getTimeSliceMs() {
1214
+ return this.getTimeSlice();
1215
+ }
1216
+ /**
1217
+ * Get the audio format being used.
1218
+ */
1219
+ getFormat() {
1220
+ return this.audioFormat;
1221
+ }
1222
+ /**
1223
+ * Start capturing audio immediately.
1224
+ *
1225
+ * Audio chunks will be buffered until setReady() is called.
1226
+ */
1227
+ async start() {
1228
+ const config = getConfig();
1229
+ if (this.isRecording) {
1230
+ if (config.debug) console.log("[SpeechOS] AudioCapture already recording");
1231
+ return;
1232
+ }
1233
+ this.buffer = [];
1234
+ this.isReady = false;
1235
+ const constraints = { audio: {
1236
+ echoCancellation: true,
1237
+ noiseSuppression: true,
1238
+ ...this.deviceId ? { deviceId: { exact: this.deviceId } } : {}
1239
+ } };
1240
+ if (config.debug) {
1241
+ console.log("[SpeechOS] AudioCapture starting with format:", this.audioFormat.mimeType);
1242
+ console.log("[SpeechOS] Detected Safari:", isSafari());
1243
+ if (this.deviceId) console.log("[SpeechOS] Using audio device:", this.deviceId);
1244
+ }
1245
+ try {
1246
+ this.mediaStream = await navigator.mediaDevices.getUserMedia(constraints);
1247
+ const recorderOptions = {};
1248
+ if (this.audioFormat.mimeType) recorderOptions.mimeType = this.audioFormat.mimeType;
1249
+ this.recorder = new MediaRecorder(this.mediaStream, recorderOptions);
1250
+ this.recorder.ondataavailable = (event) => {
1251
+ if (event.data && event.data.size > 0) this.handleChunk(event.data);
1252
+ };
1253
+ this.recorder.onerror = (event) => {
1254
+ console.error("[SpeechOS] MediaRecorder error:", event);
1255
+ };
1256
+ const timeSlice = this.getTimeSlice();
1257
+ this.recorder.start(timeSlice);
1258
+ this.isRecording = true;
1259
+ if (config.debug) console.log(`[SpeechOS] AudioCapture started with ${timeSlice}ms timeslice, buffering until ready`);
1260
+ } catch (error) {
1261
+ if (this.deviceId && error instanceof Error) {
1262
+ console.warn("[SpeechOS] Selected device unavailable, trying default:", error.message);
1263
+ this.mediaStream = await navigator.mediaDevices.getUserMedia({ audio: {
1264
+ echoCancellation: true,
1265
+ noiseSuppression: true
1266
+ } });
1267
+ const recorderOptions = {};
1268
+ if (this.audioFormat.mimeType) recorderOptions.mimeType = this.audioFormat.mimeType;
1269
+ this.recorder = new MediaRecorder(this.mediaStream, recorderOptions);
1270
+ this.recorder.ondataavailable = (event) => {
1271
+ if (event.data && event.data.size > 0) this.handleChunk(event.data);
1272
+ };
1273
+ this.recorder.start(this.getTimeSlice());
1274
+ this.isRecording = true;
1275
+ } else throw error;
1276
+ }
1277
+ }
1278
+ /**
1279
+ * Handle an audio chunk with atomic buffer swap pattern.
1280
+ *
1281
+ * If not ready: buffer the chunk.
1282
+ * If ready: send directly via callback.
1283
+ */
1284
+ handleChunk(chunk) {
1285
+ if (this.isReady) this.onChunk(chunk);
1286
+ else this.buffer.push(chunk);
1287
+ }
1288
+ /**
1289
+ * Mark the capture as ready (connection established).
1290
+ *
1291
+ * This flushes any buffered chunks and switches to direct mode.
1292
+ * Uses atomic swap to prevent chunk reordering.
1293
+ */
1294
+ setReady() {
1295
+ const config = getConfig();
1296
+ if (this.isReady) return;
1297
+ const toFlush = this.buffer;
1298
+ this.buffer = [];
1299
+ for (const chunk of toFlush) this.onChunk(chunk);
1300
+ this.isReady = true;
1301
+ if (config.debug) console.log(`[SpeechOS] AudioCapture ready, flushed ${toFlush.length} buffered chunks`);
1302
+ }
1303
+ /**
1304
+ * Stop capturing audio and wait for final chunk.
1305
+ *
1306
+ * Uses requestData() before stop() to force the MediaRecorder to flush
1307
+ * any buffered audio immediately. This is critical for Safari which
1308
+ * may hold audio data in internal buffers.
1309
+ *
1310
+ * Safari requires an additional delay after stopping to ensure all audio
1311
+ * from its internal encoding pipeline has been fully processed and emitted.
1312
+ */
1313
+ async stop() {
1314
+ const config = getConfig();
1315
+ const safari = isSafari();
1316
+ if (this.recorder && this.recorder.state !== "inactive") {
1317
+ if (this.recorder.state === "recording") try {
1318
+ const dataPromise = new Promise((resolve) => {
1319
+ const handler = (event) => {
1320
+ this.recorder?.removeEventListener("dataavailable", handler);
1321
+ if (config.debug) console.log(`[SpeechOS] requestData flush received: ${event.data.size} bytes`);
1322
+ resolve();
1323
+ };
1324
+ this.recorder?.addEventListener("dataavailable", handler);
1325
+ });
1326
+ this.recorder.requestData();
1327
+ if (config.debug) console.log("[SpeechOS] Requested data flush before stop");
1328
+ await dataPromise;
1329
+ } catch (e) {
1330
+ if (config.debug) console.log("[SpeechOS] requestData() not supported or failed:", e);
1331
+ }
1332
+ const stopPromise = new Promise((resolve) => {
1333
+ if (!this.recorder) {
1334
+ resolve();
1335
+ return;
1336
+ }
1337
+ this.recorder.onstop = () => {
1338
+ if (config.debug) console.log("[SpeechOS] MediaRecorder onstop fired");
1339
+ resolve();
1340
+ };
1341
+ });
1342
+ this.recorder.stop();
1343
+ await stopPromise;
1344
+ if (safari) {
1345
+ if (config.debug) console.log("[SpeechOS] Safari: waiting 2s for encoding pipeline to flush");
1346
+ await new Promise((resolve) => setTimeout(resolve, 2e3));
1347
+ }
1348
+ }
1349
+ if (this.mediaStream) {
1350
+ for (const track of this.mediaStream.getTracks()) track.stop();
1351
+ this.mediaStream = null;
1352
+ }
1353
+ this.recorder = null;
1354
+ this.isRecording = false;
1355
+ this.isReady = false;
1356
+ this.buffer = [];
1357
+ if (config.debug) console.log("[SpeechOS] AudioCapture stopped");
1358
+ }
1359
+ /**
1360
+ * Check if currently recording.
1361
+ */
1362
+ get recording() {
1363
+ return this.isRecording;
1364
+ }
1365
+ /**
1366
+ * Check if ready (connection established, direct mode active).
1367
+ */
1368
+ get ready() {
1369
+ return this.isReady;
1370
+ }
1371
+ /**
1372
+ * Get the number of buffered chunks waiting to be sent.
1373
+ */
1374
+ get bufferedChunks() {
1375
+ return this.buffer.length;
1376
+ }
1377
+ };
849
1378
  /**
850
- * Clear all transcript history
1379
+ * Factory function to create an AudioCapture instance.
1380
+ * @param onChunk - Callback for receiving audio chunks
1381
+ * @param deviceId - Optional audio device ID (empty string or undefined for system default)
851
1382
  */
852
- function clearTranscripts() {
853
- try {
854
- localStorage.removeItem(STORAGE_KEY);
855
- } catch {}
1383
+ function createAudioCapture(onChunk, deviceId) {
1384
+ return new AudioCapture(onChunk, deviceId);
856
1385
  }
1386
+
1387
+ //#endregion
1388
+ //#region src/websocket.ts
1389
+ const MESSAGE_TYPE_AUTH = "auth";
1390
+ const MESSAGE_TYPE_READY = "ready";
1391
+ const MESSAGE_TYPE_TRANSCRIPTION = "transcription";
1392
+ const MESSAGE_TYPE_REQUEST_TRANSCRIPT = "request_transcript";
1393
+ const MESSAGE_TYPE_TRANSCRIPT = "transcript";
1394
+ const MESSAGE_TYPE_EDIT_TEXT = "edit_text";
1395
+ const MESSAGE_TYPE_EDITED_TEXT = "edited_text";
1396
+ const MESSAGE_TYPE_EXECUTE_COMMAND = "execute_command";
1397
+ const MESSAGE_TYPE_COMMAND_RESULT = "command_result";
1398
+ const MESSAGE_TYPE_ERROR = "error";
857
1399
  /**
858
- * Delete a single transcript by ID
1400
+ * Response timeout in milliseconds.
859
1401
  */
860
- function deleteTranscript(id) {
861
- const entries = getTranscripts().filter((e) => e.id !== id);
862
- try {
863
- localStorage.setItem(STORAGE_KEY, JSON.stringify(entries));
864
- } catch {}
865
- }
866
- const transcriptStore = {
867
- getTranscripts,
868
- saveTranscript,
869
- clearTranscripts,
870
- deleteTranscript
1402
+ const RESPONSE_TIMEOUT_MS = 15e3;
1403
+ /**
1404
+ * A deferred promise with timeout support.
1405
+ */
1406
+ var Deferred$1 = class {
1407
+ promise;
1408
+ _resolve;
1409
+ _reject;
1410
+ _timeoutId = null;
1411
+ _settled = false;
1412
+ constructor() {
1413
+ this.promise = new Promise((resolve, reject) => {
1414
+ this._resolve = resolve;
1415
+ this._reject = reject;
1416
+ });
1417
+ }
1418
+ setTimeout(ms, errorMessage, errorCode, errorSource) {
1419
+ this._timeoutId = setTimeout(() => {
1420
+ if (!this._settled) {
1421
+ console.error(`[SpeechOS] Error: ${errorMessage} (${errorCode})`);
1422
+ events.emit("error", {
1423
+ code: errorCode,
1424
+ message: errorMessage,
1425
+ source: errorSource
1426
+ });
1427
+ this.reject(new Error(errorMessage));
1428
+ }
1429
+ }, ms);
1430
+ }
1431
+ resolve(value) {
1432
+ if (!this._settled) {
1433
+ this._settled = true;
1434
+ this.clearTimeout();
1435
+ this._resolve(value);
1436
+ }
1437
+ }
1438
+ reject(error) {
1439
+ if (!this._settled) {
1440
+ this._settled = true;
1441
+ this.clearTimeout();
1442
+ this._reject(error);
1443
+ }
1444
+ }
1445
+ clearTimeout() {
1446
+ if (this._timeoutId !== null) {
1447
+ clearTimeout(this._timeoutId);
1448
+ this._timeoutId = null;
1449
+ }
1450
+ }
1451
+ get isSettled() {
1452
+ return this._settled;
1453
+ }
1454
+ };
1455
+ /**
1456
+ * Maximum time to wait for WebSocket buffer to drain.
1457
+ */
1458
+ const BUFFER_DRAIN_TIMEOUT_MS = 5e3;
1459
+ /**
1460
+ * Polling interval for checking WebSocket buffer.
1461
+ */
1462
+ const BUFFER_CHECK_INTERVAL_MS = 50;
1463
+ /**
1464
+ * WebSocket connection manager for voice sessions.
1465
+ */
1466
+ var WebSocketManager = class {
1467
+ ws = null;
1468
+ audioCapture = null;
1469
+ sessionId = null;
1470
+ pendingAuth = null;
1471
+ pendingTranscript = null;
1472
+ pendingEditText = null;
1473
+ pendingCommand = null;
1474
+ pendingAudioSends = /* @__PURE__ */ new Set();
1475
+ editOriginalText = null;
1476
+ lastInputText = void 0;
1477
+ sessionAction = "dictate";
1478
+ sessionInputText = "";
1479
+ sessionCommands = [];
1480
+ sessionSettings = {};
1481
+ /**
1482
+ * Get the WebSocket URL for voice sessions.
1483
+ */
1484
+ getWebSocketUrl() {
1485
+ const config = getConfig();
1486
+ const host = config.host || "https://app.speechos.ai";
1487
+ const wsUrl = host.replace(/^http/, "ws");
1488
+ return `${wsUrl}/ws/voice/`;
1489
+ }
1490
+ /**
1491
+ * Start a voice session with the WebSocket backend.
1492
+ *
1493
+ * This method:
1494
+ * 1. Starts audio capture immediately (buffering)
1495
+ * 2. Opens WebSocket connection
1496
+ * 3. Authenticates with API key and action parameters
1497
+ * 4. Flushes buffered audio and continues streaming
1498
+ *
1499
+ * @param options - Session options including action type and parameters
1500
+ */
1501
+ async startVoiceSession(options) {
1502
+ const config = getConfig();
1503
+ this.sessionAction = options?.action || "dictate";
1504
+ this.sessionInputText = options?.inputText || "";
1505
+ this.sessionCommands = options?.commands || [];
1506
+ this.sessionSettings = options?.settings || {};
1507
+ if (this.sessionAction === "edit") this.editOriginalText = this.sessionInputText;
1508
+ if (config.debug) console.log("[SpeechOS] Starting WebSocket voice session...");
1509
+ this.audioCapture = createAudioCapture((chunk) => {
1510
+ this.sendAudioChunk(chunk);
1511
+ }, this.sessionSettings.audioDeviceId);
1512
+ await this.audioCapture.start();
1513
+ if (options?.onMicReady) options.onMicReady();
1514
+ state.setMicEnabled(true);
1515
+ const wsUrl = this.getWebSocketUrl();
1516
+ if (config.debug) console.log("[SpeechOS] Connecting to WebSocket:", wsUrl);
1517
+ this.ws = new WebSocket(wsUrl);
1518
+ this.ws.onopen = () => {
1519
+ if (config.debug) console.log("[SpeechOS] WebSocket connected, authenticating...");
1520
+ this.authenticate();
1521
+ };
1522
+ this.ws.onmessage = (event) => {
1523
+ this.handleMessage(event.data);
1524
+ };
1525
+ this.ws.onerror = (event) => {
1526
+ console.error("[SpeechOS] WebSocket error:", event);
1527
+ events.emit("error", {
1528
+ code: "websocket_error",
1529
+ message: "WebSocket connection error",
1530
+ source: "connection"
1531
+ });
1532
+ };
1533
+ this.ws.onclose = (event) => {
1534
+ if (config.debug) console.log("[SpeechOS] WebSocket closed:", event.code, event.reason);
1535
+ state.setConnected(false);
1536
+ };
1537
+ this.pendingAuth = new Deferred$1();
1538
+ this.pendingAuth.setTimeout(RESPONSE_TIMEOUT_MS, "Connection timed out", "connection_timeout", "connection");
1539
+ await this.pendingAuth.promise;
1540
+ this.pendingAuth = null;
1541
+ if (this.audioCapture) this.audioCapture.setReady();
1542
+ state.setConnected(true);
1543
+ if (config.debug) console.log("[SpeechOS] WebSocket voice session ready");
1544
+ }
1545
+ /**
1546
+ * Send authentication message with action parameters.
1547
+ * All session parameters are now sent upfront in the auth message.
1548
+ */
1549
+ authenticate() {
1550
+ const config = getConfig();
1551
+ const audioFormat = getSupportedAudioFormat();
1552
+ const settings = this.sessionSettings;
1553
+ const authMessage = {
1554
+ type: MESSAGE_TYPE_AUTH,
1555
+ api_key: config.apiKey,
1556
+ user_id: config.userId || null,
1557
+ input_language: settings.inputLanguageCode ?? "en-US",
1558
+ output_language: settings.outputLanguageCode ?? "en-US",
1559
+ smart_format: settings.smartFormat ?? true,
1560
+ custom_vocabulary: settings.vocabulary ?? [],
1561
+ custom_snippets: settings.snippets ?? [],
1562
+ audio_format: audioFormat.format,
1563
+ action: this.sessionAction,
1564
+ input_text: this.sessionInputText,
1565
+ commands: this.sessionCommands
1566
+ };
1567
+ if (config.debug) console.log("[SpeechOS] Sending auth message with action:", this.sessionAction);
1568
+ this.ws?.send(JSON.stringify(authMessage));
1569
+ }
1570
+ /**
1571
+ * Send an audio chunk over the WebSocket.
1572
+ * Tracks the promise so we can wait for all sends to complete.
1573
+ */
1574
+ sendAudioChunk(chunk) {
1575
+ const sendPromise = this.doSendAudioChunk(chunk);
1576
+ this.pendingAudioSends.add(sendPromise);
1577
+ sendPromise.finally(() => {
1578
+ this.pendingAudioSends.delete(sendPromise);
1579
+ });
1580
+ }
1581
+ /**
1582
+ * Actually send the audio chunk (async operation).
1583
+ */
1584
+ async doSendAudioChunk(chunk) {
1585
+ if (this.ws && this.ws.readyState === WebSocket.OPEN) {
1586
+ const arrayBuffer = await chunk.arrayBuffer();
1587
+ this.ws.send(arrayBuffer);
1588
+ }
1589
+ }
1590
+ /**
1591
+ * Handle incoming WebSocket messages.
1592
+ */
1593
+ handleMessage(data) {
1594
+ const config = getConfig();
1595
+ try {
1596
+ const message = JSON.parse(data);
1597
+ if (config.debug) console.log("[SpeechOS] WebSocket message:", message);
1598
+ switch (message.type) {
1599
+ case MESSAGE_TYPE_READY:
1600
+ this.handleReady(message);
1601
+ break;
1602
+ case MESSAGE_TYPE_TRANSCRIPTION:
1603
+ this.handleIntermediateTranscription(message);
1604
+ break;
1605
+ case MESSAGE_TYPE_TRANSCRIPT:
1606
+ this.handleFinalTranscript(message);
1607
+ break;
1608
+ case MESSAGE_TYPE_EDITED_TEXT:
1609
+ this.handleEditedText(message);
1610
+ break;
1611
+ case MESSAGE_TYPE_COMMAND_RESULT:
1612
+ this.handleCommandResult(message);
1613
+ break;
1614
+ case MESSAGE_TYPE_ERROR:
1615
+ this.handleError(message);
1616
+ break;
1617
+ default: if (config.debug) console.log("[SpeechOS] Unknown message type:", message.type);
1618
+ }
1619
+ } catch (error) {
1620
+ console.error("[SpeechOS] Failed to parse message:", error);
1621
+ }
1622
+ }
1623
+ handleReady(message) {
1624
+ const config = getConfig();
1625
+ this.sessionId = message.session_id;
1626
+ if (config.debug) console.log("[SpeechOS] Session ready:", this.sessionId);
1627
+ if (this.pendingAuth) this.pendingAuth.resolve();
1628
+ }
1629
+ handleIntermediateTranscription(message) {
1630
+ const config = getConfig();
1631
+ if (config.debug) console.log("[SpeechOS] Intermediate transcription:", message.transcript, "final:", message.is_final);
1632
+ }
1633
+ handleFinalTranscript(message) {
1634
+ const transcript = message.transcript || "";
1635
+ events.emit("transcription:complete", { text: transcript });
1636
+ if (this.pendingTranscript) {
1637
+ this.pendingTranscript.resolve(transcript);
1638
+ this.pendingTranscript = null;
1639
+ }
1640
+ }
1641
+ handleEditedText(message) {
1642
+ const editedText = message.text || "";
1643
+ events.emit("edit:complete", {
1644
+ text: editedText,
1645
+ originalText: this.editOriginalText || ""
1646
+ });
1647
+ if (this.pendingEditText) {
1648
+ this.pendingEditText.resolve(editedText);
1649
+ this.pendingEditText = null;
1650
+ }
1651
+ this.editOriginalText = null;
1652
+ }
1653
+ handleCommandResult(message) {
1654
+ const commandResult = message.command || null;
1655
+ this.lastInputText = message.transcript;
1656
+ events.emit("command:complete", { command: commandResult });
1657
+ if (this.pendingCommand) {
1658
+ this.pendingCommand.resolve(commandResult);
1659
+ this.pendingCommand = null;
1660
+ }
1661
+ }
1662
+ handleError(message) {
1663
+ const errorCode = message.code || "server_error";
1664
+ const errorMessage = message.message || "A server error occurred";
1665
+ console.error(`[SpeechOS] Error: ${errorMessage} (${errorCode})`);
1666
+ events.emit("error", {
1667
+ code: errorCode,
1668
+ message: errorMessage,
1669
+ source: "server"
1670
+ });
1671
+ const error = new Error(errorMessage);
1672
+ if (this.pendingAuth) {
1673
+ this.pendingAuth.reject(error);
1674
+ this.pendingAuth = null;
1675
+ }
1676
+ if (this.pendingTranscript) {
1677
+ this.pendingTranscript.reject(error);
1678
+ this.pendingTranscript = null;
1679
+ }
1680
+ if (this.pendingEditText) {
1681
+ this.pendingEditText.reject(error);
1682
+ this.pendingEditText = null;
1683
+ }
1684
+ if (this.pendingCommand) {
1685
+ this.pendingCommand.reject(error);
1686
+ this.pendingCommand = null;
1687
+ }
1688
+ }
1689
+ /**
1690
+ * Stop the voice session and request the transcript.
1691
+ */
1692
+ async stopVoiceSession() {
1693
+ const config = getConfig();
1694
+ if (config.debug) console.log("[SpeechOS] Stopping voice session, requesting transcript...");
1695
+ await this.stopAudioCapture();
1696
+ this.pendingTranscript = new Deferred$1();
1697
+ this.pendingTranscript.setTimeout(RESPONSE_TIMEOUT_MS, "Transcription timed out. Please try again.", "transcription_timeout", "timeout");
1698
+ this.sendMessage({ type: MESSAGE_TYPE_REQUEST_TRANSCRIPT });
1699
+ const result = await this.pendingTranscript.promise;
1700
+ this.pendingTranscript = null;
1701
+ return result;
1702
+ }
1703
+ /**
1704
+ * Request text editing using the transcript as instructions.
1705
+ * Note: The input text was already sent in the auth message via startVoiceSession.
1706
+ */
1707
+ async requestEditText(_originalText) {
1708
+ const config = getConfig();
1709
+ if (config.debug) console.log("[SpeechOS] Requesting text edit...");
1710
+ await this.stopAudioCapture();
1711
+ this.pendingEditText = new Deferred$1();
1712
+ this.pendingEditText.setTimeout(RESPONSE_TIMEOUT_MS, "Edit request timed out. Please try again.", "edit_timeout", "timeout");
1713
+ this.sendMessage({ type: MESSAGE_TYPE_EDIT_TEXT });
1714
+ const result = await this.pendingEditText.promise;
1715
+ this.pendingEditText = null;
1716
+ return result;
1717
+ }
1718
+ /**
1719
+ * Request command matching using the transcript as input.
1720
+ * Note: The command definitions were already sent in the auth message via startVoiceSession.
1721
+ */
1722
+ async requestCommand(_commands) {
1723
+ const config = getConfig();
1724
+ if (config.debug) console.log("[SpeechOS] Requesting command match...");
1725
+ await this.stopAudioCapture();
1726
+ this.pendingCommand = new Deferred$1();
1727
+ this.pendingCommand.setTimeout(RESPONSE_TIMEOUT_MS, "Command request timed out. Please try again.", "command_timeout", "timeout");
1728
+ this.sendMessage({ type: MESSAGE_TYPE_EXECUTE_COMMAND });
1729
+ const result = await this.pendingCommand.promise;
1730
+ this.pendingCommand = null;
1731
+ return result;
1732
+ }
1733
+ /**
1734
+ * Stop audio capture and wait for all data to be sent.
1735
+ *
1736
+ * Waits for:
1737
+ * 1. All pending sendAudioChunk calls to complete (arrayBuffer conversion)
1738
+ * 2. WebSocket buffer to drain (all data transmitted)
1739
+ *
1740
+ * WebSocket message ordering ensures server receives all audio before transcript request.
1741
+ */
1742
+ async stopAudioCapture() {
1743
+ const config = getConfig();
1744
+ const startTime = Date.now();
1745
+ if (config.debug) console.log("[SpeechOS] stopAudioCapture: starting...");
1746
+ if (this.audioCapture) {
1747
+ await this.audioCapture.stop();
1748
+ this.audioCapture = null;
1749
+ if (config.debug) console.log(`[SpeechOS] stopAudioCapture: recorder stopped after ${Date.now() - startTime}ms`);
1750
+ }
1751
+ state.setMicEnabled(false);
1752
+ if (this.pendingAudioSends.size > 0) {
1753
+ if (config.debug) console.log(`[SpeechOS] stopAudioCapture: waiting for ${this.pendingAudioSends.size} pending audio sends...`);
1754
+ await Promise.all(this.pendingAudioSends);
1755
+ if (config.debug) console.log(`[SpeechOS] stopAudioCapture: all sends complete after ${Date.now() - startTime}ms`);
1756
+ } else if (config.debug) console.log("[SpeechOS] stopAudioCapture: no pending sends");
1757
+ await this.waitForBufferDrain();
1758
+ if (config.debug) console.log(`[SpeechOS] stopAudioCapture: complete after ${Date.now() - startTime}ms`);
1759
+ }
1760
+ /**
1761
+ * Wait for the WebSocket send buffer to drain.
1762
+ *
1763
+ * This ensures all audio data has been transmitted before we request
1764
+ * the transcript. Uses the same pattern as LiveKit's ReadableStream approach.
1765
+ */
1766
+ async waitForBufferDrain() {
1767
+ if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
1768
+ const config = getConfig();
1769
+ const startTime = Date.now();
1770
+ while (this.ws.bufferedAmount > 0) {
1771
+ if (Date.now() - startTime > BUFFER_DRAIN_TIMEOUT_MS) {
1772
+ console.warn(`[SpeechOS] Buffer drain timeout, ${this.ws.bufferedAmount} bytes still pending`);
1773
+ break;
1774
+ }
1775
+ await new Promise((resolve) => setTimeout(resolve, BUFFER_CHECK_INTERVAL_MS));
1776
+ }
1777
+ if (config.debug) console.log(`[SpeechOS] Buffer drained in ${Date.now() - startTime}ms`);
1778
+ }
1779
+ /**
1780
+ * Send a JSON message over the WebSocket.
1781
+ */
1782
+ sendMessage(message) {
1783
+ if (this.ws && this.ws.readyState === WebSocket.OPEN) this.ws.send(JSON.stringify(message));
1784
+ }
1785
+ /**
1786
+ * Disconnect from the WebSocket.
1787
+ */
1788
+ async disconnect() {
1789
+ const config = getConfig();
1790
+ if (config.debug) console.log("[SpeechOS] Disconnecting WebSocket...");
1791
+ await this.stopAudioCapture();
1792
+ if (this.ws) {
1793
+ this.ws.close();
1794
+ this.ws = null;
1795
+ }
1796
+ const error = new Error("Disconnected");
1797
+ if (this.pendingAuth) {
1798
+ this.pendingAuth.reject(error);
1799
+ this.pendingAuth = null;
1800
+ }
1801
+ if (this.pendingTranscript) {
1802
+ this.pendingTranscript.reject(error);
1803
+ this.pendingTranscript = null;
1804
+ }
1805
+ if (this.pendingEditText) {
1806
+ this.pendingEditText.reject(error);
1807
+ this.pendingEditText = null;
1808
+ }
1809
+ if (this.pendingCommand) {
1810
+ this.pendingCommand.reject(error);
1811
+ this.pendingCommand = null;
1812
+ }
1813
+ this.sessionId = null;
1814
+ this.editOriginalText = null;
1815
+ this.lastInputText = void 0;
1816
+ this.sessionSettings = {};
1817
+ state.setConnected(false);
1818
+ state.setMicEnabled(false);
1819
+ if (config.debug) console.log("[SpeechOS] WebSocket disconnected");
1820
+ }
1821
+ /**
1822
+ * Check if connected to WebSocket.
1823
+ */
1824
+ isConnected() {
1825
+ return this.ws !== null && this.ws.readyState === WebSocket.OPEN;
1826
+ }
1827
+ /**
1828
+ * Get the last input text from a command result.
1829
+ * This is the raw transcript of what the user said.
1830
+ */
1831
+ getLastInputText() {
1832
+ return this.lastInputText;
1833
+ }
871
1834
  };
1835
+ const websocket = new WebSocketManager();
872
1836
 
873
1837
  //#endregion
874
1838
  //#region src/speechos.ts
875
1839
  /**
1840
+ * Get the active voice backend (always websocket now)
1841
+ */
1842
+ function getBackend$1() {
1843
+ return websocket;
1844
+ }
1845
+ /**
876
1846
  * SpeechOS Core SDK
877
1847
  *
878
1848
  * Provides two API layers:
@@ -891,7 +1861,6 @@ var SpeechOSCore = class {
891
1861
  const currentConfig$1 = getConfig();
892
1862
  if (currentConfig$1.debug) console.log("[SpeechOS] Initialized with config:", {
893
1863
  host: currentConfig$1.host,
894
- position: currentConfig$1.position,
895
1864
  debug: currentConfig$1.debug
896
1865
  });
897
1866
  }
@@ -931,7 +1900,6 @@ var SpeechOSCore = class {
931
1900
  state.setRecordingState("processing");
932
1901
  try {
933
1902
  const transcript = await livekit.stopAndGetTranscript();
934
- transcriptStore.saveTranscript(transcript, "dictate");
935
1903
  state.completeRecording();
936
1904
  return transcript;
937
1905
  } catch (error) {
@@ -948,7 +1916,6 @@ var SpeechOSCore = class {
948
1916
  state.setRecordingState("processing");
949
1917
  try {
950
1918
  const editedText = await livekit.stopAndEdit(originalText);
951
- transcriptStore.saveTranscript(editedText, "edit", originalText);
952
1919
  state.completeRecording();
953
1920
  return editedText;
954
1921
  } catch (error) {
@@ -974,8 +1941,13 @@ var SpeechOSCore = class {
974
1941
  state.setActiveAction("dictate");
975
1942
  state.startRecording();
976
1943
  try {
977
- await livekit.startVoiceSession();
978
- state.setRecordingState("recording");
1944
+ const backend = getBackend$1();
1945
+ await backend.startVoiceSession({
1946
+ action: "dictate",
1947
+ onMicReady: () => {
1948
+ state.setRecordingState("recording");
1949
+ }
1950
+ });
979
1951
  return new Promise((resolve, reject) => {
980
1952
  this._dictateResolve = resolve;
981
1953
  this._dictateReject = reject;
@@ -995,8 +1967,8 @@ var SpeechOSCore = class {
995
1967
  async stopDictation() {
996
1968
  state.setRecordingState("processing");
997
1969
  try {
998
- const transcript = await livekit.stopVoiceSession();
999
- transcriptStore.saveTranscript(transcript, "dictate");
1970
+ const backend = getBackend$1();
1971
+ const transcript = await backend.stopVoiceSession();
1000
1972
  state.completeRecording();
1001
1973
  if (this._dictateResolve) {
1002
1974
  this._dictateResolve(transcript);
@@ -1030,8 +2002,14 @@ var SpeechOSCore = class {
1030
2002
  state.startRecording();
1031
2003
  this._editOriginalText = originalText;
1032
2004
  try {
1033
- await livekit.startVoiceSession();
1034
- state.setRecordingState("recording");
2005
+ const backend = getBackend$1();
2006
+ await backend.startVoiceSession({
2007
+ action: "edit",
2008
+ inputText: originalText,
2009
+ onMicReady: () => {
2010
+ state.setRecordingState("recording");
2011
+ }
2012
+ });
1035
2013
  return new Promise((resolve, reject) => {
1036
2014
  this._editResolve = resolve;
1037
2015
  this._editReject = reject;
@@ -1052,9 +2030,9 @@ var SpeechOSCore = class {
1052
2030
  async stopEdit() {
1053
2031
  state.setRecordingState("processing");
1054
2032
  try {
2033
+ const backend = getBackend$1();
1055
2034
  const originalText = this._editOriginalText || "";
1056
- const editedText = await livekit.requestEditText(originalText);
1057
- transcriptStore.saveTranscript(editedText, "edit", originalText);
2035
+ const editedText = await backend.requestEditText(originalText);
1058
2036
  state.completeRecording();
1059
2037
  if (this._editResolve) {
1060
2038
  this._editResolve(editedText);
@@ -1077,6 +2055,71 @@ var SpeechOSCore = class {
1077
2055
  }
1078
2056
  }
1079
2057
  /**
2058
+ * One-shot command: connect, wait for agent, record voice, match against commands
2059
+ * Automatically handles the full voice session lifecycle
2060
+ *
2061
+ * @param commands - Array of command definitions to match against
2062
+ * @returns The matched command result or null if no match
2063
+ */
2064
+ async command(commands) {
2065
+ this.ensureInitialized();
2066
+ state.setActiveAction("command");
2067
+ state.startRecording();
2068
+ this._commandCommands = commands;
2069
+ try {
2070
+ const backend = getBackend$1();
2071
+ await backend.startVoiceSession({
2072
+ action: "command",
2073
+ commands,
2074
+ onMicReady: () => {
2075
+ state.setRecordingState("recording");
2076
+ }
2077
+ });
2078
+ return new Promise((resolve, reject) => {
2079
+ this._commandResolve = resolve;
2080
+ this._commandReject = reject;
2081
+ });
2082
+ } catch (error) {
2083
+ state.setError(error instanceof Error ? error.message : "Failed to start command");
2084
+ await this.cleanup();
2085
+ throw error;
2086
+ }
2087
+ }
2088
+ _commandCommands;
2089
+ _commandResolve;
2090
+ _commandReject;
2091
+ /**
2092
+ * Stop command recording and get the matched command
2093
+ * Call this after command() when user stops speaking
2094
+ */
2095
+ async stopCommand() {
2096
+ state.setRecordingState("processing");
2097
+ try {
2098
+ const backend = getBackend$1();
2099
+ const commands = this._commandCommands || [];
2100
+ const result = await backend.requestCommand(commands);
2101
+ state.completeRecording();
2102
+ if (this._commandResolve) {
2103
+ this._commandResolve(result);
2104
+ this._commandResolve = void 0;
2105
+ this._commandReject = void 0;
2106
+ }
2107
+ return result;
2108
+ } catch (error) {
2109
+ const err = error instanceof Error ? error : new Error("Command request failed");
2110
+ state.setError(err.message);
2111
+ if (this._commandReject) {
2112
+ this._commandReject(err);
2113
+ this._commandResolve = void 0;
2114
+ this._commandReject = void 0;
2115
+ }
2116
+ throw err;
2117
+ } finally {
2118
+ this._commandCommands = void 0;
2119
+ await this.cleanup();
2120
+ }
2121
+ }
2122
+ /**
1080
2123
  * Cancel the current operation
1081
2124
  */
1082
2125
  async cancel() {
@@ -1091,7 +2134,13 @@ var SpeechOSCore = class {
1091
2134
  this._editResolve = void 0;
1092
2135
  this._editReject = void 0;
1093
2136
  }
2137
+ if (this._commandReject) {
2138
+ this._commandReject(err);
2139
+ this._commandResolve = void 0;
2140
+ this._commandReject = void 0;
2141
+ }
1094
2142
  this._editOriginalText = void 0;
2143
+ this._commandCommands = void 0;
1095
2144
  await this.cleanup();
1096
2145
  state.cancelRecording();
1097
2146
  }
@@ -1118,7 +2167,8 @@ var SpeechOSCore = class {
1118
2167
  }
1119
2168
  async cleanup() {
1120
2169
  try {
1121
- await livekit.disconnect();
2170
+ const backend = getBackend$1();
2171
+ await backend.disconnect();
1122
2172
  } catch (error) {
1123
2173
  const config = getConfig();
1124
2174
  if (config.debug) console.warn("[SpeechOS] Cleanup disconnect error:", error);
@@ -1134,6 +2184,9 @@ var SpeechOSCore = class {
1134
2184
  this._editResolve = void 0;
1135
2185
  this._editReject = void 0;
1136
2186
  this._editOriginalText = void 0;
2187
+ this._commandResolve = void 0;
2188
+ this._commandReject = void 0;
2189
+ this._commandCommands = void 0;
1137
2190
  resetConfig();
1138
2191
  state.reset();
1139
2192
  events.clear();
@@ -1141,6 +2194,34 @@ var SpeechOSCore = class {
1141
2194
  };
1142
2195
  const speechOS = new SpeechOSCore();
1143
2196
 
2197
+ //#endregion
2198
+ //#region src/backend.ts
2199
+ /**
2200
+ * WebSocket backend adapter - wraps the websocket module to match the VoiceBackend interface
2201
+ */
2202
+ const websocketBackend = {
2203
+ startVoiceSession: (options) => websocket.startVoiceSession(options),
2204
+ stopVoiceSession: () => websocket.stopVoiceSession(),
2205
+ requestEditText: (text) => websocket.requestEditText(text),
2206
+ requestCommand: (commands) => websocket.requestCommand(commands),
2207
+ disconnect: () => websocket.disconnect(),
2208
+ isConnected: () => websocket.isConnected(),
2209
+ getLastInputText: () => websocket.getLastInputText(),
2210
+ prefetchToken: () => Promise.resolve({}),
2211
+ startAutoRefresh: () => {},
2212
+ stopAutoRefresh: () => {},
2213
+ invalidateTokenCache: () => {}
2214
+ };
2215
+ /**
2216
+ * Get the active voice backend.
2217
+ * Always returns WebSocket backend (LiveKit is legacy).
2218
+ *
2219
+ * @returns The websocket backend
2220
+ */
2221
+ function getBackend() {
2222
+ return websocketBackend;
2223
+ }
2224
+
1144
2225
  //#endregion
1145
2226
  //#region src/index.ts
1146
2227
  const VERSION = "0.1.0";
@@ -1151,15 +2232,15 @@ exports.Deferred = Deferred;
1151
2232
  exports.SpeechOSEventEmitter = SpeechOSEventEmitter;
1152
2233
  exports.VERSION = VERSION;
1153
2234
  exports.createStateManager = createStateManager;
1154
- exports.defaultConfig = defaultConfig;
1155
2235
  exports.events = events;
2236
+ exports.getBackend = getBackend;
1156
2237
  exports.getConfig = getConfig;
1157
2238
  exports.livekit = livekit;
1158
2239
  exports.resetConfig = resetConfig;
1159
2240
  exports.setConfig = setConfig;
1160
2241
  exports.speechOS = speechOS;
1161
2242
  exports.state = state;
1162
- exports.transcriptStore = transcriptStore;
1163
2243
  exports.updateUserId = updateUserId;
1164
2244
  exports.validateConfig = validateConfig;
2245
+ exports.websocket = websocket;
1165
2246
  //# sourceMappingURL=index.cjs.map