@speechos/core 0.2.9 → 0.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1,27 +1,3 @@
1
- //#region rolldown:runtime
2
- var __create = Object.create;
3
- var __defProp = Object.defineProperty;
4
- var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
- var __getOwnPropNames = Object.getOwnPropertyNames;
6
- var __getProtoOf = Object.getPrototypeOf;
7
- var __hasOwnProp = Object.prototype.hasOwnProperty;
8
- var __copyProps = (to, from, except, desc) => {
9
- if (from && typeof from === "object" || typeof from === "function") for (var keys = __getOwnPropNames(from), i = 0, n = keys.length, key; i < n; i++) {
10
- key = keys[i];
11
- if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, {
12
- get: ((k) => from[k]).bind(null, key),
13
- enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable
14
- });
15
- }
16
- return to;
17
- };
18
- var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", {
19
- value: mod,
20
- enumerable: true
21
- }) : target, mod));
22
-
23
- //#endregion
24
- const livekit_client = __toESM(require("livekit-client"));
25
1
 
26
2
  //#region src/config.ts
27
3
  /**
@@ -210,6 +186,8 @@ const initialState = {
210
186
  isMicEnabled: false,
211
187
  activeAction: null,
212
188
  focusedElement: null,
189
+ selectionText: null,
190
+ selectionElement: null,
213
191
  recordingState: "idle",
214
192
  errorMessage: null
215
193
  };
@@ -311,6 +289,26 @@ var StateManager = class {
311
289
  this.setState({ focusedElement: element });
312
290
  }
313
291
  /**
292
+ * Set the current text selection
293
+ * @param text - Selected text (null to clear)
294
+ * @param element - Element associated with selection
295
+ */
296
+ setSelection(text, element) {
297
+ this.setState({
298
+ selectionText: text,
299
+ selectionElement: element
300
+ });
301
+ }
302
+ /**
303
+ * Clear the current text selection
304
+ */
305
+ clearSelection() {
306
+ this.setState({
307
+ selectionText: null,
308
+ selectionElement: null
309
+ });
310
+ }
311
+ /**
314
312
  * Set the active action
315
313
  * @param action - The action to set as active
316
314
  */
@@ -326,7 +324,7 @@ var StateManager = class {
326
324
  }
327
325
  /**
328
326
  * Set the connection state
329
- * @param isConnected - Whether connected to LiveKit
327
+ * @param isConnected - Whether connected to the backend
330
328
  */
331
329
  setConnected(isConnected) {
332
330
  this.setState({ isConnected });
@@ -412,746 +410,6 @@ function createStateManager(initial) {
412
410
  });
413
411
  }
414
412
 
415
- //#endregion
416
- //#region src/livekit.ts
417
- const MESSAGE_TYPE_REQUEST_TRANSCRIPT$1 = "request_transcript";
418
- const MESSAGE_TYPE_TRANSCRIPT$1 = "transcript";
419
- const MESSAGE_TYPE_EDIT_TEXT$1 = "edit_text";
420
- const MESSAGE_TYPE_EDITED_TEXT$1 = "edited_text";
421
- const MESSAGE_TYPE_EXECUTE_COMMAND$1 = "execute_command";
422
- const MESSAGE_TYPE_COMMAND_RESULT$1 = "command_result";
423
- const MESSAGE_TYPE_ERROR$1 = "error";
424
- const TOPIC_SPEECHOS = "speechos";
425
- const TOKEN_CACHE_TTL_MS = 4 * 60 * 1e3;
426
- /**
427
- * A deferred promise with timeout support.
428
- * Encapsulates resolve/reject/timeout in a single object for cleaner async handling.
429
- */
430
- var Deferred = class {
431
- promise;
432
- _resolve;
433
- _reject;
434
- _timeoutId = null;
435
- _settled = false;
436
- constructor() {
437
- this.promise = new Promise((resolve, reject) => {
438
- this._resolve = resolve;
439
- this._reject = reject;
440
- });
441
- }
442
- /**
443
- * Set a timeout that will reject the promise with the given error
444
- */
445
- setTimeout(ms, errorMessage, errorCode, errorSource) {
446
- this._timeoutId = setTimeout(() => {
447
- if (!this._settled) {
448
- console.error(`[SpeechOS] Error: ${errorMessage} (${errorCode})`);
449
- events.emit("error", {
450
- code: errorCode,
451
- message: errorMessage,
452
- source: errorSource
453
- });
454
- this.reject(new Error(errorMessage));
455
- }
456
- }, ms);
457
- }
458
- resolve(value) {
459
- if (!this._settled) {
460
- this._settled = true;
461
- this.clearTimeout();
462
- this._resolve(value);
463
- }
464
- }
465
- reject(error) {
466
- if (!this._settled) {
467
- this._settled = true;
468
- this.clearTimeout();
469
- this._reject(error);
470
- }
471
- }
472
- clearTimeout() {
473
- if (this._timeoutId !== null) {
474
- clearTimeout(this._timeoutId);
475
- this._timeoutId = null;
476
- }
477
- }
478
- get isSettled() {
479
- return this._settled;
480
- }
481
- };
482
- /**
483
- * LiveKit connection manager
484
- */
485
- var LiveKitManager = class {
486
- room = null;
487
- tokenData = null;
488
- micTrack = null;
489
- cachedTokenData = null;
490
- tokenCacheTimestamp = null;
491
- tokenPrefetchPromise = null;
492
- tokenRefreshTimer = null;
493
- autoRefreshEnabled = false;
494
- pendingTranscript = null;
495
- pendingEditText = null;
496
- pendingCommand = null;
497
- pendingTrackSubscribed = null;
498
- editOriginalText = null;
499
- sessionSettings = {};
500
- /**
501
- * Check if the cached token is still valid (within TTL)
502
- */
503
- isCachedTokenValid() {
504
- if (!this.cachedTokenData || !this.tokenCacheTimestamp) return false;
505
- const age = Date.now() - this.tokenCacheTimestamp;
506
- return age < TOKEN_CACHE_TTL_MS;
507
- }
508
- /**
509
- * Pre-fetch a LiveKit token for later use
510
- * Call this early (e.g., when widget expands) to reduce latency when starting a voice session.
511
- * If a prefetch is already in progress, returns the existing promise.
512
- * If a valid cached token exists, returns it immediately.
513
- */
514
- async prefetchToken() {
515
- const config = getConfig();
516
- if (this.isCachedTokenValid() && this.cachedTokenData) {
517
- if (config.debug) console.log("[SpeechOS] Using cached token (prefetch hit)");
518
- return this.cachedTokenData;
519
- }
520
- if (this.tokenPrefetchPromise) {
521
- if (config.debug) console.log("[SpeechOS] Prefetch already in progress, awaiting...");
522
- return this.tokenPrefetchPromise;
523
- }
524
- if (config.debug) console.log("[SpeechOS] Starting token prefetch...");
525
- this.tokenPrefetchPromise = this.fetchTokenFromServer().then((data) => {
526
- this.cachedTokenData = data;
527
- this.tokenCacheTimestamp = Date.now();
528
- this.tokenPrefetchPromise = null;
529
- return data;
530
- }).catch((error) => {
531
- this.tokenPrefetchPromise = null;
532
- throw error;
533
- });
534
- return this.tokenPrefetchPromise;
535
- }
536
- /**
537
- * Fetch a LiveKit token from the backend
538
- * Uses cached token if valid, otherwise fetches a fresh one.
539
- * Includes language settings and user vocabulary which are stored in the VoiceSession.
540
- */
541
- async fetchToken() {
542
- const config = getConfig();
543
- if (this.isCachedTokenValid() && this.cachedTokenData) {
544
- if (config.debug) console.log("[SpeechOS] Using cached token");
545
- this.tokenData = this.cachedTokenData;
546
- return this.cachedTokenData;
547
- }
548
- if (this.tokenPrefetchPromise) {
549
- if (config.debug) console.log("[SpeechOS] Waiting for prefetch to complete...");
550
- const data$1 = await this.tokenPrefetchPromise;
551
- this.tokenData = data$1;
552
- return data$1;
553
- }
554
- const data = await this.fetchTokenFromServer();
555
- this.cachedTokenData = data;
556
- this.tokenCacheTimestamp = Date.now();
557
- this.tokenData = data;
558
- return data;
559
- }
560
- /**
561
- * Internal method to fetch a fresh token from the server
562
- */
563
- async fetchTokenFromServer() {
564
- const config = getConfig();
565
- const url = `${config.host}/livekit/api/token/`;
566
- const settings = this.sessionSettings;
567
- const inputLanguage = settings.inputLanguageCode ?? "en-US";
568
- const outputLanguage = settings.outputLanguageCode ?? "en-US";
569
- const smartFormat = settings.smartFormat ?? true;
570
- const vocabulary = settings.vocabulary ?? [];
571
- const snippets = settings.snippets ?? [];
572
- if (config.debug) {
573
- console.log("[SpeechOS] Fetching LiveKit token from:", url);
574
- console.log("[SpeechOS] Session settings:", {
575
- inputLanguage,
576
- outputLanguage,
577
- smartFormat,
578
- snippetsCount: snippets.length,
579
- vocabularyCount: vocabulary.length
580
- });
581
- }
582
- const response = await fetch(url, {
583
- method: "POST",
584
- headers: {
585
- "Content-Type": "application/json",
586
- ...config.apiKey ? { Authorization: `Api-Key ${config.apiKey}` } : {}
587
- },
588
- body: JSON.stringify({
589
- user_id: config.userId || null,
590
- input_language: inputLanguage,
591
- output_language: outputLanguage,
592
- smart_format: smartFormat,
593
- custom_vocabulary: vocabulary,
594
- custom_snippets: snippets
595
- })
596
- });
597
- if (!response.ok) throw new Error(`Failed to fetch LiveKit token: ${response.status} ${response.statusText}`);
598
- const data = await response.json();
599
- if (config.debug) console.log("[SpeechOS] LiveKit token received:", {
600
- room: data.room,
601
- identity: data.identity,
602
- ws_url: data.ws_url
603
- });
604
- return data;
605
- }
606
- /**
607
- * Connect to a LiveKit room (fresh connection each time)
608
- */
609
- async connect() {
610
- const config = getConfig();
611
- await this.fetchToken();
612
- if (!this.tokenData) throw new Error("No token available for LiveKit connection");
613
- this.room = new livekit_client.Room({
614
- adaptiveStream: true,
615
- dynacast: true
616
- });
617
- this.setupRoomEvents();
618
- if (config.debug) console.log("[SpeechOS] Connecting to LiveKit room:", this.tokenData.room);
619
- await this.room.connect(this.tokenData.ws_url, this.tokenData.token);
620
- state.setConnected(true);
621
- if (config.debug) console.log("[SpeechOS] Connected to LiveKit room:", this.room.name);
622
- return this.room;
623
- }
624
- /**
625
- * Wait until the agent is ready to receive audio
626
- * Resolves when LocalTrackSubscribed event is received
627
- */
628
- async waitUntilReady() {
629
- if (!this.room || this.room.state !== "connected") throw new Error("Not connected to room");
630
- if (this.pendingTrackSubscribed) return this.pendingTrackSubscribed.promise;
631
- this.pendingTrackSubscribed = new Deferred();
632
- this.pendingTrackSubscribed.setTimeout(15e3, "Connection timed out - agent not available", "connection_timeout", "connection");
633
- return this.pendingTrackSubscribed.promise;
634
- }
635
- /**
636
- * Set up LiveKit room event listeners
637
- */
638
- setupRoomEvents() {
639
- if (!this.room) return;
640
- const config = getConfig();
641
- this.room.on(livekit_client.RoomEvent.Connected, () => {
642
- if (config.debug) console.log("[SpeechOS] Room connected");
643
- state.setConnected(true);
644
- });
645
- this.room.on(livekit_client.RoomEvent.Disconnected, (reason) => {
646
- if (config.debug) console.log("[SpeechOS] Room disconnected:", reason);
647
- state.setConnected(false);
648
- state.setMicEnabled(false);
649
- });
650
- this.room.on(livekit_client.RoomEvent.ParticipantConnected, (participant) => {
651
- if (config.debug) console.log("[SpeechOS] Participant connected:", participant.identity);
652
- });
653
- this.room.on(livekit_client.RoomEvent.LocalTrackSubscribed, (publication) => {
654
- if (config.debug) console.log("[SpeechOS] LocalTrackSubscribed event fired:", publication.trackSid);
655
- if (this.pendingTrackSubscribed) {
656
- this.pendingTrackSubscribed.resolve();
657
- this.pendingTrackSubscribed = null;
658
- }
659
- });
660
- this.room.on(livekit_client.RoomEvent.LocalTrackPublished, (publication) => {
661
- if (config.debug) console.log("[SpeechOS] LocalTrackPublished:", publication.trackSid, publication.source);
662
- });
663
- this.room.on(livekit_client.RoomEvent.DataReceived, (data, participant) => {
664
- this.handleDataMessage(data, participant);
665
- });
666
- }
667
- /**
668
- * Handle incoming data messages from the agent
669
- */
670
- handleDataMessage(data, _participant) {
671
- const config = getConfig();
672
- try {
673
- const message = JSON.parse(new TextDecoder().decode(data));
674
- if (config.debug) console.log("[SpeechOS] Data received:", message);
675
- if (message.type === MESSAGE_TYPE_TRANSCRIPT$1) {
676
- const transcript = message.transcript || "";
677
- if (config.debug) console.log("[SpeechOS] Transcript received:", transcript);
678
- events.emit("transcription:complete", { text: transcript });
679
- if (this.pendingTranscript) {
680
- this.pendingTranscript.resolve(transcript);
681
- this.pendingTranscript = null;
682
- }
683
- } else if (message.type === MESSAGE_TYPE_EDITED_TEXT$1) {
684
- const editedText = message.text || "";
685
- if (config.debug) console.log("[SpeechOS] Edited text received:", editedText);
686
- events.emit("edit:complete", {
687
- text: editedText,
688
- originalText: this.editOriginalText || ""
689
- });
690
- if (this.pendingEditText) {
691
- this.pendingEditText.resolve(editedText);
692
- this.pendingEditText = null;
693
- }
694
- this.editOriginalText = null;
695
- } else if (message.type === MESSAGE_TYPE_COMMAND_RESULT$1) {
696
- const commandResult = message.command || null;
697
- if (config.debug) console.log("[SpeechOS] Command result received:", commandResult);
698
- events.emit("command:complete", { command: commandResult });
699
- if (this.pendingCommand) {
700
- this.pendingCommand.resolve(commandResult);
701
- this.pendingCommand = null;
702
- }
703
- } else if (message.type === MESSAGE_TYPE_ERROR$1) {
704
- const serverError = message;
705
- const errorCode = serverError.code || "server_error";
706
- const errorMessage = serverError.message || "A server error occurred";
707
- console.error(`[SpeechOS] Error: ${errorMessage} (${errorCode})`);
708
- if (config.debug && serverError.details) console.error("[SpeechOS] Error details:", serverError.details);
709
- events.emit("error", {
710
- code: errorCode,
711
- message: errorMessage,
712
- source: "server"
713
- });
714
- const error = new Error(errorMessage);
715
- if (this.pendingTranscript) {
716
- this.pendingTranscript.reject(error);
717
- this.pendingTranscript = null;
718
- }
719
- if (this.pendingEditText) {
720
- this.pendingEditText.reject(error);
721
- this.pendingEditText = null;
722
- }
723
- if (this.pendingCommand) {
724
- this.pendingCommand.reject(error);
725
- this.pendingCommand = null;
726
- }
727
- }
728
- } catch (error) {
729
- console.error("[SpeechOS] Failed to parse data message:", error);
730
- }
731
- }
732
- /**
733
- * Publish microphone audio track
734
- * Uses the device ID from session settings if set
735
- */
736
- async enableMicrophone() {
737
- if (!this.room || this.room.state !== "connected") throw new Error("Not connected to room");
738
- const config = getConfig();
739
- if (!this.micTrack) {
740
- if (config.debug) console.log("[SpeechOS] Creating microphone track...");
741
- const deviceId = this.sessionSettings.audioDeviceId;
742
- const trackOptions = {
743
- echoCancellation: true,
744
- noiseSuppression: true
745
- };
746
- if (deviceId) {
747
- trackOptions.deviceId = { exact: deviceId };
748
- if (config.debug) console.log("[SpeechOS] Using audio device:", deviceId);
749
- }
750
- try {
751
- this.micTrack = await (0, livekit_client.createLocalAudioTrack)(trackOptions);
752
- } catch (error) {
753
- if (deviceId && error instanceof Error) {
754
- console.warn("[SpeechOS] Selected audio device unavailable, falling back to default:", error.message);
755
- this.micTrack = await (0, livekit_client.createLocalAudioTrack)({
756
- echoCancellation: true,
757
- noiseSuppression: true
758
- });
759
- } else throw error;
760
- }
761
- this.logMicrophoneInfo();
762
- }
763
- const existingPub = this.room.localParticipant.getTrackPublication(livekit_client.Track.Source.Microphone);
764
- if (!existingPub) {
765
- await this.room.localParticipant.publishTrack(this.micTrack, { source: livekit_client.Track.Source.Microphone });
766
- state.setMicEnabled(true);
767
- if (config.debug) console.log("[SpeechOS] Microphone track published");
768
- }
769
- }
770
- /**
771
- * Log information about the current microphone track
772
- */
773
- logMicrophoneInfo() {
774
- if (!this.micTrack) return;
775
- const config = getConfig();
776
- const mediaTrack = this.micTrack.mediaStreamTrack;
777
- const settings = mediaTrack.getSettings();
778
- console.log("[SpeechOS] Microphone active:", {
779
- deviceId: settings.deviceId || "unknown",
780
- label: mediaTrack.label || "Unknown device",
781
- sampleRate: settings.sampleRate,
782
- channelCount: settings.channelCount,
783
- echoCancellation: settings.echoCancellation,
784
- noiseSuppression: settings.noiseSuppression
785
- });
786
- if (config.debug) console.log("[SpeechOS] Full audio track settings:", settings);
787
- }
788
- /**
789
- * Disable microphone audio track
790
- */
791
- async disableMicrophone() {
792
- const config = getConfig();
793
- if (this.micTrack) {
794
- if (config.debug) console.log("[SpeechOS] Disabling microphone track...");
795
- if (this.room?.state === "connected") try {
796
- await this.room.localParticipant.unpublishTrack(this.micTrack);
797
- if (config.debug) console.log("[SpeechOS] Microphone track unpublished");
798
- } catch (error) {
799
- console.warn("[SpeechOS] Error unpublishing track:", error);
800
- }
801
- this.micTrack.stop();
802
- this.micTrack.detach();
803
- this.micTrack = null;
804
- state.setMicEnabled(false);
805
- if (config.debug) console.log("[SpeechOS] Microphone track stopped and detached");
806
- }
807
- }
808
- /**
809
- * Send a data message to the room
810
- */
811
- async sendDataMessage(message) {
812
- if (!this.room || this.room.state !== "connected") throw new Error("Not connected to room");
813
- const data = new TextEncoder().encode(JSON.stringify(message));
814
- await this.room.localParticipant.publishData(data, {
815
- reliable: true,
816
- topic: TOPIC_SPEECHOS
817
- });
818
- }
819
- /**
820
- * Start a voice session with pre-connect audio buffering
821
- * Fetches a fresh token, then enables mic with preConnectBuffer to capture audio while connecting.
822
- * Agent subscription happens in the background - we don't block on it.
823
- *
824
- * @param options - Session options including action type and parameters
825
- */
826
- async startVoiceSession(options) {
827
- const config = getConfig();
828
- if (config.debug) console.log("[SpeechOS] Starting voice session...");
829
- this.sessionSettings = options?.settings || {};
830
- await this.fetchToken();
831
- if (!this.tokenData) throw new Error("No token available for LiveKit connection");
832
- this.pendingTrackSubscribed = new Deferred();
833
- this.pendingTrackSubscribed.setTimeout(15e3, "Connection timed out - agent not available", "connection_timeout", "connection");
834
- this.room = new livekit_client.Room({
835
- adaptiveStream: true,
836
- dynacast: true
837
- });
838
- this.setupRoomEvents();
839
- if (config.debug) console.log("[SpeechOS] Connecting to LiveKit room:", this.tokenData.room, "at", this.tokenData.ws_url);
840
- await this.room.connect(this.tokenData.ws_url, this.tokenData.token);
841
- if (config.debug) console.log("[SpeechOS] Connected, enabling microphone with preConnectBuffer...");
842
- await this.enableMicrophoneWithPreConnectBuffer();
843
- if (options?.onMicReady) options.onMicReady();
844
- state.setConnected(true);
845
- if (config.debug) console.log("[SpeechOS] Voice session ready - microphone active");
846
- this.waitForAgentSubscription();
847
- }
848
- /**
849
- * Wait for the agent to subscribe to our audio track in the background
850
- * Handles timeout errors without blocking the main flow
851
- */
852
- waitForAgentSubscription() {
853
- const config = getConfig();
854
- if (!this.pendingTrackSubscribed) return;
855
- this.pendingTrackSubscribed.promise.then(() => {
856
- if (config.debug) console.log("[SpeechOS] Agent subscribed to audio track - full duplex established");
857
- this.pendingTrackSubscribed = null;
858
- }).catch((error) => {
859
- console.warn("[SpeechOS] Agent subscription timeout:", error.message);
860
- this.pendingTrackSubscribed = null;
861
- });
862
- }
863
- /**
864
- * Enable microphone with pre-connect buffering
865
- * This starts capturing audio locally before the room is connected,
866
- * buffering it until the connection is established.
867
- */
868
- async enableMicrophoneWithPreConnectBuffer() {
869
- if (!this.room) throw new Error("Room not initialized");
870
- const config = getConfig();
871
- const deviceId = this.sessionSettings.audioDeviceId;
872
- const constraints = {
873
- echoCancellation: true,
874
- noiseSuppression: true
875
- };
876
- if (deviceId) {
877
- constraints.deviceId = { exact: deviceId };
878
- if (config.debug) console.log("[SpeechOS] Using audio device:", deviceId);
879
- }
880
- try {
881
- await this.room.localParticipant.setMicrophoneEnabled(true, constraints, { preConnectBuffer: true });
882
- state.setMicEnabled(true);
883
- const micPub = this.room.localParticipant.getTrackPublication(livekit_client.Track.Source.Microphone);
884
- if (micPub?.track) {
885
- this.micTrack = micPub.track;
886
- this.logMicrophoneInfo();
887
- }
888
- if (config.debug) console.log("[SpeechOS] Microphone enabled with pre-connect buffer - audio is being captured");
889
- } catch (error) {
890
- if (deviceId && error instanceof Error) {
891
- console.warn("[SpeechOS] Selected audio device unavailable, falling back to default:", error.message);
892
- await this.room.localParticipant.setMicrophoneEnabled(true, {
893
- echoCancellation: true,
894
- noiseSuppression: true
895
- }, { preConnectBuffer: true });
896
- state.setMicEnabled(true);
897
- } else throw error;
898
- }
899
- }
900
- /**
901
- * Stop the voice session and request the transcript
902
- * Returns a promise that resolves with the transcript text
903
- * @throws Error if timeout occurs waiting for transcript
904
- */
905
- async stopVoiceSession() {
906
- const config = getConfig();
907
- const settings = this.sessionSettings;
908
- const inputLanguage = settings.inputLanguageCode ?? "en-US";
909
- const outputLanguage = settings.outputLanguageCode ?? "en-US";
910
- console.log("[SpeechOS] Dictate command:", {
911
- inputLanguage,
912
- outputLanguage
913
- });
914
- if (config.debug) console.log("[SpeechOS] Stopping voice session, requesting transcript...");
915
- await this.disableMicrophone();
916
- if (config.debug) console.log("[SpeechOS] Requesting transcript from agent...");
917
- this.pendingTranscript = new Deferred();
918
- this.pendingTranscript.setTimeout(1e4, "Transcription timed out. Please try again.", "transcription_timeout", "timeout");
919
- await this.sendDataMessage({ type: MESSAGE_TYPE_REQUEST_TRANSCRIPT$1 });
920
- const result = await this.pendingTranscript.promise;
921
- this.pendingTranscript = null;
922
- return result;
923
- }
924
- /**
925
- * Alias for stopVoiceSession - granular API naming
926
- */
927
- async stopAndGetTranscript() {
928
- return this.stopVoiceSession();
929
- }
930
- /**
931
- * Request text editing using the transcript as instructions
932
- * Sends the original text to the backend, which applies the spoken instructions
933
- * Returns a promise that resolves with the edited text
934
- * @throws Error if timeout occurs waiting for edited text
935
- */
936
- async requestEditText(originalText) {
937
- const config = getConfig();
938
- const settings = this.sessionSettings;
939
- const inputLanguage = settings.inputLanguageCode ?? "en-US";
940
- const outputLanguage = settings.outputLanguageCode ?? "en-US";
941
- console.log("[SpeechOS] Edit command:", {
942
- inputLanguage,
943
- outputLanguage,
944
- originalTextLength: originalText.length
945
- });
946
- if (config.debug) console.log("[SpeechOS] Requesting text edit...");
947
- this.editOriginalText = originalText;
948
- await this.disableMicrophone();
949
- if (config.debug) console.log("[SpeechOS] Sending edit_text request to agent...");
950
- this.pendingEditText = new Deferred();
951
- this.pendingEditText.setTimeout(15e3, "Edit request timed out. Please try again.", "edit_timeout", "timeout");
952
- await this.sendDataMessage({
953
- type: MESSAGE_TYPE_EDIT_TEXT$1,
954
- text: originalText
955
- });
956
- const result = await this.pendingEditText.promise;
957
- this.pendingEditText = null;
958
- return result;
959
- }
960
- /**
961
- * Alias for requestEditText - granular API naming
962
- */
963
- async stopAndEdit(originalText) {
964
- return this.requestEditText(originalText);
965
- }
966
- /**
967
- * Request command matching using the transcript as input
968
- * Sends command definitions to the backend, which matches the user's speech against them
969
- * Returns a promise that resolves with the matched command or null if no match
970
- * @throws Error if timeout occurs waiting for command result
971
- */
972
- async requestCommand(commands) {
973
- const config = getConfig();
974
- const settings = this.sessionSettings;
975
- const inputLanguage = settings.inputLanguageCode ?? "en-US";
976
- console.log("[SpeechOS] Command request:", {
977
- inputLanguage,
978
- commandCount: commands.length
979
- });
980
- if (config.debug) console.log("[SpeechOS] Requesting command match...");
981
- await this.disableMicrophone();
982
- if (config.debug) console.log("[SpeechOS] Sending execute_command request to agent...");
983
- this.pendingCommand = new Deferred();
984
- this.pendingCommand.setTimeout(15e3, "Command request timed out. Please try again.", "command_timeout", "timeout");
985
- await this.sendDataMessage({
986
- type: MESSAGE_TYPE_EXECUTE_COMMAND$1,
987
- commands
988
- });
989
- const result = await this.pendingCommand.promise;
990
- this.pendingCommand = null;
991
- return result;
992
- }
993
- /**
994
- * Alias for requestCommand - granular API naming
995
- */
996
- async stopAndCommand(commands) {
997
- return this.requestCommand(commands);
998
- }
999
- /**
1000
- * Disconnect from the current room
1001
- * Clears the token so a fresh one is fetched for the next session
1002
- */
1003
- async disconnect() {
1004
- const config = getConfig();
1005
- if (config.debug) console.log("[SpeechOS] Disconnecting from room...");
1006
- await this.disableMicrophone();
1007
- if (this.room) {
1008
- this.room.removeAllListeners();
1009
- await this.room.disconnect();
1010
- this.room = null;
1011
- state.setConnected(false);
1012
- if (config.debug) console.log("[SpeechOS] Room disconnected and cleaned up");
1013
- }
1014
- if (this.pendingTranscript) {
1015
- this.pendingTranscript.reject(new Error("Disconnected"));
1016
- this.pendingTranscript = null;
1017
- }
1018
- if (this.pendingEditText) {
1019
- this.pendingEditText.reject(new Error("Disconnected"));
1020
- this.pendingEditText = null;
1021
- }
1022
- if (this.pendingCommand) {
1023
- this.pendingCommand.reject(new Error("Disconnected"));
1024
- this.pendingCommand = null;
1025
- }
1026
- if (this.pendingTrackSubscribed) {
1027
- this.pendingTrackSubscribed.reject(new Error("Disconnected"));
1028
- this.pendingTrackSubscribed = null;
1029
- }
1030
- this.tokenData = null;
1031
- this.editOriginalText = null;
1032
- this.sessionSettings = {};
1033
- if (config.debug) console.log("[SpeechOS] Session state cleared");
1034
- }
1035
- /**
1036
- * Invalidate the cached token
1037
- * Call this when settings change that would affect the token (language, vocabulary)
1038
- */
1039
- invalidateTokenCache() {
1040
- const config = getConfig();
1041
- if (config.debug) console.log("[SpeechOS] Token cache invalidated");
1042
- this.cachedTokenData = null;
1043
- this.tokenCacheTimestamp = null;
1044
- }
1045
- /**
1046
- * Start auto-refreshing the token while the widget is expanded.
1047
- * Call this after a voice session completes to immediately fetch a fresh token
1048
- * (since each command requires its own token) and keep it fresh for subsequent commands.
1049
- */
1050
- startAutoRefresh() {
1051
- const config = getConfig();
1052
- this.autoRefreshEnabled = true;
1053
- if (config.debug) console.log("[SpeechOS] Token auto-refresh enabled");
1054
- this.invalidateTokenCache();
1055
- this.prefetchToken().then(() => {
1056
- this.scheduleTokenRefresh();
1057
- }).catch((error) => {
1058
- if (config.debug) console.warn("[SpeechOS] Failed to prefetch token after command:", error);
1059
- if (this.autoRefreshEnabled) this.tokenRefreshTimer = setTimeout(() => {
1060
- this.performAutoRefresh();
1061
- }, 5 * 1e3);
1062
- });
1063
- }
1064
- /**
1065
- * Stop auto-refreshing the token.
1066
- * Call this when the widget collapses or user navigates away.
1067
- */
1068
- stopAutoRefresh() {
1069
- const config = getConfig();
1070
- this.autoRefreshEnabled = false;
1071
- if (this.tokenRefreshTimer) {
1072
- clearTimeout(this.tokenRefreshTimer);
1073
- this.tokenRefreshTimer = null;
1074
- }
1075
- if (config.debug) console.log("[SpeechOS] Token auto-refresh disabled");
1076
- }
1077
- /**
1078
- * Schedule a token refresh before the current cache expires.
1079
- * Handles computer sleep by checking elapsed time on each refresh attempt.
1080
- */
1081
- scheduleTokenRefresh() {
1082
- if (!this.autoRefreshEnabled) return;
1083
- if (this.tokenRefreshTimer) {
1084
- clearTimeout(this.tokenRefreshTimer);
1085
- this.tokenRefreshTimer = null;
1086
- }
1087
- const config = getConfig();
1088
- const refreshBuffer = 30 * 1e3;
1089
- let timeUntilRefresh;
1090
- if (this.tokenCacheTimestamp) {
1091
- const age = Date.now() - this.tokenCacheTimestamp;
1092
- const timeRemaining = TOKEN_CACHE_TTL_MS - age;
1093
- timeUntilRefresh = Math.max(0, timeRemaining - refreshBuffer);
1094
- } else timeUntilRefresh = 0;
1095
- if (config.debug) console.log(`[SpeechOS] Scheduling token refresh in ${Math.round(timeUntilRefresh / 1e3)}s`);
1096
- this.tokenRefreshTimer = setTimeout(() => {
1097
- this.performAutoRefresh();
1098
- }, timeUntilRefresh);
1099
- }
1100
- /**
1101
- * Perform the auto-refresh, handling computer sleep scenarios.
1102
- */
1103
- async performAutoRefresh() {
1104
- if (!this.autoRefreshEnabled) return;
1105
- const config = getConfig();
1106
- if (this.isCachedTokenValid()) {
1107
- if (config.debug) console.log("[SpeechOS] Token still valid on refresh check, rescheduling");
1108
- this.scheduleTokenRefresh();
1109
- return;
1110
- }
1111
- if (config.debug) console.log("[SpeechOS] Auto-refreshing token...");
1112
- try {
1113
- const data = await this.fetchTokenFromServer();
1114
- this.cachedTokenData = data;
1115
- this.tokenCacheTimestamp = Date.now();
1116
- if (config.debug) console.log("[SpeechOS] Token auto-refreshed successfully");
1117
- this.scheduleTokenRefresh();
1118
- } catch (error) {
1119
- console.warn("[SpeechOS] Token auto-refresh failed:", error);
1120
- if (this.autoRefreshEnabled) this.tokenRefreshTimer = setTimeout(() => {
1121
- this.performAutoRefresh();
1122
- }, 30 * 1e3);
1123
- }
1124
- }
1125
- /**
1126
- * Get the current room instance
1127
- */
1128
- getRoom() {
1129
- return this.room;
1130
- }
1131
- /**
1132
- * Get the current token data
1133
- */
1134
- getTokenData() {
1135
- return this.tokenData;
1136
- }
1137
- /**
1138
- * Check if connected to a room
1139
- */
1140
- isConnected() {
1141
- return this.room?.state === "connected";
1142
- }
1143
- /**
1144
- * Check if microphone is enabled
1145
- */
1146
- isMicrophoneEnabled() {
1147
- return this.micTrack !== null;
1148
- }
1149
- };
1150
- const livekit = new LiveKitManager();
1151
- events.on("settings:changed", () => {
1152
- livekit.invalidateTokenCache();
1153
- });
1154
-
1155
413
  //#endregion
1156
414
  //#region src/audio-capture.ts
1157
415
  /**
@@ -1449,7 +707,7 @@ const RESPONSE_TIMEOUT_MS = 15e3;
1449
707
  /**
1450
708
  * A deferred promise with timeout support.
1451
709
  */
1452
- var Deferred$1 = class {
710
+ var Deferred = class {
1453
711
  promise;
1454
712
  _resolve;
1455
713
  _reject;
@@ -1560,7 +818,7 @@ var WebSocketManager = class {
1560
818
  state.setMicEnabled(true);
1561
819
  const wsUrl = this.getWebSocketUrl();
1562
820
  if (config.debug) console.log("[SpeechOS] Connecting to WebSocket:", wsUrl);
1563
- this.pendingAuth = new Deferred$1();
821
+ this.pendingAuth = new Deferred();
1564
822
  this.pendingAuth.setTimeout(RESPONSE_TIMEOUT_MS, "Connection timed out", "connection_timeout", "connection");
1565
823
  const factory = config.webSocketFactory ?? ((url) => new WebSocket(url));
1566
824
  this.ws = factory(wsUrl);
@@ -1708,11 +966,11 @@ var WebSocketManager = class {
1708
966
  this.editOriginalText = null;
1709
967
  }
1710
968
  handleCommandResult(message) {
1711
- const commandResult = message.command || null;
969
+ const commands = message.commands || [];
1712
970
  this.lastInputText = message.transcript;
1713
- events.emit("command:complete", { command: commandResult });
971
+ events.emit("command:complete", { commands });
1714
972
  if (this.pendingCommand) {
1715
- this.pendingCommand.resolve(commandResult);
973
+ this.pendingCommand.resolve(commands);
1716
974
  this.pendingCommand = null;
1717
975
  }
1718
976
  }
@@ -1750,7 +1008,7 @@ var WebSocketManager = class {
1750
1008
  const config = getConfig();
1751
1009
  if (config.debug) console.log("[SpeechOS] Stopping voice session, requesting transcript...");
1752
1010
  await this.stopAudioCapture();
1753
- this.pendingTranscript = new Deferred$1();
1011
+ this.pendingTranscript = new Deferred();
1754
1012
  this.pendingTranscript.setTimeout(RESPONSE_TIMEOUT_MS, "Transcription timed out. Please try again.", "transcription_timeout", "timeout");
1755
1013
  this.sendMessage({ type: MESSAGE_TYPE_REQUEST_TRANSCRIPT });
1756
1014
  const result = await this.pendingTranscript.promise;
@@ -1765,7 +1023,7 @@ var WebSocketManager = class {
1765
1023
  const config = getConfig();
1766
1024
  if (config.debug) console.log("[SpeechOS] Requesting text edit...");
1767
1025
  await this.stopAudioCapture();
1768
- this.pendingEditText = new Deferred$1();
1026
+ this.pendingEditText = new Deferred();
1769
1027
  this.pendingEditText.setTimeout(RESPONSE_TIMEOUT_MS, "Edit request timed out. Please try again.", "edit_timeout", "timeout");
1770
1028
  this.sendMessage({ type: MESSAGE_TYPE_EDIT_TEXT });
1771
1029
  const result = await this.pendingEditText.promise;
@@ -1775,12 +1033,13 @@ var WebSocketManager = class {
1775
1033
  /**
1776
1034
  * Request command matching using the transcript as input.
1777
1035
  * Note: The command definitions were already sent in the auth message via startVoiceSession.
1036
+ * Returns an array of matched commands (empty array if no matches).
1778
1037
  */
1779
1038
  async requestCommand(_commands) {
1780
1039
  const config = getConfig();
1781
1040
  if (config.debug) console.log("[SpeechOS] Requesting command match...");
1782
1041
  await this.stopAudioCapture();
1783
- this.pendingCommand = new Deferred$1();
1042
+ this.pendingCommand = new Deferred();
1784
1043
  this.pendingCommand.setTimeout(RESPONSE_TIMEOUT_MS, "Command request timed out. Please try again.", "command_timeout", "timeout");
1785
1044
  this.sendMessage({ type: MESSAGE_TYPE_EXECUTE_COMMAND });
1786
1045
  const result = await this.pendingCommand.promise;
@@ -1818,7 +1077,7 @@ var WebSocketManager = class {
1818
1077
  * Wait for the WebSocket send buffer to drain.
1819
1078
  *
1820
1079
  * This ensures all audio data has been transmitted before we request
1821
- * the transcript. Uses the same pattern as LiveKit's ReadableStream approach.
1080
+ * the transcript.
1822
1081
  */
1823
1082
  async waitForBufferDrain() {
1824
1083
  if (!this.ws || this.ws.readyState !== WS_OPEN) return;
@@ -1894,7 +1153,7 @@ const websocket = new WebSocketManager();
1894
1153
  //#endregion
1895
1154
  //#region src/speechos.ts
1896
1155
  /**
1897
- * Get the active voice backend (always websocket now)
1156
+ * Get the active voice backend
1898
1157
  */
1899
1158
  function getBackend$1() {
1900
1159
  return websocket;
@@ -1902,9 +1161,7 @@ function getBackend$1() {
1902
1161
  /**
1903
1162
  * SpeechOS Core SDK
1904
1163
  *
1905
- * Provides two API layers:
1906
- * 1. Low-level API: Granular control over LiveKit connection lifecycle
1907
- * 2. High-level API: One-shot methods for common voice tasks
1164
+ * Provides a high-level API for common voice tasks.
1908
1165
  */
1909
1166
  var SpeechOSCore = class {
1910
1167
  initialized = false;
@@ -1928,67 +1185,7 @@ var SpeechOSCore = class {
1928
1185
  return this.initialized;
1929
1186
  }
1930
1187
  /**
1931
- * Connect to LiveKit (fetches token, establishes connection)
1932
- * Call this before other low-level methods
1933
- */
1934
- async connect() {
1935
- this.ensureInitialized();
1936
- await livekit.connect();
1937
- }
1938
- /**
1939
- * Wait until the agent is ready to receive audio
1940
- * Resolves when the agent subscribes to our audio track
1941
- */
1942
- async waitUntilReady() {
1943
- return livekit.waitUntilReady();
1944
- }
1945
- /**
1946
- * Enable microphone (user is now being recorded)
1947
- */
1948
- async enableMicrophone() {
1949
- await livekit.enableMicrophone();
1950
- state.setRecordingState("recording");
1951
- }
1952
- /**
1953
- * Stop recording and get the transcript
1954
- * @returns The transcribed text
1955
- */
1956
- async stopAndGetTranscript() {
1957
- state.setRecordingState("processing");
1958
- try {
1959
- const transcript = await livekit.stopAndGetTranscript();
1960
- state.completeRecording();
1961
- return transcript;
1962
- } catch (error) {
1963
- state.setError(error instanceof Error ? error.message : "Transcription failed");
1964
- throw error;
1965
- }
1966
- }
1967
- /**
1968
- * Stop recording and get edited text
1969
- * @param originalText - The original text to edit based on voice instructions
1970
- * @returns The edited text
1971
- */
1972
- async stopAndEdit(originalText) {
1973
- state.setRecordingState("processing");
1974
- try {
1975
- const editedText = await livekit.stopAndEdit(originalText);
1976
- state.completeRecording();
1977
- return editedText;
1978
- } catch (error) {
1979
- state.setError(error instanceof Error ? error.message : "Edit request failed");
1980
- throw error;
1981
- }
1982
- }
1983
- /**
1984
- * Disconnect from LiveKit
1985
- */
1986
- async disconnect() {
1987
- await livekit.disconnect();
1988
- state.completeRecording();
1989
- }
1990
- /**
1991
- * One-shot dictation: connect, wait for agent, record, and get transcript
1188
+ * One-shot dictation: connect, record, and get transcript
1992
1189
  * Automatically handles the full voice session lifecycle
1993
1190
  *
1994
1191
  * @returns The transcribed text
@@ -2047,7 +1244,7 @@ var SpeechOSCore = class {
2047
1244
  }
2048
1245
  }
2049
1246
  /**
2050
- * One-shot edit: connect, wait for agent, record voice instructions, apply to text
1247
+ * One-shot edit: connect, record voice instructions, apply to text
2051
1248
  * Automatically handles the full voice session lifecycle
2052
1249
  *
2053
1250
  * @param originalText - The text to edit
@@ -2112,11 +1309,11 @@ var SpeechOSCore = class {
2112
1309
  }
2113
1310
  }
2114
1311
  /**
2115
- * One-shot command: connect, wait for agent, record voice, match against commands
1312
+ * One-shot command: connect, record voice, match against commands
2116
1313
  * Automatically handles the full voice session lifecycle
2117
1314
  *
2118
1315
  * @param commands - Array of command definitions to match against
2119
- * @returns The matched command result or null if no match
1316
+ * @returns Array of matched commands (empty array if no matches)
2120
1317
  */
2121
1318
  async command(commands) {
2122
1319
  this.ensureInitialized();
@@ -2146,22 +1343,24 @@ var SpeechOSCore = class {
2146
1343
  _commandResolve;
2147
1344
  _commandReject;
2148
1345
  /**
2149
- * Stop command recording and get the matched command
1346
+ * Stop command recording and get the matched commands
2150
1347
  * Call this after command() when user stops speaking
1348
+ *
1349
+ * @returns Array of matched commands (empty array if no matches)
2151
1350
  */
2152
1351
  async stopCommand() {
2153
1352
  state.setRecordingState("processing");
2154
1353
  try {
2155
1354
  const backend = getBackend$1();
2156
1355
  const commands = this._commandCommands || [];
2157
- const result = await backend.requestCommand(commands);
1356
+ const results = await backend.requestCommand(commands);
2158
1357
  state.completeRecording();
2159
1358
  if (this._commandResolve) {
2160
- this._commandResolve(result);
1359
+ this._commandResolve(results);
2161
1360
  this._commandResolve = void 0;
2162
1361
  this._commandReject = void 0;
2163
1362
  }
2164
- return result;
1363
+ return results;
2165
1364
  } catch (error) {
2166
1365
  const err = error instanceof Error ? error : new Error("Command request failed");
2167
1366
  state.setError(err.message);
@@ -2263,15 +1462,10 @@ const websocketBackend = {
2263
1462
  requestCommand: (commands) => websocket.requestCommand(commands),
2264
1463
  disconnect: () => websocket.disconnect(),
2265
1464
  isConnected: () => websocket.isConnected(),
2266
- getLastInputText: () => websocket.getLastInputText(),
2267
- prefetchToken: () => Promise.resolve({}),
2268
- startAutoRefresh: () => {},
2269
- stopAutoRefresh: () => {},
2270
- invalidateTokenCache: () => {}
1465
+ getLastInputText: () => websocket.getLastInputText()
2271
1466
  };
2272
1467
  /**
2273
1468
  * Get the active voice backend.
2274
- * Always returns WebSocket backend (LiveKit is legacy).
2275
1469
  *
2276
1470
  * @returns The websocket backend
2277
1471
  */
@@ -2279,14 +1473,203 @@ function getBackend() {
2279
1473
  return websocketBackend;
2280
1474
  }
2281
1475
 
1476
+ //#endregion
1477
+ //#region src/tts.ts
1478
+ /**
1479
+ * Default TTS voice ID (matches server default).
1480
+ * The server validates voice IDs - pass any valid voice ID or omit to use default.
1481
+ */
1482
+ const DEFAULT_TTS_VOICE_ID = "21m00Tcm4TlvDq8ikWAM";
1483
+ /**
1484
+ * Map HTTP status codes to TTS error codes
1485
+ */
1486
+ function mapHttpStatusToErrorCode(status) {
1487
+ switch (status) {
1488
+ case 400: return "invalid_request";
1489
+ case 402: return "usage_limit_exceeded";
1490
+ case 403: return "authentication_failed";
1491
+ default: return "unknown_error";
1492
+ }
1493
+ }
1494
+ /**
1495
+ * TTS Client for synthesizing speech from text
1496
+ */
1497
+ var TTSClient = class {
1498
+ /**
1499
+ * Synthesize text to speech and return audio bytes
1500
+ *
1501
+ * @param text - Text to synthesize (max 1000 chars)
1502
+ * @param options - Optional synthesis options
1503
+ * @returns Audio data and content type
1504
+ *
1505
+ * @example
1506
+ * ```typescript
1507
+ * const result = await tts.synthesize('Hello world');
1508
+ * console.log(result.audio); // ArrayBuffer
1509
+ * console.log(result.contentType); // 'audio/mpeg'
1510
+ * ```
1511
+ */
1512
+ async synthesize(text, options) {
1513
+ const config = getConfig();
1514
+ if (!config.apiKey) {
1515
+ const error = {
1516
+ code: "authentication_failed",
1517
+ message: "API key not configured. Call SpeechOS.init() first.",
1518
+ phase: "synthesize"
1519
+ };
1520
+ events.emit("tts:error", error);
1521
+ throw new Error(error.message);
1522
+ }
1523
+ events.emit("tts:synthesize:start", { text });
1524
+ try {
1525
+ const response = await fetch(`${config.host}/api/tts/`, {
1526
+ method: "POST",
1527
+ headers: {
1528
+ "Authorization": `Api-Key ${config.apiKey}`,
1529
+ "Content-Type": "application/json"
1530
+ },
1531
+ signal: options?.signal,
1532
+ body: JSON.stringify({
1533
+ text,
1534
+ voice_id: options?.voiceId,
1535
+ language: options?.language ?? "en",
1536
+ user_id: config.userId || void 0
1537
+ })
1538
+ });
1539
+ if (!response.ok) {
1540
+ const errorCode = mapHttpStatusToErrorCode(response.status);
1541
+ let errorMessage;
1542
+ try {
1543
+ const errorData = await response.json();
1544
+ errorMessage = errorData.detail || errorData.message || `HTTP ${response.status}`;
1545
+ } catch {
1546
+ errorMessage = `HTTP ${response.status}: ${response.statusText}`;
1547
+ }
1548
+ const error = {
1549
+ code: errorCode,
1550
+ message: errorMessage,
1551
+ phase: "synthesize"
1552
+ };
1553
+ events.emit("tts:error", error);
1554
+ throw new Error(errorMessage);
1555
+ }
1556
+ const contentType = response.headers.get("Content-Type") || "audio/mpeg";
1557
+ const arrayBuffer = await response.arrayBuffer();
1558
+ events.emit("tts:synthesize:complete", { text });
1559
+ return {
1560
+ audio: arrayBuffer,
1561
+ contentType
1562
+ };
1563
+ } catch (error) {
1564
+ if (error instanceof Error && error.name === "AbortError") throw error;
1565
+ if (error instanceof Error && error.message.includes("HTTP")) throw error;
1566
+ const networkError = {
1567
+ code: "network_error",
1568
+ message: error instanceof Error ? error.message : "Network request failed",
1569
+ phase: "synthesize"
1570
+ };
1571
+ events.emit("tts:error", networkError);
1572
+ throw new Error(networkError.message);
1573
+ }
1574
+ }
1575
+ /**
1576
+ * Stream TTS audio chunks as they arrive from the server
1577
+ *
1578
+ * Useful for progressive playback or processing large texts.
1579
+ *
1580
+ * @param text - Text to synthesize (max 1000 chars)
1581
+ * @param options - Optional synthesis options
1582
+ * @yields Audio chunks as Uint8Array
1583
+ *
1584
+ * @example
1585
+ * ```typescript
1586
+ * const chunks: Uint8Array[] = [];
1587
+ * for await (const chunk of tts.stream('Hello world')) {
1588
+ * chunks.push(chunk);
1589
+ * }
1590
+ * ```
1591
+ */
1592
+ async *stream(text, options) {
1593
+ const config = getConfig();
1594
+ if (!config.apiKey) {
1595
+ const error = {
1596
+ code: "authentication_failed",
1597
+ message: "API key not configured. Call SpeechOS.init() first.",
1598
+ phase: "synthesize"
1599
+ };
1600
+ events.emit("tts:error", error);
1601
+ throw new Error(error.message);
1602
+ }
1603
+ events.emit("tts:synthesize:start", { text });
1604
+ try {
1605
+ const response = await fetch(`${config.host}/api/tts/`, {
1606
+ method: "POST",
1607
+ headers: {
1608
+ "Authorization": `Api-Key ${config.apiKey}`,
1609
+ "Content-Type": "application/json"
1610
+ },
1611
+ signal: options?.signal,
1612
+ body: JSON.stringify({
1613
+ text,
1614
+ voice_id: options?.voiceId,
1615
+ language: options?.language ?? "en",
1616
+ user_id: config.userId || void 0
1617
+ })
1618
+ });
1619
+ if (!response.ok) {
1620
+ const errorCode = mapHttpStatusToErrorCode(response.status);
1621
+ let errorMessage;
1622
+ try {
1623
+ const errorData = await response.json();
1624
+ errorMessage = errorData.detail || errorData.message || `HTTP ${response.status}`;
1625
+ } catch {
1626
+ errorMessage = `HTTP ${response.status}: ${response.statusText}`;
1627
+ }
1628
+ const error = {
1629
+ code: errorCode,
1630
+ message: errorMessage,
1631
+ phase: "synthesize"
1632
+ };
1633
+ events.emit("tts:error", error);
1634
+ throw new Error(errorMessage);
1635
+ }
1636
+ const reader = response.body?.getReader();
1637
+ if (!reader) throw new Error("Response body is not readable");
1638
+ try {
1639
+ while (true) {
1640
+ const { done, value } = await reader.read();
1641
+ if (done) break;
1642
+ yield value;
1643
+ }
1644
+ } finally {
1645
+ reader.releaseLock();
1646
+ }
1647
+ events.emit("tts:synthesize:complete", { text });
1648
+ } catch (error) {
1649
+ if (error instanceof Error && error.name === "AbortError") return;
1650
+ if (error instanceof Error && error.message.includes("HTTP")) throw error;
1651
+ const networkError = {
1652
+ code: "network_error",
1653
+ message: error instanceof Error ? error.message : "Network request failed",
1654
+ phase: "synthesize"
1655
+ };
1656
+ events.emit("tts:error", networkError);
1657
+ throw new Error(networkError.message);
1658
+ }
1659
+ }
1660
+ };
1661
+ const tts = new TTSClient();
1662
+
2282
1663
  //#endregion
2283
1664
  //#region src/index.ts
2284
1665
  const VERSION = "0.1.0";
2285
1666
 
2286
1667
  //#endregion
2287
1668
  exports.DEFAULT_HOST = DEFAULT_HOST;
1669
+ exports.DEFAULT_TTS_VOICE_ID = DEFAULT_TTS_VOICE_ID;
2288
1670
  exports.Deferred = Deferred;
2289
1671
  exports.SpeechOSEventEmitter = SpeechOSEventEmitter;
1672
+ exports.TTSClient = TTSClient;
2290
1673
  exports.VERSION = VERSION;
2291
1674
  exports.clearSettingsToken = clearSettingsToken;
2292
1675
  exports.createStateManager = createStateManager;
@@ -2294,11 +1677,11 @@ exports.events = events;
2294
1677
  exports.getBackend = getBackend;
2295
1678
  exports.getConfig = getConfig;
2296
1679
  exports.getSettingsToken = getSettingsToken;
2297
- exports.livekit = livekit;
2298
1680
  exports.resetConfig = resetConfig;
2299
1681
  exports.setConfig = setConfig;
2300
1682
  exports.speechOS = speechOS;
2301
1683
  exports.state = state;
1684
+ exports.tts = tts;
2302
1685
  exports.updateUserId = updateUserId;
2303
1686
  exports.validateConfig = validateConfig;
2304
1687
  exports.websocket = websocket;