@speechos/core 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,1127 @@
1
+ import { Room, RoomEvent, Track, createLocalAudioTrack } from "livekit-client";
2
+
3
+ //#region src/config.ts
4
+ /**
5
+ * Default host - can be overridden by SPEECHOS_HOST env var at build time
6
+ */
7
+ const DEFAULT_HOST = typeof process !== "undefined" && process.env?.SPEECHOS_HOST || "https://app.speechos.ai";
8
+ /**
9
+ * Default configuration values
10
+ */
11
+ const defaultConfig = {
12
+ apiKey: "",
13
+ userId: "",
14
+ host: DEFAULT_HOST,
15
+ position: "bottom-center",
16
+ zIndex: 999999,
17
+ debug: false
18
+ };
19
+ /**
20
+ * Validates and merges user config with defaults
21
+ * @param userConfig - User-provided configuration
22
+ * @returns Validated and merged configuration
23
+ */
24
+ function validateConfig(userConfig = {}) {
25
+ if (!userConfig.apiKey) throw new Error("SpeechOS requires an apiKey. Get one from your team dashboard at /a/<team-slug>/.");
26
+ const config = {
27
+ ...defaultConfig,
28
+ ...userConfig
29
+ };
30
+ const validPositions = [
31
+ "bottom-center",
32
+ "bottom-right",
33
+ "bottom-left"
34
+ ];
35
+ if (!validPositions.includes(config.position)) {
36
+ console.warn(`Invalid position "${config.position}". Using default "bottom-center".`);
37
+ config.position = "bottom-center";
38
+ }
39
+ if (typeof config.zIndex !== "number" || config.zIndex < 0) {
40
+ console.warn(`Invalid zIndex "${config.zIndex}". Using default ${defaultConfig.zIndex}.`);
41
+ config.zIndex = defaultConfig.zIndex;
42
+ }
43
+ return config;
44
+ }
45
+ /**
46
+ * Current active configuration (singleton)
47
+ */
48
+ let currentConfig = defaultConfig;
49
+ /**
50
+ * Get the current configuration
51
+ */
52
+ function getConfig() {
53
+ return { ...currentConfig };
54
+ }
55
+ /**
56
+ * Set the current configuration
57
+ * @param config - Configuration to set
58
+ */
59
+ function setConfig(config) {
60
+ currentConfig = validateConfig(config);
61
+ }
62
+ /**
63
+ * Reset configuration to defaults
64
+ */
65
+ function resetConfig() {
66
+ currentConfig = { ...defaultConfig };
67
+ }
68
+ /**
69
+ * Update the userId in the current configuration
70
+ * @param userId - The user identifier to set
71
+ */
72
+ function updateUserId(userId) {
73
+ currentConfig = {
74
+ ...currentConfig,
75
+ userId
76
+ };
77
+ }
78
+
79
+ //#endregion
80
+ //#region src/events.ts
81
+ /**
82
+ * Type-safe event emitter for SpeechOS events
83
+ */
84
+ var SpeechOSEventEmitter = class {
85
+ listeners = /* @__PURE__ */ new Map();
86
+ /**
87
+ * Subscribe to an event
88
+ * @param event - Event name to listen to
89
+ * @param callback - Function to call when event is emitted
90
+ * @returns Unsubscribe function
91
+ */
92
+ on(event, callback) {
93
+ if (!this.listeners.has(event)) this.listeners.set(event, /* @__PURE__ */ new Set());
94
+ this.listeners.get(event).add(callback);
95
+ return () => {
96
+ const callbacks = this.listeners.get(event);
97
+ if (callbacks) {
98
+ callbacks.delete(callback);
99
+ if (callbacks.size === 0) this.listeners.delete(event);
100
+ }
101
+ };
102
+ }
103
+ /**
104
+ * Subscribe to an event once (automatically unsubscribes after first call)
105
+ * @param event - Event name to listen to
106
+ * @param callback - Function to call when event is emitted
107
+ * @returns Unsubscribe function
108
+ */
109
+ once(event, callback) {
110
+ const unsubscribe = this.on(event, (payload) => {
111
+ unsubscribe();
112
+ callback(payload);
113
+ });
114
+ return unsubscribe;
115
+ }
116
+ /**
117
+ * Emit an event to all subscribers
118
+ * @param event - Event name to emit
119
+ * @param payload - Event payload data
120
+ */
121
+ emit(event, payload) {
122
+ const callbacks = this.listeners.get(event);
123
+ if (callbacks) callbacks.forEach((callback) => {
124
+ try {
125
+ callback(payload);
126
+ } catch (error) {
127
+ console.error(`Error in event listener for "${String(event)}":`, error);
128
+ }
129
+ });
130
+ }
131
+ /**
132
+ * Remove all listeners for a specific event or all events
133
+ * @param event - Optional event name to clear listeners for
134
+ */
135
+ clear(event) {
136
+ if (event) this.listeners.delete(event);
137
+ else this.listeners.clear();
138
+ }
139
+ /**
140
+ * Get the number of listeners for an event
141
+ * @param event - Event name
142
+ * @returns Number of listeners
143
+ */
144
+ listenerCount(event) {
145
+ return this.listeners.get(event)?.size ?? 0;
146
+ }
147
+ };
148
+ const events = new SpeechOSEventEmitter();
149
+
150
+ //#endregion
151
+ //#region src/state.ts
152
+ /**
153
+ * Initial state
154
+ */
155
+ const initialState = {
156
+ isVisible: false,
157
+ isExpanded: false,
158
+ isConnected: false,
159
+ isMicEnabled: false,
160
+ activeAction: null,
161
+ focusedElement: null,
162
+ recordingState: "idle",
163
+ errorMessage: null
164
+ };
165
+ /**
166
+ * State manager class
167
+ */
168
+ var StateManager = class {
169
+ state;
170
+ subscribers = /* @__PURE__ */ new Set();
171
+ constructor(initialState$1) {
172
+ this.state = { ...initialState$1 };
173
+ }
174
+ /**
175
+ * Get the current state (returns a copy to prevent mutations)
176
+ */
177
+ getState() {
178
+ return { ...this.state };
179
+ }
180
+ /**
181
+ * Update state with partial values
182
+ * @param partial - Partial state to merge with current state
183
+ */
184
+ setState(partial) {
185
+ const prevState = { ...this.state };
186
+ this.state = {
187
+ ...this.state,
188
+ ...partial
189
+ };
190
+ this.subscribers.forEach((callback) => {
191
+ try {
192
+ callback(this.state, prevState);
193
+ } catch (error) {
194
+ console.error("Error in state change callback:", error);
195
+ }
196
+ });
197
+ events.emit("state:change", { state: this.state });
198
+ }
199
+ /**
200
+ * Subscribe to state changes
201
+ * @param callback - Function to call when state changes
202
+ * @returns Unsubscribe function
203
+ */
204
+ subscribe(callback) {
205
+ this.subscribers.add(callback);
206
+ return () => {
207
+ this.subscribers.delete(callback);
208
+ };
209
+ }
210
+ /**
211
+ * Reset state to initial values
212
+ */
213
+ reset() {
214
+ this.setState(initialState);
215
+ }
216
+ /**
217
+ * Show the widget
218
+ */
219
+ show() {
220
+ this.setState({ isVisible: true });
221
+ events.emit("widget:show", void 0);
222
+ }
223
+ /**
224
+ * Hide the widget and reset expanded state
225
+ */
226
+ hide() {
227
+ this.setState({
228
+ isVisible: false,
229
+ isExpanded: false,
230
+ activeAction: null
231
+ });
232
+ events.emit("widget:hide", void 0);
233
+ }
234
+ /**
235
+ * Toggle the action bubbles expansion
236
+ */
237
+ toggleExpanded() {
238
+ this.setState({ isExpanded: !this.state.isExpanded });
239
+ }
240
+ /**
241
+ * Set the focused form element
242
+ * @param element - The form element that has focus
243
+ */
244
+ setFocusedElement(element) {
245
+ this.setState({ focusedElement: element });
246
+ }
247
+ /**
248
+ * Set the active action
249
+ * @param action - The action to set as active
250
+ */
251
+ setActiveAction(action) {
252
+ this.setState({ activeAction: action });
253
+ }
254
+ /**
255
+ * Set the recording state
256
+ * @param recordingState - The recording state to set
257
+ */
258
+ setRecordingState(recordingState) {
259
+ this.setState({ recordingState });
260
+ }
261
+ /**
262
+ * Set the connection state
263
+ * @param isConnected - Whether connected to LiveKit
264
+ */
265
+ setConnected(isConnected) {
266
+ this.setState({ isConnected });
267
+ }
268
+ /**
269
+ * Set the microphone enabled state
270
+ * @param isMicEnabled - Whether microphone is enabled
271
+ */
272
+ setMicEnabled(isMicEnabled) {
273
+ this.setState({ isMicEnabled });
274
+ }
275
+ /**
276
+ * Start recording flow (connecting → recording)
277
+ */
278
+ startRecording() {
279
+ this.setState({
280
+ recordingState: "connecting",
281
+ isExpanded: false
282
+ });
283
+ }
284
+ /**
285
+ * Stop recording and start processing
286
+ */
287
+ stopRecording() {
288
+ this.setState({
289
+ recordingState: "processing",
290
+ isMicEnabled: false
291
+ });
292
+ }
293
+ /**
294
+ * Complete the recording flow and return to idle
295
+ */
296
+ completeRecording() {
297
+ this.setState({
298
+ recordingState: "idle",
299
+ activeAction: null,
300
+ isConnected: false,
301
+ isMicEnabled: false
302
+ });
303
+ }
304
+ /**
305
+ * Cancel recording and return to idle
306
+ */
307
+ cancelRecording() {
308
+ this.setState({
309
+ recordingState: "idle",
310
+ activeAction: null,
311
+ errorMessage: null,
312
+ isConnected: false,
313
+ isMicEnabled: false
314
+ });
315
+ }
316
+ /**
317
+ * Set error state with a message
318
+ * @param message - Error message to display
319
+ */
320
+ setError(message) {
321
+ this.setState({
322
+ recordingState: "error",
323
+ errorMessage: message
324
+ });
325
+ }
326
+ /**
327
+ * Clear error state and return to idle
328
+ */
329
+ clearError() {
330
+ this.setState({
331
+ recordingState: "idle",
332
+ errorMessage: null
333
+ });
334
+ }
335
+ };
336
+ const state = new StateManager(initialState);
337
+ /**
338
+ * Create a new state manager instance (useful for testing)
339
+ */
340
+ function createStateManager(initial) {
341
+ return new StateManager({
342
+ ...initialState,
343
+ ...initial
344
+ });
345
+ }
346
+
347
+ //#endregion
348
+ //#region src/livekit.ts
349
+ const MESSAGE_TYPE_REQUEST_TRANSCRIPT = "request_transcript";
350
+ const MESSAGE_TYPE_TRANSCRIPT = "transcript";
351
+ const MESSAGE_TYPE_EDIT_TEXT = "edit_text";
352
+ const MESSAGE_TYPE_EDITED_TEXT = "edited_text";
353
+ const MESSAGE_TYPE_ERROR = "error";
354
+ const TOPIC_SPEECHOS = "speechos";
355
+ /**
356
+ * A deferred promise with timeout support.
357
+ * Encapsulates resolve/reject/timeout in a single object for cleaner async handling.
358
+ */
359
+ var Deferred = class {
360
+ promise;
361
+ _resolve;
362
+ _reject;
363
+ _timeoutId = null;
364
+ _settled = false;
365
+ constructor() {
366
+ this.promise = new Promise((resolve, reject) => {
367
+ this._resolve = resolve;
368
+ this._reject = reject;
369
+ });
370
+ }
371
+ /**
372
+ * Set a timeout that will reject the promise with the given error
373
+ */
374
+ setTimeout(ms, errorMessage, errorCode, errorSource) {
375
+ this._timeoutId = setTimeout(() => {
376
+ if (!this._settled) {
377
+ console.error(`[SpeechOS] Error: ${errorMessage} (${errorCode})`);
378
+ events.emit("error", {
379
+ code: errorCode,
380
+ message: errorMessage,
381
+ source: errorSource
382
+ });
383
+ this.reject(new Error(errorMessage));
384
+ }
385
+ }, ms);
386
+ }
387
+ resolve(value) {
388
+ if (!this._settled) {
389
+ this._settled = true;
390
+ this.clearTimeout();
391
+ this._resolve(value);
392
+ }
393
+ }
394
+ reject(error) {
395
+ if (!this._settled) {
396
+ this._settled = true;
397
+ this.clearTimeout();
398
+ this._reject(error);
399
+ }
400
+ }
401
+ clearTimeout() {
402
+ if (this._timeoutId !== null) {
403
+ clearTimeout(this._timeoutId);
404
+ this._timeoutId = null;
405
+ }
406
+ }
407
+ get isSettled() {
408
+ return this._settled;
409
+ }
410
+ };
411
+ /**
412
+ * LiveKit connection manager
413
+ */
414
+ var LiveKitManager = class {
415
+ room = null;
416
+ tokenData = null;
417
+ micTrack = null;
418
+ pendingTranscript = null;
419
+ pendingEditText = null;
420
+ pendingTrackSubscribed = null;
421
+ preWarmPromise = null;
422
+ editOriginalText = null;
423
+ /**
424
+ * Pre-warm resources for faster connection
425
+ * Call this when user shows intent (e.g., expands widget)
426
+ * Only fetches token - mic permission is requested when user clicks Dictate
427
+ */
428
+ async preWarm() {
429
+ if (this.tokenData || this.preWarmPromise || this.room?.state === "connected") {
430
+ const config$1 = getConfig();
431
+ if (config$1.debug) console.log("[SpeechOS] Pre-warm skipped - token already available");
432
+ return;
433
+ }
434
+ const config = getConfig();
435
+ if (config.debug) console.log("[SpeechOS] Pre-warming: fetching token...");
436
+ this.preWarmPromise = (async () => {
437
+ try {
438
+ await this.fetchToken();
439
+ if (config.debug) console.log("[SpeechOS] Pre-warm complete - token ready");
440
+ } catch (error) {
441
+ if (config.debug) console.warn("[SpeechOS] Pre-warm failed:", error);
442
+ this.preWarmPromise = null;
443
+ }
444
+ })();
445
+ await this.preWarmPromise;
446
+ }
447
+ /**
448
+ * Fetch a LiveKit token from the backend
449
+ */
450
+ async fetchToken() {
451
+ const config = getConfig();
452
+ const url = `${config.host}/livekit/api/token/`;
453
+ if (config.debug) console.log("[SpeechOS] Fetching LiveKit token from:", url);
454
+ const response = await fetch(url, {
455
+ method: "POST",
456
+ headers: {
457
+ "Content-Type": "application/json",
458
+ ...config.apiKey ? { Authorization: `Api-Key ${config.apiKey}` } : {}
459
+ },
460
+ body: JSON.stringify({ user_id: config.userId || null })
461
+ });
462
+ if (!response.ok) throw new Error(`Failed to fetch LiveKit token: ${response.status} ${response.statusText}`);
463
+ const data = await response.json();
464
+ this.tokenData = data;
465
+ if (config.debug) console.log("[SpeechOS] LiveKit token received:", {
466
+ room: data.room,
467
+ identity: data.identity,
468
+ ws_url: data.ws_url
469
+ });
470
+ return data;
471
+ }
472
+ /**
473
+ * Connect to a LiveKit room (fresh connection each time)
474
+ */
475
+ async connect() {
476
+ const config = getConfig();
477
+ if (!this.tokenData) await this.fetchToken();
478
+ else if (config.debug) console.log("[SpeechOS] Using pre-fetched token");
479
+ if (!this.tokenData) throw new Error("No token available for LiveKit connection");
480
+ this.room = new Room({
481
+ adaptiveStream: true,
482
+ dynacast: true
483
+ });
484
+ this.setupRoomEvents();
485
+ if (config.debug) console.log("[SpeechOS] Connecting to LiveKit room:", this.tokenData.room);
486
+ await this.room.connect(this.tokenData.ws_url, this.tokenData.token);
487
+ state.setConnected(true);
488
+ if (config.debug) console.log("[SpeechOS] Connected to LiveKit room:", this.room.name);
489
+ return this.room;
490
+ }
491
+ /**
492
+ * Wait until the agent is ready to receive audio
493
+ * Resolves when LocalTrackSubscribed event is received
494
+ */
495
+ async waitUntilReady() {
496
+ if (!this.room || this.room.state !== "connected") throw new Error("Not connected to room");
497
+ if (this.pendingTrackSubscribed) return this.pendingTrackSubscribed.promise;
498
+ this.pendingTrackSubscribed = new Deferred();
499
+ this.pendingTrackSubscribed.setTimeout(15e3, "Connection timed out - agent not available", "connection_timeout", "connection");
500
+ return this.pendingTrackSubscribed.promise;
501
+ }
502
+ /**
503
+ * Set up LiveKit room event listeners
504
+ */
505
+ setupRoomEvents() {
506
+ if (!this.room) return;
507
+ const config = getConfig();
508
+ this.room.on(RoomEvent.Connected, () => {
509
+ if (config.debug) console.log("[SpeechOS] Room connected");
510
+ state.setConnected(true);
511
+ });
512
+ this.room.on(RoomEvent.Disconnected, (reason) => {
513
+ if (config.debug) console.log("[SpeechOS] Room disconnected:", reason);
514
+ state.setConnected(false);
515
+ state.setMicEnabled(false);
516
+ });
517
+ this.room.on(RoomEvent.ParticipantConnected, (participant) => {
518
+ if (config.debug) console.log("[SpeechOS] Participant connected:", participant.identity);
519
+ });
520
+ this.room.on(RoomEvent.LocalTrackSubscribed, (publication) => {
521
+ if (config.debug) console.log("[SpeechOS] LocalTrackSubscribed event fired:", publication.trackSid);
522
+ if (this.pendingTrackSubscribed) {
523
+ this.pendingTrackSubscribed.resolve();
524
+ this.pendingTrackSubscribed = null;
525
+ }
526
+ });
527
+ this.room.on(RoomEvent.LocalTrackPublished, (publication) => {
528
+ if (config.debug) console.log("[SpeechOS] LocalTrackPublished:", publication.trackSid, publication.source);
529
+ });
530
+ this.room.on(RoomEvent.DataReceived, (data, participant) => {
531
+ this.handleDataMessage(data, participant);
532
+ });
533
+ }
534
+ /**
535
+ * Handle incoming data messages from the agent
536
+ */
537
+ handleDataMessage(data, _participant) {
538
+ const config = getConfig();
539
+ try {
540
+ const message = JSON.parse(new TextDecoder().decode(data));
541
+ if (config.debug) console.log("[SpeechOS] Data received:", message);
542
+ if (message.type === MESSAGE_TYPE_TRANSCRIPT) {
543
+ const transcript = message.transcript || "";
544
+ if (config.debug) console.log("[SpeechOS] Transcript received:", transcript);
545
+ events.emit("transcription:complete", { text: transcript });
546
+ if (this.pendingTranscript) {
547
+ this.pendingTranscript.resolve(transcript);
548
+ this.pendingTranscript = null;
549
+ }
550
+ } else if (message.type === MESSAGE_TYPE_EDITED_TEXT) {
551
+ const editedText = message.text || "";
552
+ if (config.debug) console.log("[SpeechOS] Edited text received:", editedText);
553
+ events.emit("edit:complete", {
554
+ text: editedText,
555
+ originalText: this.editOriginalText || ""
556
+ });
557
+ if (this.pendingEditText) {
558
+ this.pendingEditText.resolve(editedText);
559
+ this.pendingEditText = null;
560
+ }
561
+ this.editOriginalText = null;
562
+ } else if (message.type === MESSAGE_TYPE_ERROR) {
563
+ const serverError = message;
564
+ const errorCode = serverError.code || "server_error";
565
+ const errorMessage = serverError.message || "A server error occurred";
566
+ console.error(`[SpeechOS] Error: ${errorMessage} (${errorCode})`);
567
+ if (config.debug && serverError.details) console.error("[SpeechOS] Error details:", serverError.details);
568
+ events.emit("error", {
569
+ code: errorCode,
570
+ message: errorMessage,
571
+ source: "server"
572
+ });
573
+ const error = new Error(errorMessage);
574
+ if (this.pendingTranscript) {
575
+ this.pendingTranscript.reject(error);
576
+ this.pendingTranscript = null;
577
+ }
578
+ if (this.pendingEditText) {
579
+ this.pendingEditText.reject(error);
580
+ this.pendingEditText = null;
581
+ }
582
+ }
583
+ } catch (error) {
584
+ console.error("[SpeechOS] Failed to parse data message:", error);
585
+ }
586
+ }
587
+ /**
588
+ * Publish microphone audio track
589
+ */
590
+ async enableMicrophone() {
591
+ if (!this.room || this.room.state !== "connected") throw new Error("Not connected to room");
592
+ const config = getConfig();
593
+ if (!this.micTrack) {
594
+ if (config.debug) console.log("[SpeechOS] Creating microphone track...");
595
+ this.micTrack = await createLocalAudioTrack({
596
+ echoCancellation: true,
597
+ noiseSuppression: true
598
+ });
599
+ }
600
+ const existingPub = this.room.localParticipant.getTrackPublication(Track.Source.Microphone);
601
+ if (!existingPub) {
602
+ await this.room.localParticipant.publishTrack(this.micTrack, { source: Track.Source.Microphone });
603
+ state.setMicEnabled(true);
604
+ if (config.debug) console.log("[SpeechOS] Microphone track published");
605
+ }
606
+ }
607
+ /**
608
+ * Disable microphone audio track
609
+ */
610
+ async disableMicrophone() {
611
+ const config = getConfig();
612
+ if (this.micTrack) {
613
+ if (config.debug) console.log("[SpeechOS] Disabling microphone track...");
614
+ if (this.room?.state === "connected") try {
615
+ await this.room.localParticipant.unpublishTrack(this.micTrack);
616
+ if (config.debug) console.log("[SpeechOS] Microphone track unpublished");
617
+ } catch (error) {
618
+ console.warn("[SpeechOS] Error unpublishing track:", error);
619
+ }
620
+ this.micTrack.stop();
621
+ this.micTrack.detach();
622
+ this.micTrack = null;
623
+ state.setMicEnabled(false);
624
+ if (config.debug) console.log("[SpeechOS] Microphone track stopped and detached");
625
+ }
626
+ }
627
+ /**
628
+ * Send a data message to the room
629
+ */
630
+ async sendDataMessage(message) {
631
+ if (!this.room || this.room.state !== "connected") throw new Error("Not connected to room");
632
+ const data = new TextEncoder().encode(JSON.stringify(message));
633
+ await this.room.localParticipant.publishData(data, {
634
+ reliable: true,
635
+ topic: TOPIC_SPEECHOS
636
+ });
637
+ }
638
+ /**
639
+ * Start a voice session
640
+ * Connects to room, enables microphone, and waits for agent to subscribe to our track
641
+ */
642
+ async startVoiceSession() {
643
+ const config = getConfig();
644
+ if (config.debug) console.log("[SpeechOS] Starting voice session...");
645
+ if (this.preWarmPromise) {
646
+ if (config.debug) console.log("[SpeechOS] Waiting for pre-warm to complete...");
647
+ await this.preWarmPromise;
648
+ }
649
+ if (this.tokenData) {
650
+ if (config.debug) console.log("[SpeechOS] Using cached token from init");
651
+ } else {
652
+ if (config.debug) console.log("[SpeechOS] Fetching fresh token for session...");
653
+ await this.fetchToken();
654
+ }
655
+ this.pendingTrackSubscribed = new Deferred();
656
+ this.pendingTrackSubscribed.setTimeout(15e3, "Connection timed out - agent not available", "connection_timeout", "connection");
657
+ await this.connect();
658
+ await this.enableMicrophone();
659
+ if (config.debug) console.log("[SpeechOS] Microphone published, waiting for LocalTrackSubscribed event...");
660
+ await this.pendingTrackSubscribed.promise;
661
+ this.pendingTrackSubscribed = null;
662
+ if (config.debug) console.log("[SpeechOS] Voice session ready - agent subscribed to audio");
663
+ }
664
+ /**
665
+ * Stop the voice session and request the transcript
666
+ * Returns a promise that resolves with the transcript text
667
+ * @throws Error if timeout occurs waiting for transcript
668
+ */
669
+ async stopVoiceSession() {
670
+ const config = getConfig();
671
+ if (config.debug) console.log("[SpeechOS] Stopping voice session, requesting transcript...");
672
+ await this.disableMicrophone();
673
+ if (config.debug) console.log("[SpeechOS] Requesting transcript from agent...");
674
+ this.pendingTranscript = new Deferred();
675
+ this.pendingTranscript.setTimeout(1e4, "Transcription timed out. Please try again.", "transcription_timeout", "timeout");
676
+ await this.sendDataMessage({ type: MESSAGE_TYPE_REQUEST_TRANSCRIPT });
677
+ const result = await this.pendingTranscript.promise;
678
+ this.pendingTranscript = null;
679
+ return result;
680
+ }
681
+ /**
682
+ * Alias for stopVoiceSession - granular API naming
683
+ */
684
+ async stopAndGetTranscript() {
685
+ return this.stopVoiceSession();
686
+ }
687
+ /**
688
+ * Request text editing using the transcript as instructions
689
+ * Sends the original text to the backend, which applies the spoken instructions
690
+ * Returns a promise that resolves with the edited text
691
+ * @throws Error if timeout occurs waiting for edited text
692
+ */
693
+ async requestEditText(originalText) {
694
+ const config = getConfig();
695
+ if (config.debug) console.log("[SpeechOS] Requesting text edit...");
696
+ this.editOriginalText = originalText;
697
+ await this.disableMicrophone();
698
+ if (config.debug) console.log("[SpeechOS] Sending edit_text request to agent...");
699
+ this.pendingEditText = new Deferred();
700
+ this.pendingEditText.setTimeout(15e3, "Edit request timed out. Please try again.", "edit_timeout", "timeout");
701
+ await this.sendDataMessage({
702
+ type: MESSAGE_TYPE_EDIT_TEXT,
703
+ text: originalText
704
+ });
705
+ const result = await this.pendingEditText.promise;
706
+ this.pendingEditText = null;
707
+ return result;
708
+ }
709
+ /**
710
+ * Alias for requestEditText - granular API naming
711
+ */
712
+ async stopAndEdit(originalText) {
713
+ return this.requestEditText(originalText);
714
+ }
715
+ /**
716
+ * Disconnect from the current room
717
+ * Clears the token so a fresh one is fetched for the next session
718
+ */
719
+ async disconnect() {
720
+ const config = getConfig();
721
+ if (config.debug) console.log("[SpeechOS] Disconnecting from room...");
722
+ await this.disableMicrophone();
723
+ if (this.room) {
724
+ this.room.removeAllListeners();
725
+ await this.room.disconnect();
726
+ this.room = null;
727
+ state.setConnected(false);
728
+ if (config.debug) console.log("[SpeechOS] Room disconnected and cleaned up");
729
+ }
730
+ if (this.pendingTranscript) {
731
+ this.pendingTranscript.reject(new Error("Disconnected"));
732
+ this.pendingTranscript = null;
733
+ }
734
+ if (this.pendingEditText) {
735
+ this.pendingEditText.reject(new Error("Disconnected"));
736
+ this.pendingEditText = null;
737
+ }
738
+ if (this.pendingTrackSubscribed) {
739
+ this.pendingTrackSubscribed.reject(new Error("Disconnected"));
740
+ this.pendingTrackSubscribed = null;
741
+ }
742
+ this.tokenData = null;
743
+ this.preWarmPromise = null;
744
+ this.editOriginalText = null;
745
+ if (config.debug) console.log("[SpeechOS] Session state cleared");
746
+ }
747
+ /**
748
+ * Get the current room instance
749
+ */
750
+ getRoom() {
751
+ return this.room;
752
+ }
753
+ /**
754
+ * Get the current token data
755
+ */
756
+ getTokenData() {
757
+ return this.tokenData;
758
+ }
759
+ /**
760
+ * Check if connected to a room
761
+ */
762
+ isConnected() {
763
+ return this.room?.state === "connected";
764
+ }
765
+ /**
766
+ * Check if microphone is enabled
767
+ */
768
+ isMicrophoneEnabled() {
769
+ return this.micTrack !== null;
770
+ }
771
+ /**
772
+ * Clear the cached token
773
+ * Used when user identity changes to ensure next session gets a fresh token
774
+ */
775
+ clearToken() {
776
+ const config = getConfig();
777
+ if (config.debug) console.log("[SpeechOS] Clearing cached token");
778
+ this.tokenData = null;
779
+ this.preWarmPromise = null;
780
+ }
781
+ };
782
+ const livekit = new LiveKitManager();
783
+
784
+ //#endregion
785
+ //#region src/transcript-store.ts
786
+ const STORAGE_KEY = "speechos_transcripts";
787
+ const MAX_ENTRIES = 50;
788
+ /**
789
+ * Generate a unique ID for transcript entries
790
+ */
791
+ function generateId() {
792
+ return `${Date.now()}-${Math.random().toString(36).slice(2, 9)}`;
793
+ }
794
+ /**
795
+ * Get all transcripts from localStorage
796
+ */
797
+ function getTranscripts() {
798
+ try {
799
+ const stored = localStorage.getItem(STORAGE_KEY);
800
+ if (!stored) return [];
801
+ const entries = JSON.parse(stored);
802
+ return entries.sort((a, b) => b.timestamp - a.timestamp);
803
+ } catch {
804
+ return [];
805
+ }
806
+ }
807
+ /**
808
+ * Save a new transcript entry
809
+ */
810
+ function saveTranscript(text, action, originalText) {
811
+ const entry = {
812
+ id: generateId(),
813
+ text,
814
+ timestamp: Date.now(),
815
+ action,
816
+ ...originalText && { originalText }
817
+ };
818
+ const entries = getTranscripts();
819
+ entries.unshift(entry);
820
+ const pruned = entries.slice(0, MAX_ENTRIES);
821
+ try {
822
+ localStorage.setItem(STORAGE_KEY, JSON.stringify(pruned));
823
+ } catch {}
824
+ return entry;
825
+ }
826
+ /**
827
+ * Clear all transcript history
828
+ */
829
+ function clearTranscripts() {
830
+ try {
831
+ localStorage.removeItem(STORAGE_KEY);
832
+ } catch {}
833
+ }
834
+ /**
835
+ * Delete a single transcript by ID
836
+ */
837
+ function deleteTranscript(id) {
838
+ const entries = getTranscripts().filter((e) => e.id !== id);
839
+ try {
840
+ localStorage.setItem(STORAGE_KEY, JSON.stringify(entries));
841
+ } catch {}
842
+ }
843
+ const transcriptStore = {
844
+ getTranscripts,
845
+ saveTranscript,
846
+ clearTranscripts,
847
+ deleteTranscript
848
+ };
849
+
850
+ //#endregion
851
+ //#region src/speechos.ts
852
+ /**
853
+ * SpeechOS Core SDK
854
+ *
855
+ * Provides two API layers:
856
+ * 1. Low-level API: Granular control over LiveKit connection lifecycle
857
+ * 2. High-level API: One-shot methods for common voice tasks
858
+ */
859
+ var SpeechOSCore = class {
860
+ initialized = false;
861
+ /**
862
+ * Initialize the SDK with configuration
863
+ * @param config - Configuration options including apiKey
864
+ */
865
+ init(config) {
866
+ setConfig(config);
867
+ this.initialized = true;
868
+ const currentConfig$1 = getConfig();
869
+ if (currentConfig$1.debug) console.log("[SpeechOS] Initialized with config:", {
870
+ host: currentConfig$1.host,
871
+ position: currentConfig$1.position,
872
+ debug: currentConfig$1.debug
873
+ });
874
+ }
875
+ /**
876
+ * Check if the SDK is initialized
877
+ */
878
+ isInitialized() {
879
+ return this.initialized;
880
+ }
881
+ /**
882
+ * Connect to LiveKit (fetches token, establishes connection)
883
+ * Call this before other low-level methods
884
+ */
885
+ async connect() {
886
+ this.ensureInitialized();
887
+ await livekit.connect();
888
+ }
889
+ /**
890
+ * Wait until the agent is ready to receive audio
891
+ * Resolves when the agent subscribes to our audio track
892
+ */
893
+ async waitUntilReady() {
894
+ return livekit.waitUntilReady();
895
+ }
896
+ /**
897
+ * Enable microphone (user is now being recorded)
898
+ */
899
+ async enableMicrophone() {
900
+ await livekit.enableMicrophone();
901
+ state.setRecordingState("recording");
902
+ }
903
+ /**
904
+ * Stop recording and get the transcript
905
+ * @returns The transcribed text
906
+ */
907
+ async stopAndGetTranscript() {
908
+ state.setRecordingState("processing");
909
+ try {
910
+ const transcript = await livekit.stopAndGetTranscript();
911
+ transcriptStore.saveTranscript(transcript, "dictate");
912
+ state.completeRecording();
913
+ return transcript;
914
+ } catch (error) {
915
+ state.setError(error instanceof Error ? error.message : "Transcription failed");
916
+ throw error;
917
+ }
918
+ }
919
+ /**
920
+ * Stop recording and get edited text
921
+ * @param originalText - The original text to edit based on voice instructions
922
+ * @returns The edited text
923
+ */
924
+ async stopAndEdit(originalText) {
925
+ state.setRecordingState("processing");
926
+ try {
927
+ const editedText = await livekit.stopAndEdit(originalText);
928
+ transcriptStore.saveTranscript(editedText, "edit", originalText);
929
+ state.completeRecording();
930
+ return editedText;
931
+ } catch (error) {
932
+ state.setError(error instanceof Error ? error.message : "Edit request failed");
933
+ throw error;
934
+ }
935
+ }
936
+ /**
937
+ * Disconnect from LiveKit
938
+ */
939
+ async disconnect() {
940
+ await livekit.disconnect();
941
+ state.completeRecording();
942
+ }
943
+ /**
944
+ * One-shot dictation: connect, wait for agent, record, and get transcript
945
+ * Automatically handles the full voice session lifecycle
946
+ *
947
+ * @returns The transcribed text
948
+ */
949
+ async dictate() {
950
+ this.ensureInitialized();
951
+ state.setActiveAction("dictate");
952
+ state.startRecording();
953
+ try {
954
+ await livekit.startVoiceSession();
955
+ state.setRecordingState("recording");
956
+ return new Promise((resolve, reject) => {
957
+ this._dictateResolve = resolve;
958
+ this._dictateReject = reject;
959
+ });
960
+ } catch (error) {
961
+ state.setError(error instanceof Error ? error.message : "Failed to start dictation");
962
+ await this.cleanup();
963
+ throw error;
964
+ }
965
+ }
966
+ _dictateResolve;
967
+ _dictateReject;
968
+ /**
969
+ * Stop dictation and get the transcript
970
+ * Call this after dictate() when user stops speaking
971
+ */
972
+ async stopDictation() {
973
+ state.setRecordingState("processing");
974
+ try {
975
+ const transcript = await livekit.stopVoiceSession();
976
+ transcriptStore.saveTranscript(transcript, "dictate");
977
+ state.completeRecording();
978
+ if (this._dictateResolve) {
979
+ this._dictateResolve(transcript);
980
+ this._dictateResolve = void 0;
981
+ this._dictateReject = void 0;
982
+ }
983
+ return transcript;
984
+ } catch (error) {
985
+ const err = error instanceof Error ? error : new Error("Transcription failed");
986
+ state.setError(err.message);
987
+ if (this._dictateReject) {
988
+ this._dictateReject(err);
989
+ this._dictateResolve = void 0;
990
+ this._dictateReject = void 0;
991
+ }
992
+ throw err;
993
+ } finally {
994
+ await this.cleanup();
995
+ }
996
+ }
997
+ /**
998
+ * One-shot edit: connect, wait for agent, record voice instructions, apply to text
999
+ * Automatically handles the full voice session lifecycle
1000
+ *
1001
+ * @param originalText - The text to edit
1002
+ * @returns The edited text
1003
+ */
1004
+ async edit(originalText) {
1005
+ this.ensureInitialized();
1006
+ state.setActiveAction("edit");
1007
+ state.startRecording();
1008
+ this._editOriginalText = originalText;
1009
+ try {
1010
+ await livekit.startVoiceSession();
1011
+ state.setRecordingState("recording");
1012
+ return new Promise((resolve, reject) => {
1013
+ this._editResolve = resolve;
1014
+ this._editReject = reject;
1015
+ });
1016
+ } catch (error) {
1017
+ state.setError(error instanceof Error ? error.message : "Failed to start edit");
1018
+ await this.cleanup();
1019
+ throw error;
1020
+ }
1021
+ }
1022
+ _editOriginalText;
1023
+ _editResolve;
1024
+ _editReject;
1025
+ /**
1026
+ * Stop edit recording and get the edited text
1027
+ * Call this after edit() when user stops speaking
1028
+ */
1029
+ async stopEdit() {
1030
+ state.setRecordingState("processing");
1031
+ try {
1032
+ const originalText = this._editOriginalText || "";
1033
+ const editedText = await livekit.requestEditText(originalText);
1034
+ transcriptStore.saveTranscript(editedText, "edit", originalText);
1035
+ state.completeRecording();
1036
+ if (this._editResolve) {
1037
+ this._editResolve(editedText);
1038
+ this._editResolve = void 0;
1039
+ this._editReject = void 0;
1040
+ }
1041
+ return editedText;
1042
+ } catch (error) {
1043
+ const err = error instanceof Error ? error : new Error("Edit request failed");
1044
+ state.setError(err.message);
1045
+ if (this._editReject) {
1046
+ this._editReject(err);
1047
+ this._editResolve = void 0;
1048
+ this._editReject = void 0;
1049
+ }
1050
+ throw err;
1051
+ } finally {
1052
+ this._editOriginalText = void 0;
1053
+ await this.cleanup();
1054
+ }
1055
+ }
1056
+ /**
1057
+ * Cancel the current operation
1058
+ */
1059
+ async cancel() {
1060
+ const err = new Error("Operation cancelled");
1061
+ if (this._dictateReject) {
1062
+ this._dictateReject(err);
1063
+ this._dictateResolve = void 0;
1064
+ this._dictateReject = void 0;
1065
+ }
1066
+ if (this._editReject) {
1067
+ this._editReject(err);
1068
+ this._editResolve = void 0;
1069
+ this._editReject = void 0;
1070
+ }
1071
+ this._editOriginalText = void 0;
1072
+ await this.cleanup();
1073
+ state.cancelRecording();
1074
+ }
1075
+ /**
1076
+ * Access the state manager for subscribing to state changes
1077
+ */
1078
+ get state() {
1079
+ return state;
1080
+ }
1081
+ /**
1082
+ * Access the event emitter for listening to events
1083
+ */
1084
+ get events() {
1085
+ return events;
1086
+ }
1087
+ /**
1088
+ * Get the current config
1089
+ */
1090
+ getConfig() {
1091
+ return getConfig();
1092
+ }
1093
+ ensureInitialized() {
1094
+ if (!this.initialized) throw new Error("SpeechOS not initialized. Call speechOS.init({ apiKey: ... }) first.");
1095
+ }
1096
+ async cleanup() {
1097
+ try {
1098
+ await livekit.disconnect();
1099
+ } catch (error) {
1100
+ const config = getConfig();
1101
+ if (config.debug) console.warn("[SpeechOS] Cleanup disconnect error:", error);
1102
+ }
1103
+ }
1104
+ /**
1105
+ * Reset the SDK (useful for testing)
1106
+ */
1107
+ reset() {
1108
+ this.initialized = false;
1109
+ this._dictateResolve = void 0;
1110
+ this._dictateReject = void 0;
1111
+ this._editResolve = void 0;
1112
+ this._editReject = void 0;
1113
+ this._editOriginalText = void 0;
1114
+ resetConfig();
1115
+ state.reset();
1116
+ events.clear();
1117
+ }
1118
+ };
1119
+ const speechOS = new SpeechOSCore();
1120
+
1121
+ //#endregion
1122
+ //#region src/index.ts
1123
+ const VERSION = "0.1.0";
1124
+
1125
+ //#endregion
1126
+ export { DEFAULT_HOST, Deferred, SpeechOSEventEmitter, VERSION, createStateManager, defaultConfig, events, getConfig, livekit, resetConfig, setConfig, speechOS, state, transcriptStore, updateUserId, validateConfig };
1127
+ //# sourceMappingURL=index.js.map