@speechos/core 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs ADDED
@@ -0,0 +1,1165 @@
1
+ //#region rolldown:runtime
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
7
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __copyProps = (to, from, except, desc) => {
9
+ if (from && typeof from === "object" || typeof from === "function") for (var keys = __getOwnPropNames(from), i = 0, n = keys.length, key; i < n; i++) {
10
+ key = keys[i];
11
+ if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, {
12
+ get: ((k) => from[k]).bind(null, key),
13
+ enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable
14
+ });
15
+ }
16
+ return to;
17
+ };
18
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", {
19
+ value: mod,
20
+ enumerable: true
21
+ }) : target, mod));
22
+
23
+ //#endregion
24
+ const livekit_client = __toESM(require("livekit-client"));
25
+
26
+ //#region src/config.ts
27
+ /**
28
+ * Default host - can be overridden by SPEECHOS_HOST env var at build time
29
+ */
30
+ const DEFAULT_HOST = typeof process !== "undefined" && process.env?.SPEECHOS_HOST || "https://app.speechos.ai";
31
+ /**
32
+ * Default configuration values
33
+ */
34
+ const defaultConfig = {
35
+ apiKey: "",
36
+ userId: "",
37
+ host: DEFAULT_HOST,
38
+ position: "bottom-center",
39
+ zIndex: 999999,
40
+ debug: false
41
+ };
42
+ /**
43
+ * Validates and merges user config with defaults
44
+ * @param userConfig - User-provided configuration
45
+ * @returns Validated and merged configuration
46
+ */
47
+ function validateConfig(userConfig = {}) {
48
+ if (!userConfig.apiKey) throw new Error("SpeechOS requires an apiKey. Get one from your team dashboard at /a/<team-slug>/.");
49
+ const config = {
50
+ ...defaultConfig,
51
+ ...userConfig
52
+ };
53
+ const validPositions = [
54
+ "bottom-center",
55
+ "bottom-right",
56
+ "bottom-left"
57
+ ];
58
+ if (!validPositions.includes(config.position)) {
59
+ console.warn(`Invalid position "${config.position}". Using default "bottom-center".`);
60
+ config.position = "bottom-center";
61
+ }
62
+ if (typeof config.zIndex !== "number" || config.zIndex < 0) {
63
+ console.warn(`Invalid zIndex "${config.zIndex}". Using default ${defaultConfig.zIndex}.`);
64
+ config.zIndex = defaultConfig.zIndex;
65
+ }
66
+ return config;
67
+ }
68
+ /**
69
+ * Current active configuration (singleton)
70
+ */
71
+ let currentConfig = defaultConfig;
72
+ /**
73
+ * Get the current configuration
74
+ */
75
+ function getConfig() {
76
+ return { ...currentConfig };
77
+ }
78
+ /**
79
+ * Set the current configuration
80
+ * @param config - Configuration to set
81
+ */
82
+ function setConfig(config) {
83
+ currentConfig = validateConfig(config);
84
+ }
85
+ /**
86
+ * Reset configuration to defaults
87
+ */
88
+ function resetConfig() {
89
+ currentConfig = { ...defaultConfig };
90
+ }
91
+ /**
92
+ * Update the userId in the current configuration
93
+ * @param userId - The user identifier to set
94
+ */
95
+ function updateUserId(userId) {
96
+ currentConfig = {
97
+ ...currentConfig,
98
+ userId
99
+ };
100
+ }
101
+
102
+ //#endregion
103
+ //#region src/events.ts
104
+ /**
105
+ * Type-safe event emitter for SpeechOS events
106
+ */
107
+ var SpeechOSEventEmitter = class {
108
+ listeners = /* @__PURE__ */ new Map();
109
+ /**
110
+ * Subscribe to an event
111
+ * @param event - Event name to listen to
112
+ * @param callback - Function to call when event is emitted
113
+ * @returns Unsubscribe function
114
+ */
115
+ on(event, callback) {
116
+ if (!this.listeners.has(event)) this.listeners.set(event, /* @__PURE__ */ new Set());
117
+ this.listeners.get(event).add(callback);
118
+ return () => {
119
+ const callbacks = this.listeners.get(event);
120
+ if (callbacks) {
121
+ callbacks.delete(callback);
122
+ if (callbacks.size === 0) this.listeners.delete(event);
123
+ }
124
+ };
125
+ }
126
+ /**
127
+ * Subscribe to an event once (automatically unsubscribes after first call)
128
+ * @param event - Event name to listen to
129
+ * @param callback - Function to call when event is emitted
130
+ * @returns Unsubscribe function
131
+ */
132
+ once(event, callback) {
133
+ const unsubscribe = this.on(event, (payload) => {
134
+ unsubscribe();
135
+ callback(payload);
136
+ });
137
+ return unsubscribe;
138
+ }
139
+ /**
140
+ * Emit an event to all subscribers
141
+ * @param event - Event name to emit
142
+ * @param payload - Event payload data
143
+ */
144
+ emit(event, payload) {
145
+ const callbacks = this.listeners.get(event);
146
+ if (callbacks) callbacks.forEach((callback) => {
147
+ try {
148
+ callback(payload);
149
+ } catch (error) {
150
+ console.error(`Error in event listener for "${String(event)}":`, error);
151
+ }
152
+ });
153
+ }
154
+ /**
155
+ * Remove all listeners for a specific event or all events
156
+ * @param event - Optional event name to clear listeners for
157
+ */
158
+ clear(event) {
159
+ if (event) this.listeners.delete(event);
160
+ else this.listeners.clear();
161
+ }
162
+ /**
163
+ * Get the number of listeners for an event
164
+ * @param event - Event name
165
+ * @returns Number of listeners
166
+ */
167
+ listenerCount(event) {
168
+ return this.listeners.get(event)?.size ?? 0;
169
+ }
170
+ };
171
+ const events = new SpeechOSEventEmitter();
172
+
173
+ //#endregion
174
+ //#region src/state.ts
175
+ /**
176
+ * Initial state
177
+ */
178
+ const initialState = {
179
+ isVisible: false,
180
+ isExpanded: false,
181
+ isConnected: false,
182
+ isMicEnabled: false,
183
+ activeAction: null,
184
+ focusedElement: null,
185
+ recordingState: "idle",
186
+ errorMessage: null
187
+ };
188
+ /**
189
+ * State manager class
190
+ */
191
+ var StateManager = class {
192
+ state;
193
+ subscribers = /* @__PURE__ */ new Set();
194
+ constructor(initialState$1) {
195
+ this.state = { ...initialState$1 };
196
+ }
197
+ /**
198
+ * Get the current state (returns a copy to prevent mutations)
199
+ */
200
+ getState() {
201
+ return { ...this.state };
202
+ }
203
+ /**
204
+ * Update state with partial values
205
+ * @param partial - Partial state to merge with current state
206
+ */
207
+ setState(partial) {
208
+ const prevState = { ...this.state };
209
+ this.state = {
210
+ ...this.state,
211
+ ...partial
212
+ };
213
+ this.subscribers.forEach((callback) => {
214
+ try {
215
+ callback(this.state, prevState);
216
+ } catch (error) {
217
+ console.error("Error in state change callback:", error);
218
+ }
219
+ });
220
+ events.emit("state:change", { state: this.state });
221
+ }
222
+ /**
223
+ * Subscribe to state changes
224
+ * @param callback - Function to call when state changes
225
+ * @returns Unsubscribe function
226
+ */
227
+ subscribe(callback) {
228
+ this.subscribers.add(callback);
229
+ return () => {
230
+ this.subscribers.delete(callback);
231
+ };
232
+ }
233
+ /**
234
+ * Reset state to initial values
235
+ */
236
+ reset() {
237
+ this.setState(initialState);
238
+ }
239
+ /**
240
+ * Show the widget
241
+ */
242
+ show() {
243
+ this.setState({ isVisible: true });
244
+ events.emit("widget:show", void 0);
245
+ }
246
+ /**
247
+ * Hide the widget and reset expanded state
248
+ */
249
+ hide() {
250
+ this.setState({
251
+ isVisible: false,
252
+ isExpanded: false,
253
+ activeAction: null
254
+ });
255
+ events.emit("widget:hide", void 0);
256
+ }
257
+ /**
258
+ * Toggle the action bubbles expansion
259
+ */
260
+ toggleExpanded() {
261
+ this.setState({ isExpanded: !this.state.isExpanded });
262
+ }
263
+ /**
264
+ * Set the focused form element
265
+ * @param element - The form element that has focus
266
+ */
267
+ setFocusedElement(element) {
268
+ this.setState({ focusedElement: element });
269
+ }
270
+ /**
271
+ * Set the active action
272
+ * @param action - The action to set as active
273
+ */
274
+ setActiveAction(action) {
275
+ this.setState({ activeAction: action });
276
+ }
277
+ /**
278
+ * Set the recording state
279
+ * @param recordingState - The recording state to set
280
+ */
281
+ setRecordingState(recordingState) {
282
+ this.setState({ recordingState });
283
+ }
284
+ /**
285
+ * Set the connection state
286
+ * @param isConnected - Whether connected to LiveKit
287
+ */
288
+ setConnected(isConnected) {
289
+ this.setState({ isConnected });
290
+ }
291
+ /**
292
+ * Set the microphone enabled state
293
+ * @param isMicEnabled - Whether microphone is enabled
294
+ */
295
+ setMicEnabled(isMicEnabled) {
296
+ this.setState({ isMicEnabled });
297
+ }
298
+ /**
299
+ * Start recording flow (connecting → recording)
300
+ */
301
+ startRecording() {
302
+ this.setState({
303
+ recordingState: "connecting",
304
+ isExpanded: false
305
+ });
306
+ }
307
+ /**
308
+ * Stop recording and start processing
309
+ */
310
+ stopRecording() {
311
+ this.setState({
312
+ recordingState: "processing",
313
+ isMicEnabled: false
314
+ });
315
+ }
316
+ /**
317
+ * Complete the recording flow and return to idle
318
+ */
319
+ completeRecording() {
320
+ this.setState({
321
+ recordingState: "idle",
322
+ activeAction: null,
323
+ isConnected: false,
324
+ isMicEnabled: false
325
+ });
326
+ }
327
+ /**
328
+ * Cancel recording and return to idle
329
+ */
330
+ cancelRecording() {
331
+ this.setState({
332
+ recordingState: "idle",
333
+ activeAction: null,
334
+ errorMessage: null,
335
+ isConnected: false,
336
+ isMicEnabled: false
337
+ });
338
+ }
339
+ /**
340
+ * Set error state with a message
341
+ * @param message - Error message to display
342
+ */
343
+ setError(message) {
344
+ this.setState({
345
+ recordingState: "error",
346
+ errorMessage: message
347
+ });
348
+ }
349
+ /**
350
+ * Clear error state and return to idle
351
+ */
352
+ clearError() {
353
+ this.setState({
354
+ recordingState: "idle",
355
+ errorMessage: null
356
+ });
357
+ }
358
+ };
359
+ const state = new StateManager(initialState);
360
+ /**
361
+ * Create a new state manager instance (useful for testing)
362
+ */
363
+ function createStateManager(initial) {
364
+ return new StateManager({
365
+ ...initialState,
366
+ ...initial
367
+ });
368
+ }
369
+
370
+ //#endregion
371
+ //#region src/livekit.ts
372
+ const MESSAGE_TYPE_REQUEST_TRANSCRIPT = "request_transcript";
373
+ const MESSAGE_TYPE_TRANSCRIPT = "transcript";
374
+ const MESSAGE_TYPE_EDIT_TEXT = "edit_text";
375
+ const MESSAGE_TYPE_EDITED_TEXT = "edited_text";
376
+ const MESSAGE_TYPE_ERROR = "error";
377
+ const TOPIC_SPEECHOS = "speechos";
378
+ /**
379
+ * A deferred promise with timeout support.
380
+ * Encapsulates resolve/reject/timeout in a single object for cleaner async handling.
381
+ */
382
+ var Deferred = class {
383
+ promise;
384
+ _resolve;
385
+ _reject;
386
+ _timeoutId = null;
387
+ _settled = false;
388
+ constructor() {
389
+ this.promise = new Promise((resolve, reject) => {
390
+ this._resolve = resolve;
391
+ this._reject = reject;
392
+ });
393
+ }
394
+ /**
395
+ * Set a timeout that will reject the promise with the given error
396
+ */
397
+ setTimeout(ms, errorMessage, errorCode, errorSource) {
398
+ this._timeoutId = setTimeout(() => {
399
+ if (!this._settled) {
400
+ console.error(`[SpeechOS] Error: ${errorMessage} (${errorCode})`);
401
+ events.emit("error", {
402
+ code: errorCode,
403
+ message: errorMessage,
404
+ source: errorSource
405
+ });
406
+ this.reject(new Error(errorMessage));
407
+ }
408
+ }, ms);
409
+ }
410
+ resolve(value) {
411
+ if (!this._settled) {
412
+ this._settled = true;
413
+ this.clearTimeout();
414
+ this._resolve(value);
415
+ }
416
+ }
417
+ reject(error) {
418
+ if (!this._settled) {
419
+ this._settled = true;
420
+ this.clearTimeout();
421
+ this._reject(error);
422
+ }
423
+ }
424
+ clearTimeout() {
425
+ if (this._timeoutId !== null) {
426
+ clearTimeout(this._timeoutId);
427
+ this._timeoutId = null;
428
+ }
429
+ }
430
+ get isSettled() {
431
+ return this._settled;
432
+ }
433
+ };
434
+ /**
435
+ * LiveKit connection manager
436
+ */
437
+ var LiveKitManager = class {
438
+ room = null;
439
+ tokenData = null;
440
+ micTrack = null;
441
+ pendingTranscript = null;
442
+ pendingEditText = null;
443
+ pendingTrackSubscribed = null;
444
+ preWarmPromise = null;
445
+ editOriginalText = null;
446
+ /**
447
+ * Pre-warm resources for faster connection
448
+ * Call this when user shows intent (e.g., expands widget)
449
+ * Only fetches token - mic permission is requested when user clicks Dictate
450
+ */
451
+ async preWarm() {
452
+ if (this.tokenData || this.preWarmPromise || this.room?.state === "connected") {
453
+ const config$1 = getConfig();
454
+ if (config$1.debug) console.log("[SpeechOS] Pre-warm skipped - token already available");
455
+ return;
456
+ }
457
+ const config = getConfig();
458
+ if (config.debug) console.log("[SpeechOS] Pre-warming: fetching token...");
459
+ this.preWarmPromise = (async () => {
460
+ try {
461
+ await this.fetchToken();
462
+ if (config.debug) console.log("[SpeechOS] Pre-warm complete - token ready");
463
+ } catch (error) {
464
+ if (config.debug) console.warn("[SpeechOS] Pre-warm failed:", error);
465
+ this.preWarmPromise = null;
466
+ }
467
+ })();
468
+ await this.preWarmPromise;
469
+ }
470
+ /**
471
+ * Fetch a LiveKit token from the backend
472
+ */
473
+ async fetchToken() {
474
+ const config = getConfig();
475
+ const url = `${config.host}/livekit/api/token/`;
476
+ if (config.debug) console.log("[SpeechOS] Fetching LiveKit token from:", url);
477
+ const response = await fetch(url, {
478
+ method: "POST",
479
+ headers: {
480
+ "Content-Type": "application/json",
481
+ ...config.apiKey ? { Authorization: `Api-Key ${config.apiKey}` } : {}
482
+ },
483
+ body: JSON.stringify({ user_id: config.userId || null })
484
+ });
485
+ if (!response.ok) throw new Error(`Failed to fetch LiveKit token: ${response.status} ${response.statusText}`);
486
+ const data = await response.json();
487
+ this.tokenData = data;
488
+ if (config.debug) console.log("[SpeechOS] LiveKit token received:", {
489
+ room: data.room,
490
+ identity: data.identity,
491
+ ws_url: data.ws_url
492
+ });
493
+ return data;
494
+ }
495
+ /**
496
+ * Connect to a LiveKit room (fresh connection each time)
497
+ */
498
+ async connect() {
499
+ const config = getConfig();
500
+ if (!this.tokenData) await this.fetchToken();
501
+ else if (config.debug) console.log("[SpeechOS] Using pre-fetched token");
502
+ if (!this.tokenData) throw new Error("No token available for LiveKit connection");
503
+ this.room = new livekit_client.Room({
504
+ adaptiveStream: true,
505
+ dynacast: true
506
+ });
507
+ this.setupRoomEvents();
508
+ if (config.debug) console.log("[SpeechOS] Connecting to LiveKit room:", this.tokenData.room);
509
+ await this.room.connect(this.tokenData.ws_url, this.tokenData.token);
510
+ state.setConnected(true);
511
+ if (config.debug) console.log("[SpeechOS] Connected to LiveKit room:", this.room.name);
512
+ return this.room;
513
+ }
514
+ /**
515
+ * Wait until the agent is ready to receive audio
516
+ * Resolves when LocalTrackSubscribed event is received
517
+ */
518
+ async waitUntilReady() {
519
+ if (!this.room || this.room.state !== "connected") throw new Error("Not connected to room");
520
+ if (this.pendingTrackSubscribed) return this.pendingTrackSubscribed.promise;
521
+ this.pendingTrackSubscribed = new Deferred();
522
+ this.pendingTrackSubscribed.setTimeout(15e3, "Connection timed out - agent not available", "connection_timeout", "connection");
523
+ return this.pendingTrackSubscribed.promise;
524
+ }
525
+ /**
526
+ * Set up LiveKit room event listeners
527
+ */
528
+ setupRoomEvents() {
529
+ if (!this.room) return;
530
+ const config = getConfig();
531
+ this.room.on(livekit_client.RoomEvent.Connected, () => {
532
+ if (config.debug) console.log("[SpeechOS] Room connected");
533
+ state.setConnected(true);
534
+ });
535
+ this.room.on(livekit_client.RoomEvent.Disconnected, (reason) => {
536
+ if (config.debug) console.log("[SpeechOS] Room disconnected:", reason);
537
+ state.setConnected(false);
538
+ state.setMicEnabled(false);
539
+ });
540
+ this.room.on(livekit_client.RoomEvent.ParticipantConnected, (participant) => {
541
+ if (config.debug) console.log("[SpeechOS] Participant connected:", participant.identity);
542
+ });
543
+ this.room.on(livekit_client.RoomEvent.LocalTrackSubscribed, (publication) => {
544
+ if (config.debug) console.log("[SpeechOS] LocalTrackSubscribed event fired:", publication.trackSid);
545
+ if (this.pendingTrackSubscribed) {
546
+ this.pendingTrackSubscribed.resolve();
547
+ this.pendingTrackSubscribed = null;
548
+ }
549
+ });
550
+ this.room.on(livekit_client.RoomEvent.LocalTrackPublished, (publication) => {
551
+ if (config.debug) console.log("[SpeechOS] LocalTrackPublished:", publication.trackSid, publication.source);
552
+ });
553
+ this.room.on(livekit_client.RoomEvent.DataReceived, (data, participant) => {
554
+ this.handleDataMessage(data, participant);
555
+ });
556
+ }
557
+ /**
558
+ * Handle incoming data messages from the agent
559
+ */
560
+ handleDataMessage(data, _participant) {
561
+ const config = getConfig();
562
+ try {
563
+ const message = JSON.parse(new TextDecoder().decode(data));
564
+ if (config.debug) console.log("[SpeechOS] Data received:", message);
565
+ if (message.type === MESSAGE_TYPE_TRANSCRIPT) {
566
+ const transcript = message.transcript || "";
567
+ if (config.debug) console.log("[SpeechOS] Transcript received:", transcript);
568
+ events.emit("transcription:complete", { text: transcript });
569
+ if (this.pendingTranscript) {
570
+ this.pendingTranscript.resolve(transcript);
571
+ this.pendingTranscript = null;
572
+ }
573
+ } else if (message.type === MESSAGE_TYPE_EDITED_TEXT) {
574
+ const editedText = message.text || "";
575
+ if (config.debug) console.log("[SpeechOS] Edited text received:", editedText);
576
+ events.emit("edit:complete", {
577
+ text: editedText,
578
+ originalText: this.editOriginalText || ""
579
+ });
580
+ if (this.pendingEditText) {
581
+ this.pendingEditText.resolve(editedText);
582
+ this.pendingEditText = null;
583
+ }
584
+ this.editOriginalText = null;
585
+ } else if (message.type === MESSAGE_TYPE_ERROR) {
586
+ const serverError = message;
587
+ const errorCode = serverError.code || "server_error";
588
+ const errorMessage = serverError.message || "A server error occurred";
589
+ console.error(`[SpeechOS] Error: ${errorMessage} (${errorCode})`);
590
+ if (config.debug && serverError.details) console.error("[SpeechOS] Error details:", serverError.details);
591
+ events.emit("error", {
592
+ code: errorCode,
593
+ message: errorMessage,
594
+ source: "server"
595
+ });
596
+ const error = new Error(errorMessage);
597
+ if (this.pendingTranscript) {
598
+ this.pendingTranscript.reject(error);
599
+ this.pendingTranscript = null;
600
+ }
601
+ if (this.pendingEditText) {
602
+ this.pendingEditText.reject(error);
603
+ this.pendingEditText = null;
604
+ }
605
+ }
606
+ } catch (error) {
607
+ console.error("[SpeechOS] Failed to parse data message:", error);
608
+ }
609
+ }
610
+ /**
611
+ * Publish microphone audio track
612
+ */
613
+ async enableMicrophone() {
614
+ if (!this.room || this.room.state !== "connected") throw new Error("Not connected to room");
615
+ const config = getConfig();
616
+ if (!this.micTrack) {
617
+ if (config.debug) console.log("[SpeechOS] Creating microphone track...");
618
+ this.micTrack = await (0, livekit_client.createLocalAudioTrack)({
619
+ echoCancellation: true,
620
+ noiseSuppression: true
621
+ });
622
+ }
623
+ const existingPub = this.room.localParticipant.getTrackPublication(livekit_client.Track.Source.Microphone);
624
+ if (!existingPub) {
625
+ await this.room.localParticipant.publishTrack(this.micTrack, { source: livekit_client.Track.Source.Microphone });
626
+ state.setMicEnabled(true);
627
+ if (config.debug) console.log("[SpeechOS] Microphone track published");
628
+ }
629
+ }
630
+ /**
631
+ * Disable microphone audio track
632
+ */
633
+ async disableMicrophone() {
634
+ const config = getConfig();
635
+ if (this.micTrack) {
636
+ if (config.debug) console.log("[SpeechOS] Disabling microphone track...");
637
+ if (this.room?.state === "connected") try {
638
+ await this.room.localParticipant.unpublishTrack(this.micTrack);
639
+ if (config.debug) console.log("[SpeechOS] Microphone track unpublished");
640
+ } catch (error) {
641
+ console.warn("[SpeechOS] Error unpublishing track:", error);
642
+ }
643
+ this.micTrack.stop();
644
+ this.micTrack.detach();
645
+ this.micTrack = null;
646
+ state.setMicEnabled(false);
647
+ if (config.debug) console.log("[SpeechOS] Microphone track stopped and detached");
648
+ }
649
+ }
650
+ /**
651
+ * Send a data message to the room
652
+ */
653
+ async sendDataMessage(message) {
654
+ if (!this.room || this.room.state !== "connected") throw new Error("Not connected to room");
655
+ const data = new TextEncoder().encode(JSON.stringify(message));
656
+ await this.room.localParticipant.publishData(data, {
657
+ reliable: true,
658
+ topic: TOPIC_SPEECHOS
659
+ });
660
+ }
661
+ /**
662
+ * Start a voice session
663
+ * Connects to room, enables microphone, and waits for agent to subscribe to our track
664
+ */
665
+ async startVoiceSession() {
666
+ const config = getConfig();
667
+ if (config.debug) console.log("[SpeechOS] Starting voice session...");
668
+ if (this.preWarmPromise) {
669
+ if (config.debug) console.log("[SpeechOS] Waiting for pre-warm to complete...");
670
+ await this.preWarmPromise;
671
+ }
672
+ if (this.tokenData) {
673
+ if (config.debug) console.log("[SpeechOS] Using cached token from init");
674
+ } else {
675
+ if (config.debug) console.log("[SpeechOS] Fetching fresh token for session...");
676
+ await this.fetchToken();
677
+ }
678
+ this.pendingTrackSubscribed = new Deferred();
679
+ this.pendingTrackSubscribed.setTimeout(15e3, "Connection timed out - agent not available", "connection_timeout", "connection");
680
+ await this.connect();
681
+ await this.enableMicrophone();
682
+ if (config.debug) console.log("[SpeechOS] Microphone published, waiting for LocalTrackSubscribed event...");
683
+ await this.pendingTrackSubscribed.promise;
684
+ this.pendingTrackSubscribed = null;
685
+ if (config.debug) console.log("[SpeechOS] Voice session ready - agent subscribed to audio");
686
+ }
687
+ /**
688
+ * Stop the voice session and request the transcript
689
+ * Returns a promise that resolves with the transcript text
690
+ * @throws Error if timeout occurs waiting for transcript
691
+ */
692
+ async stopVoiceSession() {
693
+ const config = getConfig();
694
+ if (config.debug) console.log("[SpeechOS] Stopping voice session, requesting transcript...");
695
+ await this.disableMicrophone();
696
+ if (config.debug) console.log("[SpeechOS] Requesting transcript from agent...");
697
+ this.pendingTranscript = new Deferred();
698
+ this.pendingTranscript.setTimeout(1e4, "Transcription timed out. Please try again.", "transcription_timeout", "timeout");
699
+ await this.sendDataMessage({ type: MESSAGE_TYPE_REQUEST_TRANSCRIPT });
700
+ const result = await this.pendingTranscript.promise;
701
+ this.pendingTranscript = null;
702
+ return result;
703
+ }
704
+ /**
705
+ * Alias for stopVoiceSession - granular API naming
706
+ */
707
+ async stopAndGetTranscript() {
708
+ return this.stopVoiceSession();
709
+ }
710
+ /**
711
+ * Request text editing using the transcript as instructions
712
+ * Sends the original text to the backend, which applies the spoken instructions
713
+ * Returns a promise that resolves with the edited text
714
+ * @throws Error if timeout occurs waiting for edited text
715
+ */
716
+ async requestEditText(originalText) {
717
+ const config = getConfig();
718
+ if (config.debug) console.log("[SpeechOS] Requesting text edit...");
719
+ this.editOriginalText = originalText;
720
+ await this.disableMicrophone();
721
+ if (config.debug) console.log("[SpeechOS] Sending edit_text request to agent...");
722
+ this.pendingEditText = new Deferred();
723
+ this.pendingEditText.setTimeout(15e3, "Edit request timed out. Please try again.", "edit_timeout", "timeout");
724
+ await this.sendDataMessage({
725
+ type: MESSAGE_TYPE_EDIT_TEXT,
726
+ text: originalText
727
+ });
728
+ const result = await this.pendingEditText.promise;
729
+ this.pendingEditText = null;
730
+ return result;
731
+ }
732
+ /**
733
+ * Alias for requestEditText - granular API naming
734
+ */
735
+ async stopAndEdit(originalText) {
736
+ return this.requestEditText(originalText);
737
+ }
738
+ /**
739
+ * Disconnect from the current room
740
+ * Clears the token so a fresh one is fetched for the next session
741
+ */
742
+ async disconnect() {
743
+ const config = getConfig();
744
+ if (config.debug) console.log("[SpeechOS] Disconnecting from room...");
745
+ await this.disableMicrophone();
746
+ if (this.room) {
747
+ this.room.removeAllListeners();
748
+ await this.room.disconnect();
749
+ this.room = null;
750
+ state.setConnected(false);
751
+ if (config.debug) console.log("[SpeechOS] Room disconnected and cleaned up");
752
+ }
753
+ if (this.pendingTranscript) {
754
+ this.pendingTranscript.reject(new Error("Disconnected"));
755
+ this.pendingTranscript = null;
756
+ }
757
+ if (this.pendingEditText) {
758
+ this.pendingEditText.reject(new Error("Disconnected"));
759
+ this.pendingEditText = null;
760
+ }
761
+ if (this.pendingTrackSubscribed) {
762
+ this.pendingTrackSubscribed.reject(new Error("Disconnected"));
763
+ this.pendingTrackSubscribed = null;
764
+ }
765
+ this.tokenData = null;
766
+ this.preWarmPromise = null;
767
+ this.editOriginalText = null;
768
+ if (config.debug) console.log("[SpeechOS] Session state cleared");
769
+ }
770
+ /**
771
+ * Get the current room instance
772
+ */
773
+ getRoom() {
774
+ return this.room;
775
+ }
776
+ /**
777
+ * Get the current token data
778
+ */
779
+ getTokenData() {
780
+ return this.tokenData;
781
+ }
782
+ /**
783
+ * Check if connected to a room
784
+ */
785
+ isConnected() {
786
+ return this.room?.state === "connected";
787
+ }
788
+ /**
789
+ * Check if microphone is enabled
790
+ */
791
+ isMicrophoneEnabled() {
792
+ return this.micTrack !== null;
793
+ }
794
+ /**
795
+ * Clear the cached token
796
+ * Used when user identity changes to ensure next session gets a fresh token
797
+ */
798
+ clearToken() {
799
+ const config = getConfig();
800
+ if (config.debug) console.log("[SpeechOS] Clearing cached token");
801
+ this.tokenData = null;
802
+ this.preWarmPromise = null;
803
+ }
804
+ };
805
+ const livekit = new LiveKitManager();
806
+
807
+ //#endregion
808
+ //#region src/transcript-store.ts
809
+ const STORAGE_KEY = "speechos_transcripts";
810
+ const MAX_ENTRIES = 50;
811
+ /**
812
+ * Generate a unique ID for transcript entries
813
+ */
814
+ function generateId() {
815
+ return `${Date.now()}-${Math.random().toString(36).slice(2, 9)}`;
816
+ }
817
+ /**
818
+ * Get all transcripts from localStorage
819
+ */
820
+ function getTranscripts() {
821
+ try {
822
+ const stored = localStorage.getItem(STORAGE_KEY);
823
+ if (!stored) return [];
824
+ const entries = JSON.parse(stored);
825
+ return entries.sort((a, b) => b.timestamp - a.timestamp);
826
+ } catch {
827
+ return [];
828
+ }
829
+ }
830
+ /**
831
+ * Save a new transcript entry
832
+ */
833
+ function saveTranscript(text, action, originalText) {
834
+ const entry = {
835
+ id: generateId(),
836
+ text,
837
+ timestamp: Date.now(),
838
+ action,
839
+ ...originalText && { originalText }
840
+ };
841
+ const entries = getTranscripts();
842
+ entries.unshift(entry);
843
+ const pruned = entries.slice(0, MAX_ENTRIES);
844
+ try {
845
+ localStorage.setItem(STORAGE_KEY, JSON.stringify(pruned));
846
+ } catch {}
847
+ return entry;
848
+ }
849
+ /**
850
+ * Clear all transcript history
851
+ */
852
+ function clearTranscripts() {
853
+ try {
854
+ localStorage.removeItem(STORAGE_KEY);
855
+ } catch {}
856
+ }
857
+ /**
858
+ * Delete a single transcript by ID
859
+ */
860
+ function deleteTranscript(id) {
861
+ const entries = getTranscripts().filter((e) => e.id !== id);
862
+ try {
863
+ localStorage.setItem(STORAGE_KEY, JSON.stringify(entries));
864
+ } catch {}
865
+ }
866
+ const transcriptStore = {
867
+ getTranscripts,
868
+ saveTranscript,
869
+ clearTranscripts,
870
+ deleteTranscript
871
+ };
872
+
873
+ //#endregion
874
+ //#region src/speechos.ts
875
+ /**
876
+ * SpeechOS Core SDK
877
+ *
878
+ * Provides two API layers:
879
+ * 1. Low-level API: Granular control over LiveKit connection lifecycle
880
+ * 2. High-level API: One-shot methods for common voice tasks
881
+ */
882
+ var SpeechOSCore = class {
883
+ initialized = false;
884
+ /**
885
+ * Initialize the SDK with configuration
886
+ * @param config - Configuration options including apiKey
887
+ */
888
+ init(config) {
889
+ setConfig(config);
890
+ this.initialized = true;
891
+ const currentConfig$1 = getConfig();
892
+ if (currentConfig$1.debug) console.log("[SpeechOS] Initialized with config:", {
893
+ host: currentConfig$1.host,
894
+ position: currentConfig$1.position,
895
+ debug: currentConfig$1.debug
896
+ });
897
+ }
898
+ /**
899
+ * Check if the SDK is initialized
900
+ */
901
+ isInitialized() {
902
+ return this.initialized;
903
+ }
904
+ /**
905
+ * Connect to LiveKit (fetches token, establishes connection)
906
+ * Call this before other low-level methods
907
+ */
908
+ async connect() {
909
+ this.ensureInitialized();
910
+ await livekit.connect();
911
+ }
912
+ /**
913
+ * Wait until the agent is ready to receive audio
914
+ * Resolves when the agent subscribes to our audio track
915
+ */
916
+ async waitUntilReady() {
917
+ return livekit.waitUntilReady();
918
+ }
919
+ /**
920
+ * Enable microphone (user is now being recorded)
921
+ */
922
+ async enableMicrophone() {
923
+ await livekit.enableMicrophone();
924
+ state.setRecordingState("recording");
925
+ }
926
+ /**
927
+ * Stop recording and get the transcript
928
+ * @returns The transcribed text
929
+ */
930
+ async stopAndGetTranscript() {
931
+ state.setRecordingState("processing");
932
+ try {
933
+ const transcript = await livekit.stopAndGetTranscript();
934
+ transcriptStore.saveTranscript(transcript, "dictate");
935
+ state.completeRecording();
936
+ return transcript;
937
+ } catch (error) {
938
+ state.setError(error instanceof Error ? error.message : "Transcription failed");
939
+ throw error;
940
+ }
941
+ }
942
+ /**
943
+ * Stop recording and get edited text
944
+ * @param originalText - The original text to edit based on voice instructions
945
+ * @returns The edited text
946
+ */
947
+ async stopAndEdit(originalText) {
948
+ state.setRecordingState("processing");
949
+ try {
950
+ const editedText = await livekit.stopAndEdit(originalText);
951
+ transcriptStore.saveTranscript(editedText, "edit", originalText);
952
+ state.completeRecording();
953
+ return editedText;
954
+ } catch (error) {
955
+ state.setError(error instanceof Error ? error.message : "Edit request failed");
956
+ throw error;
957
+ }
958
+ }
959
+ /**
960
+ * Disconnect from LiveKit
961
+ */
962
+ async disconnect() {
963
+ await livekit.disconnect();
964
+ state.completeRecording();
965
+ }
966
+ /**
967
+ * One-shot dictation: connect, wait for agent, record, and get transcript
968
+ * Automatically handles the full voice session lifecycle
969
+ *
970
+ * @returns The transcribed text
971
+ */
972
+ async dictate() {
973
+ this.ensureInitialized();
974
+ state.setActiveAction("dictate");
975
+ state.startRecording();
976
+ try {
977
+ await livekit.startVoiceSession();
978
+ state.setRecordingState("recording");
979
+ return new Promise((resolve, reject) => {
980
+ this._dictateResolve = resolve;
981
+ this._dictateReject = reject;
982
+ });
983
+ } catch (error) {
984
+ state.setError(error instanceof Error ? error.message : "Failed to start dictation");
985
+ await this.cleanup();
986
+ throw error;
987
+ }
988
+ }
989
+ _dictateResolve;
990
+ _dictateReject;
991
+ /**
992
+ * Stop dictation and get the transcript
993
+ * Call this after dictate() when user stops speaking
994
+ */
995
+ async stopDictation() {
996
+ state.setRecordingState("processing");
997
+ try {
998
+ const transcript = await livekit.stopVoiceSession();
999
+ transcriptStore.saveTranscript(transcript, "dictate");
1000
+ state.completeRecording();
1001
+ if (this._dictateResolve) {
1002
+ this._dictateResolve(transcript);
1003
+ this._dictateResolve = void 0;
1004
+ this._dictateReject = void 0;
1005
+ }
1006
+ return transcript;
1007
+ } catch (error) {
1008
+ const err = error instanceof Error ? error : new Error("Transcription failed");
1009
+ state.setError(err.message);
1010
+ if (this._dictateReject) {
1011
+ this._dictateReject(err);
1012
+ this._dictateResolve = void 0;
1013
+ this._dictateReject = void 0;
1014
+ }
1015
+ throw err;
1016
+ } finally {
1017
+ await this.cleanup();
1018
+ }
1019
+ }
1020
+ /**
1021
+ * One-shot edit: connect, wait for agent, record voice instructions, apply to text
1022
+ * Automatically handles the full voice session lifecycle
1023
+ *
1024
+ * @param originalText - The text to edit
1025
+ * @returns The edited text
1026
+ */
1027
+ async edit(originalText) {
1028
+ this.ensureInitialized();
1029
+ state.setActiveAction("edit");
1030
+ state.startRecording();
1031
+ this._editOriginalText = originalText;
1032
+ try {
1033
+ await livekit.startVoiceSession();
1034
+ state.setRecordingState("recording");
1035
+ return new Promise((resolve, reject) => {
1036
+ this._editResolve = resolve;
1037
+ this._editReject = reject;
1038
+ });
1039
+ } catch (error) {
1040
+ state.setError(error instanceof Error ? error.message : "Failed to start edit");
1041
+ await this.cleanup();
1042
+ throw error;
1043
+ }
1044
+ }
1045
+ _editOriginalText;
1046
+ _editResolve;
1047
+ _editReject;
1048
+ /**
1049
+ * Stop edit recording and get the edited text
1050
+ * Call this after edit() when user stops speaking
1051
+ */
1052
+ async stopEdit() {
1053
+ state.setRecordingState("processing");
1054
+ try {
1055
+ const originalText = this._editOriginalText || "";
1056
+ const editedText = await livekit.requestEditText(originalText);
1057
+ transcriptStore.saveTranscript(editedText, "edit", originalText);
1058
+ state.completeRecording();
1059
+ if (this._editResolve) {
1060
+ this._editResolve(editedText);
1061
+ this._editResolve = void 0;
1062
+ this._editReject = void 0;
1063
+ }
1064
+ return editedText;
1065
+ } catch (error) {
1066
+ const err = error instanceof Error ? error : new Error("Edit request failed");
1067
+ state.setError(err.message);
1068
+ if (this._editReject) {
1069
+ this._editReject(err);
1070
+ this._editResolve = void 0;
1071
+ this._editReject = void 0;
1072
+ }
1073
+ throw err;
1074
+ } finally {
1075
+ this._editOriginalText = void 0;
1076
+ await this.cleanup();
1077
+ }
1078
+ }
1079
+ /**
1080
+ * Cancel the current operation
1081
+ */
1082
+ async cancel() {
1083
+ const err = new Error("Operation cancelled");
1084
+ if (this._dictateReject) {
1085
+ this._dictateReject(err);
1086
+ this._dictateResolve = void 0;
1087
+ this._dictateReject = void 0;
1088
+ }
1089
+ if (this._editReject) {
1090
+ this._editReject(err);
1091
+ this._editResolve = void 0;
1092
+ this._editReject = void 0;
1093
+ }
1094
+ this._editOriginalText = void 0;
1095
+ await this.cleanup();
1096
+ state.cancelRecording();
1097
+ }
1098
+ /**
1099
+ * Access the state manager for subscribing to state changes
1100
+ */
1101
+ get state() {
1102
+ return state;
1103
+ }
1104
+ /**
1105
+ * Access the event emitter for listening to events
1106
+ */
1107
+ get events() {
1108
+ return events;
1109
+ }
1110
+ /**
1111
+ * Get the current config
1112
+ */
1113
+ getConfig() {
1114
+ return getConfig();
1115
+ }
1116
+ ensureInitialized() {
1117
+ if (!this.initialized) throw new Error("SpeechOS not initialized. Call speechOS.init({ apiKey: ... }) first.");
1118
+ }
1119
+ async cleanup() {
1120
+ try {
1121
+ await livekit.disconnect();
1122
+ } catch (error) {
1123
+ const config = getConfig();
1124
+ if (config.debug) console.warn("[SpeechOS] Cleanup disconnect error:", error);
1125
+ }
1126
+ }
1127
+ /**
1128
+ * Reset the SDK (useful for testing)
1129
+ */
1130
+ reset() {
1131
+ this.initialized = false;
1132
+ this._dictateResolve = void 0;
1133
+ this._dictateReject = void 0;
1134
+ this._editResolve = void 0;
1135
+ this._editReject = void 0;
1136
+ this._editOriginalText = void 0;
1137
+ resetConfig();
1138
+ state.reset();
1139
+ events.clear();
1140
+ }
1141
+ };
1142
+ const speechOS = new SpeechOSCore();
1143
+
1144
+ //#endregion
1145
+ //#region src/index.ts
1146
+ const VERSION = "0.1.0";
1147
+
1148
+ //#endregion
1149
+ exports.DEFAULT_HOST = DEFAULT_HOST;
1150
+ exports.Deferred = Deferred;
1151
+ exports.SpeechOSEventEmitter = SpeechOSEventEmitter;
1152
+ exports.VERSION = VERSION;
1153
+ exports.createStateManager = createStateManager;
1154
+ exports.defaultConfig = defaultConfig;
1155
+ exports.events = events;
1156
+ exports.getConfig = getConfig;
1157
+ exports.livekit = livekit;
1158
+ exports.resetConfig = resetConfig;
1159
+ exports.setConfig = setConfig;
1160
+ exports.speechOS = speechOS;
1161
+ exports.state = state;
1162
+ exports.transcriptStore = transcriptStore;
1163
+ exports.updateUserId = updateUserId;
1164
+ exports.validateConfig = validateConfig;
1165
+ //# sourceMappingURL=index.cjs.map