@speechos/core 0.2.0 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/audio-capture.d.cts +130 -0
- package/dist/audio-capture.d.ts +130 -0
- package/dist/backend.d.cts +41 -0
- package/dist/backend.d.ts +41 -0
- package/dist/config.d.cts +23 -7
- package/dist/config.d.ts +23 -7
- package/dist/index.cjs +1263 -158
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +6 -5
- package/dist/index.d.ts +6 -5
- package/dist/index.js +1262 -157
- package/dist/index.js.map +1 -1
- package/dist/livekit.d.cts +81 -14
- package/dist/livekit.d.ts +81 -14
- package/dist/speechos.d.cts +19 -3
- package/dist/speechos.d.ts +19 -3
- package/dist/state.d.cts +4 -1
- package/dist/state.d.ts +4 -1
- package/dist/types.d.cts +105 -9
- package/dist/types.d.ts +105 -9
- package/dist/websocket.d.cts +133 -0
- package/dist/websocket.d.ts +133 -0
- package/package.json +5 -4
- package/dist/transcript-store.d.cts +0 -35
- package/dist/transcript-store.d.ts +0 -35
package/dist/index.js
CHANGED
|
@@ -12,8 +12,6 @@ const defaultConfig = {
|
|
|
12
12
|
apiKey: "",
|
|
13
13
|
userId: "",
|
|
14
14
|
host: DEFAULT_HOST,
|
|
15
|
-
position: "bottom-center",
|
|
16
|
-
zIndex: 999999,
|
|
17
15
|
debug: false
|
|
18
16
|
};
|
|
19
17
|
/**
|
|
@@ -21,31 +19,19 @@ const defaultConfig = {
|
|
|
21
19
|
* @param userConfig - User-provided configuration
|
|
22
20
|
* @returns Validated and merged configuration
|
|
23
21
|
*/
|
|
24
|
-
function validateConfig(userConfig
|
|
22
|
+
function validateConfig(userConfig) {
|
|
25
23
|
if (!userConfig.apiKey) throw new Error("SpeechOS requires an apiKey. Get one from your team dashboard at /a/<team-slug>/.");
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
24
|
+
return {
|
|
25
|
+
apiKey: userConfig.apiKey,
|
|
26
|
+
userId: userConfig.userId ?? defaultConfig.userId,
|
|
27
|
+
host: userConfig.host ?? defaultConfig.host,
|
|
28
|
+
debug: userConfig.debug ?? defaultConfig.debug
|
|
29
29
|
};
|
|
30
|
-
const validPositions = [
|
|
31
|
-
"bottom-center",
|
|
32
|
-
"bottom-right",
|
|
33
|
-
"bottom-left"
|
|
34
|
-
];
|
|
35
|
-
if (!validPositions.includes(config.position)) {
|
|
36
|
-
console.warn(`Invalid position "${config.position}". Using default "bottom-center".`);
|
|
37
|
-
config.position = "bottom-center";
|
|
38
|
-
}
|
|
39
|
-
if (typeof config.zIndex !== "number" || config.zIndex < 0) {
|
|
40
|
-
console.warn(`Invalid zIndex "${config.zIndex}". Using default ${defaultConfig.zIndex}.`);
|
|
41
|
-
config.zIndex = defaultConfig.zIndex;
|
|
42
|
-
}
|
|
43
|
-
return config;
|
|
44
30
|
}
|
|
45
31
|
/**
|
|
46
32
|
* Current active configuration (singleton)
|
|
47
33
|
*/
|
|
48
|
-
let currentConfig = defaultConfig;
|
|
34
|
+
let currentConfig = { ...defaultConfig };
|
|
49
35
|
/**
|
|
50
36
|
* Get the current configuration
|
|
51
37
|
*/
|
|
@@ -75,6 +61,28 @@ function updateUserId(userId) {
|
|
|
75
61
|
userId
|
|
76
62
|
};
|
|
77
63
|
}
|
|
64
|
+
/**
|
|
65
|
+
* LocalStorage key for anonymous ID persistence
|
|
66
|
+
*/
|
|
67
|
+
const ANONYMOUS_ID_KEY = "speechos_anonymous_id";
|
|
68
|
+
/**
|
|
69
|
+
* Get or generate a persistent anonymous ID for Mixpanel tracking.
|
|
70
|
+
*
|
|
71
|
+
* This ID is stored in localStorage to persist across sessions,
|
|
72
|
+
* allowing consistent anonymous user tracking without identifying
|
|
73
|
+
* the account owner's customers.
|
|
74
|
+
*
|
|
75
|
+
* @returns A UUID string for anonymous identification
|
|
76
|
+
*/
|
|
77
|
+
function getAnonymousId() {
|
|
78
|
+
if (typeof localStorage === "undefined") return crypto.randomUUID();
|
|
79
|
+
let anonymousId = localStorage.getItem(ANONYMOUS_ID_KEY);
|
|
80
|
+
if (!anonymousId) {
|
|
81
|
+
anonymousId = crypto.randomUUID();
|
|
82
|
+
localStorage.setItem(ANONYMOUS_ID_KEY, anonymousId);
|
|
83
|
+
}
|
|
84
|
+
return anonymousId;
|
|
85
|
+
}
|
|
78
86
|
|
|
79
87
|
//#endregion
|
|
80
88
|
//#region src/events.ts
|
|
@@ -168,33 +176,38 @@ const initialState = {
|
|
|
168
176
|
var StateManager = class {
|
|
169
177
|
state;
|
|
170
178
|
subscribers = /* @__PURE__ */ new Set();
|
|
179
|
+
/** Cached immutable snapshot for useSyncExternalStore compatibility */
|
|
180
|
+
snapshot;
|
|
171
181
|
constructor(initialState$1) {
|
|
172
182
|
this.state = { ...initialState$1 };
|
|
183
|
+
this.snapshot = Object.freeze({ ...this.state });
|
|
173
184
|
}
|
|
174
185
|
/**
|
|
175
|
-
* Get the current state (returns a
|
|
186
|
+
* Get the current state snapshot (returns a stable reference for React)
|
|
187
|
+
* This returns an immutable frozen object that only changes when setState is called.
|
|
176
188
|
*/
|
|
177
189
|
getState() {
|
|
178
|
-
return
|
|
190
|
+
return this.snapshot;
|
|
179
191
|
}
|
|
180
192
|
/**
|
|
181
193
|
* Update state with partial values
|
|
182
194
|
* @param partial - Partial state to merge with current state
|
|
183
195
|
*/
|
|
184
196
|
setState(partial) {
|
|
185
|
-
const prevState =
|
|
197
|
+
const prevState = this.snapshot;
|
|
186
198
|
this.state = {
|
|
187
199
|
...this.state,
|
|
188
200
|
...partial
|
|
189
201
|
};
|
|
202
|
+
this.snapshot = Object.freeze({ ...this.state });
|
|
190
203
|
this.subscribers.forEach((callback) => {
|
|
191
204
|
try {
|
|
192
|
-
callback(this.
|
|
205
|
+
callback(this.snapshot, prevState);
|
|
193
206
|
} catch (error) {
|
|
194
207
|
console.error("Error in state change callback:", error);
|
|
195
208
|
}
|
|
196
209
|
});
|
|
197
|
-
events.emit("state:change", { state: this.
|
|
210
|
+
events.emit("state:change", { state: this.snapshot });
|
|
198
211
|
}
|
|
199
212
|
/**
|
|
200
213
|
* Subscribe to state changes
|
|
@@ -211,7 +224,17 @@ var StateManager = class {
|
|
|
211
224
|
* Reset state to initial values
|
|
212
225
|
*/
|
|
213
226
|
reset() {
|
|
214
|
-
this.
|
|
227
|
+
const prevState = this.snapshot;
|
|
228
|
+
this.state = { ...initialState };
|
|
229
|
+
this.snapshot = Object.freeze({ ...this.state });
|
|
230
|
+
this.subscribers.forEach((callback) => {
|
|
231
|
+
try {
|
|
232
|
+
callback(this.snapshot, prevState);
|
|
233
|
+
} catch (error) {
|
|
234
|
+
console.error("Error in state change callback:", error);
|
|
235
|
+
}
|
|
236
|
+
});
|
|
237
|
+
events.emit("state:change", { state: this.snapshot });
|
|
215
238
|
}
|
|
216
239
|
/**
|
|
217
240
|
* Show the widget
|
|
@@ -346,12 +369,15 @@ function createStateManager(initial) {
|
|
|
346
369
|
|
|
347
370
|
//#endregion
|
|
348
371
|
//#region src/livekit.ts
|
|
349
|
-
const MESSAGE_TYPE_REQUEST_TRANSCRIPT = "request_transcript";
|
|
350
|
-
const MESSAGE_TYPE_TRANSCRIPT = "transcript";
|
|
351
|
-
const MESSAGE_TYPE_EDIT_TEXT = "edit_text";
|
|
352
|
-
const MESSAGE_TYPE_EDITED_TEXT = "edited_text";
|
|
353
|
-
const
|
|
372
|
+
const MESSAGE_TYPE_REQUEST_TRANSCRIPT$1 = "request_transcript";
|
|
373
|
+
const MESSAGE_TYPE_TRANSCRIPT$1 = "transcript";
|
|
374
|
+
const MESSAGE_TYPE_EDIT_TEXT$1 = "edit_text";
|
|
375
|
+
const MESSAGE_TYPE_EDITED_TEXT$1 = "edited_text";
|
|
376
|
+
const MESSAGE_TYPE_EXECUTE_COMMAND$1 = "execute_command";
|
|
377
|
+
const MESSAGE_TYPE_COMMAND_RESULT$1 = "command_result";
|
|
378
|
+
const MESSAGE_TYPE_ERROR$1 = "error";
|
|
354
379
|
const TOPIC_SPEECHOS = "speechos";
|
|
380
|
+
const TOKEN_CACHE_TTL_MS = 4 * 60 * 1e3;
|
|
355
381
|
/**
|
|
356
382
|
* A deferred promise with timeout support.
|
|
357
383
|
* Encapsulates resolve/reject/timeout in a single object for cleaner async handling.
|
|
@@ -415,53 +441,116 @@ var LiveKitManager = class {
|
|
|
415
441
|
room = null;
|
|
416
442
|
tokenData = null;
|
|
417
443
|
micTrack = null;
|
|
444
|
+
cachedTokenData = null;
|
|
445
|
+
tokenCacheTimestamp = null;
|
|
446
|
+
tokenPrefetchPromise = null;
|
|
447
|
+
tokenRefreshTimer = null;
|
|
448
|
+
autoRefreshEnabled = false;
|
|
418
449
|
pendingTranscript = null;
|
|
419
450
|
pendingEditText = null;
|
|
451
|
+
pendingCommand = null;
|
|
420
452
|
pendingTrackSubscribed = null;
|
|
421
|
-
preWarmPromise = null;
|
|
422
453
|
editOriginalText = null;
|
|
454
|
+
sessionSettings = {};
|
|
423
455
|
/**
|
|
424
|
-
*
|
|
425
|
-
* Call this when user shows intent (e.g., expands widget)
|
|
426
|
-
* Only fetches token - mic permission is requested when user clicks Dictate
|
|
456
|
+
* Check if the cached token is still valid (within TTL)
|
|
427
457
|
*/
|
|
428
|
-
|
|
429
|
-
if (this.
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
458
|
+
isCachedTokenValid() {
|
|
459
|
+
if (!this.cachedTokenData || !this.tokenCacheTimestamp) return false;
|
|
460
|
+
const age = Date.now() - this.tokenCacheTimestamp;
|
|
461
|
+
return age < TOKEN_CACHE_TTL_MS;
|
|
462
|
+
}
|
|
463
|
+
/**
|
|
464
|
+
* Pre-fetch a LiveKit token for later use
|
|
465
|
+
* Call this early (e.g., when widget expands) to reduce latency when starting a voice session.
|
|
466
|
+
* If a prefetch is already in progress, returns the existing promise.
|
|
467
|
+
* If a valid cached token exists, returns it immediately.
|
|
468
|
+
*/
|
|
469
|
+
async prefetchToken() {
|
|
434
470
|
const config = getConfig();
|
|
435
|
-
if (
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
471
|
+
if (this.isCachedTokenValid() && this.cachedTokenData) {
|
|
472
|
+
if (config.debug) console.log("[SpeechOS] Using cached token (prefetch hit)");
|
|
473
|
+
return this.cachedTokenData;
|
|
474
|
+
}
|
|
475
|
+
if (this.tokenPrefetchPromise) {
|
|
476
|
+
if (config.debug) console.log("[SpeechOS] Prefetch already in progress, awaiting...");
|
|
477
|
+
return this.tokenPrefetchPromise;
|
|
478
|
+
}
|
|
479
|
+
if (config.debug) console.log("[SpeechOS] Starting token prefetch...");
|
|
480
|
+
this.tokenPrefetchPromise = this.fetchTokenFromServer().then((data) => {
|
|
481
|
+
this.cachedTokenData = data;
|
|
482
|
+
this.tokenCacheTimestamp = Date.now();
|
|
483
|
+
this.tokenPrefetchPromise = null;
|
|
484
|
+
return data;
|
|
485
|
+
}).catch((error) => {
|
|
486
|
+
this.tokenPrefetchPromise = null;
|
|
487
|
+
throw error;
|
|
488
|
+
});
|
|
489
|
+
return this.tokenPrefetchPromise;
|
|
446
490
|
}
|
|
447
491
|
/**
|
|
448
492
|
* Fetch a LiveKit token from the backend
|
|
493
|
+
* Uses cached token if valid, otherwise fetches a fresh one.
|
|
494
|
+
* Includes language settings and user vocabulary which are stored in the VoiceSession.
|
|
449
495
|
*/
|
|
450
496
|
async fetchToken() {
|
|
497
|
+
const config = getConfig();
|
|
498
|
+
if (this.isCachedTokenValid() && this.cachedTokenData) {
|
|
499
|
+
if (config.debug) console.log("[SpeechOS] Using cached token");
|
|
500
|
+
this.tokenData = this.cachedTokenData;
|
|
501
|
+
return this.cachedTokenData;
|
|
502
|
+
}
|
|
503
|
+
if (this.tokenPrefetchPromise) {
|
|
504
|
+
if (config.debug) console.log("[SpeechOS] Waiting for prefetch to complete...");
|
|
505
|
+
const data$1 = await this.tokenPrefetchPromise;
|
|
506
|
+
this.tokenData = data$1;
|
|
507
|
+
return data$1;
|
|
508
|
+
}
|
|
509
|
+
const data = await this.fetchTokenFromServer();
|
|
510
|
+
this.cachedTokenData = data;
|
|
511
|
+
this.tokenCacheTimestamp = Date.now();
|
|
512
|
+
this.tokenData = data;
|
|
513
|
+
return data;
|
|
514
|
+
}
|
|
515
|
+
/**
|
|
516
|
+
* Internal method to fetch a fresh token from the server
|
|
517
|
+
*/
|
|
518
|
+
async fetchTokenFromServer() {
|
|
451
519
|
const config = getConfig();
|
|
452
520
|
const url = `${config.host}/livekit/api/token/`;
|
|
453
|
-
|
|
521
|
+
const settings = this.sessionSettings;
|
|
522
|
+
const inputLanguage = settings.inputLanguageCode ?? "en-US";
|
|
523
|
+
const outputLanguage = settings.outputLanguageCode ?? "en-US";
|
|
524
|
+
const smartFormat = settings.smartFormat ?? true;
|
|
525
|
+
const vocabulary = settings.vocabulary ?? [];
|
|
526
|
+
const snippets = settings.snippets ?? [];
|
|
527
|
+
if (config.debug) {
|
|
528
|
+
console.log("[SpeechOS] Fetching LiveKit token from:", url);
|
|
529
|
+
console.log("[SpeechOS] Session settings:", {
|
|
530
|
+
inputLanguage,
|
|
531
|
+
outputLanguage,
|
|
532
|
+
smartFormat,
|
|
533
|
+
snippetsCount: snippets.length,
|
|
534
|
+
vocabularyCount: vocabulary.length
|
|
535
|
+
});
|
|
536
|
+
}
|
|
454
537
|
const response = await fetch(url, {
|
|
455
538
|
method: "POST",
|
|
456
539
|
headers: {
|
|
457
540
|
"Content-Type": "application/json",
|
|
458
541
|
...config.apiKey ? { Authorization: `Api-Key ${config.apiKey}` } : {}
|
|
459
542
|
},
|
|
460
|
-
body: JSON.stringify({
|
|
543
|
+
body: JSON.stringify({
|
|
544
|
+
user_id: config.userId || null,
|
|
545
|
+
input_language: inputLanguage,
|
|
546
|
+
output_language: outputLanguage,
|
|
547
|
+
smart_format: smartFormat,
|
|
548
|
+
custom_vocabulary: vocabulary,
|
|
549
|
+
custom_snippets: snippets
|
|
550
|
+
})
|
|
461
551
|
});
|
|
462
552
|
if (!response.ok) throw new Error(`Failed to fetch LiveKit token: ${response.status} ${response.statusText}`);
|
|
463
553
|
const data = await response.json();
|
|
464
|
-
this.tokenData = data;
|
|
465
554
|
if (config.debug) console.log("[SpeechOS] LiveKit token received:", {
|
|
466
555
|
room: data.room,
|
|
467
556
|
identity: data.identity,
|
|
@@ -474,8 +563,7 @@ var LiveKitManager = class {
|
|
|
474
563
|
*/
|
|
475
564
|
async connect() {
|
|
476
565
|
const config = getConfig();
|
|
477
|
-
|
|
478
|
-
else if (config.debug) console.log("[SpeechOS] Using pre-fetched token");
|
|
566
|
+
await this.fetchToken();
|
|
479
567
|
if (!this.tokenData) throw new Error("No token available for LiveKit connection");
|
|
480
568
|
this.room = new Room({
|
|
481
569
|
adaptiveStream: true,
|
|
@@ -539,7 +627,7 @@ var LiveKitManager = class {
|
|
|
539
627
|
try {
|
|
540
628
|
const message = JSON.parse(new TextDecoder().decode(data));
|
|
541
629
|
if (config.debug) console.log("[SpeechOS] Data received:", message);
|
|
542
|
-
if (message.type === MESSAGE_TYPE_TRANSCRIPT) {
|
|
630
|
+
if (message.type === MESSAGE_TYPE_TRANSCRIPT$1) {
|
|
543
631
|
const transcript = message.transcript || "";
|
|
544
632
|
if (config.debug) console.log("[SpeechOS] Transcript received:", transcript);
|
|
545
633
|
events.emit("transcription:complete", { text: transcript });
|
|
@@ -547,7 +635,7 @@ var LiveKitManager = class {
|
|
|
547
635
|
this.pendingTranscript.resolve(transcript);
|
|
548
636
|
this.pendingTranscript = null;
|
|
549
637
|
}
|
|
550
|
-
} else if (message.type === MESSAGE_TYPE_EDITED_TEXT) {
|
|
638
|
+
} else if (message.type === MESSAGE_TYPE_EDITED_TEXT$1) {
|
|
551
639
|
const editedText = message.text || "";
|
|
552
640
|
if (config.debug) console.log("[SpeechOS] Edited text received:", editedText);
|
|
553
641
|
events.emit("edit:complete", {
|
|
@@ -559,7 +647,15 @@ var LiveKitManager = class {
|
|
|
559
647
|
this.pendingEditText = null;
|
|
560
648
|
}
|
|
561
649
|
this.editOriginalText = null;
|
|
562
|
-
} else if (message.type ===
|
|
650
|
+
} else if (message.type === MESSAGE_TYPE_COMMAND_RESULT$1) {
|
|
651
|
+
const commandResult = message.command || null;
|
|
652
|
+
if (config.debug) console.log("[SpeechOS] Command result received:", commandResult);
|
|
653
|
+
events.emit("command:complete", { command: commandResult });
|
|
654
|
+
if (this.pendingCommand) {
|
|
655
|
+
this.pendingCommand.resolve(commandResult);
|
|
656
|
+
this.pendingCommand = null;
|
|
657
|
+
}
|
|
658
|
+
} else if (message.type === MESSAGE_TYPE_ERROR$1) {
|
|
563
659
|
const serverError = message;
|
|
564
660
|
const errorCode = serverError.code || "server_error";
|
|
565
661
|
const errorMessage = serverError.message || "A server error occurred";
|
|
@@ -579,6 +675,10 @@ var LiveKitManager = class {
|
|
|
579
675
|
this.pendingEditText.reject(error);
|
|
580
676
|
this.pendingEditText = null;
|
|
581
677
|
}
|
|
678
|
+
if (this.pendingCommand) {
|
|
679
|
+
this.pendingCommand.reject(error);
|
|
680
|
+
this.pendingCommand = null;
|
|
681
|
+
}
|
|
582
682
|
}
|
|
583
683
|
} catch (error) {
|
|
584
684
|
console.error("[SpeechOS] Failed to parse data message:", error);
|
|
@@ -586,16 +686,34 @@ var LiveKitManager = class {
|
|
|
586
686
|
}
|
|
587
687
|
/**
|
|
588
688
|
* Publish microphone audio track
|
|
689
|
+
* Uses the device ID from session settings if set
|
|
589
690
|
*/
|
|
590
691
|
async enableMicrophone() {
|
|
591
692
|
if (!this.room || this.room.state !== "connected") throw new Error("Not connected to room");
|
|
592
693
|
const config = getConfig();
|
|
593
694
|
if (!this.micTrack) {
|
|
594
695
|
if (config.debug) console.log("[SpeechOS] Creating microphone track...");
|
|
595
|
-
|
|
696
|
+
const deviceId = this.sessionSettings.audioDeviceId;
|
|
697
|
+
const trackOptions = {
|
|
596
698
|
echoCancellation: true,
|
|
597
699
|
noiseSuppression: true
|
|
598
|
-
}
|
|
700
|
+
};
|
|
701
|
+
if (deviceId) {
|
|
702
|
+
trackOptions.deviceId = { exact: deviceId };
|
|
703
|
+
if (config.debug) console.log("[SpeechOS] Using audio device:", deviceId);
|
|
704
|
+
}
|
|
705
|
+
try {
|
|
706
|
+
this.micTrack = await createLocalAudioTrack(trackOptions);
|
|
707
|
+
} catch (error) {
|
|
708
|
+
if (deviceId && error instanceof Error) {
|
|
709
|
+
console.warn("[SpeechOS] Selected audio device unavailable, falling back to default:", error.message);
|
|
710
|
+
this.micTrack = await createLocalAudioTrack({
|
|
711
|
+
echoCancellation: true,
|
|
712
|
+
noiseSuppression: true
|
|
713
|
+
});
|
|
714
|
+
} else throw error;
|
|
715
|
+
}
|
|
716
|
+
this.logMicrophoneInfo();
|
|
599
717
|
}
|
|
600
718
|
const existingPub = this.room.localParticipant.getTrackPublication(Track.Source.Microphone);
|
|
601
719
|
if (!existingPub) {
|
|
@@ -605,6 +723,24 @@ var LiveKitManager = class {
|
|
|
605
723
|
}
|
|
606
724
|
}
|
|
607
725
|
/**
|
|
726
|
+
* Log information about the current microphone track
|
|
727
|
+
*/
|
|
728
|
+
logMicrophoneInfo() {
|
|
729
|
+
if (!this.micTrack) return;
|
|
730
|
+
const config = getConfig();
|
|
731
|
+
const mediaTrack = this.micTrack.mediaStreamTrack;
|
|
732
|
+
const settings = mediaTrack.getSettings();
|
|
733
|
+
console.log("[SpeechOS] Microphone active:", {
|
|
734
|
+
deviceId: settings.deviceId || "unknown",
|
|
735
|
+
label: mediaTrack.label || "Unknown device",
|
|
736
|
+
sampleRate: settings.sampleRate,
|
|
737
|
+
channelCount: settings.channelCount,
|
|
738
|
+
echoCancellation: settings.echoCancellation,
|
|
739
|
+
noiseSuppression: settings.noiseSuppression
|
|
740
|
+
});
|
|
741
|
+
if (config.debug) console.log("[SpeechOS] Full audio track settings:", settings);
|
|
742
|
+
}
|
|
743
|
+
/**
|
|
608
744
|
* Disable microphone audio track
|
|
609
745
|
*/
|
|
610
746
|
async disableMicrophone() {
|
|
@@ -636,30 +772,85 @@ var LiveKitManager = class {
|
|
|
636
772
|
});
|
|
637
773
|
}
|
|
638
774
|
/**
|
|
639
|
-
* Start a voice session
|
|
640
|
-
*
|
|
775
|
+
* Start a voice session with pre-connect audio buffering
|
|
776
|
+
* Fetches a fresh token, then enables mic with preConnectBuffer to capture audio while connecting.
|
|
777
|
+
* Agent subscription happens in the background - we don't block on it.
|
|
778
|
+
*
|
|
779
|
+
* @param options - Session options including action type and parameters
|
|
641
780
|
*/
|
|
642
|
-
async startVoiceSession() {
|
|
781
|
+
async startVoiceSession(options) {
|
|
643
782
|
const config = getConfig();
|
|
644
783
|
if (config.debug) console.log("[SpeechOS] Starting voice session...");
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
}
|
|
649
|
-
if (this.tokenData) {
|
|
650
|
-
if (config.debug) console.log("[SpeechOS] Using cached token from init");
|
|
651
|
-
} else {
|
|
652
|
-
if (config.debug) console.log("[SpeechOS] Fetching fresh token for session...");
|
|
653
|
-
await this.fetchToken();
|
|
654
|
-
}
|
|
784
|
+
this.sessionSettings = options?.settings || {};
|
|
785
|
+
await this.fetchToken();
|
|
786
|
+
if (!this.tokenData) throw new Error("No token available for LiveKit connection");
|
|
655
787
|
this.pendingTrackSubscribed = new Deferred();
|
|
656
788
|
this.pendingTrackSubscribed.setTimeout(15e3, "Connection timed out - agent not available", "connection_timeout", "connection");
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
this.
|
|
662
|
-
if (config.debug) console.log("[SpeechOS]
|
|
789
|
+
this.room = new Room({
|
|
790
|
+
adaptiveStream: true,
|
|
791
|
+
dynacast: true
|
|
792
|
+
});
|
|
793
|
+
this.setupRoomEvents();
|
|
794
|
+
if (config.debug) console.log("[SpeechOS] Connecting to LiveKit room:", this.tokenData.room, "at", this.tokenData.ws_url);
|
|
795
|
+
await this.room.connect(this.tokenData.ws_url, this.tokenData.token);
|
|
796
|
+
if (config.debug) console.log("[SpeechOS] Connected, enabling microphone with preConnectBuffer...");
|
|
797
|
+
await this.enableMicrophoneWithPreConnectBuffer();
|
|
798
|
+
if (options?.onMicReady) options.onMicReady();
|
|
799
|
+
state.setConnected(true);
|
|
800
|
+
if (config.debug) console.log("[SpeechOS] Voice session ready - microphone active");
|
|
801
|
+
this.waitForAgentSubscription();
|
|
802
|
+
}
|
|
803
|
+
/**
|
|
804
|
+
* Wait for the agent to subscribe to our audio track in the background
|
|
805
|
+
* Handles timeout errors without blocking the main flow
|
|
806
|
+
*/
|
|
807
|
+
waitForAgentSubscription() {
|
|
808
|
+
const config = getConfig();
|
|
809
|
+
if (!this.pendingTrackSubscribed) return;
|
|
810
|
+
this.pendingTrackSubscribed.promise.then(() => {
|
|
811
|
+
if (config.debug) console.log("[SpeechOS] Agent subscribed to audio track - full duplex established");
|
|
812
|
+
this.pendingTrackSubscribed = null;
|
|
813
|
+
}).catch((error) => {
|
|
814
|
+
console.warn("[SpeechOS] Agent subscription timeout:", error.message);
|
|
815
|
+
this.pendingTrackSubscribed = null;
|
|
816
|
+
});
|
|
817
|
+
}
|
|
818
|
+
/**
|
|
819
|
+
* Enable microphone with pre-connect buffering
|
|
820
|
+
* This starts capturing audio locally before the room is connected,
|
|
821
|
+
* buffering it until the connection is established.
|
|
822
|
+
*/
|
|
823
|
+
async enableMicrophoneWithPreConnectBuffer() {
|
|
824
|
+
if (!this.room) throw new Error("Room not initialized");
|
|
825
|
+
const config = getConfig();
|
|
826
|
+
const deviceId = this.sessionSettings.audioDeviceId;
|
|
827
|
+
const constraints = {
|
|
828
|
+
echoCancellation: true,
|
|
829
|
+
noiseSuppression: true
|
|
830
|
+
};
|
|
831
|
+
if (deviceId) {
|
|
832
|
+
constraints.deviceId = { exact: deviceId };
|
|
833
|
+
if (config.debug) console.log("[SpeechOS] Using audio device:", deviceId);
|
|
834
|
+
}
|
|
835
|
+
try {
|
|
836
|
+
await this.room.localParticipant.setMicrophoneEnabled(true, constraints, { preConnectBuffer: true });
|
|
837
|
+
state.setMicEnabled(true);
|
|
838
|
+
const micPub = this.room.localParticipant.getTrackPublication(Track.Source.Microphone);
|
|
839
|
+
if (micPub?.track) {
|
|
840
|
+
this.micTrack = micPub.track;
|
|
841
|
+
this.logMicrophoneInfo();
|
|
842
|
+
}
|
|
843
|
+
if (config.debug) console.log("[SpeechOS] Microphone enabled with pre-connect buffer - audio is being captured");
|
|
844
|
+
} catch (error) {
|
|
845
|
+
if (deviceId && error instanceof Error) {
|
|
846
|
+
console.warn("[SpeechOS] Selected audio device unavailable, falling back to default:", error.message);
|
|
847
|
+
await this.room.localParticipant.setMicrophoneEnabled(true, {
|
|
848
|
+
echoCancellation: true,
|
|
849
|
+
noiseSuppression: true
|
|
850
|
+
}, { preConnectBuffer: true });
|
|
851
|
+
state.setMicEnabled(true);
|
|
852
|
+
} else throw error;
|
|
853
|
+
}
|
|
663
854
|
}
|
|
664
855
|
/**
|
|
665
856
|
* Stop the voice session and request the transcript
|
|
@@ -668,12 +859,19 @@ var LiveKitManager = class {
|
|
|
668
859
|
*/
|
|
669
860
|
async stopVoiceSession() {
|
|
670
861
|
const config = getConfig();
|
|
862
|
+
const settings = this.sessionSettings;
|
|
863
|
+
const inputLanguage = settings.inputLanguageCode ?? "en-US";
|
|
864
|
+
const outputLanguage = settings.outputLanguageCode ?? "en-US";
|
|
865
|
+
console.log("[SpeechOS] Dictate command:", {
|
|
866
|
+
inputLanguage,
|
|
867
|
+
outputLanguage
|
|
868
|
+
});
|
|
671
869
|
if (config.debug) console.log("[SpeechOS] Stopping voice session, requesting transcript...");
|
|
672
870
|
await this.disableMicrophone();
|
|
673
871
|
if (config.debug) console.log("[SpeechOS] Requesting transcript from agent...");
|
|
674
872
|
this.pendingTranscript = new Deferred();
|
|
675
873
|
this.pendingTranscript.setTimeout(1e4, "Transcription timed out. Please try again.", "transcription_timeout", "timeout");
|
|
676
|
-
await this.sendDataMessage({ type: MESSAGE_TYPE_REQUEST_TRANSCRIPT });
|
|
874
|
+
await this.sendDataMessage({ type: MESSAGE_TYPE_REQUEST_TRANSCRIPT$1 });
|
|
677
875
|
const result = await this.pendingTranscript.promise;
|
|
678
876
|
this.pendingTranscript = null;
|
|
679
877
|
return result;
|
|
@@ -692,6 +890,14 @@ var LiveKitManager = class {
|
|
|
692
890
|
*/
|
|
693
891
|
async requestEditText(originalText) {
|
|
694
892
|
const config = getConfig();
|
|
893
|
+
const settings = this.sessionSettings;
|
|
894
|
+
const inputLanguage = settings.inputLanguageCode ?? "en-US";
|
|
895
|
+
const outputLanguage = settings.outputLanguageCode ?? "en-US";
|
|
896
|
+
console.log("[SpeechOS] Edit command:", {
|
|
897
|
+
inputLanguage,
|
|
898
|
+
outputLanguage,
|
|
899
|
+
originalTextLength: originalText.length
|
|
900
|
+
});
|
|
695
901
|
if (config.debug) console.log("[SpeechOS] Requesting text edit...");
|
|
696
902
|
this.editOriginalText = originalText;
|
|
697
903
|
await this.disableMicrophone();
|
|
@@ -699,7 +905,7 @@ var LiveKitManager = class {
|
|
|
699
905
|
this.pendingEditText = new Deferred();
|
|
700
906
|
this.pendingEditText.setTimeout(15e3, "Edit request timed out. Please try again.", "edit_timeout", "timeout");
|
|
701
907
|
await this.sendDataMessage({
|
|
702
|
-
type: MESSAGE_TYPE_EDIT_TEXT,
|
|
908
|
+
type: MESSAGE_TYPE_EDIT_TEXT$1,
|
|
703
909
|
text: originalText
|
|
704
910
|
});
|
|
705
911
|
const result = await this.pendingEditText.promise;
|
|
@@ -713,6 +919,39 @@ var LiveKitManager = class {
|
|
|
713
919
|
return this.requestEditText(originalText);
|
|
714
920
|
}
|
|
715
921
|
/**
|
|
922
|
+
* Request command matching using the transcript as input
|
|
923
|
+
* Sends command definitions to the backend, which matches the user's speech against them
|
|
924
|
+
* Returns a promise that resolves with the matched command or null if no match
|
|
925
|
+
* @throws Error if timeout occurs waiting for command result
|
|
926
|
+
*/
|
|
927
|
+
async requestCommand(commands) {
|
|
928
|
+
const config = getConfig();
|
|
929
|
+
const settings = this.sessionSettings;
|
|
930
|
+
const inputLanguage = settings.inputLanguageCode ?? "en-US";
|
|
931
|
+
console.log("[SpeechOS] Command request:", {
|
|
932
|
+
inputLanguage,
|
|
933
|
+
commandCount: commands.length
|
|
934
|
+
});
|
|
935
|
+
if (config.debug) console.log("[SpeechOS] Requesting command match...");
|
|
936
|
+
await this.disableMicrophone();
|
|
937
|
+
if (config.debug) console.log("[SpeechOS] Sending execute_command request to agent...");
|
|
938
|
+
this.pendingCommand = new Deferred();
|
|
939
|
+
this.pendingCommand.setTimeout(15e3, "Command request timed out. Please try again.", "command_timeout", "timeout");
|
|
940
|
+
await this.sendDataMessage({
|
|
941
|
+
type: MESSAGE_TYPE_EXECUTE_COMMAND$1,
|
|
942
|
+
commands
|
|
943
|
+
});
|
|
944
|
+
const result = await this.pendingCommand.promise;
|
|
945
|
+
this.pendingCommand = null;
|
|
946
|
+
return result;
|
|
947
|
+
}
|
|
948
|
+
/**
|
|
949
|
+
* Alias for requestCommand - granular API naming
|
|
950
|
+
*/
|
|
951
|
+
async stopAndCommand(commands) {
|
|
952
|
+
return this.requestCommand(commands);
|
|
953
|
+
}
|
|
954
|
+
/**
|
|
716
955
|
* Disconnect from the current room
|
|
717
956
|
* Clears the token so a fresh one is fetched for the next session
|
|
718
957
|
*/
|
|
@@ -735,16 +974,110 @@ var LiveKitManager = class {
|
|
|
735
974
|
this.pendingEditText.reject(new Error("Disconnected"));
|
|
736
975
|
this.pendingEditText = null;
|
|
737
976
|
}
|
|
977
|
+
if (this.pendingCommand) {
|
|
978
|
+
this.pendingCommand.reject(new Error("Disconnected"));
|
|
979
|
+
this.pendingCommand = null;
|
|
980
|
+
}
|
|
738
981
|
if (this.pendingTrackSubscribed) {
|
|
739
982
|
this.pendingTrackSubscribed.reject(new Error("Disconnected"));
|
|
740
983
|
this.pendingTrackSubscribed = null;
|
|
741
984
|
}
|
|
742
985
|
this.tokenData = null;
|
|
743
|
-
this.preWarmPromise = null;
|
|
744
986
|
this.editOriginalText = null;
|
|
987
|
+
this.sessionSettings = {};
|
|
745
988
|
if (config.debug) console.log("[SpeechOS] Session state cleared");
|
|
746
989
|
}
|
|
747
990
|
/**
|
|
991
|
+
* Invalidate the cached token
|
|
992
|
+
* Call this when settings change that would affect the token (language, vocabulary)
|
|
993
|
+
*/
|
|
994
|
+
invalidateTokenCache() {
|
|
995
|
+
const config = getConfig();
|
|
996
|
+
if (config.debug) console.log("[SpeechOS] Token cache invalidated");
|
|
997
|
+
this.cachedTokenData = null;
|
|
998
|
+
this.tokenCacheTimestamp = null;
|
|
999
|
+
}
|
|
1000
|
+
/**
|
|
1001
|
+
* Start auto-refreshing the token while the widget is expanded.
|
|
1002
|
+
* Call this after a voice session completes to immediately fetch a fresh token
|
|
1003
|
+
* (since each command requires its own token) and keep it fresh for subsequent commands.
|
|
1004
|
+
*/
|
|
1005
|
+
startAutoRefresh() {
|
|
1006
|
+
const config = getConfig();
|
|
1007
|
+
this.autoRefreshEnabled = true;
|
|
1008
|
+
if (config.debug) console.log("[SpeechOS] Token auto-refresh enabled");
|
|
1009
|
+
this.invalidateTokenCache();
|
|
1010
|
+
this.prefetchToken().then(() => {
|
|
1011
|
+
this.scheduleTokenRefresh();
|
|
1012
|
+
}).catch((error) => {
|
|
1013
|
+
if (config.debug) console.warn("[SpeechOS] Failed to prefetch token after command:", error);
|
|
1014
|
+
if (this.autoRefreshEnabled) this.tokenRefreshTimer = setTimeout(() => {
|
|
1015
|
+
this.performAutoRefresh();
|
|
1016
|
+
}, 5 * 1e3);
|
|
1017
|
+
});
|
|
1018
|
+
}
|
|
1019
|
+
/**
|
|
1020
|
+
* Stop auto-refreshing the token.
|
|
1021
|
+
* Call this when the widget collapses or user navigates away.
|
|
1022
|
+
*/
|
|
1023
|
+
stopAutoRefresh() {
|
|
1024
|
+
const config = getConfig();
|
|
1025
|
+
this.autoRefreshEnabled = false;
|
|
1026
|
+
if (this.tokenRefreshTimer) {
|
|
1027
|
+
clearTimeout(this.tokenRefreshTimer);
|
|
1028
|
+
this.tokenRefreshTimer = null;
|
|
1029
|
+
}
|
|
1030
|
+
if (config.debug) console.log("[SpeechOS] Token auto-refresh disabled");
|
|
1031
|
+
}
|
|
1032
|
+
/**
|
|
1033
|
+
* Schedule a token refresh before the current cache expires.
|
|
1034
|
+
* Handles computer sleep by checking elapsed time on each refresh attempt.
|
|
1035
|
+
*/
|
|
1036
|
+
scheduleTokenRefresh() {
|
|
1037
|
+
if (!this.autoRefreshEnabled) return;
|
|
1038
|
+
if (this.tokenRefreshTimer) {
|
|
1039
|
+
clearTimeout(this.tokenRefreshTimer);
|
|
1040
|
+
this.tokenRefreshTimer = null;
|
|
1041
|
+
}
|
|
1042
|
+
const config = getConfig();
|
|
1043
|
+
const refreshBuffer = 30 * 1e3;
|
|
1044
|
+
let timeUntilRefresh;
|
|
1045
|
+
if (this.tokenCacheTimestamp) {
|
|
1046
|
+
const age = Date.now() - this.tokenCacheTimestamp;
|
|
1047
|
+
const timeRemaining = TOKEN_CACHE_TTL_MS - age;
|
|
1048
|
+
timeUntilRefresh = Math.max(0, timeRemaining - refreshBuffer);
|
|
1049
|
+
} else timeUntilRefresh = 0;
|
|
1050
|
+
if (config.debug) console.log(`[SpeechOS] Scheduling token refresh in ${Math.round(timeUntilRefresh / 1e3)}s`);
|
|
1051
|
+
this.tokenRefreshTimer = setTimeout(() => {
|
|
1052
|
+
this.performAutoRefresh();
|
|
1053
|
+
}, timeUntilRefresh);
|
|
1054
|
+
}
|
|
1055
|
+
/**
|
|
1056
|
+
* Perform the auto-refresh, handling computer sleep scenarios.
|
|
1057
|
+
*/
|
|
1058
|
+
async performAutoRefresh() {
|
|
1059
|
+
if (!this.autoRefreshEnabled) return;
|
|
1060
|
+
const config = getConfig();
|
|
1061
|
+
if (this.isCachedTokenValid()) {
|
|
1062
|
+
if (config.debug) console.log("[SpeechOS] Token still valid on refresh check, rescheduling");
|
|
1063
|
+
this.scheduleTokenRefresh();
|
|
1064
|
+
return;
|
|
1065
|
+
}
|
|
1066
|
+
if (config.debug) console.log("[SpeechOS] Auto-refreshing token...");
|
|
1067
|
+
try {
|
|
1068
|
+
const data = await this.fetchTokenFromServer();
|
|
1069
|
+
this.cachedTokenData = data;
|
|
1070
|
+
this.tokenCacheTimestamp = Date.now();
|
|
1071
|
+
if (config.debug) console.log("[SpeechOS] Token auto-refreshed successfully");
|
|
1072
|
+
this.scheduleTokenRefresh();
|
|
1073
|
+
} catch (error) {
|
|
1074
|
+
console.warn("[SpeechOS] Token auto-refresh failed:", error);
|
|
1075
|
+
if (this.autoRefreshEnabled) this.tokenRefreshTimer = setTimeout(() => {
|
|
1076
|
+
this.performAutoRefresh();
|
|
1077
|
+
}, 30 * 1e3);
|
|
1078
|
+
}
|
|
1079
|
+
}
|
|
1080
|
+
/**
|
|
748
1081
|
* Get the current room instance
|
|
749
1082
|
*/
|
|
750
1083
|
getRoom() {
|
|
@@ -768,88 +1101,749 @@ var LiveKitManager = class {
|
|
|
768
1101
|
isMicrophoneEnabled() {
|
|
769
1102
|
return this.micTrack !== null;
|
|
770
1103
|
}
|
|
771
|
-
/**
|
|
772
|
-
* Clear the cached token
|
|
773
|
-
* Used when user identity changes to ensure next session gets a fresh token
|
|
774
|
-
*/
|
|
775
|
-
clearToken() {
|
|
776
|
-
const config = getConfig();
|
|
777
|
-
if (config.debug) console.log("[SpeechOS] Clearing cached token");
|
|
778
|
-
this.tokenData = null;
|
|
779
|
-
this.preWarmPromise = null;
|
|
780
|
-
}
|
|
781
1104
|
};
|
|
782
1105
|
const livekit = new LiveKitManager();
|
|
1106
|
+
events.on("settings:changed", () => {
|
|
1107
|
+
livekit.invalidateTokenCache();
|
|
1108
|
+
});
|
|
783
1109
|
|
|
784
1110
|
//#endregion
|
|
785
|
-
//#region src/
|
|
786
|
-
const STORAGE_KEY = "speechos_transcripts";
|
|
787
|
-
const MAX_ENTRIES = 50;
|
|
1111
|
+
//#region src/audio-capture.ts
|
|
788
1112
|
/**
|
|
789
|
-
*
|
|
1113
|
+
* Detect if running in Safari.
|
|
790
1114
|
*/
|
|
791
|
-
function
|
|
792
|
-
|
|
1115
|
+
function isSafari() {
|
|
1116
|
+
const ua = navigator.userAgent.toLowerCase();
|
|
1117
|
+
const vendor = navigator.vendor?.toLowerCase() || "";
|
|
1118
|
+
const hasSafariUA = ua.includes("safari") && !ua.includes("chrome") && !ua.includes("chromium");
|
|
1119
|
+
const isAppleVendor = vendor.includes("apple");
|
|
1120
|
+
return hasSafariUA && isAppleVendor;
|
|
793
1121
|
}
|
|
794
1122
|
/**
|
|
795
|
-
*
|
|
1123
|
+
* Detect the best supported audio format for the current browser.
|
|
1124
|
+
*
|
|
1125
|
+
* IMPORTANT: Safari must use MP4/AAC. Its WebM/Opus implementation is buggy
|
|
1126
|
+
* and produces truncated/incomplete audio.
|
|
796
1127
|
*/
|
|
797
|
-
function
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
return
|
|
1128
|
+
function getSupportedAudioFormat() {
|
|
1129
|
+
if (isSafari()) {
|
|
1130
|
+
if (MediaRecorder.isTypeSupported("audio/mp4")) return {
|
|
1131
|
+
mimeType: "audio/mp4",
|
|
1132
|
+
format: "mp4",
|
|
1133
|
+
needsEncodingParams: false
|
|
1134
|
+
};
|
|
1135
|
+
return {
|
|
1136
|
+
mimeType: "",
|
|
1137
|
+
format: "mp4",
|
|
1138
|
+
needsEncodingParams: true
|
|
1139
|
+
};
|
|
805
1140
|
}
|
|
1141
|
+
if (MediaRecorder.isTypeSupported("audio/webm;codecs=opus")) return {
|
|
1142
|
+
mimeType: "audio/webm;codecs=opus",
|
|
1143
|
+
format: "webm",
|
|
1144
|
+
needsEncodingParams: false
|
|
1145
|
+
};
|
|
1146
|
+
if (MediaRecorder.isTypeSupported("audio/webm")) return {
|
|
1147
|
+
mimeType: "audio/webm",
|
|
1148
|
+
format: "webm",
|
|
1149
|
+
needsEncodingParams: false
|
|
1150
|
+
};
|
|
1151
|
+
if (MediaRecorder.isTypeSupported("audio/mp4")) return {
|
|
1152
|
+
mimeType: "audio/mp4",
|
|
1153
|
+
format: "mp4",
|
|
1154
|
+
needsEncodingParams: false
|
|
1155
|
+
};
|
|
1156
|
+
return {
|
|
1157
|
+
mimeType: "",
|
|
1158
|
+
format: "webm",
|
|
1159
|
+
needsEncodingParams: true
|
|
1160
|
+
};
|
|
806
1161
|
}
|
|
807
1162
|
/**
|
|
808
|
-
*
|
|
1163
|
+
* Audio capture manager with buffering support.
|
|
1164
|
+
*
|
|
1165
|
+
* Usage:
|
|
1166
|
+
* 1. Create instance with onChunk callback
|
|
1167
|
+
* 2. Call start() - immediately begins capturing
|
|
1168
|
+
* 3. Call setReady() when connection is established - flushes buffer
|
|
1169
|
+
* 4. Call stop() when done
|
|
809
1170
|
*/
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
1171
|
+
var AudioCapture = class AudioCapture {
|
|
1172
|
+
mediaStream = null;
|
|
1173
|
+
recorder = null;
|
|
1174
|
+
buffer = [];
|
|
1175
|
+
isReady = false;
|
|
1176
|
+
isRecording = false;
|
|
1177
|
+
onChunk;
|
|
1178
|
+
audioFormat;
|
|
1179
|
+
deviceId;
|
|
1180
|
+
/**
|
|
1181
|
+
* Time slice for MediaRecorder in milliseconds.
|
|
1182
|
+
*
|
|
1183
|
+
* Safari requires a larger timeslice (1000ms) to properly flush its internal
|
|
1184
|
+
* audio buffers. Smaller values cause Safari to drop or truncate audio data.
|
|
1185
|
+
* See: https://community.openai.com/t/whisper-problem-with-audio-mp4-blobs-from-safari/
|
|
1186
|
+
*
|
|
1187
|
+
* Other browsers (Chrome, Firefox, Edge) work well with smaller timeslices
|
|
1188
|
+
* which provide lower latency for real-time transcription.
|
|
1189
|
+
*/
|
|
1190
|
+
static TIME_SLICE_MS = 100;
|
|
1191
|
+
static SAFARI_TIME_SLICE_MS = 1e3;
|
|
1192
|
+
/**
|
|
1193
|
+
* @param onChunk - Callback for receiving audio chunks
|
|
1194
|
+
* @param deviceId - Optional audio device ID (empty string or undefined for system default)
|
|
1195
|
+
*/
|
|
1196
|
+
constructor(onChunk, deviceId) {
|
|
1197
|
+
this.onChunk = onChunk;
|
|
1198
|
+
this.audioFormat = getSupportedAudioFormat();
|
|
1199
|
+
this.deviceId = deviceId;
|
|
1200
|
+
}
|
|
1201
|
+
/**
|
|
1202
|
+
* Get the appropriate timeslice for the current browser.
|
|
1203
|
+
* Safari needs a larger timeslice to avoid dropping audio data.
|
|
1204
|
+
*/
|
|
1205
|
+
getTimeSlice() {
|
|
1206
|
+
return isSafari() ? AudioCapture.SAFARI_TIME_SLICE_MS : AudioCapture.TIME_SLICE_MS;
|
|
1207
|
+
}
|
|
1208
|
+
/**
|
|
1209
|
+
* Get the timeslice being used (in milliseconds).
|
|
1210
|
+
* Useful for callers that need to wait for audio processing.
|
|
1211
|
+
*/
|
|
1212
|
+
getTimeSliceMs() {
|
|
1213
|
+
return this.getTimeSlice();
|
|
1214
|
+
}
|
|
1215
|
+
/**
|
|
1216
|
+
* Get the audio format being used.
|
|
1217
|
+
*/
|
|
1218
|
+
getFormat() {
|
|
1219
|
+
return this.audioFormat;
|
|
1220
|
+
}
|
|
1221
|
+
/**
|
|
1222
|
+
* Start capturing audio immediately.
|
|
1223
|
+
*
|
|
1224
|
+
* Audio chunks will be buffered until setReady() is called.
|
|
1225
|
+
*/
|
|
1226
|
+
async start() {
|
|
1227
|
+
const config = getConfig();
|
|
1228
|
+
if (this.isRecording) {
|
|
1229
|
+
if (config.debug) console.log("[SpeechOS] AudioCapture already recording");
|
|
1230
|
+
return;
|
|
1231
|
+
}
|
|
1232
|
+
this.buffer = [];
|
|
1233
|
+
this.isReady = false;
|
|
1234
|
+
const constraints = { audio: {
|
|
1235
|
+
echoCancellation: true,
|
|
1236
|
+
noiseSuppression: true,
|
|
1237
|
+
...this.deviceId ? { deviceId: { exact: this.deviceId } } : {}
|
|
1238
|
+
} };
|
|
1239
|
+
if (config.debug) {
|
|
1240
|
+
console.log("[SpeechOS] AudioCapture starting with format:", this.audioFormat.mimeType);
|
|
1241
|
+
console.log("[SpeechOS] Detected Safari:", isSafari());
|
|
1242
|
+
if (this.deviceId) console.log("[SpeechOS] Using audio device:", this.deviceId);
|
|
1243
|
+
}
|
|
1244
|
+
try {
|
|
1245
|
+
this.mediaStream = await navigator.mediaDevices.getUserMedia(constraints);
|
|
1246
|
+
const recorderOptions = {};
|
|
1247
|
+
if (this.audioFormat.mimeType) recorderOptions.mimeType = this.audioFormat.mimeType;
|
|
1248
|
+
this.recorder = new MediaRecorder(this.mediaStream, recorderOptions);
|
|
1249
|
+
this.recorder.ondataavailable = (event) => {
|
|
1250
|
+
if (event.data && event.data.size > 0) this.handleChunk(event.data);
|
|
1251
|
+
};
|
|
1252
|
+
this.recorder.onerror = (event) => {
|
|
1253
|
+
console.error("[SpeechOS] MediaRecorder error:", event);
|
|
1254
|
+
};
|
|
1255
|
+
const timeSlice = this.getTimeSlice();
|
|
1256
|
+
this.recorder.start(timeSlice);
|
|
1257
|
+
this.isRecording = true;
|
|
1258
|
+
if (config.debug) console.log(`[SpeechOS] AudioCapture started with ${timeSlice}ms timeslice, buffering until ready`);
|
|
1259
|
+
} catch (error) {
|
|
1260
|
+
if (this.deviceId && error instanceof Error) {
|
|
1261
|
+
console.warn("[SpeechOS] Selected device unavailable, trying default:", error.message);
|
|
1262
|
+
this.mediaStream = await navigator.mediaDevices.getUserMedia({ audio: {
|
|
1263
|
+
echoCancellation: true,
|
|
1264
|
+
noiseSuppression: true
|
|
1265
|
+
} });
|
|
1266
|
+
const recorderOptions = {};
|
|
1267
|
+
if (this.audioFormat.mimeType) recorderOptions.mimeType = this.audioFormat.mimeType;
|
|
1268
|
+
this.recorder = new MediaRecorder(this.mediaStream, recorderOptions);
|
|
1269
|
+
this.recorder.ondataavailable = (event) => {
|
|
1270
|
+
if (event.data && event.data.size > 0) this.handleChunk(event.data);
|
|
1271
|
+
};
|
|
1272
|
+
this.recorder.start(this.getTimeSlice());
|
|
1273
|
+
this.isRecording = true;
|
|
1274
|
+
} else throw error;
|
|
1275
|
+
}
|
|
1276
|
+
}
|
|
1277
|
+
/**
|
|
1278
|
+
* Handle an audio chunk with atomic buffer swap pattern.
|
|
1279
|
+
*
|
|
1280
|
+
* If not ready: buffer the chunk.
|
|
1281
|
+
* If ready: send directly via callback.
|
|
1282
|
+
*/
|
|
1283
|
+
handleChunk(chunk) {
|
|
1284
|
+
if (this.isReady) this.onChunk(chunk);
|
|
1285
|
+
else this.buffer.push(chunk);
|
|
1286
|
+
}
|
|
1287
|
+
/**
|
|
1288
|
+
* Mark the capture as ready (connection established).
|
|
1289
|
+
*
|
|
1290
|
+
* This flushes any buffered chunks and switches to direct mode.
|
|
1291
|
+
* Uses atomic swap to prevent chunk reordering.
|
|
1292
|
+
*/
|
|
1293
|
+
setReady() {
|
|
1294
|
+
const config = getConfig();
|
|
1295
|
+
if (this.isReady) return;
|
|
1296
|
+
const toFlush = this.buffer;
|
|
1297
|
+
this.buffer = [];
|
|
1298
|
+
for (const chunk of toFlush) this.onChunk(chunk);
|
|
1299
|
+
this.isReady = true;
|
|
1300
|
+
if (config.debug) console.log(`[SpeechOS] AudioCapture ready, flushed ${toFlush.length} buffered chunks`);
|
|
1301
|
+
}
|
|
1302
|
+
/**
|
|
1303
|
+
* Stop capturing audio and wait for final chunk.
|
|
1304
|
+
*
|
|
1305
|
+
* Uses requestData() before stop() to force the MediaRecorder to flush
|
|
1306
|
+
* any buffered audio immediately. This is critical for Safari which
|
|
1307
|
+
* may hold audio data in internal buffers.
|
|
1308
|
+
*
|
|
1309
|
+
* Safari requires an additional delay after stopping to ensure all audio
|
|
1310
|
+
* from its internal encoding pipeline has been fully processed and emitted.
|
|
1311
|
+
*/
|
|
1312
|
+
async stop() {
|
|
1313
|
+
const config = getConfig();
|
|
1314
|
+
const safari = isSafari();
|
|
1315
|
+
if (this.recorder && this.recorder.state !== "inactive") {
|
|
1316
|
+
if (this.recorder.state === "recording") try {
|
|
1317
|
+
const dataPromise = new Promise((resolve) => {
|
|
1318
|
+
const handler = (event) => {
|
|
1319
|
+
this.recorder?.removeEventListener("dataavailable", handler);
|
|
1320
|
+
if (config.debug) console.log(`[SpeechOS] requestData flush received: ${event.data.size} bytes`);
|
|
1321
|
+
resolve();
|
|
1322
|
+
};
|
|
1323
|
+
this.recorder?.addEventListener("dataavailable", handler);
|
|
1324
|
+
});
|
|
1325
|
+
this.recorder.requestData();
|
|
1326
|
+
if (config.debug) console.log("[SpeechOS] Requested data flush before stop");
|
|
1327
|
+
await dataPromise;
|
|
1328
|
+
} catch (e) {
|
|
1329
|
+
if (config.debug) console.log("[SpeechOS] requestData() not supported or failed:", e);
|
|
1330
|
+
}
|
|
1331
|
+
const stopPromise = new Promise((resolve) => {
|
|
1332
|
+
if (!this.recorder) {
|
|
1333
|
+
resolve();
|
|
1334
|
+
return;
|
|
1335
|
+
}
|
|
1336
|
+
this.recorder.onstop = () => {
|
|
1337
|
+
if (config.debug) console.log("[SpeechOS] MediaRecorder onstop fired");
|
|
1338
|
+
resolve();
|
|
1339
|
+
};
|
|
1340
|
+
});
|
|
1341
|
+
this.recorder.stop();
|
|
1342
|
+
await stopPromise;
|
|
1343
|
+
if (safari) {
|
|
1344
|
+
if (config.debug) console.log("[SpeechOS] Safari: waiting 2s for encoding pipeline to flush");
|
|
1345
|
+
await new Promise((resolve) => setTimeout(resolve, 2e3));
|
|
1346
|
+
}
|
|
1347
|
+
}
|
|
1348
|
+
if (this.mediaStream) {
|
|
1349
|
+
for (const track of this.mediaStream.getTracks()) track.stop();
|
|
1350
|
+
this.mediaStream = null;
|
|
1351
|
+
}
|
|
1352
|
+
this.recorder = null;
|
|
1353
|
+
this.isRecording = false;
|
|
1354
|
+
this.isReady = false;
|
|
1355
|
+
this.buffer = [];
|
|
1356
|
+
if (config.debug) console.log("[SpeechOS] AudioCapture stopped");
|
|
1357
|
+
}
|
|
1358
|
+
/**
|
|
1359
|
+
* Check if currently recording.
|
|
1360
|
+
*/
|
|
1361
|
+
get recording() {
|
|
1362
|
+
return this.isRecording;
|
|
1363
|
+
}
|
|
1364
|
+
/**
|
|
1365
|
+
* Check if ready (connection established, direct mode active).
|
|
1366
|
+
*/
|
|
1367
|
+
get ready() {
|
|
1368
|
+
return this.isReady;
|
|
1369
|
+
}
|
|
1370
|
+
/**
|
|
1371
|
+
* Get the number of buffered chunks waiting to be sent.
|
|
1372
|
+
*/
|
|
1373
|
+
get bufferedChunks() {
|
|
1374
|
+
return this.buffer.length;
|
|
1375
|
+
}
|
|
1376
|
+
};
|
|
826
1377
|
/**
|
|
827
|
-
*
|
|
1378
|
+
* Factory function to create an AudioCapture instance.
|
|
1379
|
+
* @param onChunk - Callback for receiving audio chunks
|
|
1380
|
+
* @param deviceId - Optional audio device ID (empty string or undefined for system default)
|
|
828
1381
|
*/
|
|
829
|
-
function
|
|
830
|
-
|
|
831
|
-
localStorage.removeItem(STORAGE_KEY);
|
|
832
|
-
} catch {}
|
|
1382
|
+
function createAudioCapture(onChunk, deviceId) {
|
|
1383
|
+
return new AudioCapture(onChunk, deviceId);
|
|
833
1384
|
}
|
|
1385
|
+
|
|
1386
|
+
//#endregion
|
|
1387
|
+
//#region src/websocket.ts
|
|
1388
|
+
const MESSAGE_TYPE_AUTH = "auth";
|
|
1389
|
+
const MESSAGE_TYPE_READY = "ready";
|
|
1390
|
+
const MESSAGE_TYPE_TRANSCRIPTION = "transcription";
|
|
1391
|
+
const MESSAGE_TYPE_REQUEST_TRANSCRIPT = "request_transcript";
|
|
1392
|
+
const MESSAGE_TYPE_TRANSCRIPT = "transcript";
|
|
1393
|
+
const MESSAGE_TYPE_EDIT_TEXT = "edit_text";
|
|
1394
|
+
const MESSAGE_TYPE_EDITED_TEXT = "edited_text";
|
|
1395
|
+
const MESSAGE_TYPE_EXECUTE_COMMAND = "execute_command";
|
|
1396
|
+
const MESSAGE_TYPE_COMMAND_RESULT = "command_result";
|
|
1397
|
+
const MESSAGE_TYPE_ERROR = "error";
|
|
834
1398
|
/**
|
|
835
|
-
*
|
|
1399
|
+
* Response timeout in milliseconds.
|
|
836
1400
|
*/
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
1401
|
+
const RESPONSE_TIMEOUT_MS = 15e3;
|
|
1402
|
+
/**
|
|
1403
|
+
* A deferred promise with timeout support.
|
|
1404
|
+
*/
|
|
1405
|
+
var Deferred$1 = class {
|
|
1406
|
+
promise;
|
|
1407
|
+
_resolve;
|
|
1408
|
+
_reject;
|
|
1409
|
+
_timeoutId = null;
|
|
1410
|
+
_settled = false;
|
|
1411
|
+
constructor() {
|
|
1412
|
+
this.promise = new Promise((resolve, reject) => {
|
|
1413
|
+
this._resolve = resolve;
|
|
1414
|
+
this._reject = reject;
|
|
1415
|
+
});
|
|
1416
|
+
}
|
|
1417
|
+
setTimeout(ms, errorMessage, errorCode, errorSource) {
|
|
1418
|
+
this._timeoutId = setTimeout(() => {
|
|
1419
|
+
if (!this._settled) {
|
|
1420
|
+
console.error(`[SpeechOS] Error: ${errorMessage} (${errorCode})`);
|
|
1421
|
+
events.emit("error", {
|
|
1422
|
+
code: errorCode,
|
|
1423
|
+
message: errorMessage,
|
|
1424
|
+
source: errorSource
|
|
1425
|
+
});
|
|
1426
|
+
this.reject(new Error(errorMessage));
|
|
1427
|
+
}
|
|
1428
|
+
}, ms);
|
|
1429
|
+
}
|
|
1430
|
+
resolve(value) {
|
|
1431
|
+
if (!this._settled) {
|
|
1432
|
+
this._settled = true;
|
|
1433
|
+
this.clearTimeout();
|
|
1434
|
+
this._resolve(value);
|
|
1435
|
+
}
|
|
1436
|
+
}
|
|
1437
|
+
reject(error) {
|
|
1438
|
+
if (!this._settled) {
|
|
1439
|
+
this._settled = true;
|
|
1440
|
+
this.clearTimeout();
|
|
1441
|
+
this._reject(error);
|
|
1442
|
+
}
|
|
1443
|
+
}
|
|
1444
|
+
clearTimeout() {
|
|
1445
|
+
if (this._timeoutId !== null) {
|
|
1446
|
+
clearTimeout(this._timeoutId);
|
|
1447
|
+
this._timeoutId = null;
|
|
1448
|
+
}
|
|
1449
|
+
}
|
|
1450
|
+
get isSettled() {
|
|
1451
|
+
return this._settled;
|
|
1452
|
+
}
|
|
848
1453
|
};
|
|
1454
|
+
/**
|
|
1455
|
+
* Maximum time to wait for WebSocket buffer to drain.
|
|
1456
|
+
*/
|
|
1457
|
+
const BUFFER_DRAIN_TIMEOUT_MS = 5e3;
|
|
1458
|
+
/**
|
|
1459
|
+
* Polling interval for checking WebSocket buffer.
|
|
1460
|
+
*/
|
|
1461
|
+
const BUFFER_CHECK_INTERVAL_MS = 50;
|
|
1462
|
+
/**
|
|
1463
|
+
* WebSocket connection manager for voice sessions.
|
|
1464
|
+
*/
|
|
1465
|
+
var WebSocketManager = class {
|
|
1466
|
+
ws = null;
|
|
1467
|
+
audioCapture = null;
|
|
1468
|
+
sessionId = null;
|
|
1469
|
+
pendingAuth = null;
|
|
1470
|
+
pendingTranscript = null;
|
|
1471
|
+
pendingEditText = null;
|
|
1472
|
+
pendingCommand = null;
|
|
1473
|
+
pendingAudioSends = /* @__PURE__ */ new Set();
|
|
1474
|
+
editOriginalText = null;
|
|
1475
|
+
lastInputText = void 0;
|
|
1476
|
+
sessionAction = "dictate";
|
|
1477
|
+
sessionInputText = "";
|
|
1478
|
+
sessionCommands = [];
|
|
1479
|
+
sessionSettings = {};
|
|
1480
|
+
/**
|
|
1481
|
+
* Get the WebSocket URL for voice sessions.
|
|
1482
|
+
*/
|
|
1483
|
+
getWebSocketUrl() {
|
|
1484
|
+
const config = getConfig();
|
|
1485
|
+
const host = config.host || "https://app.speechos.ai";
|
|
1486
|
+
const wsUrl = host.replace(/^http/, "ws");
|
|
1487
|
+
return `${wsUrl}/ws/voice/`;
|
|
1488
|
+
}
|
|
1489
|
+
/**
|
|
1490
|
+
* Start a voice session with the WebSocket backend.
|
|
1491
|
+
*
|
|
1492
|
+
* This method:
|
|
1493
|
+
* 1. Starts audio capture immediately (buffering)
|
|
1494
|
+
* 2. Opens WebSocket connection
|
|
1495
|
+
* 3. Authenticates with API key and action parameters
|
|
1496
|
+
* 4. Flushes buffered audio and continues streaming
|
|
1497
|
+
*
|
|
1498
|
+
* @param options - Session options including action type and parameters
|
|
1499
|
+
*/
|
|
1500
|
+
async startVoiceSession(options) {
|
|
1501
|
+
const config = getConfig();
|
|
1502
|
+
this.sessionAction = options?.action || "dictate";
|
|
1503
|
+
this.sessionInputText = options?.inputText || "";
|
|
1504
|
+
this.sessionCommands = options?.commands || [];
|
|
1505
|
+
this.sessionSettings = options?.settings || {};
|
|
1506
|
+
if (this.sessionAction === "edit") this.editOriginalText = this.sessionInputText;
|
|
1507
|
+
if (config.debug) console.log("[SpeechOS] Starting WebSocket voice session...");
|
|
1508
|
+
this.audioCapture = createAudioCapture((chunk) => {
|
|
1509
|
+
this.sendAudioChunk(chunk);
|
|
1510
|
+
}, this.sessionSettings.audioDeviceId);
|
|
1511
|
+
await this.audioCapture.start();
|
|
1512
|
+
if (options?.onMicReady) options.onMicReady();
|
|
1513
|
+
state.setMicEnabled(true);
|
|
1514
|
+
const wsUrl = this.getWebSocketUrl();
|
|
1515
|
+
if (config.debug) console.log("[SpeechOS] Connecting to WebSocket:", wsUrl);
|
|
1516
|
+
this.ws = new WebSocket(wsUrl);
|
|
1517
|
+
this.ws.onopen = () => {
|
|
1518
|
+
if (config.debug) console.log("[SpeechOS] WebSocket connected, authenticating...");
|
|
1519
|
+
this.authenticate();
|
|
1520
|
+
};
|
|
1521
|
+
this.ws.onmessage = (event) => {
|
|
1522
|
+
this.handleMessage(event.data);
|
|
1523
|
+
};
|
|
1524
|
+
this.ws.onerror = (event) => {
|
|
1525
|
+
console.error("[SpeechOS] WebSocket error:", event);
|
|
1526
|
+
events.emit("error", {
|
|
1527
|
+
code: "websocket_error",
|
|
1528
|
+
message: "WebSocket connection error",
|
|
1529
|
+
source: "connection"
|
|
1530
|
+
});
|
|
1531
|
+
};
|
|
1532
|
+
this.ws.onclose = (event) => {
|
|
1533
|
+
if (config.debug) console.log("[SpeechOS] WebSocket closed:", event.code, event.reason);
|
|
1534
|
+
state.setConnected(false);
|
|
1535
|
+
};
|
|
1536
|
+
this.pendingAuth = new Deferred$1();
|
|
1537
|
+
this.pendingAuth.setTimeout(RESPONSE_TIMEOUT_MS, "Connection timed out", "connection_timeout", "connection");
|
|
1538
|
+
await this.pendingAuth.promise;
|
|
1539
|
+
this.pendingAuth = null;
|
|
1540
|
+
if (this.audioCapture) this.audioCapture.setReady();
|
|
1541
|
+
state.setConnected(true);
|
|
1542
|
+
if (config.debug) console.log("[SpeechOS] WebSocket voice session ready");
|
|
1543
|
+
}
|
|
1544
|
+
/**
|
|
1545
|
+
* Send authentication message with action parameters.
|
|
1546
|
+
* All session parameters are now sent upfront in the auth message.
|
|
1547
|
+
*/
|
|
1548
|
+
authenticate() {
|
|
1549
|
+
const config = getConfig();
|
|
1550
|
+
const audioFormat = getSupportedAudioFormat();
|
|
1551
|
+
const settings = this.sessionSettings;
|
|
1552
|
+
const anonymousId = getAnonymousId();
|
|
1553
|
+
const authMessage = {
|
|
1554
|
+
type: MESSAGE_TYPE_AUTH,
|
|
1555
|
+
api_key: config.apiKey,
|
|
1556
|
+
user_id: config.userId || null,
|
|
1557
|
+
anonymous_id: anonymousId,
|
|
1558
|
+
input_language: settings.inputLanguageCode ?? "en-US",
|
|
1559
|
+
output_language: settings.outputLanguageCode ?? "en-US",
|
|
1560
|
+
smart_format: settings.smartFormat ?? true,
|
|
1561
|
+
custom_vocabulary: settings.vocabulary ?? [],
|
|
1562
|
+
custom_snippets: settings.snippets ?? [],
|
|
1563
|
+
audio_format: audioFormat.format,
|
|
1564
|
+
action: this.sessionAction,
|
|
1565
|
+
input_text: this.sessionInputText,
|
|
1566
|
+
commands: this.sessionCommands
|
|
1567
|
+
};
|
|
1568
|
+
if (config.debug) console.log("[SpeechOS] Sending auth message with action:", this.sessionAction);
|
|
1569
|
+
this.ws?.send(JSON.stringify(authMessage));
|
|
1570
|
+
}
|
|
1571
|
+
/**
|
|
1572
|
+
* Send an audio chunk over the WebSocket.
|
|
1573
|
+
* Tracks the promise so we can wait for all sends to complete.
|
|
1574
|
+
*/
|
|
1575
|
+
sendAudioChunk(chunk) {
|
|
1576
|
+
const sendPromise = this.doSendAudioChunk(chunk);
|
|
1577
|
+
this.pendingAudioSends.add(sendPromise);
|
|
1578
|
+
sendPromise.finally(() => {
|
|
1579
|
+
this.pendingAudioSends.delete(sendPromise);
|
|
1580
|
+
});
|
|
1581
|
+
}
|
|
1582
|
+
/**
|
|
1583
|
+
* Actually send the audio chunk (async operation).
|
|
1584
|
+
*/
|
|
1585
|
+
async doSendAudioChunk(chunk) {
|
|
1586
|
+
if (this.ws && this.ws.readyState === WebSocket.OPEN) {
|
|
1587
|
+
const arrayBuffer = await chunk.arrayBuffer();
|
|
1588
|
+
this.ws.send(arrayBuffer);
|
|
1589
|
+
}
|
|
1590
|
+
}
|
|
1591
|
+
/**
|
|
1592
|
+
* Handle incoming WebSocket messages.
|
|
1593
|
+
*/
|
|
1594
|
+
handleMessage(data) {
|
|
1595
|
+
const config = getConfig();
|
|
1596
|
+
try {
|
|
1597
|
+
const message = JSON.parse(data);
|
|
1598
|
+
if (config.debug) console.log("[SpeechOS] WebSocket message:", message);
|
|
1599
|
+
switch (message.type) {
|
|
1600
|
+
case MESSAGE_TYPE_READY:
|
|
1601
|
+
this.handleReady(message);
|
|
1602
|
+
break;
|
|
1603
|
+
case MESSAGE_TYPE_TRANSCRIPTION:
|
|
1604
|
+
this.handleIntermediateTranscription(message);
|
|
1605
|
+
break;
|
|
1606
|
+
case MESSAGE_TYPE_TRANSCRIPT:
|
|
1607
|
+
this.handleFinalTranscript(message);
|
|
1608
|
+
break;
|
|
1609
|
+
case MESSAGE_TYPE_EDITED_TEXT:
|
|
1610
|
+
this.handleEditedText(message);
|
|
1611
|
+
break;
|
|
1612
|
+
case MESSAGE_TYPE_COMMAND_RESULT:
|
|
1613
|
+
this.handleCommandResult(message);
|
|
1614
|
+
break;
|
|
1615
|
+
case MESSAGE_TYPE_ERROR:
|
|
1616
|
+
this.handleError(message);
|
|
1617
|
+
break;
|
|
1618
|
+
default: if (config.debug) console.log("[SpeechOS] Unknown message type:", message.type);
|
|
1619
|
+
}
|
|
1620
|
+
} catch (error) {
|
|
1621
|
+
console.error("[SpeechOS] Failed to parse message:", error);
|
|
1622
|
+
}
|
|
1623
|
+
}
|
|
1624
|
+
handleReady(message) {
|
|
1625
|
+
const config = getConfig();
|
|
1626
|
+
this.sessionId = message.session_id;
|
|
1627
|
+
if (config.debug) console.log("[SpeechOS] Session ready:", this.sessionId);
|
|
1628
|
+
if (this.pendingAuth) this.pendingAuth.resolve();
|
|
1629
|
+
}
|
|
1630
|
+
handleIntermediateTranscription(message) {
|
|
1631
|
+
const config = getConfig();
|
|
1632
|
+
if (config.debug) console.log("[SpeechOS] Intermediate transcription:", message.transcript, "final:", message.is_final);
|
|
1633
|
+
}
|
|
1634
|
+
handleFinalTranscript(message) {
|
|
1635
|
+
const transcript = message.transcript || "";
|
|
1636
|
+
events.emit("transcription:complete", { text: transcript });
|
|
1637
|
+
if (this.pendingTranscript) {
|
|
1638
|
+
this.pendingTranscript.resolve(transcript);
|
|
1639
|
+
this.pendingTranscript = null;
|
|
1640
|
+
}
|
|
1641
|
+
}
|
|
1642
|
+
handleEditedText(message) {
|
|
1643
|
+
const editedText = message.text || "";
|
|
1644
|
+
events.emit("edit:complete", {
|
|
1645
|
+
text: editedText,
|
|
1646
|
+
originalText: this.editOriginalText || ""
|
|
1647
|
+
});
|
|
1648
|
+
if (this.pendingEditText) {
|
|
1649
|
+
this.pendingEditText.resolve(editedText);
|
|
1650
|
+
this.pendingEditText = null;
|
|
1651
|
+
}
|
|
1652
|
+
this.editOriginalText = null;
|
|
1653
|
+
}
|
|
1654
|
+
handleCommandResult(message) {
|
|
1655
|
+
const commandResult = message.command || null;
|
|
1656
|
+
this.lastInputText = message.transcript;
|
|
1657
|
+
events.emit("command:complete", { command: commandResult });
|
|
1658
|
+
if (this.pendingCommand) {
|
|
1659
|
+
this.pendingCommand.resolve(commandResult);
|
|
1660
|
+
this.pendingCommand = null;
|
|
1661
|
+
}
|
|
1662
|
+
}
|
|
1663
|
+
handleError(message) {
|
|
1664
|
+
const errorCode = message.code || "server_error";
|
|
1665
|
+
const errorMessage = message.message || "A server error occurred";
|
|
1666
|
+
console.error(`[SpeechOS] Error: ${errorMessage} (${errorCode})`);
|
|
1667
|
+
events.emit("error", {
|
|
1668
|
+
code: errorCode,
|
|
1669
|
+
message: errorMessage,
|
|
1670
|
+
source: "server"
|
|
1671
|
+
});
|
|
1672
|
+
const error = new Error(errorMessage);
|
|
1673
|
+
if (this.pendingAuth) {
|
|
1674
|
+
this.pendingAuth.reject(error);
|
|
1675
|
+
this.pendingAuth = null;
|
|
1676
|
+
}
|
|
1677
|
+
if (this.pendingTranscript) {
|
|
1678
|
+
this.pendingTranscript.reject(error);
|
|
1679
|
+
this.pendingTranscript = null;
|
|
1680
|
+
}
|
|
1681
|
+
if (this.pendingEditText) {
|
|
1682
|
+
this.pendingEditText.reject(error);
|
|
1683
|
+
this.pendingEditText = null;
|
|
1684
|
+
}
|
|
1685
|
+
if (this.pendingCommand) {
|
|
1686
|
+
this.pendingCommand.reject(error);
|
|
1687
|
+
this.pendingCommand = null;
|
|
1688
|
+
}
|
|
1689
|
+
}
|
|
1690
|
+
/**
|
|
1691
|
+
* Stop the voice session and request the transcript.
|
|
1692
|
+
*/
|
|
1693
|
+
async stopVoiceSession() {
|
|
1694
|
+
const config = getConfig();
|
|
1695
|
+
if (config.debug) console.log("[SpeechOS] Stopping voice session, requesting transcript...");
|
|
1696
|
+
await this.stopAudioCapture();
|
|
1697
|
+
this.pendingTranscript = new Deferred$1();
|
|
1698
|
+
this.pendingTranscript.setTimeout(RESPONSE_TIMEOUT_MS, "Transcription timed out. Please try again.", "transcription_timeout", "timeout");
|
|
1699
|
+
this.sendMessage({ type: MESSAGE_TYPE_REQUEST_TRANSCRIPT });
|
|
1700
|
+
const result = await this.pendingTranscript.promise;
|
|
1701
|
+
this.pendingTranscript = null;
|
|
1702
|
+
return result;
|
|
1703
|
+
}
|
|
1704
|
+
/**
|
|
1705
|
+
* Request text editing using the transcript as instructions.
|
|
1706
|
+
* Note: The input text was already sent in the auth message via startVoiceSession.
|
|
1707
|
+
*/
|
|
1708
|
+
async requestEditText(_originalText) {
|
|
1709
|
+
const config = getConfig();
|
|
1710
|
+
if (config.debug) console.log("[SpeechOS] Requesting text edit...");
|
|
1711
|
+
await this.stopAudioCapture();
|
|
1712
|
+
this.pendingEditText = new Deferred$1();
|
|
1713
|
+
this.pendingEditText.setTimeout(RESPONSE_TIMEOUT_MS, "Edit request timed out. Please try again.", "edit_timeout", "timeout");
|
|
1714
|
+
this.sendMessage({ type: MESSAGE_TYPE_EDIT_TEXT });
|
|
1715
|
+
const result = await this.pendingEditText.promise;
|
|
1716
|
+
this.pendingEditText = null;
|
|
1717
|
+
return result;
|
|
1718
|
+
}
|
|
1719
|
+
/**
|
|
1720
|
+
* Request command matching using the transcript as input.
|
|
1721
|
+
* Note: The command definitions were already sent in the auth message via startVoiceSession.
|
|
1722
|
+
*/
|
|
1723
|
+
async requestCommand(_commands) {
|
|
1724
|
+
const config = getConfig();
|
|
1725
|
+
if (config.debug) console.log("[SpeechOS] Requesting command match...");
|
|
1726
|
+
await this.stopAudioCapture();
|
|
1727
|
+
this.pendingCommand = new Deferred$1();
|
|
1728
|
+
this.pendingCommand.setTimeout(RESPONSE_TIMEOUT_MS, "Command request timed out. Please try again.", "command_timeout", "timeout");
|
|
1729
|
+
this.sendMessage({ type: MESSAGE_TYPE_EXECUTE_COMMAND });
|
|
1730
|
+
const result = await this.pendingCommand.promise;
|
|
1731
|
+
this.pendingCommand = null;
|
|
1732
|
+
return result;
|
|
1733
|
+
}
|
|
1734
|
+
/**
|
|
1735
|
+
* Stop audio capture and wait for all data to be sent.
|
|
1736
|
+
*
|
|
1737
|
+
* Waits for:
|
|
1738
|
+
* 1. All pending sendAudioChunk calls to complete (arrayBuffer conversion)
|
|
1739
|
+
* 2. WebSocket buffer to drain (all data transmitted)
|
|
1740
|
+
*
|
|
1741
|
+
* WebSocket message ordering ensures server receives all audio before transcript request.
|
|
1742
|
+
*/
|
|
1743
|
+
async stopAudioCapture() {
|
|
1744
|
+
const config = getConfig();
|
|
1745
|
+
const startTime = Date.now();
|
|
1746
|
+
if (config.debug) console.log("[SpeechOS] stopAudioCapture: starting...");
|
|
1747
|
+
if (this.audioCapture) {
|
|
1748
|
+
await this.audioCapture.stop();
|
|
1749
|
+
this.audioCapture = null;
|
|
1750
|
+
if (config.debug) console.log(`[SpeechOS] stopAudioCapture: recorder stopped after ${Date.now() - startTime}ms`);
|
|
1751
|
+
}
|
|
1752
|
+
state.setMicEnabled(false);
|
|
1753
|
+
if (this.pendingAudioSends.size > 0) {
|
|
1754
|
+
if (config.debug) console.log(`[SpeechOS] stopAudioCapture: waiting for ${this.pendingAudioSends.size} pending audio sends...`);
|
|
1755
|
+
await Promise.all(this.pendingAudioSends);
|
|
1756
|
+
if (config.debug) console.log(`[SpeechOS] stopAudioCapture: all sends complete after ${Date.now() - startTime}ms`);
|
|
1757
|
+
} else if (config.debug) console.log("[SpeechOS] stopAudioCapture: no pending sends");
|
|
1758
|
+
await this.waitForBufferDrain();
|
|
1759
|
+
if (config.debug) console.log(`[SpeechOS] stopAudioCapture: complete after ${Date.now() - startTime}ms`);
|
|
1760
|
+
}
|
|
1761
|
+
/**
|
|
1762
|
+
* Wait for the WebSocket send buffer to drain.
|
|
1763
|
+
*
|
|
1764
|
+
* This ensures all audio data has been transmitted before we request
|
|
1765
|
+
* the transcript. Uses the same pattern as LiveKit's ReadableStream approach.
|
|
1766
|
+
*/
|
|
1767
|
+
async waitForBufferDrain() {
|
|
1768
|
+
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
|
|
1769
|
+
const config = getConfig();
|
|
1770
|
+
const startTime = Date.now();
|
|
1771
|
+
while (this.ws.bufferedAmount > 0) {
|
|
1772
|
+
if (Date.now() - startTime > BUFFER_DRAIN_TIMEOUT_MS) {
|
|
1773
|
+
console.warn(`[SpeechOS] Buffer drain timeout, ${this.ws.bufferedAmount} bytes still pending`);
|
|
1774
|
+
break;
|
|
1775
|
+
}
|
|
1776
|
+
await new Promise((resolve) => setTimeout(resolve, BUFFER_CHECK_INTERVAL_MS));
|
|
1777
|
+
}
|
|
1778
|
+
if (config.debug) console.log(`[SpeechOS] Buffer drained in ${Date.now() - startTime}ms`);
|
|
1779
|
+
}
|
|
1780
|
+
/**
|
|
1781
|
+
* Send a JSON message over the WebSocket.
|
|
1782
|
+
*/
|
|
1783
|
+
sendMessage(message) {
|
|
1784
|
+
if (this.ws && this.ws.readyState === WebSocket.OPEN) this.ws.send(JSON.stringify(message));
|
|
1785
|
+
}
|
|
1786
|
+
/**
|
|
1787
|
+
* Disconnect from the WebSocket.
|
|
1788
|
+
*/
|
|
1789
|
+
async disconnect() {
|
|
1790
|
+
const config = getConfig();
|
|
1791
|
+
if (config.debug) console.log("[SpeechOS] Disconnecting WebSocket...");
|
|
1792
|
+
await this.stopAudioCapture();
|
|
1793
|
+
if (this.ws) {
|
|
1794
|
+
this.ws.close();
|
|
1795
|
+
this.ws = null;
|
|
1796
|
+
}
|
|
1797
|
+
const error = new Error("Disconnected");
|
|
1798
|
+
if (this.pendingAuth) {
|
|
1799
|
+
this.pendingAuth.reject(error);
|
|
1800
|
+
this.pendingAuth = null;
|
|
1801
|
+
}
|
|
1802
|
+
if (this.pendingTranscript) {
|
|
1803
|
+
this.pendingTranscript.reject(error);
|
|
1804
|
+
this.pendingTranscript = null;
|
|
1805
|
+
}
|
|
1806
|
+
if (this.pendingEditText) {
|
|
1807
|
+
this.pendingEditText.reject(error);
|
|
1808
|
+
this.pendingEditText = null;
|
|
1809
|
+
}
|
|
1810
|
+
if (this.pendingCommand) {
|
|
1811
|
+
this.pendingCommand.reject(error);
|
|
1812
|
+
this.pendingCommand = null;
|
|
1813
|
+
}
|
|
1814
|
+
this.sessionId = null;
|
|
1815
|
+
this.editOriginalText = null;
|
|
1816
|
+
this.lastInputText = void 0;
|
|
1817
|
+
this.sessionSettings = {};
|
|
1818
|
+
state.setConnected(false);
|
|
1819
|
+
state.setMicEnabled(false);
|
|
1820
|
+
if (config.debug) console.log("[SpeechOS] WebSocket disconnected");
|
|
1821
|
+
}
|
|
1822
|
+
/**
|
|
1823
|
+
* Check if connected to WebSocket.
|
|
1824
|
+
*/
|
|
1825
|
+
isConnected() {
|
|
1826
|
+
return this.ws !== null && this.ws.readyState === WebSocket.OPEN;
|
|
1827
|
+
}
|
|
1828
|
+
/**
|
|
1829
|
+
* Get the last input text from a command result.
|
|
1830
|
+
* This is the raw transcript of what the user said.
|
|
1831
|
+
*/
|
|
1832
|
+
getLastInputText() {
|
|
1833
|
+
return this.lastInputText;
|
|
1834
|
+
}
|
|
1835
|
+
};
|
|
1836
|
+
const websocket = new WebSocketManager();
|
|
849
1837
|
|
|
850
1838
|
//#endregion
|
|
851
1839
|
//#region src/speechos.ts
|
|
852
1840
|
/**
|
|
1841
|
+
* Get the active voice backend (always websocket now)
|
|
1842
|
+
*/
|
|
1843
|
+
function getBackend$1() {
|
|
1844
|
+
return websocket;
|
|
1845
|
+
}
|
|
1846
|
+
/**
|
|
853
1847
|
* SpeechOS Core SDK
|
|
854
1848
|
*
|
|
855
1849
|
* Provides two API layers:
|
|
@@ -868,7 +1862,6 @@ var SpeechOSCore = class {
|
|
|
868
1862
|
const currentConfig$1 = getConfig();
|
|
869
1863
|
if (currentConfig$1.debug) console.log("[SpeechOS] Initialized with config:", {
|
|
870
1864
|
host: currentConfig$1.host,
|
|
871
|
-
position: currentConfig$1.position,
|
|
872
1865
|
debug: currentConfig$1.debug
|
|
873
1866
|
});
|
|
874
1867
|
}
|
|
@@ -908,7 +1901,6 @@ var SpeechOSCore = class {
|
|
|
908
1901
|
state.setRecordingState("processing");
|
|
909
1902
|
try {
|
|
910
1903
|
const transcript = await livekit.stopAndGetTranscript();
|
|
911
|
-
transcriptStore.saveTranscript(transcript, "dictate");
|
|
912
1904
|
state.completeRecording();
|
|
913
1905
|
return transcript;
|
|
914
1906
|
} catch (error) {
|
|
@@ -925,7 +1917,6 @@ var SpeechOSCore = class {
|
|
|
925
1917
|
state.setRecordingState("processing");
|
|
926
1918
|
try {
|
|
927
1919
|
const editedText = await livekit.stopAndEdit(originalText);
|
|
928
|
-
transcriptStore.saveTranscript(editedText, "edit", originalText);
|
|
929
1920
|
state.completeRecording();
|
|
930
1921
|
return editedText;
|
|
931
1922
|
} catch (error) {
|
|
@@ -951,8 +1942,13 @@ var SpeechOSCore = class {
|
|
|
951
1942
|
state.setActiveAction("dictate");
|
|
952
1943
|
state.startRecording();
|
|
953
1944
|
try {
|
|
954
|
-
|
|
955
|
-
|
|
1945
|
+
const backend = getBackend$1();
|
|
1946
|
+
await backend.startVoiceSession({
|
|
1947
|
+
action: "dictate",
|
|
1948
|
+
onMicReady: () => {
|
|
1949
|
+
state.setRecordingState("recording");
|
|
1950
|
+
}
|
|
1951
|
+
});
|
|
956
1952
|
return new Promise((resolve, reject) => {
|
|
957
1953
|
this._dictateResolve = resolve;
|
|
958
1954
|
this._dictateReject = reject;
|
|
@@ -972,8 +1968,8 @@ var SpeechOSCore = class {
|
|
|
972
1968
|
async stopDictation() {
|
|
973
1969
|
state.setRecordingState("processing");
|
|
974
1970
|
try {
|
|
975
|
-
const
|
|
976
|
-
|
|
1971
|
+
const backend = getBackend$1();
|
|
1972
|
+
const transcript = await backend.stopVoiceSession();
|
|
977
1973
|
state.completeRecording();
|
|
978
1974
|
if (this._dictateResolve) {
|
|
979
1975
|
this._dictateResolve(transcript);
|
|
@@ -1007,8 +2003,14 @@ var SpeechOSCore = class {
|
|
|
1007
2003
|
state.startRecording();
|
|
1008
2004
|
this._editOriginalText = originalText;
|
|
1009
2005
|
try {
|
|
1010
|
-
|
|
1011
|
-
|
|
2006
|
+
const backend = getBackend$1();
|
|
2007
|
+
await backend.startVoiceSession({
|
|
2008
|
+
action: "edit",
|
|
2009
|
+
inputText: originalText,
|
|
2010
|
+
onMicReady: () => {
|
|
2011
|
+
state.setRecordingState("recording");
|
|
2012
|
+
}
|
|
2013
|
+
});
|
|
1012
2014
|
return new Promise((resolve, reject) => {
|
|
1013
2015
|
this._editResolve = resolve;
|
|
1014
2016
|
this._editReject = reject;
|
|
@@ -1029,9 +2031,9 @@ var SpeechOSCore = class {
|
|
|
1029
2031
|
async stopEdit() {
|
|
1030
2032
|
state.setRecordingState("processing");
|
|
1031
2033
|
try {
|
|
2034
|
+
const backend = getBackend$1();
|
|
1032
2035
|
const originalText = this._editOriginalText || "";
|
|
1033
|
-
const editedText = await
|
|
1034
|
-
transcriptStore.saveTranscript(editedText, "edit", originalText);
|
|
2036
|
+
const editedText = await backend.requestEditText(originalText);
|
|
1035
2037
|
state.completeRecording();
|
|
1036
2038
|
if (this._editResolve) {
|
|
1037
2039
|
this._editResolve(editedText);
|
|
@@ -1054,6 +2056,71 @@ var SpeechOSCore = class {
|
|
|
1054
2056
|
}
|
|
1055
2057
|
}
|
|
1056
2058
|
/**
|
|
2059
|
+
* One-shot command: connect, wait for agent, record voice, match against commands
|
|
2060
|
+
* Automatically handles the full voice session lifecycle
|
|
2061
|
+
*
|
|
2062
|
+
* @param commands - Array of command definitions to match against
|
|
2063
|
+
* @returns The matched command result or null if no match
|
|
2064
|
+
*/
|
|
2065
|
+
async command(commands) {
|
|
2066
|
+
this.ensureInitialized();
|
|
2067
|
+
state.setActiveAction("command");
|
|
2068
|
+
state.startRecording();
|
|
2069
|
+
this._commandCommands = commands;
|
|
2070
|
+
try {
|
|
2071
|
+
const backend = getBackend$1();
|
|
2072
|
+
await backend.startVoiceSession({
|
|
2073
|
+
action: "command",
|
|
2074
|
+
commands,
|
|
2075
|
+
onMicReady: () => {
|
|
2076
|
+
state.setRecordingState("recording");
|
|
2077
|
+
}
|
|
2078
|
+
});
|
|
2079
|
+
return new Promise((resolve, reject) => {
|
|
2080
|
+
this._commandResolve = resolve;
|
|
2081
|
+
this._commandReject = reject;
|
|
2082
|
+
});
|
|
2083
|
+
} catch (error) {
|
|
2084
|
+
state.setError(error instanceof Error ? error.message : "Failed to start command");
|
|
2085
|
+
await this.cleanup();
|
|
2086
|
+
throw error;
|
|
2087
|
+
}
|
|
2088
|
+
}
|
|
2089
|
+
_commandCommands;
|
|
2090
|
+
_commandResolve;
|
|
2091
|
+
_commandReject;
|
|
2092
|
+
/**
|
|
2093
|
+
* Stop command recording and get the matched command
|
|
2094
|
+
* Call this after command() when user stops speaking
|
|
2095
|
+
*/
|
|
2096
|
+
async stopCommand() {
|
|
2097
|
+
state.setRecordingState("processing");
|
|
2098
|
+
try {
|
|
2099
|
+
const backend = getBackend$1();
|
|
2100
|
+
const commands = this._commandCommands || [];
|
|
2101
|
+
const result = await backend.requestCommand(commands);
|
|
2102
|
+
state.completeRecording();
|
|
2103
|
+
if (this._commandResolve) {
|
|
2104
|
+
this._commandResolve(result);
|
|
2105
|
+
this._commandResolve = void 0;
|
|
2106
|
+
this._commandReject = void 0;
|
|
2107
|
+
}
|
|
2108
|
+
return result;
|
|
2109
|
+
} catch (error) {
|
|
2110
|
+
const err = error instanceof Error ? error : new Error("Command request failed");
|
|
2111
|
+
state.setError(err.message);
|
|
2112
|
+
if (this._commandReject) {
|
|
2113
|
+
this._commandReject(err);
|
|
2114
|
+
this._commandResolve = void 0;
|
|
2115
|
+
this._commandReject = void 0;
|
|
2116
|
+
}
|
|
2117
|
+
throw err;
|
|
2118
|
+
} finally {
|
|
2119
|
+
this._commandCommands = void 0;
|
|
2120
|
+
await this.cleanup();
|
|
2121
|
+
}
|
|
2122
|
+
}
|
|
2123
|
+
/**
|
|
1057
2124
|
* Cancel the current operation
|
|
1058
2125
|
*/
|
|
1059
2126
|
async cancel() {
|
|
@@ -1068,7 +2135,13 @@ var SpeechOSCore = class {
|
|
|
1068
2135
|
this._editResolve = void 0;
|
|
1069
2136
|
this._editReject = void 0;
|
|
1070
2137
|
}
|
|
2138
|
+
if (this._commandReject) {
|
|
2139
|
+
this._commandReject(err);
|
|
2140
|
+
this._commandResolve = void 0;
|
|
2141
|
+
this._commandReject = void 0;
|
|
2142
|
+
}
|
|
1071
2143
|
this._editOriginalText = void 0;
|
|
2144
|
+
this._commandCommands = void 0;
|
|
1072
2145
|
await this.cleanup();
|
|
1073
2146
|
state.cancelRecording();
|
|
1074
2147
|
}
|
|
@@ -1095,7 +2168,8 @@ var SpeechOSCore = class {
|
|
|
1095
2168
|
}
|
|
1096
2169
|
async cleanup() {
|
|
1097
2170
|
try {
|
|
1098
|
-
|
|
2171
|
+
const backend = getBackend$1();
|
|
2172
|
+
await backend.disconnect();
|
|
1099
2173
|
} catch (error) {
|
|
1100
2174
|
const config = getConfig();
|
|
1101
2175
|
if (config.debug) console.warn("[SpeechOS] Cleanup disconnect error:", error);
|
|
@@ -1111,6 +2185,9 @@ var SpeechOSCore = class {
|
|
|
1111
2185
|
this._editResolve = void 0;
|
|
1112
2186
|
this._editReject = void 0;
|
|
1113
2187
|
this._editOriginalText = void 0;
|
|
2188
|
+
this._commandResolve = void 0;
|
|
2189
|
+
this._commandReject = void 0;
|
|
2190
|
+
this._commandCommands = void 0;
|
|
1114
2191
|
resetConfig();
|
|
1115
2192
|
state.reset();
|
|
1116
2193
|
events.clear();
|
|
@@ -1118,10 +2195,38 @@ var SpeechOSCore = class {
|
|
|
1118
2195
|
};
|
|
1119
2196
|
const speechOS = new SpeechOSCore();
|
|
1120
2197
|
|
|
2198
|
+
//#endregion
|
|
2199
|
+
//#region src/backend.ts
|
|
2200
|
+
/**
|
|
2201
|
+
* WebSocket backend adapter - wraps the websocket module to match the VoiceBackend interface
|
|
2202
|
+
*/
|
|
2203
|
+
const websocketBackend = {
|
|
2204
|
+
startVoiceSession: (options) => websocket.startVoiceSession(options),
|
|
2205
|
+
stopVoiceSession: () => websocket.stopVoiceSession(),
|
|
2206
|
+
requestEditText: (text) => websocket.requestEditText(text),
|
|
2207
|
+
requestCommand: (commands) => websocket.requestCommand(commands),
|
|
2208
|
+
disconnect: () => websocket.disconnect(),
|
|
2209
|
+
isConnected: () => websocket.isConnected(),
|
|
2210
|
+
getLastInputText: () => websocket.getLastInputText(),
|
|
2211
|
+
prefetchToken: () => Promise.resolve({}),
|
|
2212
|
+
startAutoRefresh: () => {},
|
|
2213
|
+
stopAutoRefresh: () => {},
|
|
2214
|
+
invalidateTokenCache: () => {}
|
|
2215
|
+
};
|
|
2216
|
+
/**
|
|
2217
|
+
* Get the active voice backend.
|
|
2218
|
+
* Always returns WebSocket backend (LiveKit is legacy).
|
|
2219
|
+
*
|
|
2220
|
+
* @returns The websocket backend
|
|
2221
|
+
*/
|
|
2222
|
+
function getBackend() {
|
|
2223
|
+
return websocketBackend;
|
|
2224
|
+
}
|
|
2225
|
+
|
|
1121
2226
|
//#endregion
|
|
1122
2227
|
//#region src/index.ts
|
|
1123
2228
|
const VERSION = "0.1.0";
|
|
1124
2229
|
|
|
1125
2230
|
//#endregion
|
|
1126
|
-
export { DEFAULT_HOST, Deferred, SpeechOSEventEmitter, VERSION, createStateManager,
|
|
2231
|
+
export { DEFAULT_HOST, Deferred, SpeechOSEventEmitter, VERSION, createStateManager, events, getBackend, getConfig, livekit, resetConfig, setConfig, speechOS, state, updateUserId, validateConfig, websocket };
|
|
1127
2232
|
//# sourceMappingURL=index.js.map
|