@speechos/core 0.2.0 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/audio-capture.d.cts +130 -0
- package/dist/audio-capture.d.ts +130 -0
- package/dist/backend.d.cts +41 -0
- package/dist/backend.d.ts +41 -0
- package/dist/config.d.cts +23 -7
- package/dist/config.d.ts +23 -7
- package/dist/index.cjs +1263 -158
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +6 -5
- package/dist/index.d.ts +6 -5
- package/dist/index.js +1262 -157
- package/dist/index.js.map +1 -1
- package/dist/livekit.d.cts +81 -14
- package/dist/livekit.d.ts +81 -14
- package/dist/speechos.d.cts +19 -3
- package/dist/speechos.d.ts +19 -3
- package/dist/state.d.cts +4 -1
- package/dist/state.d.ts +4 -1
- package/dist/types.d.cts +105 -9
- package/dist/types.d.ts +105 -9
- package/dist/websocket.d.cts +133 -0
- package/dist/websocket.d.ts +133 -0
- package/package.json +5 -4
- package/dist/transcript-store.d.cts +0 -35
- package/dist/transcript-store.d.ts +0 -35
package/dist/index.cjs
CHANGED
|
@@ -35,8 +35,6 @@ const defaultConfig = {
|
|
|
35
35
|
apiKey: "",
|
|
36
36
|
userId: "",
|
|
37
37
|
host: DEFAULT_HOST,
|
|
38
|
-
position: "bottom-center",
|
|
39
|
-
zIndex: 999999,
|
|
40
38
|
debug: false
|
|
41
39
|
};
|
|
42
40
|
/**
|
|
@@ -44,31 +42,19 @@ const defaultConfig = {
|
|
|
44
42
|
* @param userConfig - User-provided configuration
|
|
45
43
|
* @returns Validated and merged configuration
|
|
46
44
|
*/
|
|
47
|
-
function validateConfig(userConfig
|
|
45
|
+
function validateConfig(userConfig) {
|
|
48
46
|
if (!userConfig.apiKey) throw new Error("SpeechOS requires an apiKey. Get one from your team dashboard at /a/<team-slug>/.");
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
47
|
+
return {
|
|
48
|
+
apiKey: userConfig.apiKey,
|
|
49
|
+
userId: userConfig.userId ?? defaultConfig.userId,
|
|
50
|
+
host: userConfig.host ?? defaultConfig.host,
|
|
51
|
+
debug: userConfig.debug ?? defaultConfig.debug
|
|
52
52
|
};
|
|
53
|
-
const validPositions = [
|
|
54
|
-
"bottom-center",
|
|
55
|
-
"bottom-right",
|
|
56
|
-
"bottom-left"
|
|
57
|
-
];
|
|
58
|
-
if (!validPositions.includes(config.position)) {
|
|
59
|
-
console.warn(`Invalid position "${config.position}". Using default "bottom-center".`);
|
|
60
|
-
config.position = "bottom-center";
|
|
61
|
-
}
|
|
62
|
-
if (typeof config.zIndex !== "number" || config.zIndex < 0) {
|
|
63
|
-
console.warn(`Invalid zIndex "${config.zIndex}". Using default ${defaultConfig.zIndex}.`);
|
|
64
|
-
config.zIndex = defaultConfig.zIndex;
|
|
65
|
-
}
|
|
66
|
-
return config;
|
|
67
53
|
}
|
|
68
54
|
/**
|
|
69
55
|
* Current active configuration (singleton)
|
|
70
56
|
*/
|
|
71
|
-
let currentConfig = defaultConfig;
|
|
57
|
+
let currentConfig = { ...defaultConfig };
|
|
72
58
|
/**
|
|
73
59
|
* Get the current configuration
|
|
74
60
|
*/
|
|
@@ -98,6 +84,28 @@ function updateUserId(userId) {
|
|
|
98
84
|
userId
|
|
99
85
|
};
|
|
100
86
|
}
|
|
87
|
+
/**
|
|
88
|
+
* LocalStorage key for anonymous ID persistence
|
|
89
|
+
*/
|
|
90
|
+
const ANONYMOUS_ID_KEY = "speechos_anonymous_id";
|
|
91
|
+
/**
|
|
92
|
+
* Get or generate a persistent anonymous ID for Mixpanel tracking.
|
|
93
|
+
*
|
|
94
|
+
* This ID is stored in localStorage to persist across sessions,
|
|
95
|
+
* allowing consistent anonymous user tracking without identifying
|
|
96
|
+
* the account owner's customers.
|
|
97
|
+
*
|
|
98
|
+
* @returns A UUID string for anonymous identification
|
|
99
|
+
*/
|
|
100
|
+
function getAnonymousId() {
|
|
101
|
+
if (typeof localStorage === "undefined") return crypto.randomUUID();
|
|
102
|
+
let anonymousId = localStorage.getItem(ANONYMOUS_ID_KEY);
|
|
103
|
+
if (!anonymousId) {
|
|
104
|
+
anonymousId = crypto.randomUUID();
|
|
105
|
+
localStorage.setItem(ANONYMOUS_ID_KEY, anonymousId);
|
|
106
|
+
}
|
|
107
|
+
return anonymousId;
|
|
108
|
+
}
|
|
101
109
|
|
|
102
110
|
//#endregion
|
|
103
111
|
//#region src/events.ts
|
|
@@ -191,33 +199,38 @@ const initialState = {
|
|
|
191
199
|
var StateManager = class {
|
|
192
200
|
state;
|
|
193
201
|
subscribers = /* @__PURE__ */ new Set();
|
|
202
|
+
/** Cached immutable snapshot for useSyncExternalStore compatibility */
|
|
203
|
+
snapshot;
|
|
194
204
|
constructor(initialState$1) {
|
|
195
205
|
this.state = { ...initialState$1 };
|
|
206
|
+
this.snapshot = Object.freeze({ ...this.state });
|
|
196
207
|
}
|
|
197
208
|
/**
|
|
198
|
-
* Get the current state (returns a
|
|
209
|
+
* Get the current state snapshot (returns a stable reference for React)
|
|
210
|
+
* This returns an immutable frozen object that only changes when setState is called.
|
|
199
211
|
*/
|
|
200
212
|
getState() {
|
|
201
|
-
return
|
|
213
|
+
return this.snapshot;
|
|
202
214
|
}
|
|
203
215
|
/**
|
|
204
216
|
* Update state with partial values
|
|
205
217
|
* @param partial - Partial state to merge with current state
|
|
206
218
|
*/
|
|
207
219
|
setState(partial) {
|
|
208
|
-
const prevState =
|
|
220
|
+
const prevState = this.snapshot;
|
|
209
221
|
this.state = {
|
|
210
222
|
...this.state,
|
|
211
223
|
...partial
|
|
212
224
|
};
|
|
225
|
+
this.snapshot = Object.freeze({ ...this.state });
|
|
213
226
|
this.subscribers.forEach((callback) => {
|
|
214
227
|
try {
|
|
215
|
-
callback(this.
|
|
228
|
+
callback(this.snapshot, prevState);
|
|
216
229
|
} catch (error) {
|
|
217
230
|
console.error("Error in state change callback:", error);
|
|
218
231
|
}
|
|
219
232
|
});
|
|
220
|
-
events.emit("state:change", { state: this.
|
|
233
|
+
events.emit("state:change", { state: this.snapshot });
|
|
221
234
|
}
|
|
222
235
|
/**
|
|
223
236
|
* Subscribe to state changes
|
|
@@ -234,7 +247,17 @@ var StateManager = class {
|
|
|
234
247
|
* Reset state to initial values
|
|
235
248
|
*/
|
|
236
249
|
reset() {
|
|
237
|
-
this.
|
|
250
|
+
const prevState = this.snapshot;
|
|
251
|
+
this.state = { ...initialState };
|
|
252
|
+
this.snapshot = Object.freeze({ ...this.state });
|
|
253
|
+
this.subscribers.forEach((callback) => {
|
|
254
|
+
try {
|
|
255
|
+
callback(this.snapshot, prevState);
|
|
256
|
+
} catch (error) {
|
|
257
|
+
console.error("Error in state change callback:", error);
|
|
258
|
+
}
|
|
259
|
+
});
|
|
260
|
+
events.emit("state:change", { state: this.snapshot });
|
|
238
261
|
}
|
|
239
262
|
/**
|
|
240
263
|
* Show the widget
|
|
@@ -369,12 +392,15 @@ function createStateManager(initial) {
|
|
|
369
392
|
|
|
370
393
|
//#endregion
|
|
371
394
|
//#region src/livekit.ts
|
|
372
|
-
const MESSAGE_TYPE_REQUEST_TRANSCRIPT = "request_transcript";
|
|
373
|
-
const MESSAGE_TYPE_TRANSCRIPT = "transcript";
|
|
374
|
-
const MESSAGE_TYPE_EDIT_TEXT = "edit_text";
|
|
375
|
-
const MESSAGE_TYPE_EDITED_TEXT = "edited_text";
|
|
376
|
-
const
|
|
395
|
+
const MESSAGE_TYPE_REQUEST_TRANSCRIPT$1 = "request_transcript";
|
|
396
|
+
const MESSAGE_TYPE_TRANSCRIPT$1 = "transcript";
|
|
397
|
+
const MESSAGE_TYPE_EDIT_TEXT$1 = "edit_text";
|
|
398
|
+
const MESSAGE_TYPE_EDITED_TEXT$1 = "edited_text";
|
|
399
|
+
const MESSAGE_TYPE_EXECUTE_COMMAND$1 = "execute_command";
|
|
400
|
+
const MESSAGE_TYPE_COMMAND_RESULT$1 = "command_result";
|
|
401
|
+
const MESSAGE_TYPE_ERROR$1 = "error";
|
|
377
402
|
const TOPIC_SPEECHOS = "speechos";
|
|
403
|
+
const TOKEN_CACHE_TTL_MS = 4 * 60 * 1e3;
|
|
378
404
|
/**
|
|
379
405
|
* A deferred promise with timeout support.
|
|
380
406
|
* Encapsulates resolve/reject/timeout in a single object for cleaner async handling.
|
|
@@ -438,53 +464,116 @@ var LiveKitManager = class {
|
|
|
438
464
|
room = null;
|
|
439
465
|
tokenData = null;
|
|
440
466
|
micTrack = null;
|
|
467
|
+
cachedTokenData = null;
|
|
468
|
+
tokenCacheTimestamp = null;
|
|
469
|
+
tokenPrefetchPromise = null;
|
|
470
|
+
tokenRefreshTimer = null;
|
|
471
|
+
autoRefreshEnabled = false;
|
|
441
472
|
pendingTranscript = null;
|
|
442
473
|
pendingEditText = null;
|
|
474
|
+
pendingCommand = null;
|
|
443
475
|
pendingTrackSubscribed = null;
|
|
444
|
-
preWarmPromise = null;
|
|
445
476
|
editOriginalText = null;
|
|
477
|
+
sessionSettings = {};
|
|
446
478
|
/**
|
|
447
|
-
*
|
|
448
|
-
* Call this when user shows intent (e.g., expands widget)
|
|
449
|
-
* Only fetches token - mic permission is requested when user clicks Dictate
|
|
479
|
+
* Check if the cached token is still valid (within TTL)
|
|
450
480
|
*/
|
|
451
|
-
|
|
452
|
-
if (this.
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
481
|
+
isCachedTokenValid() {
|
|
482
|
+
if (!this.cachedTokenData || !this.tokenCacheTimestamp) return false;
|
|
483
|
+
const age = Date.now() - this.tokenCacheTimestamp;
|
|
484
|
+
return age < TOKEN_CACHE_TTL_MS;
|
|
485
|
+
}
|
|
486
|
+
/**
|
|
487
|
+
* Pre-fetch a LiveKit token for later use
|
|
488
|
+
* Call this early (e.g., when widget expands) to reduce latency when starting a voice session.
|
|
489
|
+
* If a prefetch is already in progress, returns the existing promise.
|
|
490
|
+
* If a valid cached token exists, returns it immediately.
|
|
491
|
+
*/
|
|
492
|
+
async prefetchToken() {
|
|
457
493
|
const config = getConfig();
|
|
458
|
-
if (
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
494
|
+
if (this.isCachedTokenValid() && this.cachedTokenData) {
|
|
495
|
+
if (config.debug) console.log("[SpeechOS] Using cached token (prefetch hit)");
|
|
496
|
+
return this.cachedTokenData;
|
|
497
|
+
}
|
|
498
|
+
if (this.tokenPrefetchPromise) {
|
|
499
|
+
if (config.debug) console.log("[SpeechOS] Prefetch already in progress, awaiting...");
|
|
500
|
+
return this.tokenPrefetchPromise;
|
|
501
|
+
}
|
|
502
|
+
if (config.debug) console.log("[SpeechOS] Starting token prefetch...");
|
|
503
|
+
this.tokenPrefetchPromise = this.fetchTokenFromServer().then((data) => {
|
|
504
|
+
this.cachedTokenData = data;
|
|
505
|
+
this.tokenCacheTimestamp = Date.now();
|
|
506
|
+
this.tokenPrefetchPromise = null;
|
|
507
|
+
return data;
|
|
508
|
+
}).catch((error) => {
|
|
509
|
+
this.tokenPrefetchPromise = null;
|
|
510
|
+
throw error;
|
|
511
|
+
});
|
|
512
|
+
return this.tokenPrefetchPromise;
|
|
469
513
|
}
|
|
470
514
|
/**
|
|
471
515
|
* Fetch a LiveKit token from the backend
|
|
516
|
+
* Uses cached token if valid, otherwise fetches a fresh one.
|
|
517
|
+
* Includes language settings and user vocabulary which are stored in the VoiceSession.
|
|
472
518
|
*/
|
|
473
519
|
async fetchToken() {
|
|
520
|
+
const config = getConfig();
|
|
521
|
+
if (this.isCachedTokenValid() && this.cachedTokenData) {
|
|
522
|
+
if (config.debug) console.log("[SpeechOS] Using cached token");
|
|
523
|
+
this.tokenData = this.cachedTokenData;
|
|
524
|
+
return this.cachedTokenData;
|
|
525
|
+
}
|
|
526
|
+
if (this.tokenPrefetchPromise) {
|
|
527
|
+
if (config.debug) console.log("[SpeechOS] Waiting for prefetch to complete...");
|
|
528
|
+
const data$1 = await this.tokenPrefetchPromise;
|
|
529
|
+
this.tokenData = data$1;
|
|
530
|
+
return data$1;
|
|
531
|
+
}
|
|
532
|
+
const data = await this.fetchTokenFromServer();
|
|
533
|
+
this.cachedTokenData = data;
|
|
534
|
+
this.tokenCacheTimestamp = Date.now();
|
|
535
|
+
this.tokenData = data;
|
|
536
|
+
return data;
|
|
537
|
+
}
|
|
538
|
+
/**
|
|
539
|
+
* Internal method to fetch a fresh token from the server
|
|
540
|
+
*/
|
|
541
|
+
async fetchTokenFromServer() {
|
|
474
542
|
const config = getConfig();
|
|
475
543
|
const url = `${config.host}/livekit/api/token/`;
|
|
476
|
-
|
|
544
|
+
const settings = this.sessionSettings;
|
|
545
|
+
const inputLanguage = settings.inputLanguageCode ?? "en-US";
|
|
546
|
+
const outputLanguage = settings.outputLanguageCode ?? "en-US";
|
|
547
|
+
const smartFormat = settings.smartFormat ?? true;
|
|
548
|
+
const vocabulary = settings.vocabulary ?? [];
|
|
549
|
+
const snippets = settings.snippets ?? [];
|
|
550
|
+
if (config.debug) {
|
|
551
|
+
console.log("[SpeechOS] Fetching LiveKit token from:", url);
|
|
552
|
+
console.log("[SpeechOS] Session settings:", {
|
|
553
|
+
inputLanguage,
|
|
554
|
+
outputLanguage,
|
|
555
|
+
smartFormat,
|
|
556
|
+
snippetsCount: snippets.length,
|
|
557
|
+
vocabularyCount: vocabulary.length
|
|
558
|
+
});
|
|
559
|
+
}
|
|
477
560
|
const response = await fetch(url, {
|
|
478
561
|
method: "POST",
|
|
479
562
|
headers: {
|
|
480
563
|
"Content-Type": "application/json",
|
|
481
564
|
...config.apiKey ? { Authorization: `Api-Key ${config.apiKey}` } : {}
|
|
482
565
|
},
|
|
483
|
-
body: JSON.stringify({
|
|
566
|
+
body: JSON.stringify({
|
|
567
|
+
user_id: config.userId || null,
|
|
568
|
+
input_language: inputLanguage,
|
|
569
|
+
output_language: outputLanguage,
|
|
570
|
+
smart_format: smartFormat,
|
|
571
|
+
custom_vocabulary: vocabulary,
|
|
572
|
+
custom_snippets: snippets
|
|
573
|
+
})
|
|
484
574
|
});
|
|
485
575
|
if (!response.ok) throw new Error(`Failed to fetch LiveKit token: ${response.status} ${response.statusText}`);
|
|
486
576
|
const data = await response.json();
|
|
487
|
-
this.tokenData = data;
|
|
488
577
|
if (config.debug) console.log("[SpeechOS] LiveKit token received:", {
|
|
489
578
|
room: data.room,
|
|
490
579
|
identity: data.identity,
|
|
@@ -497,8 +586,7 @@ var LiveKitManager = class {
|
|
|
497
586
|
*/
|
|
498
587
|
async connect() {
|
|
499
588
|
const config = getConfig();
|
|
500
|
-
|
|
501
|
-
else if (config.debug) console.log("[SpeechOS] Using pre-fetched token");
|
|
589
|
+
await this.fetchToken();
|
|
502
590
|
if (!this.tokenData) throw new Error("No token available for LiveKit connection");
|
|
503
591
|
this.room = new livekit_client.Room({
|
|
504
592
|
adaptiveStream: true,
|
|
@@ -562,7 +650,7 @@ var LiveKitManager = class {
|
|
|
562
650
|
try {
|
|
563
651
|
const message = JSON.parse(new TextDecoder().decode(data));
|
|
564
652
|
if (config.debug) console.log("[SpeechOS] Data received:", message);
|
|
565
|
-
if (message.type === MESSAGE_TYPE_TRANSCRIPT) {
|
|
653
|
+
if (message.type === MESSAGE_TYPE_TRANSCRIPT$1) {
|
|
566
654
|
const transcript = message.transcript || "";
|
|
567
655
|
if (config.debug) console.log("[SpeechOS] Transcript received:", transcript);
|
|
568
656
|
events.emit("transcription:complete", { text: transcript });
|
|
@@ -570,7 +658,7 @@ var LiveKitManager = class {
|
|
|
570
658
|
this.pendingTranscript.resolve(transcript);
|
|
571
659
|
this.pendingTranscript = null;
|
|
572
660
|
}
|
|
573
|
-
} else if (message.type === MESSAGE_TYPE_EDITED_TEXT) {
|
|
661
|
+
} else if (message.type === MESSAGE_TYPE_EDITED_TEXT$1) {
|
|
574
662
|
const editedText = message.text || "";
|
|
575
663
|
if (config.debug) console.log("[SpeechOS] Edited text received:", editedText);
|
|
576
664
|
events.emit("edit:complete", {
|
|
@@ -582,7 +670,15 @@ var LiveKitManager = class {
|
|
|
582
670
|
this.pendingEditText = null;
|
|
583
671
|
}
|
|
584
672
|
this.editOriginalText = null;
|
|
585
|
-
} else if (message.type ===
|
|
673
|
+
} else if (message.type === MESSAGE_TYPE_COMMAND_RESULT$1) {
|
|
674
|
+
const commandResult = message.command || null;
|
|
675
|
+
if (config.debug) console.log("[SpeechOS] Command result received:", commandResult);
|
|
676
|
+
events.emit("command:complete", { command: commandResult });
|
|
677
|
+
if (this.pendingCommand) {
|
|
678
|
+
this.pendingCommand.resolve(commandResult);
|
|
679
|
+
this.pendingCommand = null;
|
|
680
|
+
}
|
|
681
|
+
} else if (message.type === MESSAGE_TYPE_ERROR$1) {
|
|
586
682
|
const serverError = message;
|
|
587
683
|
const errorCode = serverError.code || "server_error";
|
|
588
684
|
const errorMessage = serverError.message || "A server error occurred";
|
|
@@ -602,6 +698,10 @@ var LiveKitManager = class {
|
|
|
602
698
|
this.pendingEditText.reject(error);
|
|
603
699
|
this.pendingEditText = null;
|
|
604
700
|
}
|
|
701
|
+
if (this.pendingCommand) {
|
|
702
|
+
this.pendingCommand.reject(error);
|
|
703
|
+
this.pendingCommand = null;
|
|
704
|
+
}
|
|
605
705
|
}
|
|
606
706
|
} catch (error) {
|
|
607
707
|
console.error("[SpeechOS] Failed to parse data message:", error);
|
|
@@ -609,16 +709,34 @@ var LiveKitManager = class {
|
|
|
609
709
|
}
|
|
610
710
|
/**
|
|
611
711
|
* Publish microphone audio track
|
|
712
|
+
* Uses the device ID from session settings if set
|
|
612
713
|
*/
|
|
613
714
|
async enableMicrophone() {
|
|
614
715
|
if (!this.room || this.room.state !== "connected") throw new Error("Not connected to room");
|
|
615
716
|
const config = getConfig();
|
|
616
717
|
if (!this.micTrack) {
|
|
617
718
|
if (config.debug) console.log("[SpeechOS] Creating microphone track...");
|
|
618
|
-
|
|
719
|
+
const deviceId = this.sessionSettings.audioDeviceId;
|
|
720
|
+
const trackOptions = {
|
|
619
721
|
echoCancellation: true,
|
|
620
722
|
noiseSuppression: true
|
|
621
|
-
}
|
|
723
|
+
};
|
|
724
|
+
if (deviceId) {
|
|
725
|
+
trackOptions.deviceId = { exact: deviceId };
|
|
726
|
+
if (config.debug) console.log("[SpeechOS] Using audio device:", deviceId);
|
|
727
|
+
}
|
|
728
|
+
try {
|
|
729
|
+
this.micTrack = await (0, livekit_client.createLocalAudioTrack)(trackOptions);
|
|
730
|
+
} catch (error) {
|
|
731
|
+
if (deviceId && error instanceof Error) {
|
|
732
|
+
console.warn("[SpeechOS] Selected audio device unavailable, falling back to default:", error.message);
|
|
733
|
+
this.micTrack = await (0, livekit_client.createLocalAudioTrack)({
|
|
734
|
+
echoCancellation: true,
|
|
735
|
+
noiseSuppression: true
|
|
736
|
+
});
|
|
737
|
+
} else throw error;
|
|
738
|
+
}
|
|
739
|
+
this.logMicrophoneInfo();
|
|
622
740
|
}
|
|
623
741
|
const existingPub = this.room.localParticipant.getTrackPublication(livekit_client.Track.Source.Microphone);
|
|
624
742
|
if (!existingPub) {
|
|
@@ -628,6 +746,24 @@ var LiveKitManager = class {
|
|
|
628
746
|
}
|
|
629
747
|
}
|
|
630
748
|
/**
|
|
749
|
+
* Log information about the current microphone track
|
|
750
|
+
*/
|
|
751
|
+
logMicrophoneInfo() {
|
|
752
|
+
if (!this.micTrack) return;
|
|
753
|
+
const config = getConfig();
|
|
754
|
+
const mediaTrack = this.micTrack.mediaStreamTrack;
|
|
755
|
+
const settings = mediaTrack.getSettings();
|
|
756
|
+
console.log("[SpeechOS] Microphone active:", {
|
|
757
|
+
deviceId: settings.deviceId || "unknown",
|
|
758
|
+
label: mediaTrack.label || "Unknown device",
|
|
759
|
+
sampleRate: settings.sampleRate,
|
|
760
|
+
channelCount: settings.channelCount,
|
|
761
|
+
echoCancellation: settings.echoCancellation,
|
|
762
|
+
noiseSuppression: settings.noiseSuppression
|
|
763
|
+
});
|
|
764
|
+
if (config.debug) console.log("[SpeechOS] Full audio track settings:", settings);
|
|
765
|
+
}
|
|
766
|
+
/**
|
|
631
767
|
* Disable microphone audio track
|
|
632
768
|
*/
|
|
633
769
|
async disableMicrophone() {
|
|
@@ -659,30 +795,85 @@ var LiveKitManager = class {
|
|
|
659
795
|
});
|
|
660
796
|
}
|
|
661
797
|
/**
|
|
662
|
-
* Start a voice session
|
|
663
|
-
*
|
|
798
|
+
* Start a voice session with pre-connect audio buffering
|
|
799
|
+
* Fetches a fresh token, then enables mic with preConnectBuffer to capture audio while connecting.
|
|
800
|
+
* Agent subscription happens in the background - we don't block on it.
|
|
801
|
+
*
|
|
802
|
+
* @param options - Session options including action type and parameters
|
|
664
803
|
*/
|
|
665
|
-
async startVoiceSession() {
|
|
804
|
+
async startVoiceSession(options) {
|
|
666
805
|
const config = getConfig();
|
|
667
806
|
if (config.debug) console.log("[SpeechOS] Starting voice session...");
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
}
|
|
672
|
-
if (this.tokenData) {
|
|
673
|
-
if (config.debug) console.log("[SpeechOS] Using cached token from init");
|
|
674
|
-
} else {
|
|
675
|
-
if (config.debug) console.log("[SpeechOS] Fetching fresh token for session...");
|
|
676
|
-
await this.fetchToken();
|
|
677
|
-
}
|
|
807
|
+
this.sessionSettings = options?.settings || {};
|
|
808
|
+
await this.fetchToken();
|
|
809
|
+
if (!this.tokenData) throw new Error("No token available for LiveKit connection");
|
|
678
810
|
this.pendingTrackSubscribed = new Deferred();
|
|
679
811
|
this.pendingTrackSubscribed.setTimeout(15e3, "Connection timed out - agent not available", "connection_timeout", "connection");
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
this.
|
|
685
|
-
if (config.debug) console.log("[SpeechOS]
|
|
812
|
+
this.room = new livekit_client.Room({
|
|
813
|
+
adaptiveStream: true,
|
|
814
|
+
dynacast: true
|
|
815
|
+
});
|
|
816
|
+
this.setupRoomEvents();
|
|
817
|
+
if (config.debug) console.log("[SpeechOS] Connecting to LiveKit room:", this.tokenData.room, "at", this.tokenData.ws_url);
|
|
818
|
+
await this.room.connect(this.tokenData.ws_url, this.tokenData.token);
|
|
819
|
+
if (config.debug) console.log("[SpeechOS] Connected, enabling microphone with preConnectBuffer...");
|
|
820
|
+
await this.enableMicrophoneWithPreConnectBuffer();
|
|
821
|
+
if (options?.onMicReady) options.onMicReady();
|
|
822
|
+
state.setConnected(true);
|
|
823
|
+
if (config.debug) console.log("[SpeechOS] Voice session ready - microphone active");
|
|
824
|
+
this.waitForAgentSubscription();
|
|
825
|
+
}
|
|
826
|
+
/**
|
|
827
|
+
* Wait for the agent to subscribe to our audio track in the background
|
|
828
|
+
* Handles timeout errors without blocking the main flow
|
|
829
|
+
*/
|
|
830
|
+
waitForAgentSubscription() {
|
|
831
|
+
const config = getConfig();
|
|
832
|
+
if (!this.pendingTrackSubscribed) return;
|
|
833
|
+
this.pendingTrackSubscribed.promise.then(() => {
|
|
834
|
+
if (config.debug) console.log("[SpeechOS] Agent subscribed to audio track - full duplex established");
|
|
835
|
+
this.pendingTrackSubscribed = null;
|
|
836
|
+
}).catch((error) => {
|
|
837
|
+
console.warn("[SpeechOS] Agent subscription timeout:", error.message);
|
|
838
|
+
this.pendingTrackSubscribed = null;
|
|
839
|
+
});
|
|
840
|
+
}
|
|
841
|
+
/**
|
|
842
|
+
* Enable microphone with pre-connect buffering
|
|
843
|
+
* This starts capturing audio locally before the room is connected,
|
|
844
|
+
* buffering it until the connection is established.
|
|
845
|
+
*/
|
|
846
|
+
async enableMicrophoneWithPreConnectBuffer() {
|
|
847
|
+
if (!this.room) throw new Error("Room not initialized");
|
|
848
|
+
const config = getConfig();
|
|
849
|
+
const deviceId = this.sessionSettings.audioDeviceId;
|
|
850
|
+
const constraints = {
|
|
851
|
+
echoCancellation: true,
|
|
852
|
+
noiseSuppression: true
|
|
853
|
+
};
|
|
854
|
+
if (deviceId) {
|
|
855
|
+
constraints.deviceId = { exact: deviceId };
|
|
856
|
+
if (config.debug) console.log("[SpeechOS] Using audio device:", deviceId);
|
|
857
|
+
}
|
|
858
|
+
try {
|
|
859
|
+
await this.room.localParticipant.setMicrophoneEnabled(true, constraints, { preConnectBuffer: true });
|
|
860
|
+
state.setMicEnabled(true);
|
|
861
|
+
const micPub = this.room.localParticipant.getTrackPublication(livekit_client.Track.Source.Microphone);
|
|
862
|
+
if (micPub?.track) {
|
|
863
|
+
this.micTrack = micPub.track;
|
|
864
|
+
this.logMicrophoneInfo();
|
|
865
|
+
}
|
|
866
|
+
if (config.debug) console.log("[SpeechOS] Microphone enabled with pre-connect buffer - audio is being captured");
|
|
867
|
+
} catch (error) {
|
|
868
|
+
if (deviceId && error instanceof Error) {
|
|
869
|
+
console.warn("[SpeechOS] Selected audio device unavailable, falling back to default:", error.message);
|
|
870
|
+
await this.room.localParticipant.setMicrophoneEnabled(true, {
|
|
871
|
+
echoCancellation: true,
|
|
872
|
+
noiseSuppression: true
|
|
873
|
+
}, { preConnectBuffer: true });
|
|
874
|
+
state.setMicEnabled(true);
|
|
875
|
+
} else throw error;
|
|
876
|
+
}
|
|
686
877
|
}
|
|
687
878
|
/**
|
|
688
879
|
* Stop the voice session and request the transcript
|
|
@@ -691,12 +882,19 @@ var LiveKitManager = class {
|
|
|
691
882
|
*/
|
|
692
883
|
async stopVoiceSession() {
|
|
693
884
|
const config = getConfig();
|
|
885
|
+
const settings = this.sessionSettings;
|
|
886
|
+
const inputLanguage = settings.inputLanguageCode ?? "en-US";
|
|
887
|
+
const outputLanguage = settings.outputLanguageCode ?? "en-US";
|
|
888
|
+
console.log("[SpeechOS] Dictate command:", {
|
|
889
|
+
inputLanguage,
|
|
890
|
+
outputLanguage
|
|
891
|
+
});
|
|
694
892
|
if (config.debug) console.log("[SpeechOS] Stopping voice session, requesting transcript...");
|
|
695
893
|
await this.disableMicrophone();
|
|
696
894
|
if (config.debug) console.log("[SpeechOS] Requesting transcript from agent...");
|
|
697
895
|
this.pendingTranscript = new Deferred();
|
|
698
896
|
this.pendingTranscript.setTimeout(1e4, "Transcription timed out. Please try again.", "transcription_timeout", "timeout");
|
|
699
|
-
await this.sendDataMessage({ type: MESSAGE_TYPE_REQUEST_TRANSCRIPT });
|
|
897
|
+
await this.sendDataMessage({ type: MESSAGE_TYPE_REQUEST_TRANSCRIPT$1 });
|
|
700
898
|
const result = await this.pendingTranscript.promise;
|
|
701
899
|
this.pendingTranscript = null;
|
|
702
900
|
return result;
|
|
@@ -715,6 +913,14 @@ var LiveKitManager = class {
|
|
|
715
913
|
*/
|
|
716
914
|
async requestEditText(originalText) {
|
|
717
915
|
const config = getConfig();
|
|
916
|
+
const settings = this.sessionSettings;
|
|
917
|
+
const inputLanguage = settings.inputLanguageCode ?? "en-US";
|
|
918
|
+
const outputLanguage = settings.outputLanguageCode ?? "en-US";
|
|
919
|
+
console.log("[SpeechOS] Edit command:", {
|
|
920
|
+
inputLanguage,
|
|
921
|
+
outputLanguage,
|
|
922
|
+
originalTextLength: originalText.length
|
|
923
|
+
});
|
|
718
924
|
if (config.debug) console.log("[SpeechOS] Requesting text edit...");
|
|
719
925
|
this.editOriginalText = originalText;
|
|
720
926
|
await this.disableMicrophone();
|
|
@@ -722,7 +928,7 @@ var LiveKitManager = class {
|
|
|
722
928
|
this.pendingEditText = new Deferred();
|
|
723
929
|
this.pendingEditText.setTimeout(15e3, "Edit request timed out. Please try again.", "edit_timeout", "timeout");
|
|
724
930
|
await this.sendDataMessage({
|
|
725
|
-
type: MESSAGE_TYPE_EDIT_TEXT,
|
|
931
|
+
type: MESSAGE_TYPE_EDIT_TEXT$1,
|
|
726
932
|
text: originalText
|
|
727
933
|
});
|
|
728
934
|
const result = await this.pendingEditText.promise;
|
|
@@ -736,6 +942,39 @@ var LiveKitManager = class {
|
|
|
736
942
|
return this.requestEditText(originalText);
|
|
737
943
|
}
|
|
738
944
|
/**
|
|
945
|
+
* Request command matching using the transcript as input
|
|
946
|
+
* Sends command definitions to the backend, which matches the user's speech against them
|
|
947
|
+
* Returns a promise that resolves with the matched command or null if no match
|
|
948
|
+
* @throws Error if timeout occurs waiting for command result
|
|
949
|
+
*/
|
|
950
|
+
async requestCommand(commands) {
|
|
951
|
+
const config = getConfig();
|
|
952
|
+
const settings = this.sessionSettings;
|
|
953
|
+
const inputLanguage = settings.inputLanguageCode ?? "en-US";
|
|
954
|
+
console.log("[SpeechOS] Command request:", {
|
|
955
|
+
inputLanguage,
|
|
956
|
+
commandCount: commands.length
|
|
957
|
+
});
|
|
958
|
+
if (config.debug) console.log("[SpeechOS] Requesting command match...");
|
|
959
|
+
await this.disableMicrophone();
|
|
960
|
+
if (config.debug) console.log("[SpeechOS] Sending execute_command request to agent...");
|
|
961
|
+
this.pendingCommand = new Deferred();
|
|
962
|
+
this.pendingCommand.setTimeout(15e3, "Command request timed out. Please try again.", "command_timeout", "timeout");
|
|
963
|
+
await this.sendDataMessage({
|
|
964
|
+
type: MESSAGE_TYPE_EXECUTE_COMMAND$1,
|
|
965
|
+
commands
|
|
966
|
+
});
|
|
967
|
+
const result = await this.pendingCommand.promise;
|
|
968
|
+
this.pendingCommand = null;
|
|
969
|
+
return result;
|
|
970
|
+
}
|
|
971
|
+
/**
|
|
972
|
+
* Alias for requestCommand - granular API naming
|
|
973
|
+
*/
|
|
974
|
+
async stopAndCommand(commands) {
|
|
975
|
+
return this.requestCommand(commands);
|
|
976
|
+
}
|
|
977
|
+
/**
|
|
739
978
|
* Disconnect from the current room
|
|
740
979
|
* Clears the token so a fresh one is fetched for the next session
|
|
741
980
|
*/
|
|
@@ -758,16 +997,110 @@ var LiveKitManager = class {
|
|
|
758
997
|
this.pendingEditText.reject(new Error("Disconnected"));
|
|
759
998
|
this.pendingEditText = null;
|
|
760
999
|
}
|
|
1000
|
+
if (this.pendingCommand) {
|
|
1001
|
+
this.pendingCommand.reject(new Error("Disconnected"));
|
|
1002
|
+
this.pendingCommand = null;
|
|
1003
|
+
}
|
|
761
1004
|
if (this.pendingTrackSubscribed) {
|
|
762
1005
|
this.pendingTrackSubscribed.reject(new Error("Disconnected"));
|
|
763
1006
|
this.pendingTrackSubscribed = null;
|
|
764
1007
|
}
|
|
765
1008
|
this.tokenData = null;
|
|
766
|
-
this.preWarmPromise = null;
|
|
767
1009
|
this.editOriginalText = null;
|
|
1010
|
+
this.sessionSettings = {};
|
|
768
1011
|
if (config.debug) console.log("[SpeechOS] Session state cleared");
|
|
769
1012
|
}
|
|
770
1013
|
/**
|
|
1014
|
+
* Invalidate the cached token
|
|
1015
|
+
* Call this when settings change that would affect the token (language, vocabulary)
|
|
1016
|
+
*/
|
|
1017
|
+
invalidateTokenCache() {
|
|
1018
|
+
const config = getConfig();
|
|
1019
|
+
if (config.debug) console.log("[SpeechOS] Token cache invalidated");
|
|
1020
|
+
this.cachedTokenData = null;
|
|
1021
|
+
this.tokenCacheTimestamp = null;
|
|
1022
|
+
}
|
|
1023
|
+
/**
|
|
1024
|
+
* Start auto-refreshing the token while the widget is expanded.
|
|
1025
|
+
* Call this after a voice session completes to immediately fetch a fresh token
|
|
1026
|
+
* (since each command requires its own token) and keep it fresh for subsequent commands.
|
|
1027
|
+
*/
|
|
1028
|
+
startAutoRefresh() {
|
|
1029
|
+
const config = getConfig();
|
|
1030
|
+
this.autoRefreshEnabled = true;
|
|
1031
|
+
if (config.debug) console.log("[SpeechOS] Token auto-refresh enabled");
|
|
1032
|
+
this.invalidateTokenCache();
|
|
1033
|
+
this.prefetchToken().then(() => {
|
|
1034
|
+
this.scheduleTokenRefresh();
|
|
1035
|
+
}).catch((error) => {
|
|
1036
|
+
if (config.debug) console.warn("[SpeechOS] Failed to prefetch token after command:", error);
|
|
1037
|
+
if (this.autoRefreshEnabled) this.tokenRefreshTimer = setTimeout(() => {
|
|
1038
|
+
this.performAutoRefresh();
|
|
1039
|
+
}, 5 * 1e3);
|
|
1040
|
+
});
|
|
1041
|
+
}
|
|
1042
|
+
/**
|
|
1043
|
+
* Stop auto-refreshing the token.
|
|
1044
|
+
* Call this when the widget collapses or user navigates away.
|
|
1045
|
+
*/
|
|
1046
|
+
stopAutoRefresh() {
|
|
1047
|
+
const config = getConfig();
|
|
1048
|
+
this.autoRefreshEnabled = false;
|
|
1049
|
+
if (this.tokenRefreshTimer) {
|
|
1050
|
+
clearTimeout(this.tokenRefreshTimer);
|
|
1051
|
+
this.tokenRefreshTimer = null;
|
|
1052
|
+
}
|
|
1053
|
+
if (config.debug) console.log("[SpeechOS] Token auto-refresh disabled");
|
|
1054
|
+
}
|
|
1055
|
+
/**
|
|
1056
|
+
* Schedule a token refresh before the current cache expires.
|
|
1057
|
+
* Handles computer sleep by checking elapsed time on each refresh attempt.
|
|
1058
|
+
*/
|
|
1059
|
+
scheduleTokenRefresh() {
|
|
1060
|
+
if (!this.autoRefreshEnabled) return;
|
|
1061
|
+
if (this.tokenRefreshTimer) {
|
|
1062
|
+
clearTimeout(this.tokenRefreshTimer);
|
|
1063
|
+
this.tokenRefreshTimer = null;
|
|
1064
|
+
}
|
|
1065
|
+
const config = getConfig();
|
|
1066
|
+
const refreshBuffer = 30 * 1e3;
|
|
1067
|
+
let timeUntilRefresh;
|
|
1068
|
+
if (this.tokenCacheTimestamp) {
|
|
1069
|
+
const age = Date.now() - this.tokenCacheTimestamp;
|
|
1070
|
+
const timeRemaining = TOKEN_CACHE_TTL_MS - age;
|
|
1071
|
+
timeUntilRefresh = Math.max(0, timeRemaining - refreshBuffer);
|
|
1072
|
+
} else timeUntilRefresh = 0;
|
|
1073
|
+
if (config.debug) console.log(`[SpeechOS] Scheduling token refresh in ${Math.round(timeUntilRefresh / 1e3)}s`);
|
|
1074
|
+
this.tokenRefreshTimer = setTimeout(() => {
|
|
1075
|
+
this.performAutoRefresh();
|
|
1076
|
+
}, timeUntilRefresh);
|
|
1077
|
+
}
|
|
1078
|
+
/**
|
|
1079
|
+
* Perform the auto-refresh, handling computer sleep scenarios.
|
|
1080
|
+
*/
|
|
1081
|
+
async performAutoRefresh() {
|
|
1082
|
+
if (!this.autoRefreshEnabled) return;
|
|
1083
|
+
const config = getConfig();
|
|
1084
|
+
if (this.isCachedTokenValid()) {
|
|
1085
|
+
if (config.debug) console.log("[SpeechOS] Token still valid on refresh check, rescheduling");
|
|
1086
|
+
this.scheduleTokenRefresh();
|
|
1087
|
+
return;
|
|
1088
|
+
}
|
|
1089
|
+
if (config.debug) console.log("[SpeechOS] Auto-refreshing token...");
|
|
1090
|
+
try {
|
|
1091
|
+
const data = await this.fetchTokenFromServer();
|
|
1092
|
+
this.cachedTokenData = data;
|
|
1093
|
+
this.tokenCacheTimestamp = Date.now();
|
|
1094
|
+
if (config.debug) console.log("[SpeechOS] Token auto-refreshed successfully");
|
|
1095
|
+
this.scheduleTokenRefresh();
|
|
1096
|
+
} catch (error) {
|
|
1097
|
+
console.warn("[SpeechOS] Token auto-refresh failed:", error);
|
|
1098
|
+
if (this.autoRefreshEnabled) this.tokenRefreshTimer = setTimeout(() => {
|
|
1099
|
+
this.performAutoRefresh();
|
|
1100
|
+
}, 30 * 1e3);
|
|
1101
|
+
}
|
|
1102
|
+
}
|
|
1103
|
+
/**
|
|
771
1104
|
* Get the current room instance
|
|
772
1105
|
*/
|
|
773
1106
|
getRoom() {
|
|
@@ -791,88 +1124,749 @@ var LiveKitManager = class {
|
|
|
791
1124
|
isMicrophoneEnabled() {
|
|
792
1125
|
return this.micTrack !== null;
|
|
793
1126
|
}
|
|
794
|
-
/**
|
|
795
|
-
* Clear the cached token
|
|
796
|
-
* Used when user identity changes to ensure next session gets a fresh token
|
|
797
|
-
*/
|
|
798
|
-
clearToken() {
|
|
799
|
-
const config = getConfig();
|
|
800
|
-
if (config.debug) console.log("[SpeechOS] Clearing cached token");
|
|
801
|
-
this.tokenData = null;
|
|
802
|
-
this.preWarmPromise = null;
|
|
803
|
-
}
|
|
804
1127
|
};
|
|
805
1128
|
const livekit = new LiveKitManager();
|
|
1129
|
+
events.on("settings:changed", () => {
|
|
1130
|
+
livekit.invalidateTokenCache();
|
|
1131
|
+
});
|
|
806
1132
|
|
|
807
1133
|
//#endregion
|
|
808
|
-
//#region src/
|
|
809
|
-
const STORAGE_KEY = "speechos_transcripts";
|
|
810
|
-
const MAX_ENTRIES = 50;
|
|
1134
|
+
//#region src/audio-capture.ts
|
|
811
1135
|
/**
|
|
812
|
-
*
|
|
1136
|
+
* Detect if running in Safari.
|
|
813
1137
|
*/
|
|
814
|
-
function
|
|
815
|
-
|
|
1138
|
+
function isSafari() {
|
|
1139
|
+
const ua = navigator.userAgent.toLowerCase();
|
|
1140
|
+
const vendor = navigator.vendor?.toLowerCase() || "";
|
|
1141
|
+
const hasSafariUA = ua.includes("safari") && !ua.includes("chrome") && !ua.includes("chromium");
|
|
1142
|
+
const isAppleVendor = vendor.includes("apple");
|
|
1143
|
+
return hasSafariUA && isAppleVendor;
|
|
816
1144
|
}
|
|
817
1145
|
/**
|
|
818
|
-
*
|
|
1146
|
+
* Detect the best supported audio format for the current browser.
|
|
1147
|
+
*
|
|
1148
|
+
* IMPORTANT: Safari must use MP4/AAC. Its WebM/Opus implementation is buggy
|
|
1149
|
+
* and produces truncated/incomplete audio.
|
|
819
1150
|
*/
|
|
820
|
-
function
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
return
|
|
1151
|
+
function getSupportedAudioFormat() {
|
|
1152
|
+
if (isSafari()) {
|
|
1153
|
+
if (MediaRecorder.isTypeSupported("audio/mp4")) return {
|
|
1154
|
+
mimeType: "audio/mp4",
|
|
1155
|
+
format: "mp4",
|
|
1156
|
+
needsEncodingParams: false
|
|
1157
|
+
};
|
|
1158
|
+
return {
|
|
1159
|
+
mimeType: "",
|
|
1160
|
+
format: "mp4",
|
|
1161
|
+
needsEncodingParams: true
|
|
1162
|
+
};
|
|
828
1163
|
}
|
|
1164
|
+
if (MediaRecorder.isTypeSupported("audio/webm;codecs=opus")) return {
|
|
1165
|
+
mimeType: "audio/webm;codecs=opus",
|
|
1166
|
+
format: "webm",
|
|
1167
|
+
needsEncodingParams: false
|
|
1168
|
+
};
|
|
1169
|
+
if (MediaRecorder.isTypeSupported("audio/webm")) return {
|
|
1170
|
+
mimeType: "audio/webm",
|
|
1171
|
+
format: "webm",
|
|
1172
|
+
needsEncodingParams: false
|
|
1173
|
+
};
|
|
1174
|
+
if (MediaRecorder.isTypeSupported("audio/mp4")) return {
|
|
1175
|
+
mimeType: "audio/mp4",
|
|
1176
|
+
format: "mp4",
|
|
1177
|
+
needsEncodingParams: false
|
|
1178
|
+
};
|
|
1179
|
+
return {
|
|
1180
|
+
mimeType: "",
|
|
1181
|
+
format: "webm",
|
|
1182
|
+
needsEncodingParams: true
|
|
1183
|
+
};
|
|
829
1184
|
}
|
|
830
1185
|
/**
|
|
831
|
-
*
|
|
1186
|
+
* Audio capture manager with buffering support.
|
|
1187
|
+
*
|
|
1188
|
+
* Usage:
|
|
1189
|
+
* 1. Create instance with onChunk callback
|
|
1190
|
+
* 2. Call start() - immediately begins capturing
|
|
1191
|
+
* 3. Call setReady() when connection is established - flushes buffer
|
|
1192
|
+
* 4. Call stop() when done
|
|
832
1193
|
*/
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
1194
|
+
var AudioCapture = class AudioCapture {
|
|
1195
|
+
mediaStream = null;
|
|
1196
|
+
recorder = null;
|
|
1197
|
+
buffer = [];
|
|
1198
|
+
isReady = false;
|
|
1199
|
+
isRecording = false;
|
|
1200
|
+
onChunk;
|
|
1201
|
+
audioFormat;
|
|
1202
|
+
deviceId;
|
|
1203
|
+
/**
|
|
1204
|
+
* Time slice for MediaRecorder in milliseconds.
|
|
1205
|
+
*
|
|
1206
|
+
* Safari requires a larger timeslice (1000ms) to properly flush its internal
|
|
1207
|
+
* audio buffers. Smaller values cause Safari to drop or truncate audio data.
|
|
1208
|
+
* See: https://community.openai.com/t/whisper-problem-with-audio-mp4-blobs-from-safari/
|
|
1209
|
+
*
|
|
1210
|
+
* Other browsers (Chrome, Firefox, Edge) work well with smaller timeslices
|
|
1211
|
+
* which provide lower latency for real-time transcription.
|
|
1212
|
+
*/
|
|
1213
|
+
static TIME_SLICE_MS = 100;
|
|
1214
|
+
static SAFARI_TIME_SLICE_MS = 1e3;
|
|
1215
|
+
/**
|
|
1216
|
+
* @param onChunk - Callback for receiving audio chunks
|
|
1217
|
+
* @param deviceId - Optional audio device ID (empty string or undefined for system default)
|
|
1218
|
+
*/
|
|
1219
|
+
constructor(onChunk, deviceId) {
|
|
1220
|
+
this.onChunk = onChunk;
|
|
1221
|
+
this.audioFormat = getSupportedAudioFormat();
|
|
1222
|
+
this.deviceId = deviceId;
|
|
1223
|
+
}
|
|
1224
|
+
/**
|
|
1225
|
+
* Get the appropriate timeslice for the current browser.
|
|
1226
|
+
* Safari needs a larger timeslice to avoid dropping audio data.
|
|
1227
|
+
*/
|
|
1228
|
+
getTimeSlice() {
|
|
1229
|
+
return isSafari() ? AudioCapture.SAFARI_TIME_SLICE_MS : AudioCapture.TIME_SLICE_MS;
|
|
1230
|
+
}
|
|
1231
|
+
/**
|
|
1232
|
+
* Get the timeslice being used (in milliseconds).
|
|
1233
|
+
* Useful for callers that need to wait for audio processing.
|
|
1234
|
+
*/
|
|
1235
|
+
getTimeSliceMs() {
|
|
1236
|
+
return this.getTimeSlice();
|
|
1237
|
+
}
|
|
1238
|
+
/**
|
|
1239
|
+
* Get the audio format being used.
|
|
1240
|
+
*/
|
|
1241
|
+
getFormat() {
|
|
1242
|
+
return this.audioFormat;
|
|
1243
|
+
}
|
|
1244
|
+
/**
|
|
1245
|
+
* Start capturing audio immediately.
|
|
1246
|
+
*
|
|
1247
|
+
* Audio chunks will be buffered until setReady() is called.
|
|
1248
|
+
*/
|
|
1249
|
+
async start() {
|
|
1250
|
+
const config = getConfig();
|
|
1251
|
+
if (this.isRecording) {
|
|
1252
|
+
if (config.debug) console.log("[SpeechOS] AudioCapture already recording");
|
|
1253
|
+
return;
|
|
1254
|
+
}
|
|
1255
|
+
this.buffer = [];
|
|
1256
|
+
this.isReady = false;
|
|
1257
|
+
const constraints = { audio: {
|
|
1258
|
+
echoCancellation: true,
|
|
1259
|
+
noiseSuppression: true,
|
|
1260
|
+
...this.deviceId ? { deviceId: { exact: this.deviceId } } : {}
|
|
1261
|
+
} };
|
|
1262
|
+
if (config.debug) {
|
|
1263
|
+
console.log("[SpeechOS] AudioCapture starting with format:", this.audioFormat.mimeType);
|
|
1264
|
+
console.log("[SpeechOS] Detected Safari:", isSafari());
|
|
1265
|
+
if (this.deviceId) console.log("[SpeechOS] Using audio device:", this.deviceId);
|
|
1266
|
+
}
|
|
1267
|
+
try {
|
|
1268
|
+
this.mediaStream = await navigator.mediaDevices.getUserMedia(constraints);
|
|
1269
|
+
const recorderOptions = {};
|
|
1270
|
+
if (this.audioFormat.mimeType) recorderOptions.mimeType = this.audioFormat.mimeType;
|
|
1271
|
+
this.recorder = new MediaRecorder(this.mediaStream, recorderOptions);
|
|
1272
|
+
this.recorder.ondataavailable = (event) => {
|
|
1273
|
+
if (event.data && event.data.size > 0) this.handleChunk(event.data);
|
|
1274
|
+
};
|
|
1275
|
+
this.recorder.onerror = (event) => {
|
|
1276
|
+
console.error("[SpeechOS] MediaRecorder error:", event);
|
|
1277
|
+
};
|
|
1278
|
+
const timeSlice = this.getTimeSlice();
|
|
1279
|
+
this.recorder.start(timeSlice);
|
|
1280
|
+
this.isRecording = true;
|
|
1281
|
+
if (config.debug) console.log(`[SpeechOS] AudioCapture started with ${timeSlice}ms timeslice, buffering until ready`);
|
|
1282
|
+
} catch (error) {
|
|
1283
|
+
if (this.deviceId && error instanceof Error) {
|
|
1284
|
+
console.warn("[SpeechOS] Selected device unavailable, trying default:", error.message);
|
|
1285
|
+
this.mediaStream = await navigator.mediaDevices.getUserMedia({ audio: {
|
|
1286
|
+
echoCancellation: true,
|
|
1287
|
+
noiseSuppression: true
|
|
1288
|
+
} });
|
|
1289
|
+
const recorderOptions = {};
|
|
1290
|
+
if (this.audioFormat.mimeType) recorderOptions.mimeType = this.audioFormat.mimeType;
|
|
1291
|
+
this.recorder = new MediaRecorder(this.mediaStream, recorderOptions);
|
|
1292
|
+
this.recorder.ondataavailable = (event) => {
|
|
1293
|
+
if (event.data && event.data.size > 0) this.handleChunk(event.data);
|
|
1294
|
+
};
|
|
1295
|
+
this.recorder.start(this.getTimeSlice());
|
|
1296
|
+
this.isRecording = true;
|
|
1297
|
+
} else throw error;
|
|
1298
|
+
}
|
|
1299
|
+
}
|
|
1300
|
+
/**
|
|
1301
|
+
* Handle an audio chunk with atomic buffer swap pattern.
|
|
1302
|
+
*
|
|
1303
|
+
* If not ready: buffer the chunk.
|
|
1304
|
+
* If ready: send directly via callback.
|
|
1305
|
+
*/
|
|
1306
|
+
handleChunk(chunk) {
|
|
1307
|
+
if (this.isReady) this.onChunk(chunk);
|
|
1308
|
+
else this.buffer.push(chunk);
|
|
1309
|
+
}
|
|
1310
|
+
/**
|
|
1311
|
+
* Mark the capture as ready (connection established).
|
|
1312
|
+
*
|
|
1313
|
+
* This flushes any buffered chunks and switches to direct mode.
|
|
1314
|
+
* Uses atomic swap to prevent chunk reordering.
|
|
1315
|
+
*/
|
|
1316
|
+
setReady() {
|
|
1317
|
+
const config = getConfig();
|
|
1318
|
+
if (this.isReady) return;
|
|
1319
|
+
const toFlush = this.buffer;
|
|
1320
|
+
this.buffer = [];
|
|
1321
|
+
for (const chunk of toFlush) this.onChunk(chunk);
|
|
1322
|
+
this.isReady = true;
|
|
1323
|
+
if (config.debug) console.log(`[SpeechOS] AudioCapture ready, flushed ${toFlush.length} buffered chunks`);
|
|
1324
|
+
}
|
|
1325
|
+
/**
|
|
1326
|
+
* Stop capturing audio and wait for final chunk.
|
|
1327
|
+
*
|
|
1328
|
+
* Uses requestData() before stop() to force the MediaRecorder to flush
|
|
1329
|
+
* any buffered audio immediately. This is critical for Safari which
|
|
1330
|
+
* may hold audio data in internal buffers.
|
|
1331
|
+
*
|
|
1332
|
+
* Safari requires an additional delay after stopping to ensure all audio
|
|
1333
|
+
* from its internal encoding pipeline has been fully processed and emitted.
|
|
1334
|
+
*/
|
|
1335
|
+
async stop() {
|
|
1336
|
+
const config = getConfig();
|
|
1337
|
+
const safari = isSafari();
|
|
1338
|
+
if (this.recorder && this.recorder.state !== "inactive") {
|
|
1339
|
+
if (this.recorder.state === "recording") try {
|
|
1340
|
+
const dataPromise = new Promise((resolve) => {
|
|
1341
|
+
const handler = (event) => {
|
|
1342
|
+
this.recorder?.removeEventListener("dataavailable", handler);
|
|
1343
|
+
if (config.debug) console.log(`[SpeechOS] requestData flush received: ${event.data.size} bytes`);
|
|
1344
|
+
resolve();
|
|
1345
|
+
};
|
|
1346
|
+
this.recorder?.addEventListener("dataavailable", handler);
|
|
1347
|
+
});
|
|
1348
|
+
this.recorder.requestData();
|
|
1349
|
+
if (config.debug) console.log("[SpeechOS] Requested data flush before stop");
|
|
1350
|
+
await dataPromise;
|
|
1351
|
+
} catch (e) {
|
|
1352
|
+
if (config.debug) console.log("[SpeechOS] requestData() not supported or failed:", e);
|
|
1353
|
+
}
|
|
1354
|
+
const stopPromise = new Promise((resolve) => {
|
|
1355
|
+
if (!this.recorder) {
|
|
1356
|
+
resolve();
|
|
1357
|
+
return;
|
|
1358
|
+
}
|
|
1359
|
+
this.recorder.onstop = () => {
|
|
1360
|
+
if (config.debug) console.log("[SpeechOS] MediaRecorder onstop fired");
|
|
1361
|
+
resolve();
|
|
1362
|
+
};
|
|
1363
|
+
});
|
|
1364
|
+
this.recorder.stop();
|
|
1365
|
+
await stopPromise;
|
|
1366
|
+
if (safari) {
|
|
1367
|
+
if (config.debug) console.log("[SpeechOS] Safari: waiting 2s for encoding pipeline to flush");
|
|
1368
|
+
await new Promise((resolve) => setTimeout(resolve, 2e3));
|
|
1369
|
+
}
|
|
1370
|
+
}
|
|
1371
|
+
if (this.mediaStream) {
|
|
1372
|
+
for (const track of this.mediaStream.getTracks()) track.stop();
|
|
1373
|
+
this.mediaStream = null;
|
|
1374
|
+
}
|
|
1375
|
+
this.recorder = null;
|
|
1376
|
+
this.isRecording = false;
|
|
1377
|
+
this.isReady = false;
|
|
1378
|
+
this.buffer = [];
|
|
1379
|
+
if (config.debug) console.log("[SpeechOS] AudioCapture stopped");
|
|
1380
|
+
}
|
|
1381
|
+
/**
|
|
1382
|
+
* Check if currently recording.
|
|
1383
|
+
*/
|
|
1384
|
+
get recording() {
|
|
1385
|
+
return this.isRecording;
|
|
1386
|
+
}
|
|
1387
|
+
/**
|
|
1388
|
+
* Check if ready (connection established, direct mode active).
|
|
1389
|
+
*/
|
|
1390
|
+
get ready() {
|
|
1391
|
+
return this.isReady;
|
|
1392
|
+
}
|
|
1393
|
+
/**
|
|
1394
|
+
* Get the number of buffered chunks waiting to be sent.
|
|
1395
|
+
*/
|
|
1396
|
+
get bufferedChunks() {
|
|
1397
|
+
return this.buffer.length;
|
|
1398
|
+
}
|
|
1399
|
+
};
|
|
849
1400
|
/**
|
|
850
|
-
*
|
|
1401
|
+
* Factory function to create an AudioCapture instance.
|
|
1402
|
+
* @param onChunk - Callback for receiving audio chunks
|
|
1403
|
+
* @param deviceId - Optional audio device ID (empty string or undefined for system default)
|
|
851
1404
|
*/
|
|
852
|
-
function
|
|
853
|
-
|
|
854
|
-
localStorage.removeItem(STORAGE_KEY);
|
|
855
|
-
} catch {}
|
|
1405
|
+
function createAudioCapture(onChunk, deviceId) {
|
|
1406
|
+
return new AudioCapture(onChunk, deviceId);
|
|
856
1407
|
}
|
|
1408
|
+
|
|
1409
|
+
//#endregion
|
|
1410
|
+
//#region src/websocket.ts
|
|
1411
|
+
const MESSAGE_TYPE_AUTH = "auth";
|
|
1412
|
+
const MESSAGE_TYPE_READY = "ready";
|
|
1413
|
+
const MESSAGE_TYPE_TRANSCRIPTION = "transcription";
|
|
1414
|
+
const MESSAGE_TYPE_REQUEST_TRANSCRIPT = "request_transcript";
|
|
1415
|
+
const MESSAGE_TYPE_TRANSCRIPT = "transcript";
|
|
1416
|
+
const MESSAGE_TYPE_EDIT_TEXT = "edit_text";
|
|
1417
|
+
const MESSAGE_TYPE_EDITED_TEXT = "edited_text";
|
|
1418
|
+
const MESSAGE_TYPE_EXECUTE_COMMAND = "execute_command";
|
|
1419
|
+
const MESSAGE_TYPE_COMMAND_RESULT = "command_result";
|
|
1420
|
+
const MESSAGE_TYPE_ERROR = "error";
|
|
857
1421
|
/**
|
|
858
|
-
*
|
|
1422
|
+
* Response timeout in milliseconds.
|
|
859
1423
|
*/
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
1424
|
+
const RESPONSE_TIMEOUT_MS = 15e3;
|
|
1425
|
+
/**
|
|
1426
|
+
* A deferred promise with timeout support.
|
|
1427
|
+
*/
|
|
1428
|
+
var Deferred$1 = class {
|
|
1429
|
+
promise;
|
|
1430
|
+
_resolve;
|
|
1431
|
+
_reject;
|
|
1432
|
+
_timeoutId = null;
|
|
1433
|
+
_settled = false;
|
|
1434
|
+
constructor() {
|
|
1435
|
+
this.promise = new Promise((resolve, reject) => {
|
|
1436
|
+
this._resolve = resolve;
|
|
1437
|
+
this._reject = reject;
|
|
1438
|
+
});
|
|
1439
|
+
}
|
|
1440
|
+
setTimeout(ms, errorMessage, errorCode, errorSource) {
|
|
1441
|
+
this._timeoutId = setTimeout(() => {
|
|
1442
|
+
if (!this._settled) {
|
|
1443
|
+
console.error(`[SpeechOS] Error: ${errorMessage} (${errorCode})`);
|
|
1444
|
+
events.emit("error", {
|
|
1445
|
+
code: errorCode,
|
|
1446
|
+
message: errorMessage,
|
|
1447
|
+
source: errorSource
|
|
1448
|
+
});
|
|
1449
|
+
this.reject(new Error(errorMessage));
|
|
1450
|
+
}
|
|
1451
|
+
}, ms);
|
|
1452
|
+
}
|
|
1453
|
+
resolve(value) {
|
|
1454
|
+
if (!this._settled) {
|
|
1455
|
+
this._settled = true;
|
|
1456
|
+
this.clearTimeout();
|
|
1457
|
+
this._resolve(value);
|
|
1458
|
+
}
|
|
1459
|
+
}
|
|
1460
|
+
reject(error) {
|
|
1461
|
+
if (!this._settled) {
|
|
1462
|
+
this._settled = true;
|
|
1463
|
+
this.clearTimeout();
|
|
1464
|
+
this._reject(error);
|
|
1465
|
+
}
|
|
1466
|
+
}
|
|
1467
|
+
clearTimeout() {
|
|
1468
|
+
if (this._timeoutId !== null) {
|
|
1469
|
+
clearTimeout(this._timeoutId);
|
|
1470
|
+
this._timeoutId = null;
|
|
1471
|
+
}
|
|
1472
|
+
}
|
|
1473
|
+
get isSettled() {
|
|
1474
|
+
return this._settled;
|
|
1475
|
+
}
|
|
871
1476
|
};
|
|
1477
|
+
/**
|
|
1478
|
+
* Maximum time to wait for WebSocket buffer to drain.
|
|
1479
|
+
*/
|
|
1480
|
+
const BUFFER_DRAIN_TIMEOUT_MS = 5e3;
|
|
1481
|
+
/**
|
|
1482
|
+
* Polling interval for checking WebSocket buffer.
|
|
1483
|
+
*/
|
|
1484
|
+
const BUFFER_CHECK_INTERVAL_MS = 50;
|
|
1485
|
+
/**
|
|
1486
|
+
* WebSocket connection manager for voice sessions.
|
|
1487
|
+
*/
|
|
1488
|
+
var WebSocketManager = class {
|
|
1489
|
+
ws = null;
|
|
1490
|
+
audioCapture = null;
|
|
1491
|
+
sessionId = null;
|
|
1492
|
+
pendingAuth = null;
|
|
1493
|
+
pendingTranscript = null;
|
|
1494
|
+
pendingEditText = null;
|
|
1495
|
+
pendingCommand = null;
|
|
1496
|
+
pendingAudioSends = /* @__PURE__ */ new Set();
|
|
1497
|
+
editOriginalText = null;
|
|
1498
|
+
lastInputText = void 0;
|
|
1499
|
+
sessionAction = "dictate";
|
|
1500
|
+
sessionInputText = "";
|
|
1501
|
+
sessionCommands = [];
|
|
1502
|
+
sessionSettings = {};
|
|
1503
|
+
/**
|
|
1504
|
+
* Get the WebSocket URL for voice sessions.
|
|
1505
|
+
*/
|
|
1506
|
+
getWebSocketUrl() {
|
|
1507
|
+
const config = getConfig();
|
|
1508
|
+
const host = config.host || "https://app.speechos.ai";
|
|
1509
|
+
const wsUrl = host.replace(/^http/, "ws");
|
|
1510
|
+
return `${wsUrl}/ws/voice/`;
|
|
1511
|
+
}
|
|
1512
|
+
/**
|
|
1513
|
+
* Start a voice session with the WebSocket backend.
|
|
1514
|
+
*
|
|
1515
|
+
* This method:
|
|
1516
|
+
* 1. Starts audio capture immediately (buffering)
|
|
1517
|
+
* 2. Opens WebSocket connection
|
|
1518
|
+
* 3. Authenticates with API key and action parameters
|
|
1519
|
+
* 4. Flushes buffered audio and continues streaming
|
|
1520
|
+
*
|
|
1521
|
+
* @param options - Session options including action type and parameters
|
|
1522
|
+
*/
|
|
1523
|
+
async startVoiceSession(options) {
|
|
1524
|
+
const config = getConfig();
|
|
1525
|
+
this.sessionAction = options?.action || "dictate";
|
|
1526
|
+
this.sessionInputText = options?.inputText || "";
|
|
1527
|
+
this.sessionCommands = options?.commands || [];
|
|
1528
|
+
this.sessionSettings = options?.settings || {};
|
|
1529
|
+
if (this.sessionAction === "edit") this.editOriginalText = this.sessionInputText;
|
|
1530
|
+
if (config.debug) console.log("[SpeechOS] Starting WebSocket voice session...");
|
|
1531
|
+
this.audioCapture = createAudioCapture((chunk) => {
|
|
1532
|
+
this.sendAudioChunk(chunk);
|
|
1533
|
+
}, this.sessionSettings.audioDeviceId);
|
|
1534
|
+
await this.audioCapture.start();
|
|
1535
|
+
if (options?.onMicReady) options.onMicReady();
|
|
1536
|
+
state.setMicEnabled(true);
|
|
1537
|
+
const wsUrl = this.getWebSocketUrl();
|
|
1538
|
+
if (config.debug) console.log("[SpeechOS] Connecting to WebSocket:", wsUrl);
|
|
1539
|
+
this.ws = new WebSocket(wsUrl);
|
|
1540
|
+
this.ws.onopen = () => {
|
|
1541
|
+
if (config.debug) console.log("[SpeechOS] WebSocket connected, authenticating...");
|
|
1542
|
+
this.authenticate();
|
|
1543
|
+
};
|
|
1544
|
+
this.ws.onmessage = (event) => {
|
|
1545
|
+
this.handleMessage(event.data);
|
|
1546
|
+
};
|
|
1547
|
+
this.ws.onerror = (event) => {
|
|
1548
|
+
console.error("[SpeechOS] WebSocket error:", event);
|
|
1549
|
+
events.emit("error", {
|
|
1550
|
+
code: "websocket_error",
|
|
1551
|
+
message: "WebSocket connection error",
|
|
1552
|
+
source: "connection"
|
|
1553
|
+
});
|
|
1554
|
+
};
|
|
1555
|
+
this.ws.onclose = (event) => {
|
|
1556
|
+
if (config.debug) console.log("[SpeechOS] WebSocket closed:", event.code, event.reason);
|
|
1557
|
+
state.setConnected(false);
|
|
1558
|
+
};
|
|
1559
|
+
this.pendingAuth = new Deferred$1();
|
|
1560
|
+
this.pendingAuth.setTimeout(RESPONSE_TIMEOUT_MS, "Connection timed out", "connection_timeout", "connection");
|
|
1561
|
+
await this.pendingAuth.promise;
|
|
1562
|
+
this.pendingAuth = null;
|
|
1563
|
+
if (this.audioCapture) this.audioCapture.setReady();
|
|
1564
|
+
state.setConnected(true);
|
|
1565
|
+
if (config.debug) console.log("[SpeechOS] WebSocket voice session ready");
|
|
1566
|
+
}
|
|
1567
|
+
/**
|
|
1568
|
+
* Send authentication message with action parameters.
|
|
1569
|
+
* All session parameters are now sent upfront in the auth message.
|
|
1570
|
+
*/
|
|
1571
|
+
authenticate() {
|
|
1572
|
+
const config = getConfig();
|
|
1573
|
+
const audioFormat = getSupportedAudioFormat();
|
|
1574
|
+
const settings = this.sessionSettings;
|
|
1575
|
+
const anonymousId = getAnonymousId();
|
|
1576
|
+
const authMessage = {
|
|
1577
|
+
type: MESSAGE_TYPE_AUTH,
|
|
1578
|
+
api_key: config.apiKey,
|
|
1579
|
+
user_id: config.userId || null,
|
|
1580
|
+
anonymous_id: anonymousId,
|
|
1581
|
+
input_language: settings.inputLanguageCode ?? "en-US",
|
|
1582
|
+
output_language: settings.outputLanguageCode ?? "en-US",
|
|
1583
|
+
smart_format: settings.smartFormat ?? true,
|
|
1584
|
+
custom_vocabulary: settings.vocabulary ?? [],
|
|
1585
|
+
custom_snippets: settings.snippets ?? [],
|
|
1586
|
+
audio_format: audioFormat.format,
|
|
1587
|
+
action: this.sessionAction,
|
|
1588
|
+
input_text: this.sessionInputText,
|
|
1589
|
+
commands: this.sessionCommands
|
|
1590
|
+
};
|
|
1591
|
+
if (config.debug) console.log("[SpeechOS] Sending auth message with action:", this.sessionAction);
|
|
1592
|
+
this.ws?.send(JSON.stringify(authMessage));
|
|
1593
|
+
}
|
|
1594
|
+
/**
|
|
1595
|
+
* Send an audio chunk over the WebSocket.
|
|
1596
|
+
* Tracks the promise so we can wait for all sends to complete.
|
|
1597
|
+
*/
|
|
1598
|
+
sendAudioChunk(chunk) {
|
|
1599
|
+
const sendPromise = this.doSendAudioChunk(chunk);
|
|
1600
|
+
this.pendingAudioSends.add(sendPromise);
|
|
1601
|
+
sendPromise.finally(() => {
|
|
1602
|
+
this.pendingAudioSends.delete(sendPromise);
|
|
1603
|
+
});
|
|
1604
|
+
}
|
|
1605
|
+
/**
|
|
1606
|
+
* Actually send the audio chunk (async operation).
|
|
1607
|
+
*/
|
|
1608
|
+
async doSendAudioChunk(chunk) {
|
|
1609
|
+
if (this.ws && this.ws.readyState === WebSocket.OPEN) {
|
|
1610
|
+
const arrayBuffer = await chunk.arrayBuffer();
|
|
1611
|
+
this.ws.send(arrayBuffer);
|
|
1612
|
+
}
|
|
1613
|
+
}
|
|
1614
|
+
/**
|
|
1615
|
+
* Handle incoming WebSocket messages.
|
|
1616
|
+
*/
|
|
1617
|
+
handleMessage(data) {
|
|
1618
|
+
const config = getConfig();
|
|
1619
|
+
try {
|
|
1620
|
+
const message = JSON.parse(data);
|
|
1621
|
+
if (config.debug) console.log("[SpeechOS] WebSocket message:", message);
|
|
1622
|
+
switch (message.type) {
|
|
1623
|
+
case MESSAGE_TYPE_READY:
|
|
1624
|
+
this.handleReady(message);
|
|
1625
|
+
break;
|
|
1626
|
+
case MESSAGE_TYPE_TRANSCRIPTION:
|
|
1627
|
+
this.handleIntermediateTranscription(message);
|
|
1628
|
+
break;
|
|
1629
|
+
case MESSAGE_TYPE_TRANSCRIPT:
|
|
1630
|
+
this.handleFinalTranscript(message);
|
|
1631
|
+
break;
|
|
1632
|
+
case MESSAGE_TYPE_EDITED_TEXT:
|
|
1633
|
+
this.handleEditedText(message);
|
|
1634
|
+
break;
|
|
1635
|
+
case MESSAGE_TYPE_COMMAND_RESULT:
|
|
1636
|
+
this.handleCommandResult(message);
|
|
1637
|
+
break;
|
|
1638
|
+
case MESSAGE_TYPE_ERROR:
|
|
1639
|
+
this.handleError(message);
|
|
1640
|
+
break;
|
|
1641
|
+
default: if (config.debug) console.log("[SpeechOS] Unknown message type:", message.type);
|
|
1642
|
+
}
|
|
1643
|
+
} catch (error) {
|
|
1644
|
+
console.error("[SpeechOS] Failed to parse message:", error);
|
|
1645
|
+
}
|
|
1646
|
+
}
|
|
1647
|
+
handleReady(message) {
|
|
1648
|
+
const config = getConfig();
|
|
1649
|
+
this.sessionId = message.session_id;
|
|
1650
|
+
if (config.debug) console.log("[SpeechOS] Session ready:", this.sessionId);
|
|
1651
|
+
if (this.pendingAuth) this.pendingAuth.resolve();
|
|
1652
|
+
}
|
|
1653
|
+
handleIntermediateTranscription(message) {
|
|
1654
|
+
const config = getConfig();
|
|
1655
|
+
if (config.debug) console.log("[SpeechOS] Intermediate transcription:", message.transcript, "final:", message.is_final);
|
|
1656
|
+
}
|
|
1657
|
+
handleFinalTranscript(message) {
|
|
1658
|
+
const transcript = message.transcript || "";
|
|
1659
|
+
events.emit("transcription:complete", { text: transcript });
|
|
1660
|
+
if (this.pendingTranscript) {
|
|
1661
|
+
this.pendingTranscript.resolve(transcript);
|
|
1662
|
+
this.pendingTranscript = null;
|
|
1663
|
+
}
|
|
1664
|
+
}
|
|
1665
|
+
handleEditedText(message) {
|
|
1666
|
+
const editedText = message.text || "";
|
|
1667
|
+
events.emit("edit:complete", {
|
|
1668
|
+
text: editedText,
|
|
1669
|
+
originalText: this.editOriginalText || ""
|
|
1670
|
+
});
|
|
1671
|
+
if (this.pendingEditText) {
|
|
1672
|
+
this.pendingEditText.resolve(editedText);
|
|
1673
|
+
this.pendingEditText = null;
|
|
1674
|
+
}
|
|
1675
|
+
this.editOriginalText = null;
|
|
1676
|
+
}
|
|
1677
|
+
handleCommandResult(message) {
|
|
1678
|
+
const commandResult = message.command || null;
|
|
1679
|
+
this.lastInputText = message.transcript;
|
|
1680
|
+
events.emit("command:complete", { command: commandResult });
|
|
1681
|
+
if (this.pendingCommand) {
|
|
1682
|
+
this.pendingCommand.resolve(commandResult);
|
|
1683
|
+
this.pendingCommand = null;
|
|
1684
|
+
}
|
|
1685
|
+
}
|
|
1686
|
+
handleError(message) {
|
|
1687
|
+
const errorCode = message.code || "server_error";
|
|
1688
|
+
const errorMessage = message.message || "A server error occurred";
|
|
1689
|
+
console.error(`[SpeechOS] Error: ${errorMessage} (${errorCode})`);
|
|
1690
|
+
events.emit("error", {
|
|
1691
|
+
code: errorCode,
|
|
1692
|
+
message: errorMessage,
|
|
1693
|
+
source: "server"
|
|
1694
|
+
});
|
|
1695
|
+
const error = new Error(errorMessage);
|
|
1696
|
+
if (this.pendingAuth) {
|
|
1697
|
+
this.pendingAuth.reject(error);
|
|
1698
|
+
this.pendingAuth = null;
|
|
1699
|
+
}
|
|
1700
|
+
if (this.pendingTranscript) {
|
|
1701
|
+
this.pendingTranscript.reject(error);
|
|
1702
|
+
this.pendingTranscript = null;
|
|
1703
|
+
}
|
|
1704
|
+
if (this.pendingEditText) {
|
|
1705
|
+
this.pendingEditText.reject(error);
|
|
1706
|
+
this.pendingEditText = null;
|
|
1707
|
+
}
|
|
1708
|
+
if (this.pendingCommand) {
|
|
1709
|
+
this.pendingCommand.reject(error);
|
|
1710
|
+
this.pendingCommand = null;
|
|
1711
|
+
}
|
|
1712
|
+
}
|
|
1713
|
+
/**
|
|
1714
|
+
* Stop the voice session and request the transcript.
|
|
1715
|
+
*/
|
|
1716
|
+
async stopVoiceSession() {
|
|
1717
|
+
const config = getConfig();
|
|
1718
|
+
if (config.debug) console.log("[SpeechOS] Stopping voice session, requesting transcript...");
|
|
1719
|
+
await this.stopAudioCapture();
|
|
1720
|
+
this.pendingTranscript = new Deferred$1();
|
|
1721
|
+
this.pendingTranscript.setTimeout(RESPONSE_TIMEOUT_MS, "Transcription timed out. Please try again.", "transcription_timeout", "timeout");
|
|
1722
|
+
this.sendMessage({ type: MESSAGE_TYPE_REQUEST_TRANSCRIPT });
|
|
1723
|
+
const result = await this.pendingTranscript.promise;
|
|
1724
|
+
this.pendingTranscript = null;
|
|
1725
|
+
return result;
|
|
1726
|
+
}
|
|
1727
|
+
/**
|
|
1728
|
+
* Request text editing using the transcript as instructions.
|
|
1729
|
+
* Note: The input text was already sent in the auth message via startVoiceSession.
|
|
1730
|
+
*/
|
|
1731
|
+
async requestEditText(_originalText) {
|
|
1732
|
+
const config = getConfig();
|
|
1733
|
+
if (config.debug) console.log("[SpeechOS] Requesting text edit...");
|
|
1734
|
+
await this.stopAudioCapture();
|
|
1735
|
+
this.pendingEditText = new Deferred$1();
|
|
1736
|
+
this.pendingEditText.setTimeout(RESPONSE_TIMEOUT_MS, "Edit request timed out. Please try again.", "edit_timeout", "timeout");
|
|
1737
|
+
this.sendMessage({ type: MESSAGE_TYPE_EDIT_TEXT });
|
|
1738
|
+
const result = await this.pendingEditText.promise;
|
|
1739
|
+
this.pendingEditText = null;
|
|
1740
|
+
return result;
|
|
1741
|
+
}
|
|
1742
|
+
/**
|
|
1743
|
+
* Request command matching using the transcript as input.
|
|
1744
|
+
* Note: The command definitions were already sent in the auth message via startVoiceSession.
|
|
1745
|
+
*/
|
|
1746
|
+
async requestCommand(_commands) {
|
|
1747
|
+
const config = getConfig();
|
|
1748
|
+
if (config.debug) console.log("[SpeechOS] Requesting command match...");
|
|
1749
|
+
await this.stopAudioCapture();
|
|
1750
|
+
this.pendingCommand = new Deferred$1();
|
|
1751
|
+
this.pendingCommand.setTimeout(RESPONSE_TIMEOUT_MS, "Command request timed out. Please try again.", "command_timeout", "timeout");
|
|
1752
|
+
this.sendMessage({ type: MESSAGE_TYPE_EXECUTE_COMMAND });
|
|
1753
|
+
const result = await this.pendingCommand.promise;
|
|
1754
|
+
this.pendingCommand = null;
|
|
1755
|
+
return result;
|
|
1756
|
+
}
|
|
1757
|
+
/**
|
|
1758
|
+
* Stop audio capture and wait for all data to be sent.
|
|
1759
|
+
*
|
|
1760
|
+
* Waits for:
|
|
1761
|
+
* 1. All pending sendAudioChunk calls to complete (arrayBuffer conversion)
|
|
1762
|
+
* 2. WebSocket buffer to drain (all data transmitted)
|
|
1763
|
+
*
|
|
1764
|
+
* WebSocket message ordering ensures server receives all audio before transcript request.
|
|
1765
|
+
*/
|
|
1766
|
+
async stopAudioCapture() {
|
|
1767
|
+
const config = getConfig();
|
|
1768
|
+
const startTime = Date.now();
|
|
1769
|
+
if (config.debug) console.log("[SpeechOS] stopAudioCapture: starting...");
|
|
1770
|
+
if (this.audioCapture) {
|
|
1771
|
+
await this.audioCapture.stop();
|
|
1772
|
+
this.audioCapture = null;
|
|
1773
|
+
if (config.debug) console.log(`[SpeechOS] stopAudioCapture: recorder stopped after ${Date.now() - startTime}ms`);
|
|
1774
|
+
}
|
|
1775
|
+
state.setMicEnabled(false);
|
|
1776
|
+
if (this.pendingAudioSends.size > 0) {
|
|
1777
|
+
if (config.debug) console.log(`[SpeechOS] stopAudioCapture: waiting for ${this.pendingAudioSends.size} pending audio sends...`);
|
|
1778
|
+
await Promise.all(this.pendingAudioSends);
|
|
1779
|
+
if (config.debug) console.log(`[SpeechOS] stopAudioCapture: all sends complete after ${Date.now() - startTime}ms`);
|
|
1780
|
+
} else if (config.debug) console.log("[SpeechOS] stopAudioCapture: no pending sends");
|
|
1781
|
+
await this.waitForBufferDrain();
|
|
1782
|
+
if (config.debug) console.log(`[SpeechOS] stopAudioCapture: complete after ${Date.now() - startTime}ms`);
|
|
1783
|
+
}
|
|
1784
|
+
/**
|
|
1785
|
+
* Wait for the WebSocket send buffer to drain.
|
|
1786
|
+
*
|
|
1787
|
+
* This ensures all audio data has been transmitted before we request
|
|
1788
|
+
* the transcript. Uses the same pattern as LiveKit's ReadableStream approach.
|
|
1789
|
+
*/
|
|
1790
|
+
async waitForBufferDrain() {
|
|
1791
|
+
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
|
|
1792
|
+
const config = getConfig();
|
|
1793
|
+
const startTime = Date.now();
|
|
1794
|
+
while (this.ws.bufferedAmount > 0) {
|
|
1795
|
+
if (Date.now() - startTime > BUFFER_DRAIN_TIMEOUT_MS) {
|
|
1796
|
+
console.warn(`[SpeechOS] Buffer drain timeout, ${this.ws.bufferedAmount} bytes still pending`);
|
|
1797
|
+
break;
|
|
1798
|
+
}
|
|
1799
|
+
await new Promise((resolve) => setTimeout(resolve, BUFFER_CHECK_INTERVAL_MS));
|
|
1800
|
+
}
|
|
1801
|
+
if (config.debug) console.log(`[SpeechOS] Buffer drained in ${Date.now() - startTime}ms`);
|
|
1802
|
+
}
|
|
1803
|
+
/**
|
|
1804
|
+
* Send a JSON message over the WebSocket.
|
|
1805
|
+
*/
|
|
1806
|
+
sendMessage(message) {
|
|
1807
|
+
if (this.ws && this.ws.readyState === WebSocket.OPEN) this.ws.send(JSON.stringify(message));
|
|
1808
|
+
}
|
|
1809
|
+
/**
|
|
1810
|
+
* Disconnect from the WebSocket.
|
|
1811
|
+
*/
|
|
1812
|
+
async disconnect() {
|
|
1813
|
+
const config = getConfig();
|
|
1814
|
+
if (config.debug) console.log("[SpeechOS] Disconnecting WebSocket...");
|
|
1815
|
+
await this.stopAudioCapture();
|
|
1816
|
+
if (this.ws) {
|
|
1817
|
+
this.ws.close();
|
|
1818
|
+
this.ws = null;
|
|
1819
|
+
}
|
|
1820
|
+
const error = new Error("Disconnected");
|
|
1821
|
+
if (this.pendingAuth) {
|
|
1822
|
+
this.pendingAuth.reject(error);
|
|
1823
|
+
this.pendingAuth = null;
|
|
1824
|
+
}
|
|
1825
|
+
if (this.pendingTranscript) {
|
|
1826
|
+
this.pendingTranscript.reject(error);
|
|
1827
|
+
this.pendingTranscript = null;
|
|
1828
|
+
}
|
|
1829
|
+
if (this.pendingEditText) {
|
|
1830
|
+
this.pendingEditText.reject(error);
|
|
1831
|
+
this.pendingEditText = null;
|
|
1832
|
+
}
|
|
1833
|
+
if (this.pendingCommand) {
|
|
1834
|
+
this.pendingCommand.reject(error);
|
|
1835
|
+
this.pendingCommand = null;
|
|
1836
|
+
}
|
|
1837
|
+
this.sessionId = null;
|
|
1838
|
+
this.editOriginalText = null;
|
|
1839
|
+
this.lastInputText = void 0;
|
|
1840
|
+
this.sessionSettings = {};
|
|
1841
|
+
state.setConnected(false);
|
|
1842
|
+
state.setMicEnabled(false);
|
|
1843
|
+
if (config.debug) console.log("[SpeechOS] WebSocket disconnected");
|
|
1844
|
+
}
|
|
1845
|
+
/**
|
|
1846
|
+
* Check if connected to WebSocket.
|
|
1847
|
+
*/
|
|
1848
|
+
isConnected() {
|
|
1849
|
+
return this.ws !== null && this.ws.readyState === WebSocket.OPEN;
|
|
1850
|
+
}
|
|
1851
|
+
/**
|
|
1852
|
+
* Get the last input text from a command result.
|
|
1853
|
+
* This is the raw transcript of what the user said.
|
|
1854
|
+
*/
|
|
1855
|
+
getLastInputText() {
|
|
1856
|
+
return this.lastInputText;
|
|
1857
|
+
}
|
|
1858
|
+
};
|
|
1859
|
+
const websocket = new WebSocketManager();
|
|
872
1860
|
|
|
873
1861
|
//#endregion
|
|
874
1862
|
//#region src/speechos.ts
|
|
875
1863
|
/**
|
|
1864
|
+
* Get the active voice backend (always websocket now)
|
|
1865
|
+
*/
|
|
1866
|
+
function getBackend$1() {
|
|
1867
|
+
return websocket;
|
|
1868
|
+
}
|
|
1869
|
+
/**
|
|
876
1870
|
* SpeechOS Core SDK
|
|
877
1871
|
*
|
|
878
1872
|
* Provides two API layers:
|
|
@@ -891,7 +1885,6 @@ var SpeechOSCore = class {
|
|
|
891
1885
|
const currentConfig$1 = getConfig();
|
|
892
1886
|
if (currentConfig$1.debug) console.log("[SpeechOS] Initialized with config:", {
|
|
893
1887
|
host: currentConfig$1.host,
|
|
894
|
-
position: currentConfig$1.position,
|
|
895
1888
|
debug: currentConfig$1.debug
|
|
896
1889
|
});
|
|
897
1890
|
}
|
|
@@ -931,7 +1924,6 @@ var SpeechOSCore = class {
|
|
|
931
1924
|
state.setRecordingState("processing");
|
|
932
1925
|
try {
|
|
933
1926
|
const transcript = await livekit.stopAndGetTranscript();
|
|
934
|
-
transcriptStore.saveTranscript(transcript, "dictate");
|
|
935
1927
|
state.completeRecording();
|
|
936
1928
|
return transcript;
|
|
937
1929
|
} catch (error) {
|
|
@@ -948,7 +1940,6 @@ var SpeechOSCore = class {
|
|
|
948
1940
|
state.setRecordingState("processing");
|
|
949
1941
|
try {
|
|
950
1942
|
const editedText = await livekit.stopAndEdit(originalText);
|
|
951
|
-
transcriptStore.saveTranscript(editedText, "edit", originalText);
|
|
952
1943
|
state.completeRecording();
|
|
953
1944
|
return editedText;
|
|
954
1945
|
} catch (error) {
|
|
@@ -974,8 +1965,13 @@ var SpeechOSCore = class {
|
|
|
974
1965
|
state.setActiveAction("dictate");
|
|
975
1966
|
state.startRecording();
|
|
976
1967
|
try {
|
|
977
|
-
|
|
978
|
-
|
|
1968
|
+
const backend = getBackend$1();
|
|
1969
|
+
await backend.startVoiceSession({
|
|
1970
|
+
action: "dictate",
|
|
1971
|
+
onMicReady: () => {
|
|
1972
|
+
state.setRecordingState("recording");
|
|
1973
|
+
}
|
|
1974
|
+
});
|
|
979
1975
|
return new Promise((resolve, reject) => {
|
|
980
1976
|
this._dictateResolve = resolve;
|
|
981
1977
|
this._dictateReject = reject;
|
|
@@ -995,8 +1991,8 @@ var SpeechOSCore = class {
|
|
|
995
1991
|
async stopDictation() {
|
|
996
1992
|
state.setRecordingState("processing");
|
|
997
1993
|
try {
|
|
998
|
-
const
|
|
999
|
-
|
|
1994
|
+
const backend = getBackend$1();
|
|
1995
|
+
const transcript = await backend.stopVoiceSession();
|
|
1000
1996
|
state.completeRecording();
|
|
1001
1997
|
if (this._dictateResolve) {
|
|
1002
1998
|
this._dictateResolve(transcript);
|
|
@@ -1030,8 +2026,14 @@ var SpeechOSCore = class {
|
|
|
1030
2026
|
state.startRecording();
|
|
1031
2027
|
this._editOriginalText = originalText;
|
|
1032
2028
|
try {
|
|
1033
|
-
|
|
1034
|
-
|
|
2029
|
+
const backend = getBackend$1();
|
|
2030
|
+
await backend.startVoiceSession({
|
|
2031
|
+
action: "edit",
|
|
2032
|
+
inputText: originalText,
|
|
2033
|
+
onMicReady: () => {
|
|
2034
|
+
state.setRecordingState("recording");
|
|
2035
|
+
}
|
|
2036
|
+
});
|
|
1035
2037
|
return new Promise((resolve, reject) => {
|
|
1036
2038
|
this._editResolve = resolve;
|
|
1037
2039
|
this._editReject = reject;
|
|
@@ -1052,9 +2054,9 @@ var SpeechOSCore = class {
|
|
|
1052
2054
|
async stopEdit() {
|
|
1053
2055
|
state.setRecordingState("processing");
|
|
1054
2056
|
try {
|
|
2057
|
+
const backend = getBackend$1();
|
|
1055
2058
|
const originalText = this._editOriginalText || "";
|
|
1056
|
-
const editedText = await
|
|
1057
|
-
transcriptStore.saveTranscript(editedText, "edit", originalText);
|
|
2059
|
+
const editedText = await backend.requestEditText(originalText);
|
|
1058
2060
|
state.completeRecording();
|
|
1059
2061
|
if (this._editResolve) {
|
|
1060
2062
|
this._editResolve(editedText);
|
|
@@ -1077,6 +2079,71 @@ var SpeechOSCore = class {
|
|
|
1077
2079
|
}
|
|
1078
2080
|
}
|
|
1079
2081
|
/**
|
|
2082
|
+
* One-shot command: connect, wait for agent, record voice, match against commands
|
|
2083
|
+
* Automatically handles the full voice session lifecycle
|
|
2084
|
+
*
|
|
2085
|
+
* @param commands - Array of command definitions to match against
|
|
2086
|
+
* @returns The matched command result or null if no match
|
|
2087
|
+
*/
|
|
2088
|
+
async command(commands) {
|
|
2089
|
+
this.ensureInitialized();
|
|
2090
|
+
state.setActiveAction("command");
|
|
2091
|
+
state.startRecording();
|
|
2092
|
+
this._commandCommands = commands;
|
|
2093
|
+
try {
|
|
2094
|
+
const backend = getBackend$1();
|
|
2095
|
+
await backend.startVoiceSession({
|
|
2096
|
+
action: "command",
|
|
2097
|
+
commands,
|
|
2098
|
+
onMicReady: () => {
|
|
2099
|
+
state.setRecordingState("recording");
|
|
2100
|
+
}
|
|
2101
|
+
});
|
|
2102
|
+
return new Promise((resolve, reject) => {
|
|
2103
|
+
this._commandResolve = resolve;
|
|
2104
|
+
this._commandReject = reject;
|
|
2105
|
+
});
|
|
2106
|
+
} catch (error) {
|
|
2107
|
+
state.setError(error instanceof Error ? error.message : "Failed to start command");
|
|
2108
|
+
await this.cleanup();
|
|
2109
|
+
throw error;
|
|
2110
|
+
}
|
|
2111
|
+
}
|
|
2112
|
+
_commandCommands;
|
|
2113
|
+
_commandResolve;
|
|
2114
|
+
_commandReject;
|
|
2115
|
+
/**
|
|
2116
|
+
* Stop command recording and get the matched command
|
|
2117
|
+
* Call this after command() when user stops speaking
|
|
2118
|
+
*/
|
|
2119
|
+
async stopCommand() {
|
|
2120
|
+
state.setRecordingState("processing");
|
|
2121
|
+
try {
|
|
2122
|
+
const backend = getBackend$1();
|
|
2123
|
+
const commands = this._commandCommands || [];
|
|
2124
|
+
const result = await backend.requestCommand(commands);
|
|
2125
|
+
state.completeRecording();
|
|
2126
|
+
if (this._commandResolve) {
|
|
2127
|
+
this._commandResolve(result);
|
|
2128
|
+
this._commandResolve = void 0;
|
|
2129
|
+
this._commandReject = void 0;
|
|
2130
|
+
}
|
|
2131
|
+
return result;
|
|
2132
|
+
} catch (error) {
|
|
2133
|
+
const err = error instanceof Error ? error : new Error("Command request failed");
|
|
2134
|
+
state.setError(err.message);
|
|
2135
|
+
if (this._commandReject) {
|
|
2136
|
+
this._commandReject(err);
|
|
2137
|
+
this._commandResolve = void 0;
|
|
2138
|
+
this._commandReject = void 0;
|
|
2139
|
+
}
|
|
2140
|
+
throw err;
|
|
2141
|
+
} finally {
|
|
2142
|
+
this._commandCommands = void 0;
|
|
2143
|
+
await this.cleanup();
|
|
2144
|
+
}
|
|
2145
|
+
}
|
|
2146
|
+
/**
|
|
1080
2147
|
* Cancel the current operation
|
|
1081
2148
|
*/
|
|
1082
2149
|
async cancel() {
|
|
@@ -1091,7 +2158,13 @@ var SpeechOSCore = class {
|
|
|
1091
2158
|
this._editResolve = void 0;
|
|
1092
2159
|
this._editReject = void 0;
|
|
1093
2160
|
}
|
|
2161
|
+
if (this._commandReject) {
|
|
2162
|
+
this._commandReject(err);
|
|
2163
|
+
this._commandResolve = void 0;
|
|
2164
|
+
this._commandReject = void 0;
|
|
2165
|
+
}
|
|
1094
2166
|
this._editOriginalText = void 0;
|
|
2167
|
+
this._commandCommands = void 0;
|
|
1095
2168
|
await this.cleanup();
|
|
1096
2169
|
state.cancelRecording();
|
|
1097
2170
|
}
|
|
@@ -1118,7 +2191,8 @@ var SpeechOSCore = class {
|
|
|
1118
2191
|
}
|
|
1119
2192
|
async cleanup() {
|
|
1120
2193
|
try {
|
|
1121
|
-
|
|
2194
|
+
const backend = getBackend$1();
|
|
2195
|
+
await backend.disconnect();
|
|
1122
2196
|
} catch (error) {
|
|
1123
2197
|
const config = getConfig();
|
|
1124
2198
|
if (config.debug) console.warn("[SpeechOS] Cleanup disconnect error:", error);
|
|
@@ -1134,6 +2208,9 @@ var SpeechOSCore = class {
|
|
|
1134
2208
|
this._editResolve = void 0;
|
|
1135
2209
|
this._editReject = void 0;
|
|
1136
2210
|
this._editOriginalText = void 0;
|
|
2211
|
+
this._commandResolve = void 0;
|
|
2212
|
+
this._commandReject = void 0;
|
|
2213
|
+
this._commandCommands = void 0;
|
|
1137
2214
|
resetConfig();
|
|
1138
2215
|
state.reset();
|
|
1139
2216
|
events.clear();
|
|
@@ -1141,6 +2218,34 @@ var SpeechOSCore = class {
|
|
|
1141
2218
|
};
|
|
1142
2219
|
const speechOS = new SpeechOSCore();
|
|
1143
2220
|
|
|
2221
|
+
//#endregion
|
|
2222
|
+
//#region src/backend.ts
|
|
2223
|
+
/**
|
|
2224
|
+
* WebSocket backend adapter - wraps the websocket module to match the VoiceBackend interface
|
|
2225
|
+
*/
|
|
2226
|
+
const websocketBackend = {
|
|
2227
|
+
startVoiceSession: (options) => websocket.startVoiceSession(options),
|
|
2228
|
+
stopVoiceSession: () => websocket.stopVoiceSession(),
|
|
2229
|
+
requestEditText: (text) => websocket.requestEditText(text),
|
|
2230
|
+
requestCommand: (commands) => websocket.requestCommand(commands),
|
|
2231
|
+
disconnect: () => websocket.disconnect(),
|
|
2232
|
+
isConnected: () => websocket.isConnected(),
|
|
2233
|
+
getLastInputText: () => websocket.getLastInputText(),
|
|
2234
|
+
prefetchToken: () => Promise.resolve({}),
|
|
2235
|
+
startAutoRefresh: () => {},
|
|
2236
|
+
stopAutoRefresh: () => {},
|
|
2237
|
+
invalidateTokenCache: () => {}
|
|
2238
|
+
};
|
|
2239
|
+
/**
|
|
2240
|
+
* Get the active voice backend.
|
|
2241
|
+
* Always returns WebSocket backend (LiveKit is legacy).
|
|
2242
|
+
*
|
|
2243
|
+
* @returns The websocket backend
|
|
2244
|
+
*/
|
|
2245
|
+
function getBackend() {
|
|
2246
|
+
return websocketBackend;
|
|
2247
|
+
}
|
|
2248
|
+
|
|
1144
2249
|
//#endregion
|
|
1145
2250
|
//#region src/index.ts
|
|
1146
2251
|
const VERSION = "0.1.0";
|
|
@@ -1151,15 +2256,15 @@ exports.Deferred = Deferred;
|
|
|
1151
2256
|
exports.SpeechOSEventEmitter = SpeechOSEventEmitter;
|
|
1152
2257
|
exports.VERSION = VERSION;
|
|
1153
2258
|
exports.createStateManager = createStateManager;
|
|
1154
|
-
exports.defaultConfig = defaultConfig;
|
|
1155
2259
|
exports.events = events;
|
|
2260
|
+
exports.getBackend = getBackend;
|
|
1156
2261
|
exports.getConfig = getConfig;
|
|
1157
2262
|
exports.livekit = livekit;
|
|
1158
2263
|
exports.resetConfig = resetConfig;
|
|
1159
2264
|
exports.setConfig = setConfig;
|
|
1160
2265
|
exports.speechOS = speechOS;
|
|
1161
2266
|
exports.state = state;
|
|
1162
|
-
exports.transcriptStore = transcriptStore;
|
|
1163
2267
|
exports.updateUserId = updateUserId;
|
|
1164
2268
|
exports.validateConfig = validateConfig;
|
|
2269
|
+
exports.websocket = websocket;
|
|
1165
2270
|
//# sourceMappingURL=index.cjs.map
|