@omote/core 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +462 -207
- package/dist/index.d.ts +462 -207
- package/dist/index.js +560 -211
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +552 -202
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -220,6 +220,19 @@ var AudioScheduler = class {
|
|
|
220
220
|
async initialize() {
|
|
221
221
|
console.log("[AudioScheduler] Ready for lazy initialization");
|
|
222
222
|
}
|
|
223
|
+
/**
|
|
224
|
+
* Eagerly create and warm up the AudioContext
|
|
225
|
+
*
|
|
226
|
+
* Call this when a playback session starts (e.g., when AI response begins).
|
|
227
|
+
* The AudioContext needs time to initialize the audio hardware — on Windows
|
|
228
|
+
* this can take 50-100ms. By warming up early (before audio data arrives),
|
|
229
|
+
* the context is fully ready when schedule() is first called.
|
|
230
|
+
*
|
|
231
|
+
* Must be called after a user gesture (click/tap) for autoplay policy.
|
|
232
|
+
*/
|
|
233
|
+
async warmup() {
|
|
234
|
+
await this.ensureContext();
|
|
235
|
+
}
|
|
223
236
|
/**
|
|
224
237
|
* Ensure AudioContext is created and ready
|
|
225
238
|
* Called lazily on first schedule() - requires user gesture
|
|
@@ -250,7 +263,7 @@ var AudioScheduler = class {
|
|
|
250
263
|
const ctx = await this.ensureContext();
|
|
251
264
|
const channels = this.options.channels ?? 1;
|
|
252
265
|
if (!this.isPlaying) {
|
|
253
|
-
this.nextPlayTime = ctx.currentTime;
|
|
266
|
+
this.nextPlayTime = ctx.currentTime + 0.05;
|
|
254
267
|
this.isPlaying = true;
|
|
255
268
|
}
|
|
256
269
|
const audioBuffer = ctx.createBuffer(channels, audioData.length, ctx.sampleRate);
|
|
@@ -324,8 +337,19 @@ var AudioScheduler = class {
|
|
|
324
337
|
}
|
|
325
338
|
/**
|
|
326
339
|
* Reset scheduler state for new playback session
|
|
340
|
+
* Stops any orphaned sources that weren't cleaned up by cancelAll()
|
|
327
341
|
*/
|
|
328
342
|
reset() {
|
|
343
|
+
if (this.context) {
|
|
344
|
+
const now = this.context.currentTime;
|
|
345
|
+
for (const { source, gainNode } of this.scheduledSources) {
|
|
346
|
+
try {
|
|
347
|
+
gainNode.gain.setValueAtTime(0, now);
|
|
348
|
+
source.stop(now);
|
|
349
|
+
} catch {
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
}
|
|
329
353
|
this.nextPlayTime = 0;
|
|
330
354
|
this.isPlaying = false;
|
|
331
355
|
this.scheduledSources = [];
|
|
@@ -453,7 +477,7 @@ var LAMPipeline = class {
|
|
|
453
477
|
newBuffer.set(this.buffer, 0);
|
|
454
478
|
newBuffer.set(samples, this.buffer.length);
|
|
455
479
|
this.buffer = newBuffer;
|
|
456
|
-
|
|
480
|
+
while (this.buffer.length >= this.REQUIRED_SAMPLES) {
|
|
457
481
|
await this.processBuffer(lam);
|
|
458
482
|
}
|
|
459
483
|
}
|
|
@@ -606,12 +630,20 @@ var LAMPipeline = class {
|
|
|
606
630
|
};
|
|
607
631
|
|
|
608
632
|
// src/audio/SyncedAudioPipeline.ts
|
|
633
|
+
function pcm16ToFloat32(buffer) {
|
|
634
|
+
const byteLen = buffer.byteLength & ~1;
|
|
635
|
+
const int16 = byteLen === buffer.byteLength ? new Int16Array(buffer) : new Int16Array(buffer, 0, byteLen / 2);
|
|
636
|
+
const float32 = new Float32Array(int16.length);
|
|
637
|
+
for (let i = 0; i < int16.length; i++) {
|
|
638
|
+
float32[i] = int16[i] / 32768;
|
|
639
|
+
}
|
|
640
|
+
return float32;
|
|
641
|
+
}
|
|
609
642
|
var SyncedAudioPipeline = class extends EventEmitter {
|
|
610
643
|
constructor(options) {
|
|
611
644
|
super();
|
|
612
645
|
this.options = options;
|
|
613
|
-
this.
|
|
614
|
-
this.bufferedChunks = [];
|
|
646
|
+
this.playbackStarted = false;
|
|
615
647
|
this.monitorInterval = null;
|
|
616
648
|
this.frameAnimationId = null;
|
|
617
649
|
const sampleRate = options.sampleRate ?? 16e3;
|
|
@@ -622,11 +654,6 @@ var SyncedAudioPipeline = class extends EventEmitter {
|
|
|
622
654
|
});
|
|
623
655
|
this.lamPipeline = new LAMPipeline({
|
|
624
656
|
sampleRate,
|
|
625
|
-
onInference: (frameCount) => {
|
|
626
|
-
if (this.waitingForFirstLAM) {
|
|
627
|
-
this.onFirstLAMComplete();
|
|
628
|
-
}
|
|
629
|
-
},
|
|
630
657
|
onError: (error) => {
|
|
631
658
|
this.emit("error", error);
|
|
632
659
|
}
|
|
@@ -642,25 +669,24 @@ var SyncedAudioPipeline = class extends EventEmitter {
|
|
|
642
669
|
* Start a new playback session
|
|
643
670
|
*
|
|
644
671
|
* Resets all state and prepares for incoming audio chunks.
|
|
645
|
-
*
|
|
672
|
+
* Audio will be scheduled immediately as chunks arrive (no buffering).
|
|
646
673
|
*/
|
|
647
674
|
start() {
|
|
675
|
+
this.stopMonitoring();
|
|
648
676
|
this.scheduler.reset();
|
|
649
677
|
this.coalescer.reset();
|
|
650
678
|
this.lamPipeline.reset();
|
|
651
|
-
this.
|
|
652
|
-
this.
|
|
679
|
+
this.playbackStarted = false;
|
|
680
|
+
this.scheduler.warmup();
|
|
653
681
|
this.startFrameLoop();
|
|
654
682
|
this.startMonitoring();
|
|
655
683
|
}
|
|
656
684
|
/**
|
|
657
685
|
* Receive audio chunk from network
|
|
658
686
|
*
|
|
659
|
-
*
|
|
660
|
-
*
|
|
661
|
-
*
|
|
662
|
-
* - Audio scheduling waits until first LAM completes
|
|
663
|
-
* - Then all buffered audio is scheduled together with LAM frames
|
|
687
|
+
* Audio-first design: schedules audio immediately, LAM runs in background.
|
|
688
|
+
* This prevents LAM inference (50-300ms) from blocking audio scheduling,
|
|
689
|
+
* which caused audible stuttering with continuous audio streams.
|
|
664
690
|
*
|
|
665
691
|
* @param chunk - Uint8Array containing Int16 PCM audio
|
|
666
692
|
*/
|
|
@@ -669,51 +695,15 @@ var SyncedAudioPipeline = class extends EventEmitter {
|
|
|
669
695
|
if (!combined) {
|
|
670
696
|
return;
|
|
671
697
|
}
|
|
672
|
-
const
|
|
673
|
-
const
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
if (this.waitingForFirstLAM) {
|
|
678
|
-
this.bufferedChunks.push(combined);
|
|
679
|
-
const estimatedTime = this.scheduler.getCurrentTime();
|
|
680
|
-
await this.lamPipeline.push(float32, estimatedTime, this.options.lam);
|
|
681
|
-
} else {
|
|
682
|
-
const scheduleTime = await this.scheduler.schedule(float32);
|
|
683
|
-
await this.lamPipeline.push(float32, scheduleTime, this.options.lam);
|
|
698
|
+
const float32 = pcm16ToFloat32(combined);
|
|
699
|
+
const scheduleTime = await this.scheduler.schedule(float32);
|
|
700
|
+
if (!this.playbackStarted) {
|
|
701
|
+
this.playbackStarted = true;
|
|
702
|
+
this.emit("playback_start", scheduleTime);
|
|
684
703
|
}
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
*
|
|
689
|
-
* This is the critical synchronization point:
|
|
690
|
-
* - LAM frames are now ready in the queue
|
|
691
|
-
* - Schedule all buffered audio chunks
|
|
692
|
-
* - Adjust LAM frame timestamps to match actual schedule time
|
|
693
|
-
* - Audio and LAM start playing together, perfectly synchronized
|
|
694
|
-
*/
|
|
695
|
-
async onFirstLAMComplete() {
|
|
696
|
-
this.waitingForFirstLAM = false;
|
|
697
|
-
const beforeSchedule = this.scheduler.getCurrentTime();
|
|
698
|
-
let actualStartTime = beforeSchedule;
|
|
699
|
-
for (let i = 0; i < this.bufferedChunks.length; i++) {
|
|
700
|
-
const buffer = this.bufferedChunks[i];
|
|
701
|
-
const int16 = new Int16Array(buffer);
|
|
702
|
-
const float32 = new Float32Array(int16.length);
|
|
703
|
-
for (let j = 0; j < int16.length; j++) {
|
|
704
|
-
float32[j] = int16[j] / 32768;
|
|
705
|
-
}
|
|
706
|
-
const scheduleTime = await this.scheduler.schedule(float32);
|
|
707
|
-
if (i === 0) {
|
|
708
|
-
actualStartTime = scheduleTime;
|
|
709
|
-
}
|
|
710
|
-
}
|
|
711
|
-
const timeOffset = actualStartTime - beforeSchedule;
|
|
712
|
-
if (timeOffset !== 0) {
|
|
713
|
-
this.lamPipeline.adjustTimestamps(timeOffset);
|
|
714
|
-
}
|
|
715
|
-
this.bufferedChunks = [];
|
|
716
|
-
this.emit("playback_start", actualStartTime);
|
|
704
|
+
this.lamPipeline.push(float32, scheduleTime, this.options.lam).catch((err) => {
|
|
705
|
+
this.emit("error", err);
|
|
706
|
+
});
|
|
717
707
|
}
|
|
718
708
|
/**
|
|
719
709
|
* End of audio stream
|
|
@@ -745,10 +735,9 @@ var SyncedAudioPipeline = class extends EventEmitter {
|
|
|
745
735
|
async stop(fadeOutMs = 50) {
|
|
746
736
|
this.stopMonitoring();
|
|
747
737
|
await this.scheduler.cancelAll(fadeOutMs);
|
|
748
|
-
this.bufferedChunks = [];
|
|
749
738
|
this.coalescer.reset();
|
|
750
739
|
this.lamPipeline.reset();
|
|
751
|
-
this.
|
|
740
|
+
this.playbackStarted = false;
|
|
752
741
|
this.emit("playback_complete", void 0);
|
|
753
742
|
}
|
|
754
743
|
/**
|
|
@@ -805,8 +794,7 @@ var SyncedAudioPipeline = class extends EventEmitter {
|
|
|
805
794
|
*/
|
|
806
795
|
getState() {
|
|
807
796
|
return {
|
|
808
|
-
|
|
809
|
-
bufferedChunks: this.bufferedChunks.length,
|
|
797
|
+
playbackStarted: this.playbackStarted,
|
|
810
798
|
coalescerFill: this.coalescer.fillLevel,
|
|
811
799
|
lamFill: this.lamPipeline.fillLevel,
|
|
812
800
|
queuedFrames: this.lamPipeline.queuedFrameCount,
|
|
@@ -822,7 +810,6 @@ var SyncedAudioPipeline = class extends EventEmitter {
|
|
|
822
810
|
this.scheduler.dispose();
|
|
823
811
|
this.coalescer.reset();
|
|
824
812
|
this.lamPipeline.reset();
|
|
825
|
-
this.bufferedChunks = [];
|
|
826
813
|
}
|
|
827
814
|
};
|
|
828
815
|
|
|
@@ -2049,7 +2036,7 @@ function hasWebGPUApi() {
|
|
|
2049
2036
|
return "gpu" in navigator && navigator.gpu !== void 0;
|
|
2050
2037
|
}
|
|
2051
2038
|
function getRecommendedBackend() {
|
|
2052
|
-
if (isIOS()) {
|
|
2039
|
+
if (isSafari() || isIOS()) {
|
|
2053
2040
|
return "wasm";
|
|
2054
2041
|
}
|
|
2055
2042
|
return "webgpu";
|
|
@@ -2093,6 +2080,14 @@ function shouldEnableWasmProxy() {
|
|
|
2093
2080
|
}
|
|
2094
2081
|
return true;
|
|
2095
2082
|
}
|
|
2083
|
+
function isSafari() {
|
|
2084
|
+
if (typeof navigator === "undefined") return false;
|
|
2085
|
+
const ua = navigator.userAgent.toLowerCase();
|
|
2086
|
+
return /safari/.test(ua) && !/chrome|crios|fxios|chromium|edg/.test(ua);
|
|
2087
|
+
}
|
|
2088
|
+
function shouldUseCpuLipSync() {
|
|
2089
|
+
return isSafari();
|
|
2090
|
+
}
|
|
2096
2091
|
function isSpeechRecognitionAvailable() {
|
|
2097
2092
|
if (typeof window === "undefined") return false;
|
|
2098
2093
|
return "SpeechRecognition" in window || "webkitSpeechRecognition" in window;
|
|
@@ -2137,13 +2132,13 @@ async function isWebGPUAvailable() {
|
|
|
2137
2132
|
return false;
|
|
2138
2133
|
}
|
|
2139
2134
|
}
|
|
2140
|
-
function configureWasm(
|
|
2141
|
-
|
|
2135
|
+
function configureWasm(ort) {
|
|
2136
|
+
ort.env.wasm.wasmPaths = WASM_CDN_PATH;
|
|
2142
2137
|
const numThreads = getOptimalWasmThreads();
|
|
2143
2138
|
const enableProxy = shouldEnableWasmProxy();
|
|
2144
|
-
|
|
2145
|
-
|
|
2146
|
-
|
|
2139
|
+
ort.env.wasm.numThreads = numThreads;
|
|
2140
|
+
ort.env.wasm.simd = true;
|
|
2141
|
+
ort.env.wasm.proxy = enableProxy;
|
|
2147
2142
|
logger.info("WASM configured", {
|
|
2148
2143
|
numThreads,
|
|
2149
2144
|
simd: true,
|
|
@@ -2191,8 +2186,8 @@ async function getOnnxRuntimeForPreference(preference = "auto") {
|
|
|
2191
2186
|
webgpuAvailable,
|
|
2192
2187
|
resolvedBackend: backend
|
|
2193
2188
|
});
|
|
2194
|
-
const
|
|
2195
|
-
return { ort
|
|
2189
|
+
const ort = await getOnnxRuntime(backend);
|
|
2190
|
+
return { ort, backend };
|
|
2196
2191
|
}
|
|
2197
2192
|
function getSessionOptions(backend) {
|
|
2198
2193
|
if (backend === "webgpu") {
|
|
@@ -2213,12 +2208,12 @@ function getSessionOptions(backend) {
|
|
|
2213
2208
|
};
|
|
2214
2209
|
}
|
|
2215
2210
|
async function createSessionWithFallback(modelBuffer, preferredBackend) {
|
|
2216
|
-
const
|
|
2211
|
+
const ort = await getOnnxRuntime(preferredBackend);
|
|
2217
2212
|
const modelData = new Uint8Array(modelBuffer);
|
|
2218
2213
|
if (preferredBackend === "webgpu") {
|
|
2219
2214
|
try {
|
|
2220
2215
|
const options2 = getSessionOptions("webgpu");
|
|
2221
|
-
const session2 = await
|
|
2216
|
+
const session2 = await ort.InferenceSession.create(modelData, options2);
|
|
2222
2217
|
logger.info("Session created with WebGPU backend");
|
|
2223
2218
|
return { session: session2, backend: "webgpu" };
|
|
2224
2219
|
} catch (err) {
|
|
@@ -2228,7 +2223,7 @@ async function createSessionWithFallback(modelBuffer, preferredBackend) {
|
|
|
2228
2223
|
}
|
|
2229
2224
|
}
|
|
2230
2225
|
const options = getSessionOptions("wasm");
|
|
2231
|
-
const session = await
|
|
2226
|
+
const session = await ort.InferenceSession.create(modelData, options);
|
|
2232
2227
|
logger.info("Session created with WASM backend");
|
|
2233
2228
|
return { session, backend: "wasm" };
|
|
2234
2229
|
}
|
|
@@ -2239,8 +2234,7 @@ function isOnnxRuntimeLoaded() {
|
|
|
2239
2234
|
return ortInstance !== null;
|
|
2240
2235
|
}
|
|
2241
2236
|
|
|
2242
|
-
// src/inference/
|
|
2243
|
-
var logger2 = createLogger("Wav2Vec2");
|
|
2237
|
+
// src/inference/blendshapeUtils.ts
|
|
2244
2238
|
var LAM_BLENDSHAPES = [
|
|
2245
2239
|
"browDownLeft",
|
|
2246
2240
|
"browDownRight",
|
|
@@ -2295,40 +2289,7 @@ var LAM_BLENDSHAPES = [
|
|
|
2295
2289
|
"noseSneerRight",
|
|
2296
2290
|
"tongueOut"
|
|
2297
2291
|
];
|
|
2298
|
-
var
|
|
2299
|
-
"<pad>",
|
|
2300
|
-
"<s>",
|
|
2301
|
-
"</s>",
|
|
2302
|
-
"<unk>",
|
|
2303
|
-
"|",
|
|
2304
|
-
"E",
|
|
2305
|
-
"T",
|
|
2306
|
-
"A",
|
|
2307
|
-
"O",
|
|
2308
|
-
"N",
|
|
2309
|
-
"I",
|
|
2310
|
-
"H",
|
|
2311
|
-
"S",
|
|
2312
|
-
"R",
|
|
2313
|
-
"D",
|
|
2314
|
-
"L",
|
|
2315
|
-
"U",
|
|
2316
|
-
"M",
|
|
2317
|
-
"W",
|
|
2318
|
-
"C",
|
|
2319
|
-
"F",
|
|
2320
|
-
"G",
|
|
2321
|
-
"Y",
|
|
2322
|
-
"P",
|
|
2323
|
-
"B",
|
|
2324
|
-
"V",
|
|
2325
|
-
"K",
|
|
2326
|
-
"'",
|
|
2327
|
-
"X",
|
|
2328
|
-
"J",
|
|
2329
|
-
"Q",
|
|
2330
|
-
"Z"
|
|
2331
|
-
];
|
|
2292
|
+
var ARKIT_BLENDSHAPES = LAM_BLENDSHAPES;
|
|
2332
2293
|
var ARKIT_SYMMETRIC_PAIRS = [
|
|
2333
2294
|
["jawLeft", "jawRight"],
|
|
2334
2295
|
["mouthLeft", "mouthRight"],
|
|
@@ -2364,6 +2325,107 @@ function symmetrizeBlendshapes(frame) {
|
|
|
2364
2325
|
}
|
|
2365
2326
|
return result;
|
|
2366
2327
|
}
|
|
2328
|
+
var WAV2ARKIT_BLENDSHAPES = [
|
|
2329
|
+
"browDownLeft",
|
|
2330
|
+
"browDownRight",
|
|
2331
|
+
"browInnerUp",
|
|
2332
|
+
"browOuterUpLeft",
|
|
2333
|
+
"browOuterUpRight",
|
|
2334
|
+
"cheekPuff",
|
|
2335
|
+
"cheekSquintLeft",
|
|
2336
|
+
"cheekSquintRight",
|
|
2337
|
+
"eyeBlinkLeft",
|
|
2338
|
+
"eyeBlinkRight",
|
|
2339
|
+
"eyeLookDownLeft",
|
|
2340
|
+
"eyeLookDownRight",
|
|
2341
|
+
"eyeLookInLeft",
|
|
2342
|
+
"eyeLookInRight",
|
|
2343
|
+
"eyeLookOutLeft",
|
|
2344
|
+
"eyeLookOutRight",
|
|
2345
|
+
"eyeLookUpLeft",
|
|
2346
|
+
"eyeLookUpRight",
|
|
2347
|
+
"eyeSquintLeft",
|
|
2348
|
+
"eyeSquintRight",
|
|
2349
|
+
"eyeWideLeft",
|
|
2350
|
+
"eyeWideRight",
|
|
2351
|
+
"jawForward",
|
|
2352
|
+
"jawLeft",
|
|
2353
|
+
"jawOpen",
|
|
2354
|
+
"mouthFrownLeft",
|
|
2355
|
+
"mouthFrownRight",
|
|
2356
|
+
"mouthFunnel",
|
|
2357
|
+
"mouthLeft",
|
|
2358
|
+
"mouthLowerDownLeft",
|
|
2359
|
+
"mouthLowerDownRight",
|
|
2360
|
+
"mouthPressLeft",
|
|
2361
|
+
"mouthPressRight",
|
|
2362
|
+
"mouthPucker",
|
|
2363
|
+
"mouthRight",
|
|
2364
|
+
"mouthRollLower",
|
|
2365
|
+
"mouthRollUpper",
|
|
2366
|
+
"mouthShrugLower",
|
|
2367
|
+
"mouthShrugUpper",
|
|
2368
|
+
"mouthSmileLeft",
|
|
2369
|
+
"mouthSmileRight",
|
|
2370
|
+
"mouthStretchLeft",
|
|
2371
|
+
"mouthStretchRight",
|
|
2372
|
+
"mouthUpperUpLeft",
|
|
2373
|
+
"mouthUpperUpRight",
|
|
2374
|
+
"noseSneerLeft",
|
|
2375
|
+
"noseSneerRight",
|
|
2376
|
+
"tongueOut",
|
|
2377
|
+
"mouthClose",
|
|
2378
|
+
"mouthDimpleLeft",
|
|
2379
|
+
"mouthDimpleRight",
|
|
2380
|
+
"jawRight"
|
|
2381
|
+
];
|
|
2382
|
+
var REMAP_WAV2ARKIT_TO_LAM = WAV2ARKIT_BLENDSHAPES.map(
|
|
2383
|
+
(name) => LAM_BLENDSHAPES.indexOf(name)
|
|
2384
|
+
);
|
|
2385
|
+
function remapWav2ArkitToLam(frame) {
|
|
2386
|
+
const result = new Float32Array(52);
|
|
2387
|
+
for (let i = 0; i < 52; i++) {
|
|
2388
|
+
result[REMAP_WAV2ARKIT_TO_LAM[i]] = frame[i];
|
|
2389
|
+
}
|
|
2390
|
+
return result;
|
|
2391
|
+
}
|
|
2392
|
+
|
|
2393
|
+
// src/inference/Wav2Vec2Inference.ts
|
|
2394
|
+
var logger2 = createLogger("Wav2Vec2");
|
|
2395
|
+
var CTC_VOCAB = [
|
|
2396
|
+
"<pad>",
|
|
2397
|
+
"<s>",
|
|
2398
|
+
"</s>",
|
|
2399
|
+
"<unk>",
|
|
2400
|
+
"|",
|
|
2401
|
+
"E",
|
|
2402
|
+
"T",
|
|
2403
|
+
"A",
|
|
2404
|
+
"O",
|
|
2405
|
+
"N",
|
|
2406
|
+
"I",
|
|
2407
|
+
"H",
|
|
2408
|
+
"S",
|
|
2409
|
+
"R",
|
|
2410
|
+
"D",
|
|
2411
|
+
"L",
|
|
2412
|
+
"U",
|
|
2413
|
+
"M",
|
|
2414
|
+
"W",
|
|
2415
|
+
"C",
|
|
2416
|
+
"F",
|
|
2417
|
+
"G",
|
|
2418
|
+
"Y",
|
|
2419
|
+
"P",
|
|
2420
|
+
"B",
|
|
2421
|
+
"V",
|
|
2422
|
+
"K",
|
|
2423
|
+
"'",
|
|
2424
|
+
"X",
|
|
2425
|
+
"J",
|
|
2426
|
+
"Q",
|
|
2427
|
+
"Z"
|
|
2428
|
+
];
|
|
2367
2429
|
var Wav2Vec2Inference = class {
|
|
2368
2430
|
constructor(config) {
|
|
2369
2431
|
this.session = null;
|
|
@@ -2400,8 +2462,8 @@ var Wav2Vec2Inference = class {
|
|
|
2400
2462
|
});
|
|
2401
2463
|
try {
|
|
2402
2464
|
logger2.info("Loading ONNX Runtime...", { preference: this.config.backend || "auto" });
|
|
2403
|
-
const { ort
|
|
2404
|
-
this.ort =
|
|
2465
|
+
const { ort, backend } = await getOnnxRuntimeForPreference(this.config.backend || "auto");
|
|
2466
|
+
this.ort = ort;
|
|
2405
2467
|
this._backend = backend;
|
|
2406
2468
|
logger2.info("ONNX Runtime loaded", { backend: this._backend });
|
|
2407
2469
|
const cache = getModelCache();
|
|
@@ -2602,6 +2664,7 @@ var Wav2Vec2Inference = class {
|
|
|
2602
2664
|
blendshapes,
|
|
2603
2665
|
asrLogits,
|
|
2604
2666
|
text,
|
|
2667
|
+
numFrames: numA2EFrames,
|
|
2605
2668
|
numA2EFrames,
|
|
2606
2669
|
numASRFrames,
|
|
2607
2670
|
inferenceTimeMs
|
|
@@ -2968,8 +3031,293 @@ var WhisperInference = class _WhisperInference {
|
|
|
2968
3031
|
}
|
|
2969
3032
|
};
|
|
2970
3033
|
|
|
3034
|
+
// src/inference/Wav2ArkitCpuInference.ts
|
|
3035
|
+
var logger5 = createLogger("Wav2ArkitCpu");
|
|
3036
|
+
var Wav2ArkitCpuInference = class {
|
|
3037
|
+
constructor(config) {
|
|
3038
|
+
this.session = null;
|
|
3039
|
+
this.ort = null;
|
|
3040
|
+
this._backend = "wasm";
|
|
3041
|
+
this.isLoading = false;
|
|
3042
|
+
// Inference queue for handling concurrent calls
|
|
3043
|
+
this.inferenceQueue = Promise.resolve();
|
|
3044
|
+
this.config = config;
|
|
3045
|
+
}
|
|
3046
|
+
get backend() {
|
|
3047
|
+
return this.session ? this._backend : null;
|
|
3048
|
+
}
|
|
3049
|
+
get isLoaded() {
|
|
3050
|
+
return this.session !== null;
|
|
3051
|
+
}
|
|
3052
|
+
/**
|
|
3053
|
+
* Load the ONNX model
|
|
3054
|
+
*/
|
|
3055
|
+
async load() {
|
|
3056
|
+
if (this.isLoading) {
|
|
3057
|
+
throw new Error("Model is already loading");
|
|
3058
|
+
}
|
|
3059
|
+
if (this.session) {
|
|
3060
|
+
throw new Error("Model already loaded. Call dispose() first.");
|
|
3061
|
+
}
|
|
3062
|
+
this.isLoading = true;
|
|
3063
|
+
const startTime = performance.now();
|
|
3064
|
+
const telemetry = getTelemetry();
|
|
3065
|
+
const span = telemetry?.startSpan("Wav2ArkitCpu.load", {
|
|
3066
|
+
"model.url": this.config.modelUrl,
|
|
3067
|
+
"model.backend_requested": this.config.backend || "wasm"
|
|
3068
|
+
});
|
|
3069
|
+
try {
|
|
3070
|
+
const preference = this.config.backend || "wasm";
|
|
3071
|
+
logger5.info("Loading ONNX Runtime...", { preference });
|
|
3072
|
+
const { ort, backend } = await getOnnxRuntimeForPreference(preference);
|
|
3073
|
+
this.ort = ort;
|
|
3074
|
+
this._backend = backend;
|
|
3075
|
+
logger5.info("ONNX Runtime loaded", { backend: this._backend });
|
|
3076
|
+
const cache = getModelCache();
|
|
3077
|
+
const modelUrl = this.config.modelUrl;
|
|
3078
|
+
const isCached = await cache.has(modelUrl);
|
|
3079
|
+
let modelBuffer;
|
|
3080
|
+
if (isCached) {
|
|
3081
|
+
logger5.debug("Loading model from cache", { modelUrl });
|
|
3082
|
+
modelBuffer = await cache.get(modelUrl);
|
|
3083
|
+
if (!modelBuffer) {
|
|
3084
|
+
logger5.warn("Cache corruption detected, clearing and retrying", { modelUrl });
|
|
3085
|
+
await cache.delete(modelUrl);
|
|
3086
|
+
modelBuffer = await fetchWithCache(modelUrl);
|
|
3087
|
+
}
|
|
3088
|
+
} else {
|
|
3089
|
+
logger5.debug("Fetching and caching model", { modelUrl });
|
|
3090
|
+
modelBuffer = await fetchWithCache(modelUrl);
|
|
3091
|
+
}
|
|
3092
|
+
if (!modelBuffer) {
|
|
3093
|
+
throw new Error(`Failed to load model: ${modelUrl}`);
|
|
3094
|
+
}
|
|
3095
|
+
logger5.debug("Creating ONNX session", {
|
|
3096
|
+
size: formatBytes(modelBuffer.byteLength),
|
|
3097
|
+
backend: this._backend
|
|
3098
|
+
});
|
|
3099
|
+
const sessionOptions = getSessionOptions(this._backend);
|
|
3100
|
+
const modelData = new Uint8Array(modelBuffer);
|
|
3101
|
+
this.session = await this.ort.InferenceSession.create(modelData, sessionOptions);
|
|
3102
|
+
const loadTimeMs = performance.now() - startTime;
|
|
3103
|
+
logger5.info("Model loaded successfully", {
|
|
3104
|
+
backend: this._backend,
|
|
3105
|
+
loadTimeMs: Math.round(loadTimeMs),
|
|
3106
|
+
inputs: this.session.inputNames,
|
|
3107
|
+
outputs: this.session.outputNames
|
|
3108
|
+
});
|
|
3109
|
+
span?.setAttributes({
|
|
3110
|
+
"model.backend": this._backend,
|
|
3111
|
+
"model.load_time_ms": loadTimeMs,
|
|
3112
|
+
"model.cached": isCached
|
|
3113
|
+
});
|
|
3114
|
+
span?.end();
|
|
3115
|
+
telemetry?.recordHistogram("omote.model.load_time", loadTimeMs, {
|
|
3116
|
+
model: "wav2arkit_cpu",
|
|
3117
|
+
backend: this._backend
|
|
3118
|
+
});
|
|
3119
|
+
logger5.debug("Running warmup inference");
|
|
3120
|
+
const warmupStart = performance.now();
|
|
3121
|
+
const silentAudio = new Float32Array(16e3);
|
|
3122
|
+
await this.infer(silentAudio);
|
|
3123
|
+
const warmupTimeMs = performance.now() - warmupStart;
|
|
3124
|
+
logger5.info("Warmup inference complete", {
|
|
3125
|
+
warmupTimeMs: Math.round(warmupTimeMs),
|
|
3126
|
+
backend: this._backend
|
|
3127
|
+
});
|
|
3128
|
+
telemetry?.recordHistogram("omote.model.warmup_time", warmupTimeMs, {
|
|
3129
|
+
model: "wav2arkit_cpu",
|
|
3130
|
+
backend: this._backend
|
|
3131
|
+
});
|
|
3132
|
+
return {
|
|
3133
|
+
backend: this._backend,
|
|
3134
|
+
loadTimeMs,
|
|
3135
|
+
inputNames: [...this.session.inputNames],
|
|
3136
|
+
outputNames: [...this.session.outputNames]
|
|
3137
|
+
};
|
|
3138
|
+
} catch (error) {
|
|
3139
|
+
span?.endWithError(error instanceof Error ? error : new Error(String(error)));
|
|
3140
|
+
telemetry?.incrementCounter("omote.errors.total", 1, {
|
|
3141
|
+
model: "wav2arkit_cpu",
|
|
3142
|
+
error_type: "load_failed"
|
|
3143
|
+
});
|
|
3144
|
+
throw error;
|
|
3145
|
+
} finally {
|
|
3146
|
+
this.isLoading = false;
|
|
3147
|
+
}
|
|
3148
|
+
}
|
|
3149
|
+
/**
|
|
3150
|
+
* Run inference on raw audio
|
|
3151
|
+
*
|
|
3152
|
+
* Accepts variable-length audio (not fixed to 16000 samples).
|
|
3153
|
+
* Output frames = ceil(30 * numSamples / 16000).
|
|
3154
|
+
*
|
|
3155
|
+
* @param audioSamples - Float32Array of raw audio at 16kHz
|
|
3156
|
+
* @param _identityIndex - Ignored (identity 11 is baked into the model)
|
|
3157
|
+
*/
|
|
3158
|
+
async infer(audioSamples, _identityIndex) {
|
|
3159
|
+
if (!this.session) {
|
|
3160
|
+
throw new Error("Model not loaded. Call load() first.");
|
|
3161
|
+
}
|
|
3162
|
+
const audioCopy = new Float32Array(audioSamples);
|
|
3163
|
+
const feeds = {
|
|
3164
|
+
"audio_waveform": new this.ort.Tensor("float32", audioCopy, [1, audioCopy.length])
|
|
3165
|
+
};
|
|
3166
|
+
return this.queueInference(feeds, audioCopy.length);
|
|
3167
|
+
}
|
|
3168
|
+
/**
|
|
3169
|
+
* Queue inference to serialize ONNX session calls
|
|
3170
|
+
*/
|
|
3171
|
+
queueInference(feeds, inputSamples) {
|
|
3172
|
+
return new Promise((resolve, reject) => {
|
|
3173
|
+
this.inferenceQueue = this.inferenceQueue.then(async () => {
|
|
3174
|
+
const telemetry = getTelemetry();
|
|
3175
|
+
const span = telemetry?.startSpan("Wav2ArkitCpu.infer", {
|
|
3176
|
+
"inference.backend": this._backend,
|
|
3177
|
+
"inference.input_samples": inputSamples
|
|
3178
|
+
});
|
|
3179
|
+
try {
|
|
3180
|
+
const startTime = performance.now();
|
|
3181
|
+
const results = await this.session.run(feeds);
|
|
3182
|
+
const inferenceTimeMs = performance.now() - startTime;
|
|
3183
|
+
const blendshapeOutput = results["blendshapes"];
|
|
3184
|
+
if (!blendshapeOutput) {
|
|
3185
|
+
throw new Error("Missing blendshapes output from model");
|
|
3186
|
+
}
|
|
3187
|
+
const blendshapeData = blendshapeOutput.data;
|
|
3188
|
+
const numFrames = blendshapeOutput.dims[1];
|
|
3189
|
+
const numBlendshapes = blendshapeOutput.dims[2];
|
|
3190
|
+
const blendshapes = [];
|
|
3191
|
+
for (let f = 0; f < numFrames; f++) {
|
|
3192
|
+
const rawFrame = blendshapeData.slice(f * numBlendshapes, (f + 1) * numBlendshapes);
|
|
3193
|
+
const remapped = remapWav2ArkitToLam(rawFrame);
|
|
3194
|
+
blendshapes.push(symmetrizeBlendshapes(remapped));
|
|
3195
|
+
}
|
|
3196
|
+
logger5.trace("Inference completed", {
|
|
3197
|
+
inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
|
|
3198
|
+
numFrames,
|
|
3199
|
+
inputSamples
|
|
3200
|
+
});
|
|
3201
|
+
span?.setAttributes({
|
|
3202
|
+
"inference.duration_ms": inferenceTimeMs,
|
|
3203
|
+
"inference.frames": numFrames
|
|
3204
|
+
});
|
|
3205
|
+
span?.end();
|
|
3206
|
+
telemetry?.recordHistogram("omote.inference.latency", inferenceTimeMs, {
|
|
3207
|
+
model: "wav2arkit_cpu",
|
|
3208
|
+
backend: this._backend
|
|
3209
|
+
});
|
|
3210
|
+
telemetry?.incrementCounter("omote.inference.total", 1, {
|
|
3211
|
+
model: "wav2arkit_cpu",
|
|
3212
|
+
backend: this._backend,
|
|
3213
|
+
status: "success"
|
|
3214
|
+
});
|
|
3215
|
+
resolve({
|
|
3216
|
+
blendshapes,
|
|
3217
|
+
numFrames,
|
|
3218
|
+
inferenceTimeMs
|
|
3219
|
+
});
|
|
3220
|
+
} catch (err) {
|
|
3221
|
+
span?.endWithError(err instanceof Error ? err : new Error(String(err)));
|
|
3222
|
+
telemetry?.incrementCounter("omote.inference.total", 1, {
|
|
3223
|
+
model: "wav2arkit_cpu",
|
|
3224
|
+
backend: this._backend,
|
|
3225
|
+
status: "error"
|
|
3226
|
+
});
|
|
3227
|
+
reject(err);
|
|
3228
|
+
}
|
|
3229
|
+
});
|
|
3230
|
+
});
|
|
3231
|
+
}
|
|
3232
|
+
/**
|
|
3233
|
+
* Dispose of the model and free resources
|
|
3234
|
+
*/
|
|
3235
|
+
async dispose() {
|
|
3236
|
+
if (this.session) {
|
|
3237
|
+
await this.session.release();
|
|
3238
|
+
this.session = null;
|
|
3239
|
+
}
|
|
3240
|
+
}
|
|
3241
|
+
};
|
|
3242
|
+
|
|
3243
|
+
// src/inference/createLipSync.ts
|
|
3244
|
+
var logger6 = createLogger("createLipSync");
|
|
3245
|
+
function createLipSync(config) {
|
|
3246
|
+
const mode = config.mode ?? "auto";
|
|
3247
|
+
const fallbackOnError = config.fallbackOnError ?? true;
|
|
3248
|
+
let useCpu;
|
|
3249
|
+
if (mode === "cpu") {
|
|
3250
|
+
useCpu = true;
|
|
3251
|
+
logger6.info("Forcing CPU lip sync model (wav2arkit_cpu)");
|
|
3252
|
+
} else if (mode === "gpu") {
|
|
3253
|
+
useCpu = false;
|
|
3254
|
+
logger6.info("Forcing GPU lip sync model (Wav2Vec2)");
|
|
3255
|
+
} else {
|
|
3256
|
+
useCpu = isSafari();
|
|
3257
|
+
logger6.info("Auto-detected lip sync model", {
|
|
3258
|
+
useCpu,
|
|
3259
|
+
isSafari: isSafari()
|
|
3260
|
+
});
|
|
3261
|
+
}
|
|
3262
|
+
if (useCpu) {
|
|
3263
|
+
logger6.info("Creating Wav2ArkitCpuInference (1.8MB, WASM)");
|
|
3264
|
+
return new Wav2ArkitCpuInference({
|
|
3265
|
+
modelUrl: config.cpuModelUrl
|
|
3266
|
+
});
|
|
3267
|
+
}
|
|
3268
|
+
const gpuInstance = new Wav2Vec2Inference({
|
|
3269
|
+
modelUrl: config.gpuModelUrl,
|
|
3270
|
+
backend: config.gpuBackend ?? "auto",
|
|
3271
|
+
numIdentityClasses: config.numIdentityClasses
|
|
3272
|
+
});
|
|
3273
|
+
if (fallbackOnError) {
|
|
3274
|
+
logger6.info("Creating Wav2Vec2Inference with CPU fallback");
|
|
3275
|
+
return new LipSyncWithFallback(gpuInstance, config);
|
|
3276
|
+
}
|
|
3277
|
+
logger6.info("Creating Wav2Vec2Inference (no fallback)");
|
|
3278
|
+
return gpuInstance;
|
|
3279
|
+
}
|
|
3280
|
+
var LipSyncWithFallback = class {
|
|
3281
|
+
constructor(gpuInstance, config) {
|
|
3282
|
+
this.hasFallenBack = false;
|
|
3283
|
+
this.implementation = gpuInstance;
|
|
3284
|
+
this.config = config;
|
|
3285
|
+
}
|
|
3286
|
+
get backend() {
|
|
3287
|
+
return this.implementation.backend;
|
|
3288
|
+
}
|
|
3289
|
+
get isLoaded() {
|
|
3290
|
+
return this.implementation.isLoaded;
|
|
3291
|
+
}
|
|
3292
|
+
async load() {
|
|
3293
|
+
try {
|
|
3294
|
+
return await this.implementation.load();
|
|
3295
|
+
} catch (error) {
|
|
3296
|
+
logger6.warn("GPU model load failed, falling back to CPU model", {
|
|
3297
|
+
error: error instanceof Error ? error.message : String(error)
|
|
3298
|
+
});
|
|
3299
|
+
try {
|
|
3300
|
+
await this.implementation.dispose();
|
|
3301
|
+
} catch {
|
|
3302
|
+
}
|
|
3303
|
+
this.implementation = new Wav2ArkitCpuInference({
|
|
3304
|
+
modelUrl: this.config.cpuModelUrl
|
|
3305
|
+
});
|
|
3306
|
+
this.hasFallenBack = true;
|
|
3307
|
+
logger6.info("Fallback to Wav2ArkitCpuInference successful");
|
|
3308
|
+
return await this.implementation.load();
|
|
3309
|
+
}
|
|
3310
|
+
}
|
|
3311
|
+
async infer(audioSamples, identityIndex) {
|
|
3312
|
+
return this.implementation.infer(audioSamples, identityIndex);
|
|
3313
|
+
}
|
|
3314
|
+
async dispose() {
|
|
3315
|
+
return this.implementation.dispose();
|
|
3316
|
+
}
|
|
3317
|
+
};
|
|
3318
|
+
|
|
2971
3319
|
// src/inference/SileroVADInference.ts
|
|
2972
|
-
var
|
|
3320
|
+
var logger7 = createLogger("SileroVAD");
|
|
2973
3321
|
var SileroVADInference = class {
|
|
2974
3322
|
constructor(config) {
|
|
2975
3323
|
this.session = null;
|
|
@@ -3041,32 +3389,32 @@ var SileroVADInference = class {
|
|
|
3041
3389
|
"model.sample_rate": this.config.sampleRate
|
|
3042
3390
|
});
|
|
3043
3391
|
try {
|
|
3044
|
-
|
|
3045
|
-
const { ort
|
|
3046
|
-
this.ort =
|
|
3392
|
+
logger7.info("Loading ONNX Runtime...", { preference: this.config.backend });
|
|
3393
|
+
const { ort, backend } = await getOnnxRuntimeForPreference(this.config.backend);
|
|
3394
|
+
this.ort = ort;
|
|
3047
3395
|
this._backend = backend;
|
|
3048
|
-
|
|
3396
|
+
logger7.info("ONNX Runtime loaded", { backend: this._backend });
|
|
3049
3397
|
const cache = getModelCache();
|
|
3050
3398
|
const modelUrl = this.config.modelUrl;
|
|
3051
3399
|
const isCached = await cache.has(modelUrl);
|
|
3052
3400
|
let modelBuffer;
|
|
3053
3401
|
if (isCached) {
|
|
3054
|
-
|
|
3402
|
+
logger7.debug("Loading model from cache", { modelUrl });
|
|
3055
3403
|
modelBuffer = await cache.get(modelUrl);
|
|
3056
3404
|
} else {
|
|
3057
|
-
|
|
3405
|
+
logger7.debug("Fetching and caching model", { modelUrl });
|
|
3058
3406
|
modelBuffer = await fetchWithCache(modelUrl);
|
|
3059
3407
|
}
|
|
3060
|
-
|
|
3408
|
+
logger7.debug("Creating ONNX session", {
|
|
3061
3409
|
size: formatBytes(modelBuffer.byteLength),
|
|
3062
3410
|
backend: this._backend
|
|
3063
3411
|
});
|
|
3064
3412
|
const sessionOptions = getSessionOptions(this._backend);
|
|
3065
3413
|
const modelData = new Uint8Array(modelBuffer);
|
|
3066
|
-
this.session = await
|
|
3414
|
+
this.session = await ort.InferenceSession.create(modelData, sessionOptions);
|
|
3067
3415
|
this.reset();
|
|
3068
3416
|
const loadTimeMs = performance.now() - startTime;
|
|
3069
|
-
|
|
3417
|
+
logger7.info("Model loaded successfully", {
|
|
3070
3418
|
backend: this._backend,
|
|
3071
3419
|
loadTimeMs: Math.round(loadTimeMs),
|
|
3072
3420
|
sampleRate: this.config.sampleRate,
|
|
@@ -3219,7 +3567,7 @@ var SileroVADInference = class {
|
|
|
3219
3567
|
this.preSpeechBuffer.shift();
|
|
3220
3568
|
}
|
|
3221
3569
|
}
|
|
3222
|
-
|
|
3570
|
+
logger7.trace("Skipping VAD inference - audio too quiet", {
|
|
3223
3571
|
rms: Math.round(rms * 1e4) / 1e4,
|
|
3224
3572
|
threshold: MIN_ENERGY_THRESHOLD
|
|
3225
3573
|
});
|
|
@@ -3273,7 +3621,7 @@ var SileroVADInference = class {
|
|
|
3273
3621
|
if (isSpeech && !this.wasSpeaking) {
|
|
3274
3622
|
preSpeechChunks = [...this.preSpeechBuffer];
|
|
3275
3623
|
this.preSpeechBuffer = [];
|
|
3276
|
-
|
|
3624
|
+
logger7.debug("Speech started with pre-speech buffer", {
|
|
3277
3625
|
preSpeechChunks: preSpeechChunks.length,
|
|
3278
3626
|
durationMs: Math.round(preSpeechChunks.length * this.getChunkDurationMs())
|
|
3279
3627
|
});
|
|
@@ -3286,7 +3634,7 @@ var SileroVADInference = class {
|
|
|
3286
3634
|
this.preSpeechBuffer = [];
|
|
3287
3635
|
}
|
|
3288
3636
|
this.wasSpeaking = isSpeech;
|
|
3289
|
-
|
|
3637
|
+
logger7.trace("VAD inference completed", {
|
|
3290
3638
|
probability: Math.round(probability * 1e3) / 1e3,
|
|
3291
3639
|
isSpeech,
|
|
3292
3640
|
inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100
|
|
@@ -3342,7 +3690,7 @@ var SileroVADInference = class {
|
|
|
3342
3690
|
SileroVADInference.isWebGPUAvailable = isWebGPUAvailable;
|
|
3343
3691
|
|
|
3344
3692
|
// src/inference/SileroVADWorker.ts
|
|
3345
|
-
var
|
|
3693
|
+
var logger8 = createLogger("SileroVADWorker");
|
|
3346
3694
|
var WASM_CDN_PATH2 = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
|
|
3347
3695
|
var LOAD_TIMEOUT_MS = 1e4;
|
|
3348
3696
|
var INFERENCE_TIMEOUT_MS = 1e3;
|
|
@@ -3605,7 +3953,7 @@ var SileroVADWorker = class {
|
|
|
3605
3953
|
this.handleWorkerMessage(event.data);
|
|
3606
3954
|
};
|
|
3607
3955
|
worker.onerror = (error) => {
|
|
3608
|
-
|
|
3956
|
+
logger8.error("Worker error", { error: error.message });
|
|
3609
3957
|
for (const [, resolver] of this.pendingResolvers) {
|
|
3610
3958
|
resolver.reject(new Error(`Worker error: ${error.message}`));
|
|
3611
3959
|
}
|
|
@@ -3681,9 +4029,9 @@ var SileroVADWorker = class {
|
|
|
3681
4029
|
"model.sample_rate": this.config.sampleRate
|
|
3682
4030
|
});
|
|
3683
4031
|
try {
|
|
3684
|
-
|
|
4032
|
+
logger8.info("Creating VAD worker...");
|
|
3685
4033
|
this.worker = this.createWorker();
|
|
3686
|
-
|
|
4034
|
+
logger8.info("Loading model in worker...", {
|
|
3687
4035
|
modelUrl: this.config.modelUrl,
|
|
3688
4036
|
sampleRate: this.config.sampleRate
|
|
3689
4037
|
});
|
|
@@ -3699,7 +4047,7 @@ var SileroVADWorker = class {
|
|
|
3699
4047
|
);
|
|
3700
4048
|
this._isLoaded = true;
|
|
3701
4049
|
const loadTimeMs = performance.now() - startTime;
|
|
3702
|
-
|
|
4050
|
+
logger8.info("VAD worker loaded successfully", {
|
|
3703
4051
|
backend: "wasm",
|
|
3704
4052
|
loadTimeMs: Math.round(loadTimeMs),
|
|
3705
4053
|
workerLoadTimeMs: Math.round(result.loadTimeMs),
|
|
@@ -3806,7 +4154,7 @@ var SileroVADWorker = class {
|
|
|
3806
4154
|
if (isSpeech && !this.wasSpeaking) {
|
|
3807
4155
|
preSpeechChunks = [...this.preSpeechBuffer];
|
|
3808
4156
|
this.preSpeechBuffer = [];
|
|
3809
|
-
|
|
4157
|
+
logger8.debug("Speech started with pre-speech buffer", {
|
|
3810
4158
|
preSpeechChunks: preSpeechChunks.length,
|
|
3811
4159
|
durationMs: Math.round(preSpeechChunks.length * this.getChunkDurationMs())
|
|
3812
4160
|
});
|
|
@@ -3819,7 +4167,7 @@ var SileroVADWorker = class {
|
|
|
3819
4167
|
this.preSpeechBuffer = [];
|
|
3820
4168
|
}
|
|
3821
4169
|
this.wasSpeaking = isSpeech;
|
|
3822
|
-
|
|
4170
|
+
logger8.trace("VAD worker inference completed", {
|
|
3823
4171
|
probability: Math.round(result.probability * 1e3) / 1e3,
|
|
3824
4172
|
isSpeech,
|
|
3825
4173
|
inferenceTimeMs: Math.round(inferenceTimeMs * 100) / 100,
|
|
@@ -3887,18 +4235,18 @@ var SileroVADWorker = class {
|
|
|
3887
4235
|
};
|
|
3888
4236
|
|
|
3889
4237
|
// src/inference/createSileroVAD.ts
|
|
3890
|
-
var
|
|
4238
|
+
var logger9 = createLogger("createSileroVAD");
|
|
3891
4239
|
function supportsVADWorker() {
|
|
3892
4240
|
if (typeof Worker === "undefined") {
|
|
3893
|
-
|
|
4241
|
+
logger9.debug("Worker not supported: Worker constructor undefined");
|
|
3894
4242
|
return false;
|
|
3895
4243
|
}
|
|
3896
4244
|
if (typeof URL === "undefined" || typeof URL.createObjectURL === "undefined") {
|
|
3897
|
-
|
|
4245
|
+
logger9.debug("Worker not supported: URL.createObjectURL unavailable");
|
|
3898
4246
|
return false;
|
|
3899
4247
|
}
|
|
3900
4248
|
if (typeof Blob === "undefined") {
|
|
3901
|
-
|
|
4249
|
+
logger9.debug("Worker not supported: Blob constructor unavailable");
|
|
3902
4250
|
return false;
|
|
3903
4251
|
}
|
|
3904
4252
|
return true;
|
|
@@ -3908,19 +4256,19 @@ function createSileroVAD(config) {
|
|
|
3908
4256
|
let useWorker;
|
|
3909
4257
|
if (config.useWorker !== void 0) {
|
|
3910
4258
|
useWorker = config.useWorker;
|
|
3911
|
-
|
|
4259
|
+
logger9.debug("Worker preference explicitly set", { useWorker });
|
|
3912
4260
|
} else {
|
|
3913
4261
|
const workerSupported = supportsVADWorker();
|
|
3914
4262
|
const onMobile = isMobile();
|
|
3915
4263
|
useWorker = workerSupported && !onMobile;
|
|
3916
|
-
|
|
4264
|
+
logger9.debug("Auto-detected Worker preference", {
|
|
3917
4265
|
useWorker,
|
|
3918
4266
|
workerSupported,
|
|
3919
4267
|
onMobile
|
|
3920
4268
|
});
|
|
3921
4269
|
}
|
|
3922
4270
|
if (useWorker) {
|
|
3923
|
-
|
|
4271
|
+
logger9.info("Creating SileroVADWorker (off-main-thread)");
|
|
3924
4272
|
const worker = new SileroVADWorker({
|
|
3925
4273
|
modelUrl: config.modelUrl,
|
|
3926
4274
|
sampleRate: config.sampleRate,
|
|
@@ -3932,7 +4280,7 @@ function createSileroVAD(config) {
|
|
|
3932
4280
|
}
|
|
3933
4281
|
return worker;
|
|
3934
4282
|
}
|
|
3935
|
-
|
|
4283
|
+
logger9.info("Creating SileroVADInference (main thread)");
|
|
3936
4284
|
return new SileroVADInference(config);
|
|
3937
4285
|
}
|
|
3938
4286
|
var VADWorkerWithFallback = class {
|
|
@@ -3958,7 +4306,7 @@ var VADWorkerWithFallback = class {
|
|
|
3958
4306
|
try {
|
|
3959
4307
|
return await this.implementation.load();
|
|
3960
4308
|
} catch (error) {
|
|
3961
|
-
|
|
4309
|
+
logger9.warn("Worker load failed, falling back to main thread", {
|
|
3962
4310
|
error: error instanceof Error ? error.message : String(error)
|
|
3963
4311
|
});
|
|
3964
4312
|
try {
|
|
@@ -3967,7 +4315,7 @@ var VADWorkerWithFallback = class {
|
|
|
3967
4315
|
}
|
|
3968
4316
|
this.implementation = new SileroVADInference(this.config);
|
|
3969
4317
|
this.hasFallenBack = true;
|
|
3970
|
-
|
|
4318
|
+
logger9.info("Fallback to SileroVADInference successful");
|
|
3971
4319
|
return await this.implementation.load();
|
|
3972
4320
|
}
|
|
3973
4321
|
}
|
|
@@ -3989,7 +4337,7 @@ var VADWorkerWithFallback = class {
|
|
|
3989
4337
|
};
|
|
3990
4338
|
|
|
3991
4339
|
// src/inference/Emotion2VecInference.ts
|
|
3992
|
-
var
|
|
4340
|
+
var logger10 = createLogger("Emotion2Vec");
|
|
3993
4341
|
var EMOTION2VEC_LABELS = ["neutral", "happy", "angry", "sad"];
|
|
3994
4342
|
var Emotion2VecInference = class {
|
|
3995
4343
|
constructor(config) {
|
|
@@ -4031,36 +4379,36 @@ var Emotion2VecInference = class {
|
|
|
4031
4379
|
"model.backend_requested": this.config.backend
|
|
4032
4380
|
});
|
|
4033
4381
|
try {
|
|
4034
|
-
|
|
4035
|
-
const { ort
|
|
4036
|
-
this.ort =
|
|
4382
|
+
logger10.info("Loading ONNX Runtime...", { preference: this.config.backend });
|
|
4383
|
+
const { ort, backend } = await getOnnxRuntimeForPreference(this.config.backend);
|
|
4384
|
+
this.ort = ort;
|
|
4037
4385
|
this._backend = backend;
|
|
4038
|
-
|
|
4039
|
-
|
|
4386
|
+
logger10.info("ONNX Runtime loaded", { backend: this._backend });
|
|
4387
|
+
logger10.info("Checking model cache...");
|
|
4040
4388
|
const cache = getModelCache();
|
|
4041
4389
|
const modelUrl = this.config.modelUrl;
|
|
4042
4390
|
const isCached = await cache.has(modelUrl);
|
|
4043
|
-
|
|
4391
|
+
logger10.info("Cache check complete", { modelUrl, isCached });
|
|
4044
4392
|
let modelBuffer;
|
|
4045
4393
|
if (isCached) {
|
|
4046
|
-
|
|
4394
|
+
logger10.info("Loading model from cache...", { modelUrl });
|
|
4047
4395
|
modelBuffer = await cache.get(modelUrl);
|
|
4048
|
-
|
|
4396
|
+
logger10.info("Model loaded from cache", { size: formatBytes(modelBuffer.byteLength) });
|
|
4049
4397
|
} else {
|
|
4050
|
-
|
|
4398
|
+
logger10.info("Fetching model (not cached)...", { modelUrl });
|
|
4051
4399
|
modelBuffer = await fetchWithCache(modelUrl);
|
|
4052
|
-
|
|
4400
|
+
logger10.info("Model fetched and cached", { size: formatBytes(modelBuffer.byteLength) });
|
|
4053
4401
|
}
|
|
4054
|
-
|
|
4055
|
-
|
|
4402
|
+
logger10.info("Creating ONNX session (this may take a while for large models)...");
|
|
4403
|
+
logger10.debug("Creating ONNX session", {
|
|
4056
4404
|
size: formatBytes(modelBuffer.byteLength),
|
|
4057
4405
|
backend: this._backend
|
|
4058
4406
|
});
|
|
4059
4407
|
const sessionOptions = getSessionOptions(this._backend);
|
|
4060
4408
|
const modelData = new Uint8Array(modelBuffer);
|
|
4061
|
-
this.session = await
|
|
4409
|
+
this.session = await ort.InferenceSession.create(modelData, sessionOptions);
|
|
4062
4410
|
const loadTimeMs = performance.now() - startTime;
|
|
4063
|
-
|
|
4411
|
+
logger10.info("Model loaded successfully", {
|
|
4064
4412
|
backend: this._backend,
|
|
4065
4413
|
loadTimeMs: Math.round(loadTimeMs),
|
|
4066
4414
|
sampleRate: this.config.sampleRate,
|
|
@@ -4172,7 +4520,7 @@ var Emotion2VecInference = class {
|
|
|
4172
4520
|
});
|
|
4173
4521
|
}
|
|
4174
4522
|
const inferenceTimeMs = performance.now() - startTime;
|
|
4175
|
-
|
|
4523
|
+
logger10.debug("Emotion inference completed", {
|
|
4176
4524
|
numFrames,
|
|
4177
4525
|
dominant: dominant.emotion,
|
|
4178
4526
|
confidence: Math.round(dominant.confidence * 100),
|
|
@@ -4248,14 +4596,8 @@ var Emotion2VecInference = class {
|
|
|
4248
4596
|
*/
|
|
4249
4597
|
Emotion2VecInference.isWebGPUAvailable = isWebGPUAvailable;
|
|
4250
4598
|
|
|
4251
|
-
// src/inference/ChatterboxTurboInference.ts
|
|
4252
|
-
import ort from "onnxruntime-web/webgpu";
|
|
4253
|
-
var logger9 = createLogger("ChatterboxTurbo");
|
|
4254
|
-
env.allowLocalModels = true;
|
|
4255
|
-
ort.env.wasm.wasmPaths = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.2/dist/";
|
|
4256
|
-
|
|
4257
4599
|
// src/inference/SafariSpeechRecognition.ts
|
|
4258
|
-
var
|
|
4600
|
+
var logger11 = createLogger("SafariSpeech");
|
|
4259
4601
|
var SafariSpeechRecognition = class _SafariSpeechRecognition {
|
|
4260
4602
|
constructor(config = {}) {
|
|
4261
4603
|
this.recognition = null;
|
|
@@ -4274,7 +4616,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
|
|
|
4274
4616
|
interimResults: config.interimResults ?? true,
|
|
4275
4617
|
maxAlternatives: config.maxAlternatives ?? 1
|
|
4276
4618
|
};
|
|
4277
|
-
|
|
4619
|
+
logger11.debug("SafariSpeechRecognition created", {
|
|
4278
4620
|
language: this.config.language,
|
|
4279
4621
|
continuous: this.config.continuous
|
|
4280
4622
|
});
|
|
@@ -4335,7 +4677,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
|
|
|
4335
4677
|
*/
|
|
4336
4678
|
async start() {
|
|
4337
4679
|
if (this.isListening) {
|
|
4338
|
-
|
|
4680
|
+
logger11.warn("Already listening");
|
|
4339
4681
|
return;
|
|
4340
4682
|
}
|
|
4341
4683
|
if (!_SafariSpeechRecognition.isAvailable()) {
|
|
@@ -4365,7 +4707,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
|
|
|
4365
4707
|
this.isListening = true;
|
|
4366
4708
|
this.startTime = performance.now();
|
|
4367
4709
|
this.accumulatedText = "";
|
|
4368
|
-
|
|
4710
|
+
logger11.info("Speech recognition started", {
|
|
4369
4711
|
language: this.config.language
|
|
4370
4712
|
});
|
|
4371
4713
|
span?.end();
|
|
@@ -4380,7 +4722,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
|
|
|
4380
4722
|
*/
|
|
4381
4723
|
async stop() {
|
|
4382
4724
|
if (!this.isListening || !this.recognition) {
|
|
4383
|
-
|
|
4725
|
+
logger11.warn("Not currently listening");
|
|
4384
4726
|
return {
|
|
4385
4727
|
text: this.accumulatedText,
|
|
4386
4728
|
language: this.config.language,
|
|
@@ -4409,7 +4751,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
|
|
|
4409
4751
|
if (this.recognition && this.isListening) {
|
|
4410
4752
|
this.recognition.abort();
|
|
4411
4753
|
this.isListening = false;
|
|
4412
|
-
|
|
4754
|
+
logger11.info("Speech recognition aborted");
|
|
4413
4755
|
}
|
|
4414
4756
|
}
|
|
4415
4757
|
/**
|
|
@@ -4440,7 +4782,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
|
|
|
4440
4782
|
this.isListening = false;
|
|
4441
4783
|
this.resultCallbacks = [];
|
|
4442
4784
|
this.errorCallbacks = [];
|
|
4443
|
-
|
|
4785
|
+
logger11.debug("SafariSpeechRecognition disposed");
|
|
4444
4786
|
}
|
|
4445
4787
|
/**
|
|
4446
4788
|
* Set up event handlers for the recognition instance
|
|
@@ -4468,7 +4810,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
|
|
|
4468
4810
|
confidence: alternative.confidence
|
|
4469
4811
|
};
|
|
4470
4812
|
this.emitResult(speechResult);
|
|
4471
|
-
|
|
4813
|
+
logger11.trace("Speech result", {
|
|
4472
4814
|
text: text.substring(0, 50),
|
|
4473
4815
|
isFinal,
|
|
4474
4816
|
confidence: alternative.confidence
|
|
@@ -4478,12 +4820,12 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
|
|
|
4478
4820
|
span?.end();
|
|
4479
4821
|
} catch (error) {
|
|
4480
4822
|
span?.endWithError(error instanceof Error ? error : new Error(String(error)));
|
|
4481
|
-
|
|
4823
|
+
logger11.error("Error processing speech result", { error });
|
|
4482
4824
|
}
|
|
4483
4825
|
};
|
|
4484
4826
|
this.recognition.onerror = (event) => {
|
|
4485
4827
|
const error = new Error(`Speech recognition error: ${event.error} - ${event.message}`);
|
|
4486
|
-
|
|
4828
|
+
logger11.error("Speech recognition error", { error: event.error, message: event.message });
|
|
4487
4829
|
this.emitError(error);
|
|
4488
4830
|
if (this.stopRejecter) {
|
|
4489
4831
|
this.stopRejecter(error);
|
|
@@ -4493,7 +4835,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
|
|
|
4493
4835
|
};
|
|
4494
4836
|
this.recognition.onend = () => {
|
|
4495
4837
|
this.isListening = false;
|
|
4496
|
-
|
|
4838
|
+
logger11.info("Speech recognition ended", {
|
|
4497
4839
|
totalText: this.accumulatedText.length,
|
|
4498
4840
|
durationMs: performance.now() - this.startTime
|
|
4499
4841
|
});
|
|
@@ -4510,13 +4852,13 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
|
|
|
4510
4852
|
}
|
|
4511
4853
|
};
|
|
4512
4854
|
this.recognition.onstart = () => {
|
|
4513
|
-
|
|
4855
|
+
logger11.debug("Speech recognition started by browser");
|
|
4514
4856
|
};
|
|
4515
4857
|
this.recognition.onspeechstart = () => {
|
|
4516
|
-
|
|
4858
|
+
logger11.debug("Speech detected");
|
|
4517
4859
|
};
|
|
4518
4860
|
this.recognition.onspeechend = () => {
|
|
4519
|
-
|
|
4861
|
+
logger11.debug("Speech ended");
|
|
4520
4862
|
};
|
|
4521
4863
|
}
|
|
4522
4864
|
/**
|
|
@@ -4527,7 +4869,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
|
|
|
4527
4869
|
try {
|
|
4528
4870
|
callback(result);
|
|
4529
4871
|
} catch (error) {
|
|
4530
|
-
|
|
4872
|
+
logger11.error("Error in result callback", { error });
|
|
4531
4873
|
}
|
|
4532
4874
|
}
|
|
4533
4875
|
}
|
|
@@ -4539,7 +4881,7 @@ var SafariSpeechRecognition = class _SafariSpeechRecognition {
|
|
|
4539
4881
|
try {
|
|
4540
4882
|
callback(error);
|
|
4541
4883
|
} catch (callbackError) {
|
|
4542
|
-
|
|
4884
|
+
logger11.error("Error in error callback", { error: callbackError });
|
|
4543
4885
|
}
|
|
4544
4886
|
}
|
|
4545
4887
|
}
|
|
@@ -5962,12 +6304,12 @@ async function isHuggingFaceCDNReachable(testUrl = HF_CDN_TEST_URL) {
|
|
|
5962
6304
|
}
|
|
5963
6305
|
|
|
5964
6306
|
// src/utils/transformersCacheClear.ts
|
|
5965
|
-
var
|
|
6307
|
+
var logger12 = createLogger("TransformersCache");
|
|
5966
6308
|
async function clearTransformersCache(options) {
|
|
5967
6309
|
const verbose = options?.verbose ?? true;
|
|
5968
6310
|
const additionalPatterns = options?.additionalPatterns ?? [];
|
|
5969
6311
|
if (!("caches" in window)) {
|
|
5970
|
-
|
|
6312
|
+
logger12.warn("Cache API not available in this environment");
|
|
5971
6313
|
return [];
|
|
5972
6314
|
}
|
|
5973
6315
|
try {
|
|
@@ -5985,18 +6327,18 @@ async function clearTransformersCache(options) {
|
|
|
5985
6327
|
);
|
|
5986
6328
|
if (shouldDelete) {
|
|
5987
6329
|
if (verbose) {
|
|
5988
|
-
|
|
6330
|
+
logger12.info("Deleting cache", { cacheName });
|
|
5989
6331
|
}
|
|
5990
6332
|
const deleted = await caches.delete(cacheName);
|
|
5991
6333
|
if (deleted) {
|
|
5992
6334
|
deletedCaches.push(cacheName);
|
|
5993
6335
|
} else if (verbose) {
|
|
5994
|
-
|
|
6336
|
+
logger12.warn("Failed to delete cache", { cacheName });
|
|
5995
6337
|
}
|
|
5996
6338
|
}
|
|
5997
6339
|
}
|
|
5998
6340
|
if (verbose) {
|
|
5999
|
-
|
|
6341
|
+
logger12.info("Cache clearing complete", {
|
|
6000
6342
|
totalCaches: cacheNames.length,
|
|
6001
6343
|
deletedCount: deletedCaches.length,
|
|
6002
6344
|
deletedCaches
|
|
@@ -6004,35 +6346,35 @@ async function clearTransformersCache(options) {
|
|
|
6004
6346
|
}
|
|
6005
6347
|
return deletedCaches;
|
|
6006
6348
|
} catch (error) {
|
|
6007
|
-
|
|
6349
|
+
logger12.error("Error clearing caches", { error });
|
|
6008
6350
|
throw error;
|
|
6009
6351
|
}
|
|
6010
6352
|
}
|
|
6011
6353
|
async function clearSpecificCache(cacheName) {
|
|
6012
6354
|
if (!("caches" in window)) {
|
|
6013
|
-
|
|
6355
|
+
logger12.warn("Cache API not available in this environment");
|
|
6014
6356
|
return false;
|
|
6015
6357
|
}
|
|
6016
6358
|
try {
|
|
6017
6359
|
const deleted = await caches.delete(cacheName);
|
|
6018
|
-
|
|
6360
|
+
logger12.info("Cache deletion attempt", { cacheName, deleted });
|
|
6019
6361
|
return deleted;
|
|
6020
6362
|
} catch (error) {
|
|
6021
|
-
|
|
6363
|
+
logger12.error("Error deleting cache", { cacheName, error });
|
|
6022
6364
|
return false;
|
|
6023
6365
|
}
|
|
6024
6366
|
}
|
|
6025
6367
|
async function listCaches() {
|
|
6026
6368
|
if (!("caches" in window)) {
|
|
6027
|
-
|
|
6369
|
+
logger12.warn("Cache API not available in this environment");
|
|
6028
6370
|
return [];
|
|
6029
6371
|
}
|
|
6030
6372
|
try {
|
|
6031
6373
|
const cacheNames = await caches.keys();
|
|
6032
|
-
|
|
6374
|
+
logger12.debug("Available caches", { cacheNames });
|
|
6033
6375
|
return cacheNames;
|
|
6034
6376
|
} catch (error) {
|
|
6035
|
-
|
|
6377
|
+
logger12.error("Error listing caches", { error });
|
|
6036
6378
|
return [];
|
|
6037
6379
|
}
|
|
6038
6380
|
}
|
|
@@ -6074,7 +6416,7 @@ async function validateCachedResponse(cacheName, requestUrl) {
|
|
|
6074
6416
|
reason: valid ? "Valid response" : `Invalid: status=${response.status}, contentType=${contentType}, isHtml=${isHtml || looksLikeHtml}`
|
|
6075
6417
|
};
|
|
6076
6418
|
} catch (error) {
|
|
6077
|
-
|
|
6419
|
+
logger12.error("Error validating cached response", { cacheName, requestUrl, error });
|
|
6078
6420
|
return {
|
|
6079
6421
|
exists: false,
|
|
6080
6422
|
valid: false,
|
|
@@ -6111,7 +6453,7 @@ async function scanForInvalidCaches() {
|
|
|
6111
6453
|
}
|
|
6112
6454
|
}
|
|
6113
6455
|
}
|
|
6114
|
-
|
|
6456
|
+
logger12.info("Cache scan complete", {
|
|
6115
6457
|
totalCaches: cacheNames.length,
|
|
6116
6458
|
scannedEntries,
|
|
6117
6459
|
invalidCount: invalidEntries.length
|
|
@@ -6122,13 +6464,13 @@ async function scanForInvalidCaches() {
|
|
|
6122
6464
|
invalidEntries
|
|
6123
6465
|
};
|
|
6124
6466
|
} catch (error) {
|
|
6125
|
-
|
|
6467
|
+
logger12.error("Error scanning caches", { error });
|
|
6126
6468
|
throw error;
|
|
6127
6469
|
}
|
|
6128
6470
|
}
|
|
6129
6471
|
async function nukeBrowserCaches(preventRecreation = false) {
|
|
6130
6472
|
if (!("caches" in window)) {
|
|
6131
|
-
|
|
6473
|
+
logger12.warn("Cache API not available in this environment");
|
|
6132
6474
|
return 0;
|
|
6133
6475
|
}
|
|
6134
6476
|
try {
|
|
@@ -6140,17 +6482,17 @@ async function nukeBrowserCaches(preventRecreation = false) {
|
|
|
6140
6482
|
deletedCount++;
|
|
6141
6483
|
}
|
|
6142
6484
|
}
|
|
6143
|
-
|
|
6485
|
+
logger12.info("All browser caches cleared", {
|
|
6144
6486
|
totalDeleted: deletedCount
|
|
6145
6487
|
});
|
|
6146
6488
|
if (preventRecreation) {
|
|
6147
6489
|
const { env: env2 } = await import("./transformers.web-ALDLCPHT.mjs");
|
|
6148
6490
|
env2.useBrowserCache = false;
|
|
6149
|
-
|
|
6491
|
+
logger12.warn("Browser cache creation disabled (env.useBrowserCache = false)");
|
|
6150
6492
|
}
|
|
6151
6493
|
return deletedCount;
|
|
6152
6494
|
} catch (error) {
|
|
6153
|
-
|
|
6495
|
+
logger12.error("Error nuking caches", { error });
|
|
6154
6496
|
throw error;
|
|
6155
6497
|
}
|
|
6156
6498
|
}
|
|
@@ -6676,6 +7018,7 @@ var EmphasisDetector = class {
|
|
|
6676
7018
|
}
|
|
6677
7019
|
};
|
|
6678
7020
|
export {
|
|
7021
|
+
ARKIT_BLENDSHAPES,
|
|
6679
7022
|
AgentCoreAdapter,
|
|
6680
7023
|
AnimationGraph,
|
|
6681
7024
|
AudioChunkCoalescer,
|
|
@@ -6711,6 +7054,8 @@ export {
|
|
|
6711
7054
|
SileroVADWorker,
|
|
6712
7055
|
SyncedAudioPipeline,
|
|
6713
7056
|
TenantManager,
|
|
7057
|
+
WAV2ARKIT_BLENDSHAPES,
|
|
7058
|
+
Wav2ArkitCpuInference,
|
|
6714
7059
|
Wav2Vec2Inference,
|
|
6715
7060
|
WhisperInference,
|
|
6716
7061
|
blendEmotions,
|
|
@@ -6722,6 +7067,7 @@ export {
|
|
|
6722
7067
|
configureLogging,
|
|
6723
7068
|
configureTelemetry,
|
|
6724
7069
|
createEmotionVector,
|
|
7070
|
+
createLipSync,
|
|
6725
7071
|
createLogger,
|
|
6726
7072
|
createSessionWithFallback,
|
|
6727
7073
|
createSileroVAD,
|
|
@@ -6746,6 +7092,7 @@ export {
|
|
|
6746
7092
|
isIOSSafari,
|
|
6747
7093
|
isMobile,
|
|
6748
7094
|
isOnnxRuntimeLoaded,
|
|
7095
|
+
isSafari,
|
|
6749
7096
|
isSpeechRecognitionAvailable,
|
|
6750
7097
|
isWebGPUAvailable,
|
|
6751
7098
|
lerpEmotion,
|
|
@@ -6754,15 +7101,18 @@ export {
|
|
|
6754
7101
|
nukeBrowserCaches,
|
|
6755
7102
|
parseHuggingFaceUrl,
|
|
6756
7103
|
preloadModels,
|
|
7104
|
+
remapWav2ArkitToLam,
|
|
6757
7105
|
resetLoggingConfig,
|
|
6758
7106
|
resolveBackend,
|
|
6759
7107
|
scanForInvalidCaches,
|
|
6760
7108
|
setLogLevel,
|
|
6761
7109
|
setLoggingEnabled,
|
|
6762
7110
|
shouldEnableWasmProxy,
|
|
7111
|
+
shouldUseCpuLipSync,
|
|
6763
7112
|
shouldUseNativeASR,
|
|
6764
7113
|
shouldUseServerLipSync,
|
|
6765
7114
|
supportsVADWorker,
|
|
7115
|
+
symmetrizeBlendshapes,
|
|
6766
7116
|
validateCachedResponse
|
|
6767
7117
|
};
|
|
6768
7118
|
//# sourceMappingURL=index.mjs.map
|