@goodganglabs/lipsync-wasm-v1 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,606 @@
1
+ # @goodganglabs/lipsync-wasm-v1
2
+
3
+ WebAssembly-based real-time audio-to-blendshape lip sync engine.
4
+ Converts 16kHz PCM audio into 111-dimensional ARKit-compatible blendshape frames at 30fps using a phoneme classification model.
5
+
6
+ ## Which Version?
7
+
8
+ | | V1 (this package) | V2 |
9
+ |---|---|---|
10
+ | **Dimensions** | 111-dim ARKit | 52-dim ARKit |
11
+ | **Model** | Phoneme classification | Student distillation |
12
+ | **Idle expression** | Built-in `IdleExpressionGenerator` | Not included |
13
+ | **VAD** | Built-in `VoiceActivityDetector` | Not included |
14
+ | **ONNX fallback** | Heuristic fallback | None (ONNX required) |
15
+ | **Post-processing** | Manual | Built-in (crisp mouth, fade, blinks) |
16
+ | **Recommendation** | Full expression control needed | Most use cases |
17
+
18
+ ## Features
19
+
20
+ - 111-dim ARKit blendshape output (phoneme-based model)
21
+ - Batch and real-time streaming processing
22
+ - Built-in expression preset blending
23
+ - Embedded VRMA bone animation data
24
+ - Built-in idle expression generator (eye blinks + micro expressions)
25
+ - Built-in voice activity detection (VAD) with auto-calibration
26
+ - ONNX Runtime inference with automatic heuristic fallback
27
+ - Runs entirely in the browser via WebAssembly
28
+
29
+ ## Requirements
30
+
31
+ - **onnxruntime-web** `>=1.17.0` (peer dependency)
32
+
33
+ ```html
34
+ <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.17.0/dist/ort.min.js"></script>
35
+ ```
36
+
37
+ ## Installation
38
+
39
+ ```bash
40
+ npm install @goodganglabs/lipsync-wasm-v1
41
+ ```
42
+
43
+ ## Quick Start
44
+
45
+ ### Batch Processing
46
+
47
+ ```js
48
+ import { LipSyncWasmWrapper } from '@goodganglabs/lipsync-wasm-v1';
49
+
50
+ const lipsync = new LipSyncWasmWrapper();
51
+ await lipsync.init();
52
+
53
+ const result = await lipsync.processFile(audioFile);
54
+ for (let i = 0; i < result.frame_count; i++) {
55
+ const frame = lipsync.getFrame(result, i); // number[111]
56
+ applyToAvatar(frame);
57
+ }
58
+
59
+ lipsync.dispose();
60
+ ```
61
+
62
+ ### Three.js VRM Complete Example
63
+
64
+ Full integration with a VRM avatar: init, load VRM, apply blendshapes, render loop.
65
+
66
+ ```html
67
+ <script type="importmap">
68
+ { "imports": {
69
+ "three": "https://cdn.jsdelivr.net/npm/three@0.179.1/build/three.module.js",
70
+ "three/addons/": "https://cdn.jsdelivr.net/npm/three@0.179.1/examples/jsm/",
71
+ "@pixiv/three-vrm": "https://cdn.jsdelivr.net/npm/@pixiv/three-vrm@3.4.5/lib/three-vrm.module.min.js",
72
+ "@pixiv/three-vrm-animation": "https://cdn.jsdelivr.net/npm/@pixiv/three-vrm-animation@3.4.5/lib/three-vrm-animation.module.min.js"
73
+ }}
74
+ </script>
75
+ <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.17.0/dist/ort.min.js"></script>
76
+
77
+ <canvas id="avatar-canvas" style="width:100%; height:500px;"></canvas>
78
+
79
+ <script type="module">
80
+ import * as THREE from 'three';
81
+ import { GLTFLoader } from 'three/addons/loaders/GLTFLoader.js';
82
+ import { OrbitControls } from 'three/addons/controls/OrbitControls.js';
83
+ import { VRMLoaderPlugin, VRMUtils } from '@pixiv/three-vrm';
84
+ import { LipSyncWasmWrapper } from '@goodganglabs/lipsync-wasm-v1';
85
+
86
+ // --- Index-to-name mapping (first 52 of 111-dim ARKit) ---
87
+ // V1 outputs 111 dimensions. The first 52 match the standard ARKit set.
88
+ const SYSTEM_INDEX_TO_BLENDSHAPE = {
89
+ 0: ['browDownLeft'], 1: ['browDownRight'], 2: ['browInnerUp'],
90
+ 3: ['browOuterUpLeft'], 4: ['browOuterUpRight'],
91
+ 5: ['cheekPuff'], 6: ['cheekSquintLeft'], 7: ['cheekSquintRight'],
92
+ 8: ['eyeBlinkLeft'], 9: ['eyeBlinkRight'],
93
+ 10: ['eyeLookDownLeft'], 11: ['eyeLookDownRight'],
94
+ 12: ['eyeLookInLeft'], 13: ['eyeLookInRight'],
95
+ 14: ['eyeLookOutLeft'], 15: ['eyeLookOutRight'],
96
+ 16: ['eyeLookUpLeft'], 17: ['eyeLookUpRight'],
97
+ 18: ['eyeSquintLeft'], 19: ['eyeSquintRight'],
98
+ 20: ['eyeWideLeft'], 21: ['eyeWideRight'],
99
+ 22: ['jawForward'], 23: ['jawLeft'], 24: ['jawOpen'], 25: ['jawRight'],
100
+ 26: ['mouthClose'], 27: ['mouthDimpleLeft'], 28: ['mouthDimpleRight'],
101
+ 29: ['mouthFrownLeft'], 30: ['mouthFrownRight'], 31: ['mouthFunnel'],
102
+ 32: ['mouthLeft'], 33: ['mouthLowerDownLeft'], 34: ['mouthLowerDownRight'],
103
+ 35: ['mouthPressLeft'], 36: ['mouthPressRight'], 37: ['mouthPucker'],
104
+ 38: ['mouthRight'], 39: ['mouthRollLower'], 40: ['mouthRollUpper'],
105
+ 41: ['mouthShrugLower'], 42: ['mouthShrugUpper'],
106
+ 43: ['mouthSmileLeft'], 44: ['mouthSmileRight'],
107
+ 45: ['mouthStretchLeft'], 46: ['mouthStretchRight'],
108
+ 47: ['mouthUpperUpLeft'], 48: ['mouthUpperUpRight'],
109
+ 49: ['noseSneerLeft'], 50: ['noseSneerRight'],
110
+ 51: ['tongueOut']
111
+ };
112
+
113
+ // --- Apply blendshape frame to VRM ---
114
+ function applyBlendshapes(vrm, frame) {
115
+ if (!vrm) return;
116
+
117
+ // VRM 1.0 (expressionManager)
118
+ if (vrm.expressionManager) {
119
+ for (const [idx, names] of Object.entries(SYSTEM_INDEX_TO_BLENDSHAPE)) {
120
+ const value = frame[idx] || 0;
121
+ for (const name of names) {
122
+ vrm.expressionManager.setValue(name, value);
123
+ }
124
+ }
125
+ return;
126
+ }
127
+
128
+ // VRM 0.x (blendShapeProxy)
129
+ if (vrm.blendShapeProxy) {
130
+ for (const [idx, names] of Object.entries(SYSTEM_INDEX_TO_BLENDSHAPE)) {
131
+ const value = frame[idx] || 0;
132
+ for (const name of names) {
133
+ vrm.blendShapeProxy.setValue(name, value);
134
+ }
135
+ }
136
+ vrm.blendShapeProxy.update();
137
+ return;
138
+ }
139
+
140
+ // Fallback: direct morph target manipulation
141
+ vrm.scene.traverse((child) => {
142
+ if (!child.isMesh || !child.morphTargetDictionary || !child.morphTargetInfluences) return;
143
+ for (const [idx, names] of Object.entries(SYSTEM_INDEX_TO_BLENDSHAPE)) {
144
+ const value = frame[idx] || 0;
145
+ for (const name of names) {
146
+ const morphIdx = child.morphTargetDictionary[name];
147
+ if (morphIdx !== undefined) {
148
+ child.morphTargetInfluences[morphIdx] = value;
149
+ }
150
+ }
151
+ }
152
+ });
153
+ }
154
+
155
+ // --- Setup ---
156
+ const canvas = document.getElementById('avatar-canvas');
157
+ const scene = new THREE.Scene();
158
+ scene.background = new THREE.Color(0x1a1a2e);
159
+
160
+ const camera = new THREE.PerspectiveCamera(30, canvas.clientWidth / canvas.clientHeight, 0.1, 100);
161
+ camera.position.set(0, 1.25, 0.5);
162
+
163
+ const renderer = new THREE.WebGLRenderer({ canvas, antialias: true });
164
+ renderer.setSize(canvas.clientWidth, canvas.clientHeight);
165
+ renderer.setPixelRatio(Math.min(window.devicePixelRatio, 2));
166
+
167
+ const controls = new OrbitControls(camera, canvas);
168
+ controls.target.set(0, 1.25, 0);
169
+ controls.enableDamping = true;
170
+
171
+ scene.add(new THREE.AmbientLight(0xffffff, 2.0));
172
+ const dirLight = new THREE.DirectionalLight(0xffffff, 1.1);
173
+ dirLight.position.set(1, 3, 2);
174
+ scene.add(dirLight);
175
+
176
+ // --- Load VRM ---
177
+ const loader = new GLTFLoader();
178
+ loader.register((parser) => new VRMLoaderPlugin(parser));
179
+
180
+ const gltf = await new Promise((resolve, reject) =>
181
+ loader.load('your-avatar.vrm', resolve, undefined, reject)
182
+ );
183
+ const vrm = gltf.userData.vrm;
184
+ VRMUtils.removeUnnecessaryVertices(gltf.scene);
185
+ VRMUtils.removeUnnecessaryJoints(gltf.scene);
186
+ scene.add(vrm.scene);
187
+
188
+ // --- Init LipSync ---
189
+ const lipsync = new LipSyncWasmWrapper();
190
+ await lipsync.init();
191
+
192
+ // --- Process audio & animate ---
193
+ const result = await lipsync.processFile(audioFile);
194
+ let frameIndex = 0;
195
+ const clock = new THREE.Clock();
196
+
197
+ function animate() {
198
+ requestAnimationFrame(animate);
199
+ const delta = clock.getDelta();
200
+ controls.update();
201
+
202
+ if (frameIndex < result.frame_count) {
203
+ const frame = lipsync.getFrame(result, frameIndex);
204
+ applyBlendshapes(vrm, frame);
205
+ frameIndex++;
206
+ }
207
+
208
+ vrm.update(delta);
209
+ renderer.render(scene, camera);
210
+ }
211
+ animate();
212
+ </script>
213
+ ```
214
+
215
+ ## IdleExpressionGenerator
216
+
217
+ V1 includes a procedural idle expression generator that produces natural eye blinks (random interval 2.5-4.5s, 15% double-blink chance) and micro facial expressions (sinusoidal).
218
+
219
+ ```js
220
+ const lipsync = new LipSyncWasmWrapper();
221
+ await lipsync.init();
222
+
223
+ // Access via the underlying WASM module
224
+ const idle = new lipsync.wasmModule.IdleExpressionGenerator();
225
+ let elapsedSeconds = 0;
226
+
227
+ function renderLoop() {
228
+ requestAnimationFrame(renderLoop);
229
+ const delta = clock.getDelta();
230
+ elapsedSeconds += delta;
231
+
232
+ // Generate procedural idle frame (number[111])
233
+ const frame = idle.get_frame(elapsedSeconds);
234
+ applyBlendshapes(vrm, frame);
235
+
236
+ vrm.update(delta);
237
+ renderer.render(scene, camera);
238
+ }
239
+ ```
240
+
241
+ Use idle expressions when no audio is playing. Transition smoothly from lip sync to idle:
242
+
243
+ ```js
244
+ // In render loop: lerp from last lip sync frame to idle
245
+ if (!isPlaying && prevFrame) {
246
+ const idleFrame = idle.get_frame(elapsedSeconds);
247
+ const blended = prevFrame.map((v, i) =>
248
+ v + 0.15 * ((idleFrame[i] || 0) - v) // alpha=0.15 for smooth transition
249
+ );
250
+ applyBlendshapes(vrm, blended);
251
+ prevFrame = blended;
252
+ }
253
+ ```
254
+
255
+ ## Voice Activity Detection (VAD)
256
+
257
+ V1 includes a built-in VAD that auto-calibrates from ambient noise. Use it to transition between idle and speaking bone animations.
258
+
259
+ ```js
260
+ const lipsync = new LipSyncWasmWrapper();
261
+ await lipsync.init();
262
+
263
+ // --- Step 1: Calibrate from 1 second of ambient noise ---
264
+ const calibrationSamples = []; // collect RMS values
265
+ const calibrationStart = performance.now();
266
+
267
+ function collectCalibration(audioChunk) {
268
+ let sumSq = 0;
269
+ for (let i = 0; i < audioChunk.length; i++) {
270
+ sumSq += audioChunk[i] * audioChunk[i];
271
+ }
272
+ const rms = Math.sqrt(sumSq / audioChunk.length);
273
+ calibrationSamples.push(rms);
274
+ }
275
+
276
+ // After 1 second of collecting samples:
277
+ function finalizeCalibration() {
278
+ const mean = calibrationSamples.reduce((a, b) => a + b, 0) / calibrationSamples.length;
279
+ const variance = calibrationSamples.reduce((a, b) => a + (b - mean) ** 2, 0) / calibrationSamples.length;
280
+ const stdDev = Math.sqrt(variance);
281
+
282
+ const threshold = Math.max(mean + 2 * stdDev, 0.005);
283
+ const holdTime = 0.5; // seconds to hold "speaking" state after voice drops
284
+
285
+ // Create VAD via WASM module
286
+ const vad = new lipsync.wasmModule.VoiceActivityDetector(threshold, holdTime);
287
+ return vad;
288
+ }
289
+
290
+ // --- Step 2: Use VAD in streaming loop ---
291
+ let vad = null; // set after calibration
292
+
293
+ function processMicChunk(audio) {
294
+ if (vad) {
295
+ const isSpeaking = vad.feed_audio(audio);
296
+ // Use isSpeaking to transition bone animations:
297
+ // isSpeaking=true → crossfade to speaking pose
298
+ // isSpeaking=false → crossfade to idle pose
299
+ }
300
+ }
301
+ ```
302
+
303
+ ## VRMA Bone Animation
304
+
305
+ The package includes embedded VRMA bone animation data for idle and speaking poses. Use these with Three.js `AnimationMixer` for natural body motion during lip sync.
306
+
307
+ ```js
308
+ import { GLTFLoader } from 'three/addons/loaders/GLTFLoader.js';
309
+ import { VRMAnimationLoaderPlugin, createVRMAnimationClip } from '@pixiv/three-vrm-animation';
310
+
311
+ // 1. Get embedded VRMA bytes from the wrapper
312
+ const vrmaData = lipsync.getVrmaBytes();
313
+
314
+ // 2. Load VRMA from bytes
315
+ async function loadVRMAFromBytes(bytes) {
316
+ const blob = new Blob([bytes], { type: 'application/octet-stream' });
317
+ const url = URL.createObjectURL(blob);
318
+ const loader = new GLTFLoader();
319
+ loader.register((parser) => new VRMAnimationLoaderPlugin(parser));
320
+ const gltf = await new Promise((resolve, reject) =>
321
+ loader.load(url, resolve, undefined, reject)
322
+ );
323
+ URL.revokeObjectURL(url);
324
+ return gltf.userData.vrmAnimations[0];
325
+ }
326
+
327
+ const idleAnim = await loadVRMAFromBytes(vrmaData.idle);
328
+ const speakingAnim = await loadVRMAFromBytes(vrmaData.speaking);
329
+
330
+ // 3. Setup AnimationMixer with crossfade
331
+ const mixer = new THREE.AnimationMixer(vrm.scene);
332
+
333
+ const idleClip = createVRMAnimationClip(idleAnim, vrm);
334
+ const speakingClip = createVRMAnimationClip(speakingAnim, vrm);
335
+
336
+ const idleAction = mixer.clipAction(idleClip);
337
+ const speakingAction = mixer.clipAction(speakingClip);
338
+
339
+ idleAction.setLoop(THREE.LoopRepeat);
340
+ speakingAction.setLoop(THREE.LoopRepeat);
341
+
342
+ idleAction.setEffectiveWeight(1);
343
+ idleAction.play();
344
+ speakingAction.setEffectiveWeight(0);
345
+ speakingAction.play();
346
+
347
+ // 4. Smoothstep crossfade between idle and speaking
348
+ let crossFadeProgress = 0;
349
+ let isSpeaking = false;
350
+
351
+ function updateBoneWeights(delta) {
352
+ const target = isSpeaking ? 1 : 0;
353
+ const speed = 1.0 / 0.4; // 0.4s transition duration
354
+ if (target > crossFadeProgress) {
355
+ crossFadeProgress = Math.min(crossFadeProgress + delta * speed, 1);
356
+ } else {
357
+ crossFadeProgress = Math.max(crossFadeProgress - delta * speed, 0);
358
+ }
359
+ // Smoothstep interpolation
360
+ const t = crossFadeProgress;
361
+ const w = t * t * (3 - 2 * t);
362
+ speakingAction.setEffectiveWeight(w);
363
+ idleAction.setEffectiveWeight(1 - w);
364
+ }
365
+
366
+ // In your render loop:
367
+ // updateBoneWeights(delta);
368
+ // mixer.update(delta);
369
+ ```
370
+
371
+ ## Real-time Streaming
372
+
373
+ ### Microphone Input with AudioWorklet
374
+
375
+ ```js
376
+ // 1. Get microphone stream
377
+ const stream = await navigator.mediaDevices.getUserMedia({
378
+ audio: { sampleRate: 16000, channelCount: 1, echoCancellation: true }
379
+ });
380
+ const audioCtx = new AudioContext({ sampleRate: 16000 });
381
+ const source = audioCtx.createMediaStreamSource(stream);
382
+
383
+ // 2. AudioWorklet: batch 1600 samples (100ms @ 16kHz)
384
+ const workletCode = `
385
+ class MicProcessor extends AudioWorkletProcessor {
386
+ constructor() {
387
+ super();
388
+ this.buffer = [];
389
+ this.bufferLen = 0;
390
+ this.TARGET = 1600; // 100ms @ 16kHz
391
+ }
392
+ process(inputs) {
393
+ const input = inputs[0];
394
+ if (input.length > 0 && input[0].length > 0) {
395
+ this.buffer.push(new Float32Array(input[0]));
396
+ this.bufferLen += input[0].length;
397
+ if (this.bufferLen >= this.TARGET) {
398
+ const merged = new Float32Array(this.bufferLen);
399
+ let off = 0;
400
+ for (const buf of this.buffer) { merged.set(buf, off); off += buf.length; }
401
+ this.port.postMessage(merged);
402
+ this.buffer = [];
403
+ this.bufferLen = 0;
404
+ }
405
+ }
406
+ return true;
407
+ }
408
+ }
409
+ registerProcessor('mic-processor', MicProcessor);
410
+ `;
411
+ const blob = new Blob([workletCode], { type: 'application/javascript' });
412
+ const url = URL.createObjectURL(blob);
413
+ await audioCtx.audioWorklet.addModule(url);
414
+ URL.revokeObjectURL(url);
415
+
416
+ const workletNode = new AudioWorkletNode(audioCtx, 'mic-processor');
417
+ source.connect(workletNode);
418
+ workletNode.connect(audioCtx.destination);
419
+
420
+ // 3. Frame queue + processing with VAD
421
+ const streamQueue = [];
422
+ let micProcessing = false;
423
+ const micBuffer = [];
424
+
425
+ workletNode.port.onmessage = (e) => {
426
+ micBuffer.push(e.data);
427
+ if (!micProcessing) processMicBuffer();
428
+ };
429
+
430
+ async function processMicBuffer() {
431
+ if (micBuffer.length === 0) return;
432
+ micProcessing = true;
433
+ try {
434
+ const chunks = micBuffer.splice(0);
435
+ let totalLen = 0;
436
+ for (const c of chunks) totalLen += c.length;
437
+ const audio = new Float32Array(totalLen);
438
+ let offset = 0;
439
+ for (const c of chunks) { audio.set(c, offset); offset += c.length; }
440
+
441
+ // VAD check (if calibrated)
442
+ if (vad) {
443
+ const speaking = vad.feed_audio(audio);
444
+ // Toggle bone animation transitions based on speaking state
445
+ }
446
+
447
+ const result = await lipsync.processAudioChunk(audio);
448
+ if (result && result.frame_count > 0) {
449
+ for (let i = 0; i < result.frame_count; i++) {
450
+ streamQueue.push(lipsync.getFrame(result, i));
451
+ }
452
+ }
453
+ } finally {
454
+ micProcessing = false;
455
+ if (micBuffer.length > 0) processMicBuffer();
456
+ }
457
+ }
458
+
459
+ // 4. Consume at 30fps in render loop
460
+ let streamTimeAccum = 0;
461
+ const frameInterval = 1.0 / 30.0;
462
+
463
+ function renderLoop() {
464
+ requestAnimationFrame(renderLoop);
465
+ const delta = clock.getDelta();
466
+
467
+ streamTimeAccum += delta;
468
+ while (streamTimeAccum >= frameInterval) {
469
+ streamTimeAccum -= frameInterval;
470
+ if (streamQueue.length > 0) {
471
+ const frame = streamQueue.shift();
472
+ applyBlendshapes(vrm, frame);
473
+ }
474
+ }
475
+
476
+ vrm.update(delta);
477
+ renderer.render(scene, camera);
478
+ }
479
+ ```
480
+
481
+ ## API Reference
482
+
483
+ ### Constructor
484
+
485
+ ```ts
486
+ new LipSyncWasmWrapper(options?: { wasmPath?: string })
487
+ ```
488
+
489
+ | Parameter | Type | Default | Description |
490
+ |-----------|------|---------|-------------|
491
+ | `wasmPath` | `string` | `'./lipsync_wasm_v1.js'` | Path to the WASM glue module |
492
+
493
+ ### Properties
494
+
495
+ | Property | Type | Description |
496
+ |----------|------|-------------|
497
+ | `ready` | `boolean` | `true` after `init()` completes |
498
+ | `modelVersion` | `string` | `'v1'` |
499
+ | `blendshapeDim` | `number` | `111` |
500
+ | `wasmModule` | `object` | Direct access to WASM exports (for `IdleExpressionGenerator`, `VoiceActivityDetector`) |
501
+
502
+ ### `init(options?): Promise<{ mode: string }>`
503
+
504
+ Initializes the WASM runtime, loads the ONNX model, and applies the expression preset.
505
+
506
+ | Option | Type | Default | Description |
507
+ |--------|------|---------|-------------|
508
+ | `onProgress` | `(stage: string, percent: number) => void` | — | Progress callback. Stages: `'wasm'`, `'decrypt'`, `'onnx'`, `'onnx-fallback'` |
509
+ | `preset` | `boolean \| string` | `true` | `true` loads the built-in preset. Pass a URL string to load a custom preset JSON. `false` disables preset loading. |
510
+
511
+ Returns `{ mode: 'onnx' }` or `{ mode: 'heuristic' }` if ONNX is unavailable.
512
+
513
+ ### `processAudio(audio: Float32Array): Promise<ProcessResult>`
514
+
515
+ Processes a complete 16kHz mono PCM audio buffer.
516
+
517
+ ### `processAudioBuffer(audioBuffer: AudioBuffer): Promise<ProcessResult>`
518
+
519
+ Processes a Web Audio API `AudioBuffer` (automatically resampled to 16kHz).
520
+
521
+ ### `processFile(file: File): Promise<ProcessResult>`
522
+
523
+ Decodes and processes an audio `File` object.
524
+
525
+ ### `processAudioChunk(chunk: Float32Array, isLast?: boolean): Promise<ProcessResult | null>`
526
+
527
+ Feeds an audio chunk for real-time streaming. Streaming sessions are managed internally — the first call starts a session, passing `isLast = true` ends it. Returns `null` if the internal buffer has not accumulated enough data.
528
+
529
+ ### `getFrame(result: ProcessResult, frameIndex: number): number[]`
530
+
531
+ Extracts a single blendshape frame from a `ProcessResult`. Returns `number[111]`.
532
+
533
+ ### `getVrmaBytes(): { idle: Uint8Array, speaking: Uint8Array }`
534
+
535
+ Returns embedded VRMA bone animation data for idle and speaking states.
536
+
537
+ ### `reset(): void`
538
+
539
+ Resets internal state and ends any active streaming session.
540
+
541
+ ### `dispose(): void`
542
+
543
+ Releases all WASM and ONNX resources.
544
+
545
+ ### ProcessResult
546
+
547
+ ```ts
548
+ {
549
+ blendshapes: number[]; // Flat array: frame_count * 111 values
550
+ frame_count: number; // Number of output frames (30fps)
551
+ mode: string; // 'onnx' | 'heuristic' | 'streaming-onnx'
552
+ }
553
+ ```
554
+
555
+ ## Bundler Setup
556
+
557
+ ### Vite
558
+
559
+ Works out of the box. No additional configuration needed.
560
+
561
+ ### Webpack
562
+
563
+ Enable async WebAssembly support:
564
+
565
+ ```js
566
+ // webpack.config.js
567
+ module.exports = {
568
+ experiments: {
569
+ asyncWebAssembly: true,
570
+ },
571
+ };
572
+ ```
573
+
574
+ ### CDN (no bundler)
575
+
576
+ Use `<script type="module">` with an import map:
577
+
578
+ ```html
579
+ <script type="importmap">
580
+ { "imports": {
581
+ "@goodganglabs/lipsync-wasm-v1": "https://your-cdn.com/lipsync-wasm-v1/lipsync-wasm-wrapper.js"
582
+ }}
583
+ </script>
584
+ <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.17.0/dist/ort.min.js"></script>
585
+ <script type="module">
586
+ import { LipSyncWasmWrapper } from '@goodganglabs/lipsync-wasm-v1';
587
+ // ... your code
588
+ </script>
589
+ ```
590
+
591
+ When hosting WASM files on a different path than the wrapper JS, use the `wasmPath` option:
592
+
593
+ ```js
594
+ const lipsync = new LipSyncWasmWrapper({
595
+ wasmPath: '/static/wasm/lipsync_wasm_v1.js'
596
+ });
597
+ ```
598
+
599
+ ## Deployment
600
+
601
+ `.wasm` files must be served with the `application/wasm` MIME type.
602
+ CORS headers are required for cross-origin usage.
603
+
604
+ ## License
605
+
606
+ Proprietary — GoodGang Labs
@@ -0,0 +1,55 @@
1
+ /**
2
+ * @goodganglabs/lipsync-wasm-v1
3
+ * Audio-to-blendshape lip sync engine (111-dim ARKit, phoneme model)
4
+ */
5
+
6
+ export interface ProcessResult {
7
+ blendshapes: number[];
8
+ frame_count: number;
9
+ fps: number;
10
+ mode?: string;
11
+ }
12
+
13
+ export interface InitResult {
14
+ mode: 'onnx' | 'heuristic';
15
+ }
16
+
17
+ export interface VrmaBytes {
18
+ idle: Uint8Array;
19
+ speaking: Uint8Array;
20
+ }
21
+
22
+ export interface InitOptions {
23
+ /** GoodGangLabs license key (e.g. "ggl_xxx"). Omit for 30-day free trial. */
24
+ licenseKey?: string;
25
+ onProgress?: (stage: string, percent: number) => void;
26
+ preset?: boolean | string;
27
+ }
28
+
29
+ export interface ConstructorOptions {
30
+ wasmPath?: string;
31
+ }
32
+
33
+ export class LipSyncWasmWrapper {
34
+ constructor(options?: ConstructorOptions);
35
+
36
+ readonly ready: boolean;
37
+ readonly modelVersion: 'v1';
38
+ readonly blendshapeDim: 111;
39
+ readonly mode: 'onnx' | 'heuristic' | null;
40
+ readonly wasmModule: any;
41
+
42
+ init(options?: InitOptions): Promise<InitResult>;
43
+
44
+ processAudio(audio: Float32Array): Promise<ProcessResult>;
45
+ processAudioBuffer(audioBuffer: AudioBuffer): Promise<ProcessResult>;
46
+ processFile(file: File): Promise<ProcessResult>;
47
+
48
+ processAudioChunk(audioChunk: Float32Array, isLast?: boolean): Promise<ProcessResult | null>;
49
+
50
+ getFrame(result: ProcessResult, frameIndex: number): number[];
51
+
52
+ getVrmaBytes(): VrmaBytes;
53
+ reset(): void;
54
+ dispose(): void;
55
+ }