@goodganglabs/lipsync-wasm-v2 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,565 @@
1
+ # @goodganglabs/lipsync-wasm-v2
2
+
3
+ WebAssembly-based real-time audio-to-blendshape lip sync engine.
4
+ Converts 16kHz PCM audio into 52-dimensional ARKit-compatible blendshape frames at 30fps using a student distillation model.
5
+
6
+ ## Which Version?
7
+
8
+ | | V2 (this package) | V1 |
9
+ |---|---|---|
10
+ | **Dimensions** | 52-dim ARKit | 111-dim ARKit |
11
+ | **Model** | Student distillation | Phoneme classification |
12
+ | **Post-processing** | Built-in (crisp mouth, fade, blinks) | Manual |
13
+ | **Idle expression** | Not included | Built-in `IdleExpressionGenerator` |
14
+ | **VAD** | Not included | Built-in `VoiceActivityDetector` |
15
+ | **ONNX fallback** | None (ONNX required) | Heuristic fallback |
16
+ | **Recommendation** | Most use cases | Full expression control needed |
17
+
18
+ ## Features
19
+
20
+ - 52-dim ARKit blendshape output (direct prediction, no intermediate phoneme step)
21
+ - Batch and real-time streaming processing
22
+ - Built-in post-processing: mouth articulation enhancement, fade in/out, automatic blink injection
23
+ - Built-in expression preset blending
24
+ - Embedded VRMA bone animation data
25
+ - Runs entirely in the browser via WebAssembly
26
+
27
+ ## Requirements
28
+
29
+ - **onnxruntime-web** `>=1.17.0` (peer dependency, **required** — V2 has no heuristic fallback)
30
+
31
+ ```html
32
+ <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.17.0/dist/ort.min.js"></script>
33
+ ```
34
+
35
+ ## Installation
36
+
37
+ ```bash
38
+ npm install @goodganglabs/lipsync-wasm-v2
39
+ ```
40
+
41
+ ## Quick Start
42
+
43
+ ### Batch Processing
44
+
45
+ ```js
46
+ import { LipSyncWasmWrapper } from '@goodganglabs/lipsync-wasm-v2';
47
+
48
+ const lipsync = new LipSyncWasmWrapper();
49
+ await lipsync.init();
50
+
51
+ const result = await lipsync.processFile(audioFile);
52
+ for (let i = 0; i < result.frame_count; i++) {
53
+ const frame = lipsync.getFrame(result, i); // number[52]
54
+ applyToAvatar(frame);
55
+ }
56
+
57
+ lipsync.dispose();
58
+ ```
59
+
60
+ ### Three.js VRM Complete Example
61
+
62
+ Full integration with a VRM avatar: init, load VRM, apply blendshapes, render loop.
63
+
64
+ ```html
65
+ <script type="importmap">
66
+ { "imports": {
67
+ "three": "https://cdn.jsdelivr.net/npm/three@0.179.1/build/three.module.js",
68
+ "three/addons/": "https://cdn.jsdelivr.net/npm/three@0.179.1/examples/jsm/",
69
+ "@pixiv/three-vrm": "https://cdn.jsdelivr.net/npm/@pixiv/three-vrm@3.4.5/lib/three-vrm.module.min.js",
70
+ "@pixiv/three-vrm-animation": "https://cdn.jsdelivr.net/npm/@pixiv/three-vrm-animation@3.4.5/lib/three-vrm-animation.module.min.js"
71
+ }}
72
+ </script>
73
+ <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.17.0/dist/ort.min.js"></script>
74
+
75
+ <canvas id="avatar-canvas" style="width:100%; height:500px;"></canvas>
76
+
77
+ <script type="module">
78
+ import * as THREE from 'three';
79
+ import { GLTFLoader } from 'three/addons/loaders/GLTFLoader.js';
80
+ import { OrbitControls } from 'three/addons/controls/OrbitControls.js';
81
+ import { VRMLoaderPlugin, VRMUtils } from '@pixiv/three-vrm';
82
+ import { LipSyncWasmWrapper } from '@goodganglabs/lipsync-wasm-v2';
83
+
84
+ // --- Index-to-name mapping (52-dim ARKit) ---
85
+ const SYSTEM_INDEX_TO_BLENDSHAPE = {
86
+ 0: ['browDownLeft'], 1: ['browDownRight'], 2: ['browInnerUp'],
87
+ 3: ['browOuterUpLeft'], 4: ['browOuterUpRight'],
88
+ 5: ['cheekPuff'], 6: ['cheekSquintLeft'], 7: ['cheekSquintRight'],
89
+ 8: ['eyeBlinkLeft'], 9: ['eyeBlinkRight'],
90
+ 10: ['eyeLookDownLeft'], 11: ['eyeLookDownRight'],
91
+ 12: ['eyeLookInLeft'], 13: ['eyeLookInRight'],
92
+ 14: ['eyeLookOutLeft'], 15: ['eyeLookOutRight'],
93
+ 16: ['eyeLookUpLeft'], 17: ['eyeLookUpRight'],
94
+ 18: ['eyeSquintLeft'], 19: ['eyeSquintRight'],
95
+ 20: ['eyeWideLeft'], 21: ['eyeWideRight'],
96
+ 22: ['jawForward'], 23: ['jawLeft'], 24: ['jawOpen'], 25: ['jawRight'],
97
+ 26: ['mouthClose'], 27: ['mouthDimpleLeft'], 28: ['mouthDimpleRight'],
98
+ 29: ['mouthFrownLeft'], 30: ['mouthFrownRight'], 31: ['mouthFunnel'],
99
+ 32: ['mouthLeft'], 33: ['mouthLowerDownLeft'], 34: ['mouthLowerDownRight'],
100
+ 35: ['mouthPressLeft'], 36: ['mouthPressRight'], 37: ['mouthPucker'],
101
+ 38: ['mouthRight'], 39: ['mouthRollLower'], 40: ['mouthRollUpper'],
102
+ 41: ['mouthShrugLower'], 42: ['mouthShrugUpper'],
103
+ 43: ['mouthSmileLeft'], 44: ['mouthSmileRight'],
104
+ 45: ['mouthStretchLeft'], 46: ['mouthStretchRight'],
105
+ 47: ['mouthUpperUpLeft'], 48: ['mouthUpperUpRight'],
106
+ 49: ['noseSneerLeft'], 50: ['noseSneerRight'],
107
+ 51: ['tongueOut']
108
+ };
109
+
110
+ // --- Apply blendshape frame to VRM ---
111
+ function applyBlendshapes(vrm, frame) {
112
+ if (!vrm) return;
113
+
114
+ // VRM 1.0 (expressionManager)
115
+ if (vrm.expressionManager) {
116
+ for (const [idx, names] of Object.entries(SYSTEM_INDEX_TO_BLENDSHAPE)) {
117
+ const value = frame[idx] || 0;
118
+ for (const name of names) {
119
+ vrm.expressionManager.setValue(name, value);
120
+ }
121
+ }
122
+ return;
123
+ }
124
+
125
+ // VRM 0.x (blendShapeProxy)
126
+ if (vrm.blendShapeProxy) {
127
+ for (const [idx, names] of Object.entries(SYSTEM_INDEX_TO_BLENDSHAPE)) {
128
+ const value = frame[idx] || 0;
129
+ for (const name of names) {
130
+ vrm.blendShapeProxy.setValue(name, value);
131
+ }
132
+ }
133
+ vrm.blendShapeProxy.update();
134
+ return;
135
+ }
136
+
137
+ // Fallback: direct morph target manipulation
138
+ vrm.scene.traverse((child) => {
139
+ if (!child.isMesh || !child.morphTargetDictionary || !child.morphTargetInfluences) return;
140
+ for (const [idx, names] of Object.entries(SYSTEM_INDEX_TO_BLENDSHAPE)) {
141
+ const value = frame[idx] || 0;
142
+ for (const name of names) {
143
+ const morphIdx = child.morphTargetDictionary[name];
144
+ if (morphIdx !== undefined) {
145
+ child.morphTargetInfluences[morphIdx] = value;
146
+ }
147
+ }
148
+ }
149
+ });
150
+ }
151
+
152
+ // --- Setup ---
153
+ const canvas = document.getElementById('avatar-canvas');
154
+ const scene = new THREE.Scene();
155
+ scene.background = new THREE.Color(0x1a1a2e);
156
+
157
+ const camera = new THREE.PerspectiveCamera(30, canvas.clientWidth / canvas.clientHeight, 0.1, 100);
158
+ camera.position.set(0, 1.25, 0.5);
159
+
160
+ const renderer = new THREE.WebGLRenderer({ canvas, antialias: true });
161
+ renderer.setSize(canvas.clientWidth, canvas.clientHeight);
162
+ renderer.setPixelRatio(Math.min(window.devicePixelRatio, 2));
163
+
164
+ const controls = new OrbitControls(camera, canvas);
165
+ controls.target.set(0, 1.25, 0);
166
+ controls.enableDamping = true;
167
+
168
+ scene.add(new THREE.AmbientLight(0xffffff, 2.0));
169
+ const dirLight = new THREE.DirectionalLight(0xffffff, 1.1);
170
+ dirLight.position.set(1, 3, 2);
171
+ scene.add(dirLight);
172
+
173
+ // --- Load VRM ---
174
+ const loader = new GLTFLoader();
175
+ loader.register((parser) => new VRMLoaderPlugin(parser));
176
+
177
+ const gltf = await new Promise((resolve, reject) =>
178
+ loader.load('your-avatar.vrm', resolve, undefined, reject)
179
+ );
180
+ const vrm = gltf.userData.vrm;
181
+ VRMUtils.removeUnnecessaryVertices(gltf.scene);
182
+ VRMUtils.removeUnnecessaryJoints(gltf.scene);
183
+ scene.add(vrm.scene);
184
+
185
+ // --- Init LipSync ---
186
+ const lipsync = new LipSyncWasmWrapper();
187
+ await lipsync.init();
188
+
189
+ // --- Process audio & animate ---
190
+ const result = await lipsync.processFile(audioFile);
191
+ let frameIndex = 0;
192
+ const clock = new THREE.Clock();
193
+
194
+ function animate() {
195
+ requestAnimationFrame(animate);
196
+ const delta = clock.getDelta();
197
+ controls.update();
198
+
199
+ if (frameIndex < result.frame_count) {
200
+ const frame = lipsync.getFrame(result, frameIndex);
201
+ applyBlendshapes(vrm, frame);
202
+ frameIndex++;
203
+ }
204
+
205
+ vrm.update(delta);
206
+ renderer.render(scene, camera);
207
+ }
208
+ animate();
209
+ </script>
210
+ ```
211
+
212
+ ## ARKit Blendshape Index
213
+
214
+ Full 52-element index mapping:
215
+
216
+ | Index | Name | Index | Name |
217
+ |-------|------|-------|------|
218
+ | 0 | `browDownLeft` | 26 | `mouthClose` |
219
+ | 1 | `browDownRight` | 27 | `mouthDimpleLeft` |
220
+ | 2 | `browInnerUp` | 28 | `mouthDimpleRight` |
221
+ | 3 | `browOuterUpLeft` | 29 | `mouthFrownLeft` |
222
+ | 4 | `browOuterUpRight` | 30 | `mouthFrownRight` |
223
+ | 5 | `cheekPuff` | 31 | `mouthFunnel` |
224
+ | 6 | `cheekSquintLeft` | 32 | `mouthLeft` |
225
+ | 7 | `cheekSquintRight` | 33 | `mouthLowerDownLeft` |
226
+ | 8 | `eyeBlinkLeft` | 34 | `mouthLowerDownRight` |
227
+ | 9 | `eyeBlinkRight` | 35 | `mouthPressLeft` |
228
+ | 10 | `eyeLookDownLeft` | 36 | `mouthPressRight` |
229
+ | 11 | `eyeLookDownRight` | 37 | `mouthPucker` |
230
+ | 12 | `eyeLookInLeft` | 38 | `mouthRight` |
231
+ | 13 | `eyeLookInRight` | 39 | `mouthRollLower` |
232
+ | 14 | `eyeLookOutLeft` | 40 | `mouthRollUpper` |
233
+ | 15 | `eyeLookOutRight` | 41 | `mouthShrugLower` |
234
+ | 16 | `eyeLookUpLeft` | 42 | `mouthShrugUpper` |
235
+ | 17 | `eyeLookUpRight` | 43 | `mouthSmileLeft` |
236
+ | 18 | `eyeSquintLeft` | 44 | `mouthSmileRight` |
237
+ | 19 | `eyeSquintRight` | 45 | `mouthStretchLeft` |
238
+ | 20 | `eyeWideLeft` | 46 | `mouthStretchRight` |
239
+ | 21 | `eyeWideRight` | 47 | `mouthUpperUpLeft` |
240
+ | 22 | `jawForward` | 48 | `mouthUpperUpRight` |
241
+ | 23 | `jawLeft` | 49 | `noseSneerLeft` |
242
+ | 24 | `jawOpen` | 50 | `noseSneerRight` |
243
+ | 25 | `jawRight` | 51 | `tongueOut` |
244
+
245
+ ## VRMA Bone Animation
246
+
247
+ The package includes embedded VRMA bone animation data for idle and speaking poses. Use these with Three.js `AnimationMixer` for natural body motion during lip sync.
248
+
249
+ ```js
250
+ import { GLTFLoader } from 'three/addons/loaders/GLTFLoader.js';
251
+ import { VRMAnimationLoaderPlugin, createVRMAnimationClip } from '@pixiv/three-vrm-animation';
252
+
253
+ // 1. Get embedded VRMA bytes from the wrapper
254
+ const vrmaData = lipsync.getVrmaBytes();
255
+
256
+ // 2. Load VRMA from bytes
257
+ async function loadVRMAFromBytes(bytes) {
258
+ const blob = new Blob([bytes], { type: 'application/octet-stream' });
259
+ const url = URL.createObjectURL(blob);
260
+ const loader = new GLTFLoader();
261
+ loader.register((parser) => new VRMAnimationLoaderPlugin(parser));
262
+ const gltf = await new Promise((resolve, reject) =>
263
+ loader.load(url, resolve, undefined, reject)
264
+ );
265
+ URL.revokeObjectURL(url);
266
+ return gltf.userData.vrmAnimations[0];
267
+ }
268
+
269
+ const idleAnim = await loadVRMAFromBytes(vrmaData.idle);
270
+ const speakingAnim = await loadVRMAFromBytes(vrmaData.speaking);
271
+
272
+ // 3. Setup AnimationMixer with crossfade
273
+ const mixer = new THREE.AnimationMixer(vrm.scene);
274
+
275
+ const idleClip = createVRMAnimationClip(idleAnim, vrm);
276
+ const speakingClip = createVRMAnimationClip(speakingAnim, vrm);
277
+
278
+ const idleAction = mixer.clipAction(idleClip);
279
+ const speakingAction = mixer.clipAction(speakingClip);
280
+
281
+ idleAction.setLoop(THREE.LoopRepeat);
282
+ speakingAction.setLoop(THREE.LoopRepeat);
283
+
284
+ idleAction.setEffectiveWeight(1);
285
+ idleAction.play();
286
+ speakingAction.setEffectiveWeight(0);
287
+ speakingAction.play();
288
+
289
+ // 4. Smoothstep crossfade between idle and speaking
290
+ let crossFadeProgress = 0;
291
+ let isSpeaking = false;
292
+
293
+ function updateBoneWeights(delta) {
294
+ const target = isSpeaking ? 1 : 0;
295
+ const speed = 1.0 / 0.4; // 0.4s transition duration
296
+ if (target > crossFadeProgress) {
297
+ crossFadeProgress = Math.min(crossFadeProgress + delta * speed, 1);
298
+ } else {
299
+ crossFadeProgress = Math.max(crossFadeProgress - delta * speed, 0);
300
+ }
301
+ // Smoothstep interpolation
302
+ const t = crossFadeProgress;
303
+ const w = t * t * (3 - 2 * t);
304
+ speakingAction.setEffectiveWeight(w);
305
+ idleAction.setEffectiveWeight(1 - w);
306
+ }
307
+
308
+ // In your render loop:
309
+ // updateBoneWeights(delta);
310
+ // mixer.update(delta);
311
+ ```
312
+
313
+ ## Real-time Streaming
314
+
315
+ ### Microphone Input with AudioWorklet
316
+
317
+ ```js
318
+ // 1. Get microphone stream
319
+ const stream = await navigator.mediaDevices.getUserMedia({
320
+ audio: { sampleRate: 16000, channelCount: 1, echoCancellation: true }
321
+ });
322
+ const audioCtx = new AudioContext({ sampleRate: 16000 });
323
+ const source = audioCtx.createMediaStreamSource(stream);
324
+
325
+ // 2. AudioWorklet: batch 1600 samples (100ms @ 16kHz)
326
+ const workletCode = `
327
+ class MicProcessor extends AudioWorkletProcessor {
328
+ constructor() {
329
+ super();
330
+ this.buffer = [];
331
+ this.bufferLen = 0;
332
+ this.TARGET = 1600; // 100ms @ 16kHz
333
+ }
334
+ process(inputs) {
335
+ const input = inputs[0];
336
+ if (input.length > 0 && input[0].length > 0) {
337
+ this.buffer.push(new Float32Array(input[0]));
338
+ this.bufferLen += input[0].length;
339
+ if (this.bufferLen >= this.TARGET) {
340
+ const merged = new Float32Array(this.bufferLen);
341
+ let off = 0;
342
+ for (const buf of this.buffer) { merged.set(buf, off); off += buf.length; }
343
+ this.port.postMessage(merged);
344
+ this.buffer = [];
345
+ this.bufferLen = 0;
346
+ }
347
+ }
348
+ return true;
349
+ }
350
+ }
351
+ registerProcessor('mic-processor', MicProcessor);
352
+ `;
353
+ const blob = new Blob([workletCode], { type: 'application/javascript' });
354
+ const url = URL.createObjectURL(blob);
355
+ await audioCtx.audioWorklet.addModule(url);
356
+ URL.revokeObjectURL(url);
357
+
358
+ const workletNode = new AudioWorkletNode(audioCtx, 'mic-processor');
359
+ source.connect(workletNode);
360
+ workletNode.connect(audioCtx.destination);
361
+
362
+ // 3. Frame queue + processing
363
+ const streamQueue = [];
364
+ let micProcessing = false;
365
+ const micBuffer = [];
366
+
367
+ workletNode.port.onmessage = (e) => {
368
+ micBuffer.push(e.data);
369
+ if (!micProcessing) processMicBuffer();
370
+ };
371
+
372
+ async function processMicBuffer() {
373
+ if (micBuffer.length === 0) return;
374
+ micProcessing = true;
375
+ try {
376
+ const chunks = micBuffer.splice(0);
377
+ let totalLen = 0;
378
+ for (const c of chunks) totalLen += c.length;
379
+ const audio = new Float32Array(totalLen);
380
+ let offset = 0;
381
+ for (const c of chunks) { audio.set(c, offset); offset += c.length; }
382
+
383
+ const result = await lipsync.processAudioChunk(audio);
384
+ if (result && result.frame_count > 0) {
385
+ for (let i = 0; i < result.frame_count; i++) {
386
+ streamQueue.push(lipsync.getFrame(result, i));
387
+ }
388
+ }
389
+ } finally {
390
+ micProcessing = false;
391
+ if (micBuffer.length > 0) processMicBuffer();
392
+ }
393
+ }
394
+
395
+ // 4. Consume at 30fps in render loop
396
+ let streamTimeAccum = 0;
397
+ const frameInterval = 1.0 / 30.0;
398
+
399
+ function renderLoop() {
400
+ requestAnimationFrame(renderLoop);
401
+ const delta = clock.getDelta();
402
+
403
+ streamTimeAccum += delta;
404
+ while (streamTimeAccum >= frameInterval) {
405
+ streamTimeAccum -= frameInterval;
406
+ if (streamQueue.length > 0) {
407
+ const frame = streamQueue.shift();
408
+ applyBlendshapes(vrm, frame);
409
+ }
410
+ }
411
+
412
+ vrm.update(delta);
413
+ renderer.render(scene, camera);
414
+ }
415
+ ```
416
+
417
+ ### TTS Streaming Integration
418
+
419
+ When processing TTS audio chunks, yield to the main thread periodically to prevent render freezes:
420
+
421
+ ```js
422
+ async function processTTSChunks(chunks) {
423
+ for (let i = 0; i < chunks.length; i++) {
424
+ const result = await lipsync.processAudioChunk(
425
+ chunks[i],
426
+ i === chunks.length - 1 // isLast on final chunk
427
+ );
428
+ if (result && result.frame_count > 0) {
429
+ for (let j = 0; j < result.frame_count; j++) {
430
+ streamQueue.push(lipsync.getFrame(result, j));
431
+ }
432
+ }
433
+ // Yield to main thread every 3 chunks (~300ms) to keep rAF rendering smooth
434
+ if ((i + 1) % 3 === 0) {
435
+ await new Promise((resolve) => setTimeout(resolve, 0));
436
+ }
437
+ }
438
+ }
439
+ ```
440
+
441
+ ## API Reference
442
+
443
+ ### Constructor
444
+
445
+ ```ts
446
+ new LipSyncWasmWrapper(options?: { wasmPath?: string })
447
+ ```
448
+
449
+ | Parameter | Type | Default | Description |
450
+ |-----------|------|---------|-------------|
451
+ | `wasmPath` | `string` | `'./lipsync_wasm_v2.js'` | Path to the WASM glue module |
452
+
453
+ ### Properties
454
+
455
+ | Property | Type | Description |
456
+ |----------|------|-------------|
457
+ | `ready` | `boolean` | `true` after `init()` completes |
458
+ | `modelVersion` | `string` | `'v2'` |
459
+ | `blendshapeDim` | `number` | `52` |
460
+
461
+ ### `init(options?): Promise<{ mode: string }>`
462
+
463
+ Initializes the WASM runtime, loads the ONNX model, and applies the expression preset.
464
+
465
+ | Option | Type | Default | Description |
466
+ |--------|------|---------|-------------|
467
+ | `onProgress` | `(stage: string, percent: number) => void` | — | Progress callback. Stages: `'wasm'`, `'decrypt'`, `'onnx'` |
468
+ | `preset` | `boolean \| string` | `true` | `true` loads the built-in preset. Pass a URL string to load a custom preset JSON. `false` disables preset loading. |
469
+
470
+ Returns `{ mode: 'v2-onnx' }`. Throws if ONNX Runtime is not available.
471
+
472
+ ### `processAudio(audio: Float32Array): Promise<ProcessResult>`
473
+
474
+ Processes a complete 16kHz mono PCM audio buffer.
475
+
476
+ ### `processAudioBuffer(audioBuffer: AudioBuffer): Promise<ProcessResult>`
477
+
478
+ Processes a Web Audio API `AudioBuffer` (automatically resampled to 16kHz).
479
+
480
+ ### `processFile(file: File): Promise<ProcessResult>`
481
+
482
+ Decodes and processes an audio `File` object.
483
+
484
+ ### `processAudioChunk(chunk: Float32Array, isLast?: boolean): Promise<ProcessResult | null>`
485
+
486
+ Feeds an audio chunk for real-time streaming. Streaming sessions are managed internally — the first call starts a session, passing `isLast = true` ends it. Returns `null` if the internal buffer has not accumulated enough data.
487
+
488
+ ### `getFrame(result: ProcessResult, frameIndex: number): number[]`
489
+
490
+ Extracts a single blendshape frame from a `ProcessResult`. Returns `number[52]`.
491
+
492
+ ### `getVrmaBytes(): { idle: Uint8Array, speaking: Uint8Array }`
493
+
494
+ Returns embedded VRMA bone animation data for idle and speaking states.
495
+
496
+ ### `reset(): void`
497
+
498
+ Resets internal state and ends any active streaming session.
499
+
500
+ ### `dispose(): void`
501
+
502
+ Releases all WASM and ONNX resources.
503
+
504
+ ### ProcessResult
505
+
506
+ ```ts
507
+ {
508
+ blendshapes: number[]; // Flat array: frame_count * 52 values
509
+ frame_count: number; // Number of output frames (30fps)
510
+ mode: string; // 'v2-onnx' | 'v2-streaming-onnx'
511
+ }
512
+ ```
513
+
514
+ ## Bundler Setup
515
+
516
+ ### Vite
517
+
518
+ Works out of the box. No additional configuration needed.
519
+
520
+ ### Webpack
521
+
522
+ Enable async WebAssembly support:
523
+
524
+ ```js
525
+ // webpack.config.js
526
+ module.exports = {
527
+ experiments: {
528
+ asyncWebAssembly: true,
529
+ },
530
+ };
531
+ ```
532
+
533
+ ### CDN (no bundler)
534
+
535
+ Use `<script type="module">` with an import map:
536
+
537
+ ```html
538
+ <script type="importmap">
539
+ { "imports": {
540
+ "@goodganglabs/lipsync-wasm-v2": "https://your-cdn.com/lipsync-wasm-v2/lipsync-wasm-wrapper.js"
541
+ }}
542
+ </script>
543
+ <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.17.0/dist/ort.min.js"></script>
544
+ <script type="module">
545
+ import { LipSyncWasmWrapper } from '@goodganglabs/lipsync-wasm-v2';
546
+ // ... your code
547
+ </script>
548
+ ```
549
+
550
+ When hosting WASM files on a different path than the wrapper JS, use the `wasmPath` option:
551
+
552
+ ```js
553
+ const lipsync = new LipSyncWasmWrapper({
554
+ wasmPath: '/static/wasm/lipsync_wasm_v2.js'
555
+ });
556
+ ```
557
+
558
+ ## Deployment
559
+
560
+ `.wasm` files must be served with the `application/wasm` MIME type.
561
+ CORS headers are required for cross-origin usage.
562
+
563
+ ## License
564
+
565
+ Proprietary — GoodGang Labs
@@ -0,0 +1,54 @@
1
+ /**
2
+ * @goodganglabs/lipsync-wasm-v2
3
+ * Audio-to-blendshape lip sync engine (52-dim ARKit, student model)
4
+ */
5
+
6
+ export interface ProcessResult {
7
+ blendshapes: number[];
8
+ frame_count: number;
9
+ fps: number;
10
+ mode?: string;
11
+ }
12
+
13
+ export interface InitResult {
14
+ mode: 'v2-onnx';
15
+ }
16
+
17
+ export interface VrmaBytes {
18
+ idle: Uint8Array;
19
+ speaking: Uint8Array;
20
+ }
21
+
22
+ export interface InitOptions {
23
+ /** GoodGangLabs license key (e.g. "ggl_xxx"). Omit for 30-day free trial. */
24
+ licenseKey?: string;
25
+ onProgress?: (stage: string, percent: number) => void;
26
+ preset?: boolean | string;
27
+ }
28
+
29
+ export interface ConstructorOptions {
30
+ wasmPath?: string;
31
+ }
32
+
33
+ export class LipSyncWasmWrapper {
34
+ constructor(options?: ConstructorOptions);
35
+
36
+ readonly ready: boolean;
37
+ readonly modelVersion: 'v2';
38
+ readonly blendshapeDim: 52;
39
+ readonly wasmModule: any;
40
+
41
+ init(options?: InitOptions): Promise<InitResult>;
42
+
43
+ processAudio(audio: Float32Array): Promise<ProcessResult>;
44
+ processAudioBuffer(audioBuffer: AudioBuffer): Promise<ProcessResult>;
45
+ processFile(file: File): Promise<ProcessResult>;
46
+
47
+ processAudioChunk(audioChunk: Float32Array, isLast?: boolean): Promise<ProcessResult | null>;
48
+
49
+ getFrame(result: ProcessResult, frameIndex: number): number[];
50
+
51
+ getVrmaBytes(): VrmaBytes;
52
+ reset(): void;
53
+ dispose(): void;
54
+ }