@goodganglabs/lipsync-wasm-v1 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of @goodganglabs/lipsync-wasm-v1 might be problematic. Click here for more details.
- package/README.md +606 -0
- package/lipsync-wasm-wrapper.d.ts +55 -0
- package/lipsync-wasm-wrapper.js +377 -0
- package/lipsync_wasm_v1.d.ts +302 -0
- package/lipsync_wasm_v1.js +1083 -0
- package/lipsync_wasm_v1_bg.wasm +0 -0
- package/package.json +27 -0
|
@@ -0,0 +1,377 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @goodganglabs/lipsync-wasm-v1
|
|
3
|
+
* Audio-to-blendshape lip sync engine (111-dim ARKit, phoneme model)
|
|
4
|
+
* @module lipsync-wasm-v1
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
const WASM_MODULE_PATH = './lipsync_wasm_v1.js';
|
|
8
|
+
const TARGET_SAMPLE_RATE = 16000;
|
|
9
|
+
const BLENDSHAPE_DIM = 111;
|
|
10
|
+
const ONNX_INPUT_NAME = 'input1';
|
|
11
|
+
const ONNX_OUTPUT_NAME = 'dense_8';
|
|
12
|
+
|
|
13
|
+
// ── License Constants ──
|
|
14
|
+
const LICENSE_API_BASE = 'https://api.goodganglabs.com/v1/license';
|
|
15
|
+
const TRIAL_STORAGE_KEY = '__ggl_lipsync_trial';
|
|
16
|
+
const TOKEN_CACHE_KEY = '__ggl_lipsync_token_v1';
|
|
17
|
+
const TRIAL_DURATION_MS = 30 * 24 * 60 * 60 * 1000; // 30 days
|
|
18
|
+
const TOKEN_CACHE_DURATION_MS = 23 * 60 * 60 * 1000; // 23h (buffer before 24h expiry)
|
|
19
|
+
|
|
20
|
+
export class LipSyncWasmWrapper {
|
|
21
|
+
#wasmPath;
|
|
22
|
+
#wasmModule = null;
|
|
23
|
+
#engine = null;
|
|
24
|
+
#onnxSession = null;
|
|
25
|
+
#ready = false;
|
|
26
|
+
#streaming = false;
|
|
27
|
+
#mode = null;
|
|
28
|
+
|
|
29
|
+
constructor(options = {}) {
|
|
30
|
+
this.#wasmPath = options.wasmPath || WASM_MODULE_PATH;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
get ready() { return this.#ready; }
|
|
34
|
+
get modelVersion() { return 'v1'; }
|
|
35
|
+
get blendshapeDim() { return BLENDSHAPE_DIM; }
|
|
36
|
+
get mode() { return this.#mode; }
|
|
37
|
+
get wasmModule() { return this.#wasmModule; }
|
|
38
|
+
|
|
39
|
+
async init(options = {}) {
|
|
40
|
+
const { licenseKey, onProgress, preset = true } = options;
|
|
41
|
+
|
|
42
|
+
onProgress?.('wasm', 0);
|
|
43
|
+
this.#wasmModule = await import(this.#wasmPath);
|
|
44
|
+
await this.#wasmModule.default();
|
|
45
|
+
this.#engine = new this.#wasmModule.LipSyncWasm();
|
|
46
|
+
onProgress?.('wasm', 100);
|
|
47
|
+
|
|
48
|
+
onProgress?.('license', 0);
|
|
49
|
+
const token = await this.#obtainLicenseToken(licenseKey);
|
|
50
|
+
onProgress?.('license', 100);
|
|
51
|
+
|
|
52
|
+
onProgress?.('onnx', 0);
|
|
53
|
+
try {
|
|
54
|
+
if (typeof ort === 'undefined') throw new Error('onnxruntime-web not available');
|
|
55
|
+
ort.env.wasm.numThreads = 1;
|
|
56
|
+
onProgress?.('decrypt', 0);
|
|
57
|
+
const nowSecs = Math.floor(Date.now() / 1000);
|
|
58
|
+
const modelBytes = this.#wasmModule.get_onnx_model_licensed(token, nowSecs);
|
|
59
|
+
onProgress?.('decrypt', 100);
|
|
60
|
+
this.#onnxSession = await ort.InferenceSession.create(modelBytes.buffer, {
|
|
61
|
+
executionProviders: ['wasm'],
|
|
62
|
+
graphOptimizationLevel: 'all',
|
|
63
|
+
});
|
|
64
|
+
onProgress?.('onnx', 100);
|
|
65
|
+
} catch (err) {
|
|
66
|
+
console.warn('[lipsync-v1] ONNX unavailable, using heuristic fallback:', err.message);
|
|
67
|
+
this.#onnxSession = null;
|
|
68
|
+
onProgress?.('onnx-fallback', 100);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Mark license as validated for heuristic mode gate
|
|
72
|
+
this.#engine.set_license_validated();
|
|
73
|
+
|
|
74
|
+
await this.#loadPreset(preset);
|
|
75
|
+
this.#ready = true;
|
|
76
|
+
this.#mode = this.#onnxSession ? 'onnx' : 'heuristic';
|
|
77
|
+
return { mode: this.#mode };
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// ── Batch Processing ──
|
|
81
|
+
|
|
82
|
+
async processAudio(audio) {
|
|
83
|
+
this.#assertReady();
|
|
84
|
+
|
|
85
|
+
if (this.#onnxSession) {
|
|
86
|
+
const input = this.#engine.prepare_onnx_input(audio);
|
|
87
|
+
const output = await this.#runInference(input.mfcc_data, [1, input.num_frames, input.mfcc_dim]);
|
|
88
|
+
const result = this.#engine.assemble_and_postprocess(output);
|
|
89
|
+
return { ...result, mode: 'onnx' };
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
return { ...this.#engine.process_audio_with_animation(audio), mode: 'heuristic' };
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
async processAudioBuffer(audioBuffer) {
|
|
96
|
+
return this.processAudio(resampleToMono(audioBuffer, TARGET_SAMPLE_RATE));
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
async processFile(file) {
|
|
100
|
+
const arrayBuffer = await file.arrayBuffer();
|
|
101
|
+
const ctx = new OfflineAudioContext(1, 1, TARGET_SAMPLE_RATE);
|
|
102
|
+
const audioBuffer = await ctx.decodeAudioData(arrayBuffer);
|
|
103
|
+
return this.processAudioBuffer(audioBuffer);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// ── Streaming ──
|
|
107
|
+
|
|
108
|
+
async processAudioChunk(audioChunk, isLast = false) {
|
|
109
|
+
this.#assertReady();
|
|
110
|
+
|
|
111
|
+
if (!this.#onnxSession) {
|
|
112
|
+
return { ...this.#engine.process_audio_with_animation(audioChunk), mode: 'heuristic' };
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
if (!this.#streaming) {
|
|
116
|
+
this.#engine.start_streaming();
|
|
117
|
+
this.#streaming = true;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const state = this.#engine.feed_audio_chunk(audioChunk);
|
|
121
|
+
const flushState = isLast ? this.#engine.flush_audio_buffer() : null;
|
|
122
|
+
const pending = state.ready ? state : (flushState?.ready ? flushState : null);
|
|
123
|
+
|
|
124
|
+
if (!pending) {
|
|
125
|
+
if (isLast) this.#endStreamingSession();
|
|
126
|
+
return null;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
const output = await this.#runInference(pending.mfcc_data, [1, pending.num_frames, pending.mfcc_dim]);
|
|
130
|
+
const processed = this.#engine.feed_onnx_result(output, pending.num_classes);
|
|
131
|
+
|
|
132
|
+
if (isLast) this.#endStreamingSession();
|
|
133
|
+
return { ...processed, mode: 'streaming-onnx' };
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// ── Frame Extraction ──
|
|
137
|
+
|
|
138
|
+
getFrame(result, frameIndex) {
|
|
139
|
+
if (!result?.blendshapes) return new Array(BLENDSHAPE_DIM).fill(0);
|
|
140
|
+
|
|
141
|
+
if (Array.isArray(result.blendshapes) && typeof result.blendshapes[0] === 'number') {
|
|
142
|
+
const offset = frameIndex * BLENDSHAPE_DIM;
|
|
143
|
+
return result.blendshapes.slice(offset, offset + BLENDSHAPE_DIM);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
return result.blendshapes[frameIndex] || new Array(BLENDSHAPE_DIM).fill(0);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// ── Utilities ──
|
|
150
|
+
|
|
151
|
+
getVrmaBytes() {
|
|
152
|
+
this.#assertReady();
|
|
153
|
+
return {
|
|
154
|
+
idle: this.#engine.get_idle_vrma_bytes(),
|
|
155
|
+
speaking: this.#engine.get_speaking_vrma_bytes(),
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
reset() {
|
|
160
|
+
if (this.#streaming) this.#endStreamingSession();
|
|
161
|
+
this.#engine?.reset();
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
dispose() {
|
|
165
|
+
this.reset();
|
|
166
|
+
if (this.#engine) { this.#engine.free(); this.#engine = null; }
|
|
167
|
+
this.#onnxSession = null;
|
|
168
|
+
this.#wasmModule = null;
|
|
169
|
+
this.#ready = false;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// ── Private ──
|
|
173
|
+
|
|
174
|
+
#assertReady() {
|
|
175
|
+
if (!this.#ready || !this.#engine) {
|
|
176
|
+
throw new Error('[lipsync-v1] Not initialized. Call init() first.');
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
#endStreamingSession() {
|
|
181
|
+
this.#engine.end_streaming();
|
|
182
|
+
this.#streaming = false;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
async #runInference(inputData, shape) {
|
|
186
|
+
const tensor = new ort.Tensor('float32', new Float32Array(inputData), shape);
|
|
187
|
+
const results = await this.#onnxSession.run({ [ONNX_INPUT_NAME]: tensor });
|
|
188
|
+
return new Float32Array(results[ONNX_OUTPUT_NAME].data);
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
async #loadPreset(preset) {
|
|
192
|
+
if (preset === true) {
|
|
193
|
+
this.#engine.load_default_preset();
|
|
194
|
+
} else if (typeof preset === 'string') {
|
|
195
|
+
const response = await fetch(preset);
|
|
196
|
+
if (!response.ok) throw new Error(`[lipsync-v1] Preset fetch failed: ${response.status}`);
|
|
197
|
+
const { blendshape } = await response.json();
|
|
198
|
+
if (!blendshape?.length) throw new Error('[lipsync-v1] Invalid preset format');
|
|
199
|
+
const rows = blendshape.length;
|
|
200
|
+
const cols = blendshape[0].length;
|
|
201
|
+
const flat = new Float32Array(rows * cols);
|
|
202
|
+
for (let i = 0; i < rows; i++) flat.set(blendshape[i], i * cols);
|
|
203
|
+
this.#engine.load_preset(flat, rows, cols);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// ── License Orchestration ──
|
|
208
|
+
|
|
209
|
+
async #obtainLicenseToken(licenseKey) {
|
|
210
|
+
// 1. Check cached token (sessionStorage)
|
|
211
|
+
const cached = this.#getCachedToken();
|
|
212
|
+
if (cached) return cached;
|
|
213
|
+
|
|
214
|
+
// 2. License key provided → validate with server
|
|
215
|
+
if (licenseKey) {
|
|
216
|
+
const token = await this.#validateLicenseKey(licenseKey);
|
|
217
|
+
this.#cacheToken(token);
|
|
218
|
+
return token;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
// 3. Trial flow
|
|
222
|
+
return this.#handleTrialFlow();
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
async #validateLicenseKey(licenseKey) {
|
|
226
|
+
const fingerprint = await this.#getFingerprint();
|
|
227
|
+
const res = await fetch(`${LICENSE_API_BASE}/validate`, {
|
|
228
|
+
method: 'POST',
|
|
229
|
+
headers: {
|
|
230
|
+
'Content-Type': 'application/json',
|
|
231
|
+
'Authorization': `Bearer ${licenseKey}`,
|
|
232
|
+
},
|
|
233
|
+
body: JSON.stringify({ pkg: 'lipsync-wasm-v1', fingerprint }),
|
|
234
|
+
});
|
|
235
|
+
|
|
236
|
+
if (!res.ok) {
|
|
237
|
+
const body = await res.text().catch(() => '');
|
|
238
|
+
throw new Error(`[lipsync-v1] License validation failed (${res.status}): ${body}`);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
const { token } = await res.json();
|
|
242
|
+
if (!token) throw new Error('[lipsync-v1] License server returned no token');
|
|
243
|
+
return token;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
async #handleTrialFlow() {
|
|
247
|
+
const fingerprint = await this.#getFingerprint();
|
|
248
|
+
let trialData = null;
|
|
249
|
+
|
|
250
|
+
try {
|
|
251
|
+
const stored = localStorage.getItem(TRIAL_STORAGE_KEY);
|
|
252
|
+
if (stored) trialData = JSON.parse(stored);
|
|
253
|
+
} catch { /* localStorage unavailable or corrupted */ }
|
|
254
|
+
|
|
255
|
+
if (!trialData) {
|
|
256
|
+
// First use → start trial
|
|
257
|
+
const res = await fetch(`${LICENSE_API_BASE}/trial/start`, {
|
|
258
|
+
method: 'POST',
|
|
259
|
+
headers: { 'Content-Type': 'application/json' },
|
|
260
|
+
body: JSON.stringify({ pkg: 'lipsync-wasm-v1', fingerprint }),
|
|
261
|
+
});
|
|
262
|
+
|
|
263
|
+
if (!res.ok) {
|
|
264
|
+
const body = await res.text().catch(() => '');
|
|
265
|
+
throw new Error(`[lipsync-v1] Trial start failed (${res.status}): ${body}`);
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
const { token } = await res.json();
|
|
269
|
+
if (!token) throw new Error('[lipsync-v1] Trial server returned no token');
|
|
270
|
+
|
|
271
|
+
try {
|
|
272
|
+
localStorage.setItem(TRIAL_STORAGE_KEY, JSON.stringify({
|
|
273
|
+
start: Date.now(),
|
|
274
|
+
fingerprint,
|
|
275
|
+
}));
|
|
276
|
+
} catch { /* localStorage unavailable */ }
|
|
277
|
+
|
|
278
|
+
this.#cacheToken(token);
|
|
279
|
+
return token;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// Existing trial → check expiry
|
|
283
|
+
const elapsed = Date.now() - (trialData.start || 0);
|
|
284
|
+
if (elapsed > TRIAL_DURATION_MS) {
|
|
285
|
+
throw new Error(
|
|
286
|
+
'[lipsync-v1] Trial period expired (30 days). ' +
|
|
287
|
+
'Pass a licenseKey to init(): new LipSyncWasmWrapper().init({ licenseKey: "ggl_..." })'
|
|
288
|
+
);
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
// Trial still valid → renew token
|
|
292
|
+
const res = await fetch(`${LICENSE_API_BASE}/trial/validate`, {
|
|
293
|
+
method: 'POST',
|
|
294
|
+
headers: { 'Content-Type': 'application/json' },
|
|
295
|
+
body: JSON.stringify({ pkg: 'lipsync-wasm-v1', fingerprint }),
|
|
296
|
+
});
|
|
297
|
+
|
|
298
|
+
if (!res.ok) {
|
|
299
|
+
const body = await res.text().catch(() => '');
|
|
300
|
+
throw new Error(`[lipsync-v1] Trial validation failed (${res.status}): ${body}`);
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
const { token } = await res.json();
|
|
304
|
+
if (!token) throw new Error('[lipsync-v1] Trial server returned no token');
|
|
305
|
+
|
|
306
|
+
this.#cacheToken(token);
|
|
307
|
+
return token;
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
async #getFingerprint() {
|
|
311
|
+
const raw = [
|
|
312
|
+
navigator.userAgent || '',
|
|
313
|
+
navigator.language || '',
|
|
314
|
+
screen?.width || 0,
|
|
315
|
+
screen?.height || 0,
|
|
316
|
+
Intl?.DateTimeFormat()?.resolvedOptions()?.timeZone || '',
|
|
317
|
+
].join('|');
|
|
318
|
+
|
|
319
|
+
if (typeof crypto?.subtle?.digest === 'function') {
|
|
320
|
+
const buf = new TextEncoder().encode(raw);
|
|
321
|
+
const hash = await crypto.subtle.digest('SHA-256', buf);
|
|
322
|
+
return Array.from(new Uint8Array(hash)).map(b => b.toString(16).padStart(2, '0')).join('');
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
// Fallback: simple hash
|
|
326
|
+
let h = 0;
|
|
327
|
+
for (let i = 0; i < raw.length; i++) {
|
|
328
|
+
h = ((h << 5) - h + raw.charCodeAt(i)) | 0;
|
|
329
|
+
}
|
|
330
|
+
return 'f' + Math.abs(h).toString(16);
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
#getCachedToken() {
|
|
334
|
+
try {
|
|
335
|
+
const stored = sessionStorage.getItem(TOKEN_CACHE_KEY);
|
|
336
|
+
if (!stored) return null;
|
|
337
|
+
const { token, cachedAt } = JSON.parse(stored);
|
|
338
|
+
if (Date.now() - cachedAt > TOKEN_CACHE_DURATION_MS) {
|
|
339
|
+
sessionStorage.removeItem(TOKEN_CACHE_KEY);
|
|
340
|
+
return null;
|
|
341
|
+
}
|
|
342
|
+
return token;
|
|
343
|
+
} catch {
|
|
344
|
+
return null;
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
#cacheToken(token) {
|
|
349
|
+
try {
|
|
350
|
+
sessionStorage.setItem(TOKEN_CACHE_KEY, JSON.stringify({
|
|
351
|
+
token,
|
|
352
|
+
cachedAt: Date.now(),
|
|
353
|
+
}));
|
|
354
|
+
} catch { /* sessionStorage unavailable */ }
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
// ── Module-level Helpers ──
|
|
359
|
+
|
|
360
|
+
function resampleToMono(audioBuffer, targetRate) {
|
|
361
|
+
const data = audioBuffer.getChannelData(0);
|
|
362
|
+
if (audioBuffer.sampleRate === targetRate) return data;
|
|
363
|
+
|
|
364
|
+
const ratio = targetRate / audioBuffer.sampleRate;
|
|
365
|
+
const length = Math.round(data.length * ratio);
|
|
366
|
+
const output = new Float32Array(length);
|
|
367
|
+
|
|
368
|
+
for (let i = 0; i < length; i++) {
|
|
369
|
+
const srcPos = i / ratio;
|
|
370
|
+
const lo = Math.floor(srcPos);
|
|
371
|
+
const hi = Math.min(lo + 1, data.length - 1);
|
|
372
|
+
const frac = srcPos - lo;
|
|
373
|
+
output[i] = data[lo] + frac * (data[hi] - data[lo]);
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
return output;
|
|
377
|
+
}
|
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
/* tslint:disable */
|
|
2
|
+
/* eslint-disable */
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Idle 상태에서 눈 깜빡임 + 미세 표정을 생성하는 구조체
|
|
6
|
+
*
|
|
7
|
+
* V2의 add_blinks 로직을 재활용하여, idle 상태에서도
|
|
8
|
+
* 자연스러운 표정 변화가 나오도록 한다.
|
|
9
|
+
*/
|
|
10
|
+
export class IdleExpressionGenerator {
|
|
11
|
+
free(): void;
|
|
12
|
+
[Symbol.dispose](): void;
|
|
13
|
+
/**
|
|
14
|
+
* elapsed_time(초)에 해당하는 52-dim 블렌드쉐입 프레임 반환
|
|
15
|
+
*/
|
|
16
|
+
get_frame(elapsed_time: number): Float32Array;
|
|
17
|
+
constructor();
|
|
18
|
+
/**
|
|
19
|
+
* 상태 초기화
|
|
20
|
+
*/
|
|
21
|
+
reset(): void;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* 메인 LipSync WASM 클래스
|
|
26
|
+
*/
|
|
27
|
+
export class LipSyncWasm {
|
|
28
|
+
free(): void;
|
|
29
|
+
[Symbol.dispose](): void;
|
|
30
|
+
/**
|
|
31
|
+
* 배치 모드: ONNX 결과 조립 + 후처리
|
|
32
|
+
*
|
|
33
|
+
* phoneme_probs: ONNX 출력 (flatten, N * 61)
|
|
34
|
+
*/
|
|
35
|
+
assemble_and_postprocess(phoneme_probs: Float32Array): any;
|
|
36
|
+
/**
|
|
37
|
+
* 프리셋 제거
|
|
38
|
+
*/
|
|
39
|
+
clear_preset(): void;
|
|
40
|
+
/**
|
|
41
|
+
* 스트리밍 모드 종료
|
|
42
|
+
*/
|
|
43
|
+
end_streaming(): void;
|
|
44
|
+
/**
|
|
45
|
+
* MFCC 추출 (JS에서 onnxruntime-web 추론에 사용)
|
|
46
|
+
*/
|
|
47
|
+
extract_mfcc(audio: Float32Array): Float32Array;
|
|
48
|
+
/**
|
|
49
|
+
* 스트리밍: 오디오 청크에서 MFCC 추출
|
|
50
|
+
*
|
|
51
|
+
* 반환: flatten MFCC (N * 13)
|
|
52
|
+
*/
|
|
53
|
+
extract_mfcc_chunk(audio: Float32Array): Float32Array;
|
|
54
|
+
/**
|
|
55
|
+
* 스트리밍: 오디오 청크를 입력받아 MFCC 추출 + 버퍼링
|
|
56
|
+
*
|
|
57
|
+
* 내부 버퍼에 MFCC를 축적하고, 30프레임 이상 도달 시
|
|
58
|
+
* MFCC 데이터를 반환 (JS에서 ONNX 추론 수행).
|
|
59
|
+
*/
|
|
60
|
+
feed_audio_chunk(audio: Float32Array): any;
|
|
61
|
+
/**
|
|
62
|
+
* 스트리밍: ONNX 추론 결과로 블렌드쉐입 생성
|
|
63
|
+
*
|
|
64
|
+
* phoneme_probs: ONNX 출력 (flatten, N * num_classes)
|
|
65
|
+
*/
|
|
66
|
+
feed_onnx_result(phoneme_probs: Float32Array, num_classes: number): any;
|
|
67
|
+
/**
|
|
68
|
+
* 스트리밍: 잔여 버퍼 플러시 (마지막 청크에서 호출)
|
|
69
|
+
*/
|
|
70
|
+
flush_audio_buffer(): any;
|
|
71
|
+
/**
|
|
72
|
+
* 후보정 상태 조회
|
|
73
|
+
*/
|
|
74
|
+
get_corrections_enabled(): boolean;
|
|
75
|
+
/**
|
|
76
|
+
* 특정 프레임의 블렌드쉐입 추출
|
|
77
|
+
*/
|
|
78
|
+
get_frame(result: any, frame_index: number): Float32Array;
|
|
79
|
+
/**
|
|
80
|
+
* 프레임 수 조회
|
|
81
|
+
*/
|
|
82
|
+
get_frame_count(): number;
|
|
83
|
+
/**
|
|
84
|
+
* 내장 idle VRMA 원본 바이트 반환
|
|
85
|
+
*/
|
|
86
|
+
get_idle_vrma_bytes(): Uint8Array;
|
|
87
|
+
/**
|
|
88
|
+
* 내장 speaking VRMA 원본 바이트 반환
|
|
89
|
+
*/
|
|
90
|
+
get_speaking_vrma_bytes(): Uint8Array;
|
|
91
|
+
/**
|
|
92
|
+
* 프리셋 로드 여부
|
|
93
|
+
*/
|
|
94
|
+
has_preset(): boolean;
|
|
95
|
+
/**
|
|
96
|
+
* 라이선스 검증 여부 조회
|
|
97
|
+
*/
|
|
98
|
+
is_license_validated(): boolean;
|
|
99
|
+
/**
|
|
100
|
+
* 스트리밍 모드 여부 조회
|
|
101
|
+
*/
|
|
102
|
+
is_streaming(): boolean;
|
|
103
|
+
/**
|
|
104
|
+
* 내장 기본 프리셋 로드 (jc01)
|
|
105
|
+
*/
|
|
106
|
+
load_default_preset(): void;
|
|
107
|
+
/**
|
|
108
|
+
* 프리셋 애니메이션 로드
|
|
109
|
+
*/
|
|
110
|
+
load_preset(data: Float32Array, num_frames: number, num_dims: number): void;
|
|
111
|
+
/**
|
|
112
|
+
* 새 LipSync 인스턴스 생성
|
|
113
|
+
*/
|
|
114
|
+
constructor();
|
|
115
|
+
/**
|
|
116
|
+
* 배치 모드: ONNX 입력 준비 (MFCC 추출)
|
|
117
|
+
*
|
|
118
|
+
* JS에서 ONNX 추론 후 assemble_and_postprocess()로 전달.
|
|
119
|
+
* V1은 chunking 불필요 (가변 길이 MFCC → ONNX).
|
|
120
|
+
*/
|
|
121
|
+
prepare_onnx_input(audio: Float32Array): any;
|
|
122
|
+
/**
|
|
123
|
+
* 오디오 처리 (휴리스틱 폴백 모드)
|
|
124
|
+
*/
|
|
125
|
+
process_audio(audio: Float32Array): any;
|
|
126
|
+
/**
|
|
127
|
+
* 오디오 + 프리셋 블렌딩 (휴리스틱 애니메이션 모드)
|
|
128
|
+
*/
|
|
129
|
+
process_audio_with_animation(audio: Float32Array): any;
|
|
130
|
+
/**
|
|
131
|
+
* 음소 확률에서 블렌드쉐입 생성 (ONNX 추론 결과 사용)
|
|
132
|
+
*/
|
|
133
|
+
process_phonemes(phoneme_probs: Float32Array, num_classes: number): any;
|
|
134
|
+
/**
|
|
135
|
+
* 스트리밍: 음소 확률에서 블렌드쉐입 생성 (ONNX 추론 결과)
|
|
136
|
+
*
|
|
137
|
+
* OneEuroFilter가 시간 오프셋을 유지하여 청크 간 연속성 보장.
|
|
138
|
+
*/
|
|
139
|
+
process_phonemes_chunk(phoneme_probs: Float32Array, num_classes: number): any;
|
|
140
|
+
/**
|
|
141
|
+
* 스트리밍: 음소 확률 + 프리셋 블렌딩
|
|
142
|
+
*/
|
|
143
|
+
process_phonemes_chunk_with_animation(phoneme_probs: Float32Array, num_classes: number): any;
|
|
144
|
+
/**
|
|
145
|
+
* 음소 확률 + 프리셋 블렌딩 (애니메이션 모드)
|
|
146
|
+
*/
|
|
147
|
+
process_phonemes_with_animation(phoneme_probs: Float32Array, num_classes: number): any;
|
|
148
|
+
/**
|
|
149
|
+
* 필터 상태 리셋
|
|
150
|
+
*/
|
|
151
|
+
reset(): void;
|
|
152
|
+
/**
|
|
153
|
+
* 후보정 활성화/비활성화
|
|
154
|
+
*/
|
|
155
|
+
set_corrections_enabled(enabled: boolean): void;
|
|
156
|
+
/**
|
|
157
|
+
* 라이선스 검증 완료 표시 (JS wrapper에서 get_onnx_model_licensed 성공 후 호출)
|
|
158
|
+
*/
|
|
159
|
+
set_license_validated(): void;
|
|
160
|
+
/**
|
|
161
|
+
* 스트리밍 모드 시작
|
|
162
|
+
*/
|
|
163
|
+
start_streaming(): void;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* RMS 에너지 기반 음성 활동 감지기 (V1/V2 공통)
|
|
168
|
+
*
|
|
169
|
+
* 마이크 스트리밍 시 말하는 중인지 판단하여
|
|
170
|
+
* idle<->speaking 본 애니메이션 전환을 제어한다.
|
|
171
|
+
*/
|
|
172
|
+
export class VoiceActivityDetector {
|
|
173
|
+
free(): void;
|
|
174
|
+
[Symbol.dispose](): void;
|
|
175
|
+
/**
|
|
176
|
+
* 오디오 샘플을 입력받아 speaking 여부를 반환
|
|
177
|
+
*
|
|
178
|
+
* RMS 에너지 > threshold -> speaking, 아니면 hold_time 이후 silent
|
|
179
|
+
*/
|
|
180
|
+
feed_audio(samples: Float32Array): boolean;
|
|
181
|
+
/**
|
|
182
|
+
* 현재 speaking 상태 조회
|
|
183
|
+
*/
|
|
184
|
+
is_speaking(): boolean;
|
|
185
|
+
constructor(threshold: number, hold_time: number);
|
|
186
|
+
/**
|
|
187
|
+
* 상태 초기화
|
|
188
|
+
*/
|
|
189
|
+
reset(): void;
|
|
190
|
+
/**
|
|
191
|
+
* hold_time 변경
|
|
192
|
+
*/
|
|
193
|
+
set_hold_time(hold_time: number): void;
|
|
194
|
+
/**
|
|
195
|
+
* RMS 임계값 변경
|
|
196
|
+
*/
|
|
197
|
+
set_threshold(threshold: number): void;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* [Deprecated] 구 API — 라이선스 없이 모델 접근 불가
|
|
202
|
+
*
|
|
203
|
+
* get_onnx_model_licensed(token, timestamp)를 사용하세요.
|
|
204
|
+
*/
|
|
205
|
+
export function get_onnx_model(): Uint8Array;
|
|
206
|
+
|
|
207
|
+
/**
|
|
208
|
+
* 라이선스 검증 + ONNX 모델 복호화 (원자적 실행)
|
|
209
|
+
*
|
|
210
|
+
* Ed25519 서명 검증 통과 후에만 AES-256-GCM 복호화 수행.
|
|
211
|
+
*
|
|
212
|
+
* # Arguments
|
|
213
|
+
* * `token` - 서버에서 발급받은 라이선스 토큰 (EdDSA 서명)
|
|
214
|
+
* * `current_timestamp_secs` - 현재 Unix timestamp (초)
|
|
215
|
+
*/
|
|
216
|
+
export function get_onnx_model_licensed(token: string, current_timestamp_secs: number): Uint8Array;
|
|
217
|
+
|
|
218
|
+
/**
|
|
219
|
+
* WASM 초기화 시 panic hook 설정
|
|
220
|
+
*/
|
|
221
|
+
export function start(): void;
|
|
222
|
+
|
|
223
|
+
export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembly.Module;
|
|
224
|
+
|
|
225
|
+
export interface InitOutput {
|
|
226
|
+
readonly memory: WebAssembly.Memory;
|
|
227
|
+
readonly __wbg_lipsyncwasm_free: (a: number, b: number) => void;
|
|
228
|
+
readonly get_onnx_model: () => [number, number, number, number];
|
|
229
|
+
readonly get_onnx_model_licensed: (a: number, b: number, c: number) => [number, number, number, number];
|
|
230
|
+
readonly lipsyncwasm_assemble_and_postprocess: (a: number, b: number, c: number) => [number, number, number];
|
|
231
|
+
readonly lipsyncwasm_clear_preset: (a: number) => void;
|
|
232
|
+
readonly lipsyncwasm_end_streaming: (a: number) => void;
|
|
233
|
+
readonly lipsyncwasm_extract_mfcc: (a: number, b: number, c: number) => [number, number];
|
|
234
|
+
readonly lipsyncwasm_extract_mfcc_chunk: (a: number, b: number, c: number) => [number, number];
|
|
235
|
+
readonly lipsyncwasm_feed_audio_chunk: (a: number, b: number, c: number) => [number, number, number];
|
|
236
|
+
readonly lipsyncwasm_feed_onnx_result: (a: number, b: number, c: number, d: number) => [number, number, number];
|
|
237
|
+
readonly lipsyncwasm_flush_audio_buffer: (a: number) => [number, number, number];
|
|
238
|
+
readonly lipsyncwasm_get_corrections_enabled: (a: number) => number;
|
|
239
|
+
readonly lipsyncwasm_get_frame: (a: number, b: any, c: number) => [number, number, number, number];
|
|
240
|
+
readonly lipsyncwasm_get_frame_count: (a: number) => number;
|
|
241
|
+
readonly lipsyncwasm_get_idle_vrma_bytes: (a: number) => [number, number];
|
|
242
|
+
readonly lipsyncwasm_get_speaking_vrma_bytes: (a: number) => [number, number];
|
|
243
|
+
readonly lipsyncwasm_has_preset: (a: number) => number;
|
|
244
|
+
readonly lipsyncwasm_is_license_validated: (a: number) => number;
|
|
245
|
+
readonly lipsyncwasm_is_streaming: (a: number) => number;
|
|
246
|
+
readonly lipsyncwasm_load_default_preset: (a: number) => [number, number];
|
|
247
|
+
readonly lipsyncwasm_load_preset: (a: number, b: number, c: number, d: number, e: number) => void;
|
|
248
|
+
readonly lipsyncwasm_new: () => [number, number, number];
|
|
249
|
+
readonly lipsyncwasm_prepare_onnx_input: (a: number, b: number, c: number) => [number, number, number];
|
|
250
|
+
readonly lipsyncwasm_process_audio: (a: number, b: number, c: number) => [number, number, number];
|
|
251
|
+
readonly lipsyncwasm_process_audio_with_animation: (a: number, b: number, c: number) => [number, number, number];
|
|
252
|
+
readonly lipsyncwasm_process_phonemes: (a: number, b: number, c: number, d: number) => [number, number, number];
|
|
253
|
+
readonly lipsyncwasm_process_phonemes_chunk: (a: number, b: number, c: number, d: number) => [number, number, number];
|
|
254
|
+
readonly lipsyncwasm_process_phonemes_chunk_with_animation: (a: number, b: number, c: number, d: number) => [number, number, number];
|
|
255
|
+
readonly lipsyncwasm_process_phonemes_with_animation: (a: number, b: number, c: number, d: number) => [number, number, number];
|
|
256
|
+
readonly lipsyncwasm_reset: (a: number) => void;
|
|
257
|
+
readonly lipsyncwasm_set_corrections_enabled: (a: number, b: number) => void;
|
|
258
|
+
readonly lipsyncwasm_set_license_validated: (a: number) => void;
|
|
259
|
+
readonly lipsyncwasm_start_streaming: (a: number) => void;
|
|
260
|
+
readonly start: () => void;
|
|
261
|
+
readonly __wbg_idleexpressiongenerator_free: (a: number, b: number) => void;
|
|
262
|
+
readonly idleexpressiongenerator_get_frame: (a: number, b: number) => [number, number];
|
|
263
|
+
readonly idleexpressiongenerator_new: () => number;
|
|
264
|
+
readonly idleexpressiongenerator_reset: (a: number) => void;
|
|
265
|
+
readonly __wbg_voiceactivitydetector_free: (a: number, b: number) => void;
|
|
266
|
+
readonly voiceactivitydetector_feed_audio: (a: number, b: number, c: number) => number;
|
|
267
|
+
readonly voiceactivitydetector_is_speaking: (a: number) => number;
|
|
268
|
+
readonly voiceactivitydetector_new: (a: number, b: number) => number;
|
|
269
|
+
readonly voiceactivitydetector_reset: (a: number) => void;
|
|
270
|
+
readonly voiceactivitydetector_set_hold_time: (a: number, b: number) => void;
|
|
271
|
+
readonly voiceactivitydetector_set_threshold: (a: number, b: number) => void;
|
|
272
|
+
readonly __wbindgen_malloc: (a: number, b: number) => number;
|
|
273
|
+
readonly __wbindgen_realloc: (a: number, b: number, c: number, d: number) => number;
|
|
274
|
+
readonly __wbindgen_exn_store: (a: number) => void;
|
|
275
|
+
readonly __externref_table_alloc: () => number;
|
|
276
|
+
readonly __wbindgen_externrefs: WebAssembly.Table;
|
|
277
|
+
readonly __wbindgen_free: (a: number, b: number, c: number) => void;
|
|
278
|
+
readonly __externref_table_dealloc: (a: number) => void;
|
|
279
|
+
readonly __wbindgen_start: () => void;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
export type SyncInitInput = BufferSource | WebAssembly.Module;
|
|
283
|
+
|
|
284
|
+
/**
|
|
285
|
+
* Instantiates the given `module`, which can either be bytes or
|
|
286
|
+
* a precompiled `WebAssembly.Module`.
|
|
287
|
+
*
|
|
288
|
+
* @param {{ module: SyncInitInput }} module - Passing `SyncInitInput` directly is deprecated.
|
|
289
|
+
*
|
|
290
|
+
* @returns {InitOutput}
|
|
291
|
+
*/
|
|
292
|
+
export function initSync(module: { module: SyncInitInput } | SyncInitInput): InitOutput;
|
|
293
|
+
|
|
294
|
+
/**
|
|
295
|
+
* If `module_or_path` is {RequestInfo} or {URL}, makes a request and
|
|
296
|
+
* for everything else, calls `WebAssembly.instantiate` directly.
|
|
297
|
+
*
|
|
298
|
+
* @param {{ module_or_path: InitInput | Promise<InitInput> }} module_or_path - Passing `InitInput` directly is deprecated.
|
|
299
|
+
*
|
|
300
|
+
* @returns {Promise<InitOutput>}
|
|
301
|
+
*/
|
|
302
|
+
export default function __wbg_init (module_or_path?: { module_or_path: InitInput | Promise<InitInput> } | InitInput | Promise<InitInput>): Promise<InitOutput>;
|