sophontalk-services 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1,324 @@
1
- "use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});const l=16e3,_=25,u=10,w=80,m=4,M=.97,f=1e-10,k=20,F=!0,R=!0,h=25,g=1e3/h,p=t=>{if(t===128)return 4;if(t===96)return 3;throw new Error(`Unsupported crop size: ${t}. Only nano (96) and tiny (128) are supported.`)},O=(t,e,s)=>{if(e<=1)return{nextIndex:0,nextDirection:1};let r=t+s,a=s;return r>=e?(r=e-2,a=-1):r<0&&(r=1,a=1),{nextIndex:r,nextDirection:a}};async function L(t={}){const{jsonUrl:e="/complete_dataset.json",zipUrl:s="/processed_images.zip"}=t,[r,a]=await Promise.all([fetch(e),fetch(s)]);if(!r.ok)throw new Error(`无法加载 ${e}`);if(!a.ok)throw new Error(`无法加载 ${s}`);const o=await r.json(),c=await a.arrayBuffer();return{dataset:o,zipBuffer:c}}function S(t){return new Worker("./assets/feature.worker-Dx6moind.js",{type:"module",name:t?.name})}class P{worker=new S;async process(e){const s=e.getChannelData(0),r=e.numberOfChannels>1?e.getChannelData(1):void 0,a={leftChannel:s,rightChannel:r,sampleRate:e.sampleRate},o=[s.buffer];return r&&o.push(r.buffer),new Promise((c,d)=>{const i=n=>{n.data?.status==="success"?(this.worker.removeEventListener("message",i),this.worker.removeEventListener("error",E),c(n.data.payload)):n.data?.status==="error"&&(this.worker.removeEventListener("message",i),this.worker.removeEventListener("error",E),d(new Error(n.data.error||"特征提取失败")))},E=n=>{this.worker.removeEventListener("message",i),this.worker.removeEventListener("error",E),d(n)};this.worker.addEventListener("message",i),this.worker.addEventListener("error",E),this.worker.postMessage(a,o)})}dispose(){this.worker.terminate()}}function T(t){return new Worker("./assets/inference.worker-cwIlIomt.js",{type:"module",name:t?.name})}class A{worker;handlers={};constructor(e={}){this.handlers=e,this.worker=new T,this.worker.onmessage=this.handleMessage.bind(this)}handleMessage(e){const{type:s,payload:r}=e.data;switch(s){case"ready":this.handlers.onReady?.();break;case"start":this.handlers.onStart?.(r.totalFrames);break;case"frame":this.handlers.onFrame?.(r.frame,r.index);break;case"done":this.handlers.onDone?.();break;case"error":this.handlers.onError?.(r);break}}init(e){this.worker.postMessage({type:"init",modelPath:e.modelPath,dataset:e.dataset,zipBuffer:e.zipBuffer,blendingMask:e.blendingMask,wasmPaths:e.wasmPaths},[e.zipBuffer,e.blendingMask])}run(e){this.worker.postMessage({type:"run",audioFeatures:e.audioFeatures,audioDimensions:e.audioDimensions},[e.audioFeatures.buffer])}stop(){this.worker.postMessage({type:"stop"})}terminate(){this.worker.terminate()}}exports.DEFAULT_LOW_FREQ=k;exports.ENERGY_FLOOR=f;exports.FRAME_INTERVAL_MS=g;exports.FRAME_LENGTH_MS=_;exports.FRAME_SHIFT_MS=u;exports.FeatureExtractor=P;exports.InferenceEngine=A;exports.NUM_MEL_BINS=w;exports.NUM_SEQUENCE_FRAMES=m;exports.PREEMPH_COEFF=M;exports.REMOVE_DC_OFFSET=F;exports.ROUND_TO_POWER_OF_TWO=R;exports.SAMPLE_RATE=l;exports.TARGET_FPS=h;exports.calculatePingPongState=O;exports.getBorder=p;exports.loadDataset=L;
1
+ "use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});const w=16e3,g=25,_=10,y=80,E=4,M=.97,R=1e-10,b=20,F=!0,L=!0,d=25,I=1e3/d,k=a=>{if(a===128)return 4;if(a===96)return 3;throw new Error(`Unsupported crop size: ${a}. Only nano (96) and tiny (128) are supported.`)},v=(a,n,e)=>{if(n<=1)return{nextIndex:0,nextDirection:1};let t=a+e,r=e;return t>=n?(t=n-2,r=-1):t<0&&(t=1,r=1),{nextIndex:t,nextDirection:r}};async function U(a={}){const{jsonUrl:n="/complete_dataset.json",zipUrl:e="/processed_images.zip"}=a,[t,r]=await Promise.all([fetch(n),fetch(e)]);if(!t.ok)throw new Error(`无法加载 ${n}`);if(!r.ok)throw new Error(`无法加载 ${e}`);const o=await t.json(),c=await r.arrayBuffer();return{dataset:o,zipBuffer:c}}const h=`import x from "fft.js";
2
+ import { create as O, ConverterType as H } from "@alexanderolsen/libsamplerate-js";
3
+ const y = 16e3, U = 25, D = 10, u = 80, S = 4, W = 0.97, L = 1e-10, b = 20, h = Math.round(U * y / 1e3), C = Math.round(D * y / 1e3), k = (e) => {
4
+ if (e <= 0) return 1;
5
+ let t = 1;
6
+ for (; t < e; ) t *= 2;
7
+ return t;
8
+ }, P = k(h), N = new x(P), v = new Float32Array(h), G = () => {
9
+ if (h <= 1) return;
10
+ const e = 2 * Math.PI / (h - 1);
11
+ for (let t = 0; t < h; t++)
12
+ v[t] = 0.5 - 0.5 * Math.cos(t * e);
13
+ };
14
+ G();
15
+ function Q(e, t, o, a = 20, s) {
16
+ s = s || o / 2;
17
+ const i = (n) => 700 * (Math.exp(n / 1127) - 1), r = (n) => 1127 * Math.log(1 + n / 700), l = r(a), A = r(s), w = new Float32Array(e + 2);
18
+ for (let n = 0; n < e + 2; n++)
19
+ w[n] = l + (A - l) * n / (e + 1);
20
+ const _ = w.map(i).map((n) => n * t / o), f = Math.floor(t / 2) + 1, m = [];
21
+ for (let n = 1; n <= e; n++) {
22
+ const c = _[n - 1], F = _[n], M = _[n + 1], I = Math.ceil(c), T = Math.floor(M), E = Math.max(0, I), p = Math.min(f - 1, T), B = p - E + 1;
23
+ if (B <= 0) {
24
+ m.push({ startBin: 0, values: new Float32Array(0) });
25
+ continue;
26
+ }
27
+ const R = new Float32Array(B);
28
+ for (let d = E; d <= p; d++) {
29
+ let g = 0;
30
+ d <= F ? g = (d - c) / (F - c) : g = (M - d) / (M - F), R[d - E] = g;
31
+ }
32
+ m.push({ startBin: E, values: R });
33
+ }
34
+ return m;
35
+ }
36
+ const q = Q(
37
+ u,
38
+ P,
39
+ y,
40
+ b
41
+ );
42
+ function K(e) {
43
+ for (let t = 0; t < e.length; t++)
44
+ e[t] *= v[t];
45
+ }
46
+ function Y(e, t) {
47
+ for (let o = e.length - 1; o > 0; o--)
48
+ e[o] -= t * e[o - 1];
49
+ e[0] -= t * e[0];
50
+ }
51
+ function V(e) {
52
+ const t = e.length;
53
+ if (t === 0) return;
54
+ let o = 0;
55
+ for (let s = 0; s < t; s++) o += e[s];
56
+ const a = o / t;
57
+ for (let s = 0; s < t; s++) e[s] -= a;
58
+ }
59
+ function Z(e, t) {
60
+ const o = e.length;
61
+ for (let a = 0; a < o; a++) {
62
+ const s = t[a * 2], i = t[a * 2 + 1];
63
+ e[a] = s * s + i * i;
64
+ }
65
+ }
66
+ function j(e) {
67
+ const t = e.length;
68
+ if (t < 2) return;
69
+ let o = 0;
70
+ for (let r = 0; r < t; r++) o += e[r];
71
+ const a = o / t;
72
+ let s = 0;
73
+ for (let r = 0; r < t; r++) {
74
+ const l = e[r] - a;
75
+ s += l * l;
76
+ }
77
+ const i = Math.sqrt(s / (t - 1));
78
+ if (i > 1e-8) {
79
+ const r = 1 / i;
80
+ for (let l = 0; l < t; l++)
81
+ e[l] = (e[l] - a) * r;
82
+ } else
83
+ for (let r = 0; r < t; r++)
84
+ e[r] -= a;
85
+ }
86
+ async function z(e) {
87
+ let t = e.leftChannel;
88
+ if (e.rightChannel) {
89
+ const f = e.rightChannel, m = new Float32Array(t.length);
90
+ for (let n = 0; n < t.length; n++)
91
+ m[n] = (t[n] + f[n]) * 0.5;
92
+ t = m;
93
+ }
94
+ if (e.sampleRate !== y) {
95
+ const f = await O(1, e.sampleRate, y, {
96
+ converterType: H.SRC_SINC_MEDIUM_QUALITY
97
+ });
98
+ t = f.simple(t), f.destroy();
99
+ }
100
+ const o = Math.floor((t.length - h) / C) + 1;
101
+ if (o <= 0)
102
+ throw new Error("特征提取失败:音频时长过短。");
103
+ const a = Math.ceil(o / S) * S, s = a * u, i = new Float32Array(s), r = new Float32Array(h), l = N.createComplexArray(), A = N.createComplexArray(), w = new Float32Array(Math.floor(P / 2) + 1);
104
+ for (let f = 0; f < o; f++) {
105
+ const m = f * C;
106
+ r.set(t.subarray(m, m + h)), V(r), Y(r, W), K(r), l.fill(0);
107
+ for (let c = 0; c < h; c++)
108
+ l[c * 2] = r[c];
109
+ N.transform(A, l), Z(w, A);
110
+ const n = f * u;
111
+ for (let c = 0; c < u; c++) {
112
+ const F = q[c];
113
+ let M = 0;
114
+ const I = F.values, T = F.startBin, E = I.length;
115
+ for (let p = 0; p < E; p++)
116
+ M += w[T + p] * I[p];
117
+ M = M < L ? L : M, i[n + c] = Math.log(M);
118
+ }
119
+ }
120
+ if (a > o) {
121
+ const f = (o - 1) * u, m = f + u, n = i.subarray(f, m);
122
+ for (let c = o; c < a; c++)
123
+ i.set(n, c * u);
124
+ }
125
+ j(i);
126
+ const _ = a / S;
127
+ return {
128
+ features: i,
129
+ dimensions: [_, S, u]
130
+ };
131
+ }
132
+ self.addEventListener(
133
+ "message",
134
+ async (e) => {
135
+ try {
136
+ const t = e.data, o = await z(t);
137
+ self.postMessage(
138
+ { status: "success", payload: o },
139
+ { transfer: [o.features.buffer] }
140
+ );
141
+ } catch (t) {
142
+ self.postMessage({ status: "error", error: t.message });
143
+ }
144
+ }
145
+ );
146
+ `,u=typeof self<"u"&&self.Blob&&new Blob(["URL.revokeObjectURL(import.meta.url);",h],{type:"text/javascript;charset=utf-8"});function C(a){let n;try{if(n=u&&(self.URL||self.webkitURL).createObjectURL(u),!n)throw"";const e=new Worker(n,{type:"module",name:a?.name});return e.addEventListener("error",()=>{(self.URL||self.webkitURL).revokeObjectURL(n)}),e}catch{return new Worker("data:text/javascript;charset=utf-8,"+encodeURIComponent(h),{type:"module",name:a?.name})}}class x{worker=new C;async process(n){const e=n.getChannelData(0),t=n.numberOfChannels>1?n.getChannelData(1):void 0,r={leftChannel:e,rightChannel:t,sampleRate:n.sampleRate},o=[e.buffer];return t&&o.push(t.buffer),new Promise((c,f)=>{const l=s=>{s.data?.status==="success"?(this.worker.removeEventListener("message",l),this.worker.removeEventListener("error",i),c(s.data.payload)):s.data?.status==="error"&&(this.worker.removeEventListener("message",l),this.worker.removeEventListener("error",i),f(new Error(s.data.error||"特征提取失败")))},i=s=>{this.worker.removeEventListener("message",l),this.worker.removeEventListener("error",i),f(s)};this.worker.addEventListener("message",l),this.worker.addEventListener("error",i),this.worker.postMessage(r,o)})}dispose(){this.worker.terminate()}}const p=`import { env as k, InferenceSession as $, Tensor as W } from "onnxruntime-web";
147
+ import q from "jszip";
148
+ const V = 80, G = 4, H = 25, j = 1e3 / H, J = (t) => {
149
+ if (t === 128) return 4;
150
+ if (t === 96) return 3;
151
+ throw new Error(\`Unsupported crop size: \${t}. Only nano (96) and tiny (128) are supported.\`);
152
+ }, Q = (t, e, n) => {
153
+ if (e <= 1) return { nextIndex: 0, nextDirection: 1 };
154
+ let a = t + n, s = n;
155
+ return a >= e ? (a = e - 2, s = -1) : a < 0 && (a = 1, s = 1), { nextIndex: a, nextDirection: s };
156
+ };
157
+ k.wasm.numThreads = 1;
158
+ const Z = () => typeof navigator < "u" && !!navigator.gpu, F = 32;
159
+ let _ = null, R = null, x = null, L = null;
160
+ const w = {
161
+ tensors: /* @__PURE__ */ new Map(),
162
+ faceImages: /* @__PURE__ */ new Map(),
163
+ fullImages: /* @__PURE__ */ new Map()
164
+ };
165
+ let S = 3, b = 96, O = 0, z = 0, C = null, A = null, y = null, E = null, g = null, I = null, D = null, M = null, P = !1, v = "", p = null;
166
+ const T = (t) => {
167
+ self.postMessage({ type: "error", payload: t });
168
+ }, K = (t) => {
169
+ const e = w.tensors.get(t);
170
+ if (!e) throw new Error(\`Cache miss: \${t}\`);
171
+ const n = b - 2 * S;
172
+ return new W("float32", e, [1, 6, n, n]);
173
+ }, X = (t) => {
174
+ const e = w.faceImages.get(t);
175
+ if (!e) throw new Error(\`Cache miss: \${t}\`);
176
+ return e;
177
+ }, Y = async (t) => {
178
+ const e = w.fullImages.get(t);
179
+ if (!e) throw new Error(\`Cache miss: \${t}\`);
180
+ return await createImageBitmap(e);
181
+ }, ee = (t) => {
182
+ const [e, n, a, s] = t.dims, u = t.data;
183
+ (!y || y.width !== s || y.height !== a) && (C.width = s, C.height = a, y = A.createImageData(s, a));
184
+ const d = a * s, l = new Uint32Array(y.data.buffer), o = d, i = d * 2, c = n === 3;
185
+ for (let r = 0; r < d; r++) {
186
+ let m = (c ? u[r + i] : u[r]) * 255, f = (c ? u[r + o] : u[r]) * 255, h = u[r] * 255;
187
+ m = m < 0 ? 0 : m > 255 ? 255 : m, f = f < 0 ? 0 : f > 255 ? 255 : f, h = h < 0 ? 0 : h > 255 ? 255 : h, l[r] = 255 << 24 | (h & 255) << 16 | (f & 255) << 8 | m & 255;
188
+ }
189
+ A.putImageData(y, 0, 0);
190
+ }, te = async (t, e, n) => {
191
+ ee(t);
192
+ const a = X(e.face_image);
193
+ g.globalCompositeOperation = "source-over", g.drawImage(a, 0, 0), g.drawImage(C, S, S), g.globalCompositeOperation = "destination-in", g.drawImage(L, 0, 0, b, b), g.globalCompositeOperation = "destination-over", g.drawImage(a, 0, 0), g.globalCompositeOperation = "source-over", D.drawImage(n, 0, 0);
194
+ const s = e.crop_info.ymax - e.crop_info.ymin;
195
+ return D.drawImage(
196
+ E,
197
+ e.crop_info.xmin,
198
+ e.crop_info.ymin,
199
+ e.crop_info.width,
200
+ s
201
+ ), await createImageBitmap(I);
202
+ }, ae = (t, e, n) => {
203
+ const [a, s, u] = e, d = s * u, l = F * d;
204
+ (!M || M.length !== l) && (M = new Float32Array(l));
205
+ for (let o = 0; o < F; o++) {
206
+ const c = Math.min(
207
+ Math.max(n - F / 2 + o, 0),
208
+ a - 1
209
+ ) * d, r = o * d;
210
+ M.set(
211
+ t.subarray(c, c + d),
212
+ r
213
+ );
214
+ }
215
+ return M;
216
+ }, ne = (t, e, n) => {
217
+ C || (C = new OffscreenCanvas(1, 1), A = C.getContext("2d", {
218
+ willReadFrequently: !0
219
+ })), (!E || E.width !== n) && (E = new OffscreenCanvas(n, n), g = E.getContext("2d", {
220
+ willReadFrequently: !0
221
+ })), (!I || I.width !== t || I.height !== e) && (I = new OffscreenCanvas(t, e), D = I.getContext("2d", {
222
+ willReadFrequently: !0
223
+ }));
224
+ }, se = async (t) => {
225
+ try {
226
+ if (t.wasmPaths && (k.wasm.wasmPaths = t.wasmPaths), !t.dataset) throw new Error("Missing dataset");
227
+ const e = t.dataset;
228
+ R = e, b = e.dataset_info.config.crop_size, S = J(b), O = e.dataset_info.source_image_dimensions?.width || 0, z = e.dataset_info.source_image_dimensions?.height || 0, ne(O, z, b), x = await q.loadAsync(t.zipBuffer), t.zipBuffer = null, L = t.blendingMask;
229
+ const n = e.images.map(async (l) => {
230
+ if (!w.tensors.has(l.tensor_file)) {
231
+ const o = x.file(l.tensor_file);
232
+ if (o) {
233
+ const i = await o.async("arraybuffer");
234
+ w.tensors.set(l.tensor_file, new Float32Array(i));
235
+ }
236
+ }
237
+ if (!w.faceImages.has(l.face_image)) {
238
+ const o = x.file(l.face_image);
239
+ if (o) {
240
+ const i = await o.async("blob"), c = await createImageBitmap(i);
241
+ w.faceImages.set(l.face_image, c);
242
+ }
243
+ }
244
+ if (!w.fullImages.has(l.full_image)) {
245
+ const o = x.file(l.full_image);
246
+ if (o) {
247
+ const i = await o.async("blob");
248
+ w.fullImages.set(l.full_image, i);
249
+ }
250
+ }
251
+ });
252
+ await Promise.all(n), x = null;
253
+ const a = [];
254
+ Z() && a.push("webgpu"), a.push("wasm");
255
+ const s = new URL(t.modelPath, self.location.href).toString(), u = new URL(s).pathname.split("/").pop() || "model.onnx", d = new URL(\`\${u}.data\`, s).toString();
256
+ _ = await $.create(s, {
257
+ executionProviders: a,
258
+ graphOptimizationLevel: "all",
259
+ enableMemPattern: !0,
260
+ enableCpuMemArena: !0,
261
+ externalData: [{ data: d, path: \`\${u}.data\` }]
262
+ }), self.postMessage({ type: "ready" });
263
+ } catch (e) {
264
+ T(e?.message || "Init Error");
265
+ }
266
+ }, oe = async (t) => {
267
+ if (!_ || !R) {
268
+ T("Not initialized");
269
+ return;
270
+ }
271
+ P = !1;
272
+ const e = t.audioDimensions[0];
273
+ self.postMessage({
274
+ type: "start",
275
+ payload: { totalFrames: e, frameInterval: j }
276
+ });
277
+ let n = 0, a = 1;
278
+ const s = {}, u = _.inputNames[0], d = _.inputNames[1], l = _.outputNames[0];
279
+ v = "", p && (p.close(), p = null);
280
+ for (let o = 0; o < e && !P; o++) {
281
+ let i = null, c = null, r = null, m = null;
282
+ try {
283
+ const f = R.images[n];
284
+ i = K(f.tensor_file), f.full_image !== v && (p && p.close(), p = await Y(f.full_image), v = f.full_image);
285
+ const h = ae(
286
+ t.audioFeatures,
287
+ t.audioDimensions,
288
+ o
289
+ );
290
+ c = new W("float32", h, [
291
+ 1,
292
+ F,
293
+ G,
294
+ V
295
+ ]), s[u] = i, s[d] = c, r = await _.run(s);
296
+ const N = r[l];
297
+ if (!N)
298
+ throw new Error("Missing inference output tensor");
299
+ m = N;
300
+ const B = await te(N, f, p);
301
+ self.postMessage(
302
+ { type: "frame", payload: { frame: B, index: o } },
303
+ [B]
304
+ );
305
+ const U = Q(
306
+ n,
307
+ R.images.length,
308
+ a
309
+ );
310
+ n = U.nextIndex, a = U.nextDirection;
311
+ } catch (f) {
312
+ T(f?.message || "Run Error");
313
+ break;
314
+ } finally {
315
+ i && i.dispose(), c && c.dispose(), m && m.dispose(), i = null, c = null, m = null, r = null;
316
+ }
317
+ }
318
+ p && (p.close(), p = null), v = "", self.postMessage({ type: "done" });
319
+ };
320
+ self.onmessage = (t) => {
321
+ const e = t.data;
322
+ e.type === "init" ? se(e) : e.type === "run" ? oe(e) : e.type === "stop" && (P = !0);
323
+ };
324
+ `,m=typeof self<"u"&&self.Blob&&new Blob(["URL.revokeObjectURL(import.meta.url);",p],{type:"text/javascript;charset=utf-8"});function O(a){let n;try{if(n=m&&(self.URL||self.webkitURL).createObjectURL(m),!n)throw"";const e=new Worker(n,{type:"module",name:a?.name});return e.addEventListener("error",()=>{(self.URL||self.webkitURL).revokeObjectURL(n)}),e}catch{return new Worker("data:text/javascript;charset=utf-8,"+encodeURIComponent(p),{type:"module",name:a?.name})}}class A{worker;handlers={};constructor(n={}){this.handlers=n,this.worker=new O,this.worker.onmessage=this.handleMessage.bind(this)}handleMessage(n){const{type:e,payload:t}=n.data;switch(e){case"ready":this.handlers.onReady?.();break;case"start":this.handlers.onStart?.(t.totalFrames);break;case"frame":this.handlers.onFrame?.(t.frame,t.index);break;case"done":this.handlers.onDone?.();break;case"error":this.handlers.onError?.(t);break}}init(n){this.worker.postMessage({type:"init",modelPath:n.modelPath,dataset:n.dataset,zipBuffer:n.zipBuffer,blendingMask:n.blendingMask,wasmPaths:n.wasmPaths},[n.zipBuffer,n.blendingMask])}run(n){this.worker.postMessage({type:"run",audioFeatures:n.audioFeatures,audioDimensions:n.audioDimensions},[n.audioFeatures.buffer])}stop(){this.worker.postMessage({type:"stop"})}terminate(){this.worker.terminate()}}exports.DEFAULT_LOW_FREQ=b;exports.ENERGY_FLOOR=R;exports.FRAME_INTERVAL_MS=I;exports.FRAME_LENGTH_MS=g;exports.FRAME_SHIFT_MS=_;exports.FeatureExtractor=x;exports.InferenceEngine=A;exports.NUM_MEL_BINS=y;exports.NUM_SEQUENCE_FRAMES=E;exports.PREEMPH_COEFF=M;exports.REMOVE_DC_OFFSET=F;exports.ROUND_TO_POWER_OF_TWO=L;exports.SAMPLE_RATE=w;exports.TARGET_FPS=d;exports.calculatePingPongState=v;exports.getBorder=k;exports.loadDataset=U;
@@ -1,111 +1,460 @@
1
- const w = 16e3, m = 25, u = 10, k = 80, f = 4, p = 0.97, _ = 1e-10, g = 20, M = !0, R = !0, F = 25, L = 40, y = (t) => {
2
- if (t === 128) return 4;
3
- if (t === 96) return 3;
4
- throw new Error(`Unsupported crop size: ${t}. Only nano (96) and tiny (128) are supported.`);
5
- }, P = (t, e, s) => {
6
- if (e <= 1) return { nextIndex: 0, nextDirection: 1 };
7
- let r = t + s, n = s;
8
- return r >= e ? (r = e - 2, n = -1) : r < 0 && (r = 1, n = 1), { nextIndex: r, nextDirection: n };
1
+ const g = 16e3, y = 25, _ = 10, M = 80, E = 4, b = 0.97, R = 1e-10, k = 20, I = !0, v = !0, L = 25, x = 40, F = (a) => {
2
+ if (a === 128) return 4;
3
+ if (a === 96) return 3;
4
+ throw new Error(`Unsupported crop size: ${a}. Only nano (96) and tiny (128) are supported.`);
5
+ }, C = (a, n, e) => {
6
+ if (n <= 1) return { nextIndex: 0, nextDirection: 1 };
7
+ let t = a + e, r = e;
8
+ return t >= n ? (t = n - 2, r = -1) : t < 0 && (t = 1, r = 1), { nextIndex: t, nextDirection: r };
9
9
  };
10
- async function b(t = {}) {
10
+ async function U(a = {}) {
11
11
  const {
12
- jsonUrl: e = "/complete_dataset.json",
13
- zipUrl: s = "/processed_images.zip"
14
- } = t, [r, n] = await Promise.all([
15
- fetch(e),
16
- fetch(s)
12
+ jsonUrl: n = "/complete_dataset.json",
13
+ zipUrl: e = "/processed_images.zip"
14
+ } = a, [t, r] = await Promise.all([
15
+ fetch(n),
16
+ fetch(e)
17
17
  ]);
18
+ if (!t.ok) throw new Error(`无法加载 ${n}`);
18
19
  if (!r.ok) throw new Error(`无法加载 ${e}`);
19
- if (!n.ok) throw new Error(`无法加载 ${s}`);
20
- const o = await r.json(), h = await n.arrayBuffer();
21
- return { dataset: o, zipBuffer: h };
20
+ const o = await t.json(), c = await r.arrayBuffer();
21
+ return { dataset: o, zipBuffer: c };
22
22
  }
23
- function l(t) {
24
- return new Worker(
25
- "./assets/feature.worker-Dx6moind.js",
26
- {
27
- type: "module",
28
- name: t?.name
23
+ const h = `import x from "fft.js";
24
+ import { create as O, ConverterType as H } from "@alexanderolsen/libsamplerate-js";
25
+ const y = 16e3, U = 25, D = 10, u = 80, S = 4, W = 0.97, L = 1e-10, b = 20, h = Math.round(U * y / 1e3), C = Math.round(D * y / 1e3), k = (e) => {
26
+ if (e <= 0) return 1;
27
+ let t = 1;
28
+ for (; t < e; ) t *= 2;
29
+ return t;
30
+ }, P = k(h), N = new x(P), v = new Float32Array(h), G = () => {
31
+ if (h <= 1) return;
32
+ const e = 2 * Math.PI / (h - 1);
33
+ for (let t = 0; t < h; t++)
34
+ v[t] = 0.5 - 0.5 * Math.cos(t * e);
35
+ };
36
+ G();
37
+ function Q(e, t, o, a = 20, s) {
38
+ s = s || o / 2;
39
+ const i = (n) => 700 * (Math.exp(n / 1127) - 1), r = (n) => 1127 * Math.log(1 + n / 700), l = r(a), A = r(s), w = new Float32Array(e + 2);
40
+ for (let n = 0; n < e + 2; n++)
41
+ w[n] = l + (A - l) * n / (e + 1);
42
+ const _ = w.map(i).map((n) => n * t / o), f = Math.floor(t / 2) + 1, m = [];
43
+ for (let n = 1; n <= e; n++) {
44
+ const c = _[n - 1], F = _[n], M = _[n + 1], I = Math.ceil(c), T = Math.floor(M), E = Math.max(0, I), p = Math.min(f - 1, T), B = p - E + 1;
45
+ if (B <= 0) {
46
+ m.push({ startBin: 0, values: new Float32Array(0) });
47
+ continue;
48
+ }
49
+ const R = new Float32Array(B);
50
+ for (let d = E; d <= p; d++) {
51
+ let g = 0;
52
+ d <= F ? g = (d - c) / (F - c) : g = (M - d) / (M - F), R[d - E] = g;
29
53
  }
30
- );
54
+ m.push({ startBin: E, values: R });
55
+ }
56
+ return m;
31
57
  }
32
- class O {
33
- worker = new l();
34
- async process(e) {
35
- const s = e.getChannelData(0), r = e.numberOfChannels > 1 ? e.getChannelData(1) : void 0, n = {
36
- leftChannel: s,
37
- rightChannel: r,
38
- sampleRate: e.sampleRate
39
- }, o = [s.buffer];
40
- return r && o.push(r.buffer), new Promise((h, d) => {
41
- const i = (a) => {
42
- a.data?.status === "success" ? (this.worker.removeEventListener("message", i), this.worker.removeEventListener("error", c), h(a.data.payload)) : a.data?.status === "error" && (this.worker.removeEventListener("message", i), this.worker.removeEventListener("error", c), d(new Error(a.data.error || "特征提取失败")));
43
- }, c = (a) => {
44
- this.worker.removeEventListener("message", i), this.worker.removeEventListener("error", c), d(a);
58
+ const q = Q(
59
+ u,
60
+ P,
61
+ y,
62
+ b
63
+ );
64
+ function K(e) {
65
+ for (let t = 0; t < e.length; t++)
66
+ e[t] *= v[t];
67
+ }
68
+ function Y(e, t) {
69
+ for (let o = e.length - 1; o > 0; o--)
70
+ e[o] -= t * e[o - 1];
71
+ e[0] -= t * e[0];
72
+ }
73
+ function V(e) {
74
+ const t = e.length;
75
+ if (t === 0) return;
76
+ let o = 0;
77
+ for (let s = 0; s < t; s++) o += e[s];
78
+ const a = o / t;
79
+ for (let s = 0; s < t; s++) e[s] -= a;
80
+ }
81
+ function Z(e, t) {
82
+ const o = e.length;
83
+ for (let a = 0; a < o; a++) {
84
+ const s = t[a * 2], i = t[a * 2 + 1];
85
+ e[a] = s * s + i * i;
86
+ }
87
+ }
88
+ function j(e) {
89
+ const t = e.length;
90
+ if (t < 2) return;
91
+ let o = 0;
92
+ for (let r = 0; r < t; r++) o += e[r];
93
+ const a = o / t;
94
+ let s = 0;
95
+ for (let r = 0; r < t; r++) {
96
+ const l = e[r] - a;
97
+ s += l * l;
98
+ }
99
+ const i = Math.sqrt(s / (t - 1));
100
+ if (i > 1e-8) {
101
+ const r = 1 / i;
102
+ for (let l = 0; l < t; l++)
103
+ e[l] = (e[l] - a) * r;
104
+ } else
105
+ for (let r = 0; r < t; r++)
106
+ e[r] -= a;
107
+ }
108
+ async function z(e) {
109
+ let t = e.leftChannel;
110
+ if (e.rightChannel) {
111
+ const f = e.rightChannel, m = new Float32Array(t.length);
112
+ for (let n = 0; n < t.length; n++)
113
+ m[n] = (t[n] + f[n]) * 0.5;
114
+ t = m;
115
+ }
116
+ if (e.sampleRate !== y) {
117
+ const f = await O(1, e.sampleRate, y, {
118
+ converterType: H.SRC_SINC_MEDIUM_QUALITY
119
+ });
120
+ t = f.simple(t), f.destroy();
121
+ }
122
+ const o = Math.floor((t.length - h) / C) + 1;
123
+ if (o <= 0)
124
+ throw new Error("特征提取失败:音频时长过短。");
125
+ const a = Math.ceil(o / S) * S, s = a * u, i = new Float32Array(s), r = new Float32Array(h), l = N.createComplexArray(), A = N.createComplexArray(), w = new Float32Array(Math.floor(P / 2) + 1);
126
+ for (let f = 0; f < o; f++) {
127
+ const m = f * C;
128
+ r.set(t.subarray(m, m + h)), V(r), Y(r, W), K(r), l.fill(0);
129
+ for (let c = 0; c < h; c++)
130
+ l[c * 2] = r[c];
131
+ N.transform(A, l), Z(w, A);
132
+ const n = f * u;
133
+ for (let c = 0; c < u; c++) {
134
+ const F = q[c];
135
+ let M = 0;
136
+ const I = F.values, T = F.startBin, E = I.length;
137
+ for (let p = 0; p < E; p++)
138
+ M += w[T + p] * I[p];
139
+ M = M < L ? L : M, i[n + c] = Math.log(M);
140
+ }
141
+ }
142
+ if (a > o) {
143
+ const f = (o - 1) * u, m = f + u, n = i.subarray(f, m);
144
+ for (let c = o; c < a; c++)
145
+ i.set(n, c * u);
146
+ }
147
+ j(i);
148
+ const _ = a / S;
149
+ return {
150
+ features: i,
151
+ dimensions: [_, S, u]
152
+ };
153
+ }
154
+ self.addEventListener(
155
+ "message",
156
+ async (e) => {
157
+ try {
158
+ const t = e.data, o = await z(t);
159
+ self.postMessage(
160
+ { status: "success", payload: o },
161
+ { transfer: [o.features.buffer] }
162
+ );
163
+ } catch (t) {
164
+ self.postMessage({ status: "error", error: t.message });
165
+ }
166
+ }
167
+ );
168
+ `, u = typeof self < "u" && self.Blob && new Blob(["URL.revokeObjectURL(import.meta.url);", h], { type: "text/javascript;charset=utf-8" });
169
+ function p(a) {
170
+ let n;
171
+ try {
172
+ if (n = u && (self.URL || self.webkitURL).createObjectURL(u), !n) throw "";
173
+ const e = new Worker(n, {
174
+ type: "module",
175
+ name: a?.name
176
+ });
177
+ return e.addEventListener("error", () => {
178
+ (self.URL || self.webkitURL).revokeObjectURL(n);
179
+ }), e;
180
+ } catch {
181
+ return new Worker(
182
+ "data:text/javascript;charset=utf-8," + encodeURIComponent(h),
183
+ {
184
+ type: "module",
185
+ name: a?.name
186
+ }
187
+ );
188
+ }
189
+ }
190
+ class A {
191
+ worker = new p();
192
+ async process(n) {
193
+ const e = n.getChannelData(0), t = n.numberOfChannels > 1 ? n.getChannelData(1) : void 0, r = {
194
+ leftChannel: e,
195
+ rightChannel: t,
196
+ sampleRate: n.sampleRate
197
+ }, o = [e.buffer];
198
+ return t && o.push(t.buffer), new Promise((c, f) => {
199
+ const l = (s) => {
200
+ s.data?.status === "success" ? (this.worker.removeEventListener("message", l), this.worker.removeEventListener("error", i), c(s.data.payload)) : s.data?.status === "error" && (this.worker.removeEventListener("message", l), this.worker.removeEventListener("error", i), f(new Error(s.data.error || "特征提取失败")));
201
+ }, i = (s) => {
202
+ this.worker.removeEventListener("message", l), this.worker.removeEventListener("error", i), f(s);
45
203
  };
46
- this.worker.addEventListener("message", i), this.worker.addEventListener("error", c), this.worker.postMessage(n, o);
204
+ this.worker.addEventListener("message", l), this.worker.addEventListener("error", i), this.worker.postMessage(r, o);
47
205
  });
48
206
  }
49
207
  dispose() {
50
208
  this.worker.terminate();
51
209
  }
52
210
  }
53
- function E(t) {
54
- return new Worker(
55
- "./assets/inference.worker-cwIlIomt.js",
56
- {
57
- type: "module",
58
- name: t?.name
211
+ const d = `import { env as k, InferenceSession as $, Tensor as W } from "onnxruntime-web";
212
+ import q from "jszip";
213
+ const V = 80, G = 4, H = 25, j = 1e3 / H, J = (t) => {
214
+ if (t === 128) return 4;
215
+ if (t === 96) return 3;
216
+ throw new Error(\`Unsupported crop size: \${t}. Only nano (96) and tiny (128) are supported.\`);
217
+ }, Q = (t, e, n) => {
218
+ if (e <= 1) return { nextIndex: 0, nextDirection: 1 };
219
+ let a = t + n, s = n;
220
+ return a >= e ? (a = e - 2, s = -1) : a < 0 && (a = 1, s = 1), { nextIndex: a, nextDirection: s };
221
+ };
222
+ k.wasm.numThreads = 1;
223
+ const Z = () => typeof navigator < "u" && !!navigator.gpu, F = 32;
224
+ let _ = null, R = null, x = null, L = null;
225
+ const w = {
226
+ tensors: /* @__PURE__ */ new Map(),
227
+ faceImages: /* @__PURE__ */ new Map(),
228
+ fullImages: /* @__PURE__ */ new Map()
229
+ };
230
+ let S = 3, b = 96, O = 0, z = 0, C = null, A = null, y = null, E = null, g = null, I = null, D = null, M = null, P = !1, v = "", p = null;
231
+ const T = (t) => {
232
+ self.postMessage({ type: "error", payload: t });
233
+ }, K = (t) => {
234
+ const e = w.tensors.get(t);
235
+ if (!e) throw new Error(\`Cache miss: \${t}\`);
236
+ const n = b - 2 * S;
237
+ return new W("float32", e, [1, 6, n, n]);
238
+ }, X = (t) => {
239
+ const e = w.faceImages.get(t);
240
+ if (!e) throw new Error(\`Cache miss: \${t}\`);
241
+ return e;
242
+ }, Y = async (t) => {
243
+ const e = w.fullImages.get(t);
244
+ if (!e) throw new Error(\`Cache miss: \${t}\`);
245
+ return await createImageBitmap(e);
246
+ }, ee = (t) => {
247
+ const [e, n, a, s] = t.dims, u = t.data;
248
+ (!y || y.width !== s || y.height !== a) && (C.width = s, C.height = a, y = A.createImageData(s, a));
249
+ const d = a * s, l = new Uint32Array(y.data.buffer), o = d, i = d * 2, c = n === 3;
250
+ for (let r = 0; r < d; r++) {
251
+ let m = (c ? u[r + i] : u[r]) * 255, f = (c ? u[r + o] : u[r]) * 255, h = u[r] * 255;
252
+ m = m < 0 ? 0 : m > 255 ? 255 : m, f = f < 0 ? 0 : f > 255 ? 255 : f, h = h < 0 ? 0 : h > 255 ? 255 : h, l[r] = 255 << 24 | (h & 255) << 16 | (f & 255) << 8 | m & 255;
253
+ }
254
+ A.putImageData(y, 0, 0);
255
+ }, te = async (t, e, n) => {
256
+ ee(t);
257
+ const a = X(e.face_image);
258
+ g.globalCompositeOperation = "source-over", g.drawImage(a, 0, 0), g.drawImage(C, S, S), g.globalCompositeOperation = "destination-in", g.drawImage(L, 0, 0, b, b), g.globalCompositeOperation = "destination-over", g.drawImage(a, 0, 0), g.globalCompositeOperation = "source-over", D.drawImage(n, 0, 0);
259
+ const s = e.crop_info.ymax - e.crop_info.ymin;
260
+ return D.drawImage(
261
+ E,
262
+ e.crop_info.xmin,
263
+ e.crop_info.ymin,
264
+ e.crop_info.width,
265
+ s
266
+ ), await createImageBitmap(I);
267
+ }, ae = (t, e, n) => {
268
+ const [a, s, u] = e, d = s * u, l = F * d;
269
+ (!M || M.length !== l) && (M = new Float32Array(l));
270
+ for (let o = 0; o < F; o++) {
271
+ const c = Math.min(
272
+ Math.max(n - F / 2 + o, 0),
273
+ a - 1
274
+ ) * d, r = o * d;
275
+ M.set(
276
+ t.subarray(c, c + d),
277
+ r
278
+ );
279
+ }
280
+ return M;
281
+ }, ne = (t, e, n) => {
282
+ C || (C = new OffscreenCanvas(1, 1), A = C.getContext("2d", {
283
+ willReadFrequently: !0
284
+ })), (!E || E.width !== n) && (E = new OffscreenCanvas(n, n), g = E.getContext("2d", {
285
+ willReadFrequently: !0
286
+ })), (!I || I.width !== t || I.height !== e) && (I = new OffscreenCanvas(t, e), D = I.getContext("2d", {
287
+ willReadFrequently: !0
288
+ }));
289
+ }, se = async (t) => {
290
+ try {
291
+ if (t.wasmPaths && (k.wasm.wasmPaths = t.wasmPaths), !t.dataset) throw new Error("Missing dataset");
292
+ const e = t.dataset;
293
+ R = e, b = e.dataset_info.config.crop_size, S = J(b), O = e.dataset_info.source_image_dimensions?.width || 0, z = e.dataset_info.source_image_dimensions?.height || 0, ne(O, z, b), x = await q.loadAsync(t.zipBuffer), t.zipBuffer = null, L = t.blendingMask;
294
+ const n = e.images.map(async (l) => {
295
+ if (!w.tensors.has(l.tensor_file)) {
296
+ const o = x.file(l.tensor_file);
297
+ if (o) {
298
+ const i = await o.async("arraybuffer");
299
+ w.tensors.set(l.tensor_file, new Float32Array(i));
300
+ }
301
+ }
302
+ if (!w.faceImages.has(l.face_image)) {
303
+ const o = x.file(l.face_image);
304
+ if (o) {
305
+ const i = await o.async("blob"), c = await createImageBitmap(i);
306
+ w.faceImages.set(l.face_image, c);
307
+ }
308
+ }
309
+ if (!w.fullImages.has(l.full_image)) {
310
+ const o = x.file(l.full_image);
311
+ if (o) {
312
+ const i = await o.async("blob");
313
+ w.fullImages.set(l.full_image, i);
314
+ }
315
+ }
316
+ });
317
+ await Promise.all(n), x = null;
318
+ const a = [];
319
+ Z() && a.push("webgpu"), a.push("wasm");
320
+ const s = new URL(t.modelPath, self.location.href).toString(), u = new URL(s).pathname.split("/").pop() || "model.onnx", d = new URL(\`\${u}.data\`, s).toString();
321
+ _ = await $.create(s, {
322
+ executionProviders: a,
323
+ graphOptimizationLevel: "all",
324
+ enableMemPattern: !0,
325
+ enableCpuMemArena: !0,
326
+ externalData: [{ data: d, path: \`\${u}.data\` }]
327
+ }), self.postMessage({ type: "ready" });
328
+ } catch (e) {
329
+ T(e?.message || "Init Error");
330
+ }
331
+ }, oe = async (t) => {
332
+ if (!_ || !R) {
333
+ T("Not initialized");
334
+ return;
335
+ }
336
+ P = !1;
337
+ const e = t.audioDimensions[0];
338
+ self.postMessage({
339
+ type: "start",
340
+ payload: { totalFrames: e, frameInterval: j }
341
+ });
342
+ let n = 0, a = 1;
343
+ const s = {}, u = _.inputNames[0], d = _.inputNames[1], l = _.outputNames[0];
344
+ v = "", p && (p.close(), p = null);
345
+ for (let o = 0; o < e && !P; o++) {
346
+ let i = null, c = null, r = null, m = null;
347
+ try {
348
+ const f = R.images[n];
349
+ i = K(f.tensor_file), f.full_image !== v && (p && p.close(), p = await Y(f.full_image), v = f.full_image);
350
+ const h = ae(
351
+ t.audioFeatures,
352
+ t.audioDimensions,
353
+ o
354
+ );
355
+ c = new W("float32", h, [
356
+ 1,
357
+ F,
358
+ G,
359
+ V
360
+ ]), s[u] = i, s[d] = c, r = await _.run(s);
361
+ const N = r[l];
362
+ if (!N)
363
+ throw new Error("Missing inference output tensor");
364
+ m = N;
365
+ const B = await te(N, f, p);
366
+ self.postMessage(
367
+ { type: "frame", payload: { frame: B, index: o } },
368
+ [B]
369
+ );
370
+ const U = Q(
371
+ n,
372
+ R.images.length,
373
+ a
374
+ );
375
+ n = U.nextIndex, a = U.nextDirection;
376
+ } catch (f) {
377
+ T(f?.message || "Run Error");
378
+ break;
379
+ } finally {
380
+ i && i.dispose(), c && c.dispose(), m && m.dispose(), i = null, c = null, m = null, r = null;
59
381
  }
60
- );
382
+ }
383
+ p && (p.close(), p = null), v = "", self.postMessage({ type: "done" });
384
+ };
385
+ self.onmessage = (t) => {
386
+ const e = t.data;
387
+ e.type === "init" ? se(e) : e.type === "run" ? oe(e) : e.type === "stop" && (P = !0);
388
+ };
389
+ `, m = typeof self < "u" && self.Blob && new Blob(["URL.revokeObjectURL(import.meta.url);", d], { type: "text/javascript;charset=utf-8" });
390
+ function w(a) {
391
+ let n;
392
+ try {
393
+ if (n = m && (self.URL || self.webkitURL).createObjectURL(m), !n) throw "";
394
+ const e = new Worker(n, {
395
+ type: "module",
396
+ name: a?.name
397
+ });
398
+ return e.addEventListener("error", () => {
399
+ (self.URL || self.webkitURL).revokeObjectURL(n);
400
+ }), e;
401
+ } catch {
402
+ return new Worker(
403
+ "data:text/javascript;charset=utf-8," + encodeURIComponent(d),
404
+ {
405
+ type: "module",
406
+ name: a?.name
407
+ }
408
+ );
409
+ }
61
410
  }
62
- class v {
411
+ class O {
63
412
  worker;
64
413
  handlers = {};
65
- constructor(e = {}) {
66
- this.handlers = e, this.worker = new E(), this.worker.onmessage = this.handleMessage.bind(this);
414
+ constructor(n = {}) {
415
+ this.handlers = n, this.worker = new w(), this.worker.onmessage = this.handleMessage.bind(this);
67
416
  }
68
- handleMessage(e) {
69
- const { type: s, payload: r } = e.data;
70
- switch (s) {
417
+ handleMessage(n) {
418
+ const { type: e, payload: t } = n.data;
419
+ switch (e) {
71
420
  case "ready":
72
421
  this.handlers.onReady?.();
73
422
  break;
74
423
  case "start":
75
- this.handlers.onStart?.(r.totalFrames);
424
+ this.handlers.onStart?.(t.totalFrames);
76
425
  break;
77
426
  case "frame":
78
- this.handlers.onFrame?.(r.frame, r.index);
427
+ this.handlers.onFrame?.(t.frame, t.index);
79
428
  break;
80
429
  case "done":
81
430
  this.handlers.onDone?.();
82
431
  break;
83
432
  case "error":
84
- this.handlers.onError?.(r);
433
+ this.handlers.onError?.(t);
85
434
  break;
86
435
  }
87
436
  }
88
- init(e) {
437
+ init(n) {
89
438
  this.worker.postMessage(
90
439
  {
91
440
  type: "init",
92
- modelPath: e.modelPath,
93
- dataset: e.dataset,
94
- zipBuffer: e.zipBuffer,
95
- blendingMask: e.blendingMask,
96
- wasmPaths: e.wasmPaths
441
+ modelPath: n.modelPath,
442
+ dataset: n.dataset,
443
+ zipBuffer: n.zipBuffer,
444
+ blendingMask: n.blendingMask,
445
+ wasmPaths: n.wasmPaths
97
446
  },
98
- [e.zipBuffer, e.blendingMask]
447
+ [n.zipBuffer, n.blendingMask]
99
448
  );
100
449
  }
101
- run(e) {
450
+ run(n) {
102
451
  this.worker.postMessage(
103
452
  {
104
453
  type: "run",
105
- audioFeatures: e.audioFeatures,
106
- audioDimensions: e.audioDimensions
454
+ audioFeatures: n.audioFeatures,
455
+ audioDimensions: n.audioDimensions
107
456
  },
108
- [e.audioFeatures.buffer]
457
+ [n.audioFeatures.buffer]
109
458
  );
110
459
  }
111
460
  stop() {
@@ -116,21 +465,21 @@ class v {
116
465
  }
117
466
  }
118
467
  export {
119
- g as DEFAULT_LOW_FREQ,
120
- _ as ENERGY_FLOOR,
121
- L as FRAME_INTERVAL_MS,
122
- m as FRAME_LENGTH_MS,
123
- u as FRAME_SHIFT_MS,
124
- O as FeatureExtractor,
125
- v as InferenceEngine,
126
- k as NUM_MEL_BINS,
127
- f as NUM_SEQUENCE_FRAMES,
128
- p as PREEMPH_COEFF,
129
- M as REMOVE_DC_OFFSET,
130
- R as ROUND_TO_POWER_OF_TWO,
131
- w as SAMPLE_RATE,
132
- F as TARGET_FPS,
133
- P as calculatePingPongState,
134
- y as getBorder,
135
- b as loadDataset
468
+ k as DEFAULT_LOW_FREQ,
469
+ R as ENERGY_FLOOR,
470
+ x as FRAME_INTERVAL_MS,
471
+ y as FRAME_LENGTH_MS,
472
+ _ as FRAME_SHIFT_MS,
473
+ A as FeatureExtractor,
474
+ O as InferenceEngine,
475
+ M as NUM_MEL_BINS,
476
+ E as NUM_SEQUENCE_FRAMES,
477
+ b as PREEMPH_COEFF,
478
+ I as REMOVE_DC_OFFSET,
479
+ v as ROUND_TO_POWER_OF_TWO,
480
+ g as SAMPLE_RATE,
481
+ L as TARGET_FPS,
482
+ C as calculatePingPongState,
483
+ F as getBorder,
484
+ U as loadDataset
136
485
  };
package/package.json CHANGED
@@ -1,42 +1,41 @@
1
- {
2
- "name": "sophontalk-services",
3
- "version": "0.0.2",
4
- "type": "module",
5
- "scripts": {
6
- "dev": "vite",
7
- "build": "vue-tsc -b && vite build",
8
- "build:lib": "vite build --config vite.lib.config.ts",
9
- "preview": "vite preview",
10
- "publish": "npm publish --registry https://registry.npmjs.org --access public"
11
- },
12
- "main": "./dist-lib/sophontalk-services.cjs",
13
- "module": "./dist-lib/sophontalk-services.js",
14
- "types": "./dist-lib/types/src/services/index.d.ts",
15
- "files": [
16
- "dist-lib"
17
- ],
18
- "exports": {
19
- ".": {
20
- "types": "./dist-lib/types/src/services/index.d.ts",
21
- "import": "./dist-lib/sophontalk-services.js",
22
- "require": "./dist-lib/sophontalk-services.cjs"
23
- }
24
- },
25
- "dependencies": {
26
- "@alexanderolsen/libsamplerate-js": "^2.1.2",
27
- "fft.js": "^4.0.4",
28
- "jszip": "^3.10.1",
29
- "onnxruntime-web": "^1.22.0",
30
- "vue": "^3.5.17"
31
- },
32
- "devDependencies": {
33
- "@rollup/plugin-replace": "^6.0.3",
34
- "@vitejs/plugin-vue": "^6.0.0",
35
- "@vue/tsconfig": "^0.7.0",
36
- "typescript": "~5.8.3",
37
- "vite": "^7.0.2",
38
- "vite-plugin-dts": "^4.5.4",
39
- "vite-plugin-static-copy": "^3.1.0",
40
- "vue-tsc": "^3.0.1"
41
- }
42
- }
1
+ {
2
+ "name": "sophontalk-services",
3
+ "version": "0.0.4",
4
+ "type": "module",
5
+ "scripts": {
6
+ "dev": "vite",
7
+ "build": "vue-tsc -b && vite build",
8
+ "build:lib": "vite build --config vite.lib.config.ts",
9
+ "preview": "vite preview"
10
+ },
11
+ "main": "./dist-lib/sophontalk-services.cjs",
12
+ "module": "./dist-lib/sophontalk-services.js",
13
+ "types": "./dist-lib/types/src/services/index.d.ts",
14
+ "files": [
15
+ "dist-lib"
16
+ ],
17
+ "exports": {
18
+ ".": {
19
+ "types": "./dist-lib/types/src/services/index.d.ts",
20
+ "import": "./dist-lib/sophontalk-services.js",
21
+ "require": "./dist-lib/sophontalk-services.cjs"
22
+ }
23
+ },
24
+ "dependencies": {
25
+ "@alexanderolsen/libsamplerate-js": "^2.1.2",
26
+ "fft.js": "^4.0.4",
27
+ "jszip": "^3.10.1",
28
+ "onnxruntime-web": "^1.22.0",
29
+ "vue": "^3.5.17"
30
+ },
31
+ "devDependencies": {
32
+ "@rollup/plugin-replace": "^6.0.3",
33
+ "@vitejs/plugin-vue": "^6.0.0",
34
+ "@vue/tsconfig": "^0.7.0",
35
+ "typescript": "~5.8.3",
36
+ "vite": "^7.0.2",
37
+ "vite-plugin-dts": "^4.5.4",
38
+ "vite-plugin-static-copy": "^3.1.0",
39
+ "vue-tsc": "^3.0.1"
40
+ }
41
+ }
@@ -1,145 +0,0 @@
1
- import x from "fft.js";
2
- import { create as O, ConverterType as H } from "@alexanderolsen/libsamplerate-js";
3
- const y = 16e3, U = 25, D = 10, u = 80, S = 4, W = 0.97, L = 1e-10, b = 20, h = Math.round(U * y / 1e3), C = Math.round(D * y / 1e3), k = (e) => {
4
- if (e <= 0) return 1;
5
- let t = 1;
6
- for (; t < e; ) t *= 2;
7
- return t;
8
- }, P = k(h), N = new x(P), v = new Float32Array(h), G = () => {
9
- if (h <= 1) return;
10
- const e = 2 * Math.PI / (h - 1);
11
- for (let t = 0; t < h; t++)
12
- v[t] = 0.5 - 0.5 * Math.cos(t * e);
13
- };
14
- G();
15
- function Q(e, t, o, a = 20, s) {
16
- s = s || o / 2;
17
- const i = (n) => 700 * (Math.exp(n / 1127) - 1), r = (n) => 1127 * Math.log(1 + n / 700), l = r(a), A = r(s), w = new Float32Array(e + 2);
18
- for (let n = 0; n < e + 2; n++)
19
- w[n] = l + (A - l) * n / (e + 1);
20
- const _ = w.map(i).map((n) => n * t / o), f = Math.floor(t / 2) + 1, m = [];
21
- for (let n = 1; n <= e; n++) {
22
- const c = _[n - 1], F = _[n], M = _[n + 1], I = Math.ceil(c), T = Math.floor(M), E = Math.max(0, I), p = Math.min(f - 1, T), B = p - E + 1;
23
- if (B <= 0) {
24
- m.push({ startBin: 0, values: new Float32Array(0) });
25
- continue;
26
- }
27
- const R = new Float32Array(B);
28
- for (let d = E; d <= p; d++) {
29
- let g = 0;
30
- d <= F ? g = (d - c) / (F - c) : g = (M - d) / (M - F), R[d - E] = g;
31
- }
32
- m.push({ startBin: E, values: R });
33
- }
34
- return m;
35
- }
36
- const q = Q(
37
- u,
38
- P,
39
- y,
40
- b
41
- );
42
- function K(e) {
43
- for (let t = 0; t < e.length; t++)
44
- e[t] *= v[t];
45
- }
46
- function Y(e, t) {
47
- for (let o = e.length - 1; o > 0; o--)
48
- e[o] -= t * e[o - 1];
49
- e[0] -= t * e[0];
50
- }
51
- function V(e) {
52
- const t = e.length;
53
- if (t === 0) return;
54
- let o = 0;
55
- for (let s = 0; s < t; s++) o += e[s];
56
- const a = o / t;
57
- for (let s = 0; s < t; s++) e[s] -= a;
58
- }
59
- function Z(e, t) {
60
- const o = e.length;
61
- for (let a = 0; a < o; a++) {
62
- const s = t[a * 2], i = t[a * 2 + 1];
63
- e[a] = s * s + i * i;
64
- }
65
- }
66
- function j(e) {
67
- const t = e.length;
68
- if (t < 2) return;
69
- let o = 0;
70
- for (let r = 0; r < t; r++) o += e[r];
71
- const a = o / t;
72
- let s = 0;
73
- for (let r = 0; r < t; r++) {
74
- const l = e[r] - a;
75
- s += l * l;
76
- }
77
- const i = Math.sqrt(s / (t - 1));
78
- if (i > 1e-8) {
79
- const r = 1 / i;
80
- for (let l = 0; l < t; l++)
81
- e[l] = (e[l] - a) * r;
82
- } else
83
- for (let r = 0; r < t; r++)
84
- e[r] -= a;
85
- }
86
- async function z(e) {
87
- let t = e.leftChannel;
88
- if (e.rightChannel) {
89
- const f = e.rightChannel, m = new Float32Array(t.length);
90
- for (let n = 0; n < t.length; n++)
91
- m[n] = (t[n] + f[n]) * 0.5;
92
- t = m;
93
- }
94
- if (e.sampleRate !== y) {
95
- const f = await O(1, e.sampleRate, y, {
96
- converterType: H.SRC_SINC_MEDIUM_QUALITY
97
- });
98
- t = f.simple(t), f.destroy();
99
- }
100
- const o = Math.floor((t.length - h) / C) + 1;
101
- if (o <= 0)
102
- throw new Error("特征提取失败:音频时长过短。");
103
- const a = Math.ceil(o / S) * S, s = a * u, i = new Float32Array(s), r = new Float32Array(h), l = N.createComplexArray(), A = N.createComplexArray(), w = new Float32Array(Math.floor(P / 2) + 1);
104
- for (let f = 0; f < o; f++) {
105
- const m = f * C;
106
- r.set(t.subarray(m, m + h)), V(r), Y(r, W), K(r), l.fill(0);
107
- for (let c = 0; c < h; c++)
108
- l[c * 2] = r[c];
109
- N.transform(A, l), Z(w, A);
110
- const n = f * u;
111
- for (let c = 0; c < u; c++) {
112
- const F = q[c];
113
- let M = 0;
114
- const I = F.values, T = F.startBin, E = I.length;
115
- for (let p = 0; p < E; p++)
116
- M += w[T + p] * I[p];
117
- M = M < L ? L : M, i[n + c] = Math.log(M);
118
- }
119
- }
120
- if (a > o) {
121
- const f = (o - 1) * u, m = f + u, n = i.subarray(f, m);
122
- for (let c = o; c < a; c++)
123
- i.set(n, c * u);
124
- }
125
- j(i);
126
- const _ = a / S;
127
- return {
128
- features: i,
129
- dimensions: [_, S, u]
130
- };
131
- }
132
- self.addEventListener(
133
- "message",
134
- async (e) => {
135
- try {
136
- const t = e.data, o = await z(t);
137
- self.postMessage(
138
- { status: "success", payload: o },
139
- { transfer: [o.features.buffer] }
140
- );
141
- } catch (t) {
142
- self.postMessage({ status: "error", error: t.message });
143
- }
144
- }
145
- );
@@ -1,175 +0,0 @@
1
- import { env as B, InferenceSession as V, Tensor as U } from "onnxruntime-web";
2
- import $ from "jszip";
3
- const G = 80, L = 4, H = 25, j = 1e3 / H, J = (t) => {
4
- if (t === 128) return 4;
5
- if (t === 96) return 3;
6
- throw new Error(`Unsupported crop size: ${t}. Only nano (96) and tiny (128) are supported.`);
7
- }, Q = (t, e, a) => {
8
- if (e <= 1) return { nextIndex: 0, nextDirection: 1 };
9
- let s = t + a, o = a;
10
- return s >= e ? (s = e - 2, o = -1) : s < 0 && (s = 1, o = 1), { nextIndex: s, nextDirection: o };
11
- };
12
- B.wasm.numThreads = 1;
13
- const Z = () => typeof navigator < "u" && !!navigator.gpu, F = 32;
14
- let _ = null, R = null, M = null, q = null;
15
- const w = {
16
- tensors: /* @__PURE__ */ new Map(),
17
- faceImages: /* @__PURE__ */ new Map(),
18
- fullImages: /* @__PURE__ */ new Map()
19
- };
20
- let P = 3, b = 96, k = 0, W = 0, C = null, N = null, y = null, E = null, g = null, I = null, S = null, x = null, T = !1, v = "", p = null;
21
- const D = (t) => {
22
- self.postMessage({ type: "error", payload: t });
23
- }, K = (t) => {
24
- const e = w.tensors.get(t);
25
- if (!e) throw new Error(`Cache miss: ${t}`);
26
- const a = b - 2 * P;
27
- return new U("float32", e, [1, 6, a, a]);
28
- }, X = (t) => {
29
- const e = w.faceImages.get(t);
30
- if (!e) throw new Error(`Cache miss: ${t}`);
31
- return e;
32
- }, Y = async (t) => {
33
- const e = w.fullImages.get(t);
34
- if (!e) throw new Error(`Cache miss: ${t}`);
35
- return await createImageBitmap(e);
36
- }, ee = (t) => {
37
- const [e, a, s, o] = t.dims, c = t.data;
38
- (!y || y.width !== o || y.height !== s) && (C.width = o, C.height = s, y = N.createImageData(o, s));
39
- const n = s * o, l = new Uint32Array(y.data.buffer), i = n, d = n * 2, u = a === 3;
40
- for (let r = 0; r < n; r++) {
41
- let m = (u ? c[r + d] : c[r]) * 255, f = (u ? c[r + i] : c[r]) * 255, h = c[r] * 255;
42
- m = m < 0 ? 0 : m > 255 ? 255 : m, f = f < 0 ? 0 : f > 255 ? 255 : f, h = h < 0 ? 0 : h > 255 ? 255 : h, l[r] = 255 << 24 | (h & 255) << 16 | (f & 255) << 8 | m & 255;
43
- }
44
- N.putImageData(y, 0, 0);
45
- }, te = async (t, e, a) => {
46
- ee(t);
47
- const s = X(e.face_image);
48
- g.globalCompositeOperation = "source-over", g.drawImage(s, 0, 0), g.drawImage(C, P, P), g.globalCompositeOperation = "destination-in", g.drawImage(q, 0, 0, b, b), g.globalCompositeOperation = "destination-over", g.drawImage(s, 0, 0), g.globalCompositeOperation = "source-over", S.drawImage(a, 0, 0);
49
- const o = e.crop_info.ymax - e.crop_info.ymin;
50
- return S.drawImage(
51
- E,
52
- e.crop_info.xmin,
53
- e.crop_info.ymin,
54
- e.crop_info.width,
55
- o
56
- ), await createImageBitmap(I);
57
- }, ae = (t, e, a) => {
58
- const [s, o, c] = e, n = o * c, l = F * n;
59
- (!x || x.length !== l) && (x = new Float32Array(l));
60
- for (let i = 0; i < F; i++) {
61
- const u = Math.min(
62
- Math.max(a - F / 2 + i, 0),
63
- s - 1
64
- ) * n, r = i * n;
65
- x.set(
66
- t.subarray(u, u + n),
67
- r
68
- );
69
- }
70
- return x;
71
- }, ne = (t, e, a) => {
72
- C || (C = new OffscreenCanvas(1, 1), N = C.getContext("2d", {
73
- willReadFrequently: !0
74
- })), (!E || E.width !== a) && (E = new OffscreenCanvas(a, a), g = E.getContext("2d", {
75
- willReadFrequently: !0
76
- })), (!I || I.width !== t || I.height !== e) && (I = new OffscreenCanvas(t, e), S = I.getContext("2d", {
77
- willReadFrequently: !0
78
- }));
79
- }, se = async (t) => {
80
- try {
81
- if (t.wasmPaths ? B.wasm.wasmPaths = t.wasmPaths : B.wasm.wasmPaths = "/", !t.dataset) throw new Error("Missing dataset");
82
- const e = t.dataset;
83
- R = e, b = e.dataset_info.config.crop_size, P = J(b), k = e.dataset_info.source_image_dimensions?.width || 0, W = e.dataset_info.source_image_dimensions?.height || 0, ne(k, W, b), M = await $.loadAsync(t.zipBuffer), t.zipBuffer = null, q = t.blendingMask;
84
- const a = e.images.map(async (n) => {
85
- if (!w.tensors.has(n.tensor_file)) {
86
- const l = M.file(n.tensor_file);
87
- if (l) {
88
- const i = await l.async("arraybuffer");
89
- w.tensors.set(n.tensor_file, new Float32Array(i));
90
- }
91
- }
92
- if (!w.faceImages.has(n.face_image)) {
93
- const l = M.file(n.face_image);
94
- if (l) {
95
- const i = await l.async("blob"), d = await createImageBitmap(i);
96
- w.faceImages.set(n.face_image, d);
97
- }
98
- }
99
- if (!w.fullImages.has(n.full_image)) {
100
- const l = M.file(n.full_image);
101
- if (l) {
102
- const i = await l.async("blob");
103
- w.fullImages.set(n.full_image, i);
104
- }
105
- }
106
- });
107
- await Promise.all(a), M = null;
108
- const o = await (await fetch(t.modelPath)).arrayBuffer(), c = [];
109
- Z() && c.push("webgpu"), c.push("wasm"), _ = await V.create(o, {
110
- executionProviders: c,
111
- graphOptimizationLevel: "all",
112
- enableMemPattern: !0,
113
- enableCpuMemArena: !0
114
- }), self.postMessage({ type: "ready" });
115
- } catch (e) {
116
- D(e?.message || "Init Error");
117
- }
118
- }, oe = async (t) => {
119
- if (!_ || !R) {
120
- D("Not initialized");
121
- return;
122
- }
123
- T = !1;
124
- const e = t.audioDimensions[0];
125
- self.postMessage({
126
- type: "start",
127
- payload: { totalFrames: e, frameInterval: j }
128
- });
129
- let a = 0, s = 1;
130
- const o = {}, c = _.inputNames[0], n = _.inputNames[1], l = _.outputNames[0];
131
- v = "", p && (p.close(), p = null);
132
- for (let i = 0; i < e && !T; i++) {
133
- let d = null, u = null, r = null, m = null;
134
- try {
135
- const f = R.images[a];
136
- d = K(f.tensor_file), f.full_image !== v && (p && p.close(), p = await Y(f.full_image), v = f.full_image);
137
- const h = ae(
138
- t.audioFeatures,
139
- t.audioDimensions,
140
- i
141
- );
142
- u = new U("float32", h, [
143
- 1,
144
- F,
145
- L,
146
- G
147
- ]), o[c] = d, o[n] = u, r = await _.run(o);
148
- const A = r[l];
149
- if (!A)
150
- throw new Error("Missing inference output tensor");
151
- m = A;
152
- const O = await te(A, f, p);
153
- self.postMessage(
154
- { type: "frame", payload: { frame: O, index: i } },
155
- [O]
156
- );
157
- const z = Q(
158
- a,
159
- R.images.length,
160
- s
161
- );
162
- a = z.nextIndex, s = z.nextDirection;
163
- } catch (f) {
164
- D(f?.message || "Run Error");
165
- break;
166
- } finally {
167
- d && d.dispose(), u && u.dispose(), m && m.dispose(), d = null, u = null, m = null, r = null;
168
- }
169
- }
170
- p && (p.close(), p = null), v = "", self.postMessage({ type: "done" });
171
- };
172
- self.onmessage = (t) => {
173
- const e = t.data;
174
- e.type === "init" ? se(e) : e.type === "run" ? oe(e) : e.type === "stop" && (T = !0);
175
- };