sophontalk-services 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,324 +1 @@
1
- "use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});const w=16e3,g=25,_=10,y=80,E=4,M=.97,R=1e-10,b=20,F=!0,L=!0,d=25,I=1e3/d,k=a=>{if(a===128)return 4;if(a===96)return 3;throw new Error(`Unsupported crop size: ${a}. Only nano (96) and tiny (128) are supported.`)},v=(a,n,e)=>{if(n<=1)return{nextIndex:0,nextDirection:1};let t=a+e,r=e;return t>=n?(t=n-2,r=-1):t<0&&(t=1,r=1),{nextIndex:t,nextDirection:r}};async function U(a={}){const{jsonUrl:n="/complete_dataset.json",zipUrl:e="/processed_images.zip"}=a,[t,r]=await Promise.all([fetch(n),fetch(e)]);if(!t.ok)throw new Error(`无法加载 ${n}`);if(!r.ok)throw new Error(`无法加载 ${e}`);const o=await t.json(),c=await r.arrayBuffer();return{dataset:o,zipBuffer:c}}const h=`import x from "fft.js";
2
- import { create as O, ConverterType as H } from "@alexanderolsen/libsamplerate-js";
3
- const y = 16e3, U = 25, D = 10, u = 80, S = 4, W = 0.97, L = 1e-10, b = 20, h = Math.round(U * y / 1e3), C = Math.round(D * y / 1e3), k = (e) => {
4
- if (e <= 0) return 1;
5
- let t = 1;
6
- for (; t < e; ) t *= 2;
7
- return t;
8
- }, P = k(h), N = new x(P), v = new Float32Array(h), G = () => {
9
- if (h <= 1) return;
10
- const e = 2 * Math.PI / (h - 1);
11
- for (let t = 0; t < h; t++)
12
- v[t] = 0.5 - 0.5 * Math.cos(t * e);
13
- };
14
- G();
15
- function Q(e, t, o, a = 20, s) {
16
- s = s || o / 2;
17
- const i = (n) => 700 * (Math.exp(n / 1127) - 1), r = (n) => 1127 * Math.log(1 + n / 700), l = r(a), A = r(s), w = new Float32Array(e + 2);
18
- for (let n = 0; n < e + 2; n++)
19
- w[n] = l + (A - l) * n / (e + 1);
20
- const _ = w.map(i).map((n) => n * t / o), f = Math.floor(t / 2) + 1, m = [];
21
- for (let n = 1; n <= e; n++) {
22
- const c = _[n - 1], F = _[n], M = _[n + 1], I = Math.ceil(c), T = Math.floor(M), E = Math.max(0, I), p = Math.min(f - 1, T), B = p - E + 1;
23
- if (B <= 0) {
24
- m.push({ startBin: 0, values: new Float32Array(0) });
25
- continue;
26
- }
27
- const R = new Float32Array(B);
28
- for (let d = E; d <= p; d++) {
29
- let g = 0;
30
- d <= F ? g = (d - c) / (F - c) : g = (M - d) / (M - F), R[d - E] = g;
31
- }
32
- m.push({ startBin: E, values: R });
33
- }
34
- return m;
35
- }
36
- const q = Q(
37
- u,
38
- P,
39
- y,
40
- b
41
- );
42
- function K(e) {
43
- for (let t = 0; t < e.length; t++)
44
- e[t] *= v[t];
45
- }
46
- function Y(e, t) {
47
- for (let o = e.length - 1; o > 0; o--)
48
- e[o] -= t * e[o - 1];
49
- e[0] -= t * e[0];
50
- }
51
- function V(e) {
52
- const t = e.length;
53
- if (t === 0) return;
54
- let o = 0;
55
- for (let s = 0; s < t; s++) o += e[s];
56
- const a = o / t;
57
- for (let s = 0; s < t; s++) e[s] -= a;
58
- }
59
- function Z(e, t) {
60
- const o = e.length;
61
- for (let a = 0; a < o; a++) {
62
- const s = t[a * 2], i = t[a * 2 + 1];
63
- e[a] = s * s + i * i;
64
- }
65
- }
66
- function j(e) {
67
- const t = e.length;
68
- if (t < 2) return;
69
- let o = 0;
70
- for (let r = 0; r < t; r++) o += e[r];
71
- const a = o / t;
72
- let s = 0;
73
- for (let r = 0; r < t; r++) {
74
- const l = e[r] - a;
75
- s += l * l;
76
- }
77
- const i = Math.sqrt(s / (t - 1));
78
- if (i > 1e-8) {
79
- const r = 1 / i;
80
- for (let l = 0; l < t; l++)
81
- e[l] = (e[l] - a) * r;
82
- } else
83
- for (let r = 0; r < t; r++)
84
- e[r] -= a;
85
- }
86
- async function z(e) {
87
- let t = e.leftChannel;
88
- if (e.rightChannel) {
89
- const f = e.rightChannel, m = new Float32Array(t.length);
90
- for (let n = 0; n < t.length; n++)
91
- m[n] = (t[n] + f[n]) * 0.5;
92
- t = m;
93
- }
94
- if (e.sampleRate !== y) {
95
- const f = await O(1, e.sampleRate, y, {
96
- converterType: H.SRC_SINC_MEDIUM_QUALITY
97
- });
98
- t = f.simple(t), f.destroy();
99
- }
100
- const o = Math.floor((t.length - h) / C) + 1;
101
- if (o <= 0)
102
- throw new Error("特征提取失败:音频时长过短。");
103
- const a = Math.ceil(o / S) * S, s = a * u, i = new Float32Array(s), r = new Float32Array(h), l = N.createComplexArray(), A = N.createComplexArray(), w = new Float32Array(Math.floor(P / 2) + 1);
104
- for (let f = 0; f < o; f++) {
105
- const m = f * C;
106
- r.set(t.subarray(m, m + h)), V(r), Y(r, W), K(r), l.fill(0);
107
- for (let c = 0; c < h; c++)
108
- l[c * 2] = r[c];
109
- N.transform(A, l), Z(w, A);
110
- const n = f * u;
111
- for (let c = 0; c < u; c++) {
112
- const F = q[c];
113
- let M = 0;
114
- const I = F.values, T = F.startBin, E = I.length;
115
- for (let p = 0; p < E; p++)
116
- M += w[T + p] * I[p];
117
- M = M < L ? L : M, i[n + c] = Math.log(M);
118
- }
119
- }
120
- if (a > o) {
121
- const f = (o - 1) * u, m = f + u, n = i.subarray(f, m);
122
- for (let c = o; c < a; c++)
123
- i.set(n, c * u);
124
- }
125
- j(i);
126
- const _ = a / S;
127
- return {
128
- features: i,
129
- dimensions: [_, S, u]
130
- };
131
- }
132
- self.addEventListener(
133
- "message",
134
- async (e) => {
135
- try {
136
- const t = e.data, o = await z(t);
137
- self.postMessage(
138
- { status: "success", payload: o },
139
- { transfer: [o.features.buffer] }
140
- );
141
- } catch (t) {
142
- self.postMessage({ status: "error", error: t.message });
143
- }
144
- }
145
- );
146
- `,u=typeof self<"u"&&self.Blob&&new Blob(["URL.revokeObjectURL(import.meta.url);",h],{type:"text/javascript;charset=utf-8"});function C(a){let n;try{if(n=u&&(self.URL||self.webkitURL).createObjectURL(u),!n)throw"";const e=new Worker(n,{type:"module",name:a?.name});return e.addEventListener("error",()=>{(self.URL||self.webkitURL).revokeObjectURL(n)}),e}catch{return new Worker("data:text/javascript;charset=utf-8,"+encodeURIComponent(h),{type:"module",name:a?.name})}}class x{worker=new C;async process(n){const e=n.getChannelData(0),t=n.numberOfChannels>1?n.getChannelData(1):void 0,r={leftChannel:e,rightChannel:t,sampleRate:n.sampleRate},o=[e.buffer];return t&&o.push(t.buffer),new Promise((c,f)=>{const l=s=>{s.data?.status==="success"?(this.worker.removeEventListener("message",l),this.worker.removeEventListener("error",i),c(s.data.payload)):s.data?.status==="error"&&(this.worker.removeEventListener("message",l),this.worker.removeEventListener("error",i),f(new Error(s.data.error||"特征提取失败")))},i=s=>{this.worker.removeEventListener("message",l),this.worker.removeEventListener("error",i),f(s)};this.worker.addEventListener("message",l),this.worker.addEventListener("error",i),this.worker.postMessage(r,o)})}dispose(){this.worker.terminate()}}const p=`import { env as k, InferenceSession as $, Tensor as W } from "onnxruntime-web";
147
- import q from "jszip";
148
- const V = 80, G = 4, H = 25, j = 1e3 / H, J = (t) => {
149
- if (t === 128) return 4;
150
- if (t === 96) return 3;
151
- throw new Error(\`Unsupported crop size: \${t}. Only nano (96) and tiny (128) are supported.\`);
152
- }, Q = (t, e, n) => {
153
- if (e <= 1) return { nextIndex: 0, nextDirection: 1 };
154
- let a = t + n, s = n;
155
- return a >= e ? (a = e - 2, s = -1) : a < 0 && (a = 1, s = 1), { nextIndex: a, nextDirection: s };
156
- };
157
- k.wasm.numThreads = 1;
158
- const Z = () => typeof navigator < "u" && !!navigator.gpu, F = 32;
159
- let _ = null, R = null, x = null, L = null;
160
- const w = {
161
- tensors: /* @__PURE__ */ new Map(),
162
- faceImages: /* @__PURE__ */ new Map(),
163
- fullImages: /* @__PURE__ */ new Map()
164
- };
165
- let S = 3, b = 96, O = 0, z = 0, C = null, A = null, y = null, E = null, g = null, I = null, D = null, M = null, P = !1, v = "", p = null;
166
- const T = (t) => {
167
- self.postMessage({ type: "error", payload: t });
168
- }, K = (t) => {
169
- const e = w.tensors.get(t);
170
- if (!e) throw new Error(\`Cache miss: \${t}\`);
171
- const n = b - 2 * S;
172
- return new W("float32", e, [1, 6, n, n]);
173
- }, X = (t) => {
174
- const e = w.faceImages.get(t);
175
- if (!e) throw new Error(\`Cache miss: \${t}\`);
176
- return e;
177
- }, Y = async (t) => {
178
- const e = w.fullImages.get(t);
179
- if (!e) throw new Error(\`Cache miss: \${t}\`);
180
- return await createImageBitmap(e);
181
- }, ee = (t) => {
182
- const [e, n, a, s] = t.dims, u = t.data;
183
- (!y || y.width !== s || y.height !== a) && (C.width = s, C.height = a, y = A.createImageData(s, a));
184
- const d = a * s, l = new Uint32Array(y.data.buffer), o = d, i = d * 2, c = n === 3;
185
- for (let r = 0; r < d; r++) {
186
- let m = (c ? u[r + i] : u[r]) * 255, f = (c ? u[r + o] : u[r]) * 255, h = u[r] * 255;
187
- m = m < 0 ? 0 : m > 255 ? 255 : m, f = f < 0 ? 0 : f > 255 ? 255 : f, h = h < 0 ? 0 : h > 255 ? 255 : h, l[r] = 255 << 24 | (h & 255) << 16 | (f & 255) << 8 | m & 255;
188
- }
189
- A.putImageData(y, 0, 0);
190
- }, te = async (t, e, n) => {
191
- ee(t);
192
- const a = X(e.face_image);
193
- g.globalCompositeOperation = "source-over", g.drawImage(a, 0, 0), g.drawImage(C, S, S), g.globalCompositeOperation = "destination-in", g.drawImage(L, 0, 0, b, b), g.globalCompositeOperation = "destination-over", g.drawImage(a, 0, 0), g.globalCompositeOperation = "source-over", D.drawImage(n, 0, 0);
194
- const s = e.crop_info.ymax - e.crop_info.ymin;
195
- return D.drawImage(
196
- E,
197
- e.crop_info.xmin,
198
- e.crop_info.ymin,
199
- e.crop_info.width,
200
- s
201
- ), await createImageBitmap(I);
202
- }, ae = (t, e, n) => {
203
- const [a, s, u] = e, d = s * u, l = F * d;
204
- (!M || M.length !== l) && (M = new Float32Array(l));
205
- for (let o = 0; o < F; o++) {
206
- const c = Math.min(
207
- Math.max(n - F / 2 + o, 0),
208
- a - 1
209
- ) * d, r = o * d;
210
- M.set(
211
- t.subarray(c, c + d),
212
- r
213
- );
214
- }
215
- return M;
216
- }, ne = (t, e, n) => {
217
- C || (C = new OffscreenCanvas(1, 1), A = C.getContext("2d", {
218
- willReadFrequently: !0
219
- })), (!E || E.width !== n) && (E = new OffscreenCanvas(n, n), g = E.getContext("2d", {
220
- willReadFrequently: !0
221
- })), (!I || I.width !== t || I.height !== e) && (I = new OffscreenCanvas(t, e), D = I.getContext("2d", {
222
- willReadFrequently: !0
223
- }));
224
- }, se = async (t) => {
225
- try {
226
- if (t.wasmPaths && (k.wasm.wasmPaths = t.wasmPaths), !t.dataset) throw new Error("Missing dataset");
227
- const e = t.dataset;
228
- R = e, b = e.dataset_info.config.crop_size, S = J(b), O = e.dataset_info.source_image_dimensions?.width || 0, z = e.dataset_info.source_image_dimensions?.height || 0, ne(O, z, b), x = await q.loadAsync(t.zipBuffer), t.zipBuffer = null, L = t.blendingMask;
229
- const n = e.images.map(async (l) => {
230
- if (!w.tensors.has(l.tensor_file)) {
231
- const o = x.file(l.tensor_file);
232
- if (o) {
233
- const i = await o.async("arraybuffer");
234
- w.tensors.set(l.tensor_file, new Float32Array(i));
235
- }
236
- }
237
- if (!w.faceImages.has(l.face_image)) {
238
- const o = x.file(l.face_image);
239
- if (o) {
240
- const i = await o.async("blob"), c = await createImageBitmap(i);
241
- w.faceImages.set(l.face_image, c);
242
- }
243
- }
244
- if (!w.fullImages.has(l.full_image)) {
245
- const o = x.file(l.full_image);
246
- if (o) {
247
- const i = await o.async("blob");
248
- w.fullImages.set(l.full_image, i);
249
- }
250
- }
251
- });
252
- await Promise.all(n), x = null;
253
- const a = [];
254
- Z() && a.push("webgpu"), a.push("wasm");
255
- const s = new URL(t.modelPath, self.location.href).toString(), u = new URL(s).pathname.split("/").pop() || "model.onnx", d = new URL(\`\${u}.data\`, s).toString();
256
- _ = await $.create(s, {
257
- executionProviders: a,
258
- graphOptimizationLevel: "all",
259
- enableMemPattern: !0,
260
- enableCpuMemArena: !0,
261
- externalData: [{ data: d, path: \`\${u}.data\` }]
262
- }), self.postMessage({ type: "ready" });
263
- } catch (e) {
264
- T(e?.message || "Init Error");
265
- }
266
- }, oe = async (t) => {
267
- if (!_ || !R) {
268
- T("Not initialized");
269
- return;
270
- }
271
- P = !1;
272
- const e = t.audioDimensions[0];
273
- self.postMessage({
274
- type: "start",
275
- payload: { totalFrames: e, frameInterval: j }
276
- });
277
- let n = 0, a = 1;
278
- const s = {}, u = _.inputNames[0], d = _.inputNames[1], l = _.outputNames[0];
279
- v = "", p && (p.close(), p = null);
280
- for (let o = 0; o < e && !P; o++) {
281
- let i = null, c = null, r = null, m = null;
282
- try {
283
- const f = R.images[n];
284
- i = K(f.tensor_file), f.full_image !== v && (p && p.close(), p = await Y(f.full_image), v = f.full_image);
285
- const h = ae(
286
- t.audioFeatures,
287
- t.audioDimensions,
288
- o
289
- );
290
- c = new W("float32", h, [
291
- 1,
292
- F,
293
- G,
294
- V
295
- ]), s[u] = i, s[d] = c, r = await _.run(s);
296
- const N = r[l];
297
- if (!N)
298
- throw new Error("Missing inference output tensor");
299
- m = N;
300
- const B = await te(N, f, p);
301
- self.postMessage(
302
- { type: "frame", payload: { frame: B, index: o } },
303
- [B]
304
- );
305
- const U = Q(
306
- n,
307
- R.images.length,
308
- a
309
- );
310
- n = U.nextIndex, a = U.nextDirection;
311
- } catch (f) {
312
- T(f?.message || "Run Error");
313
- break;
314
- } finally {
315
- i && i.dispose(), c && c.dispose(), m && m.dispose(), i = null, c = null, m = null, r = null;
316
- }
317
- }
318
- p && (p.close(), p = null), v = "", self.postMessage({ type: "done" });
319
- };
320
- self.onmessage = (t) => {
321
- const e = t.data;
322
- e.type === "init" ? se(e) : e.type === "run" ? oe(e) : e.type === "stop" && (P = !0);
323
- };
324
- `,m=typeof self<"u"&&self.Blob&&new Blob(["URL.revokeObjectURL(import.meta.url);",p],{type:"text/javascript;charset=utf-8"});function O(a){let n;try{if(n=m&&(self.URL||self.webkitURL).createObjectURL(m),!n)throw"";const e=new Worker(n,{type:"module",name:a?.name});return e.addEventListener("error",()=>{(self.URL||self.webkitURL).revokeObjectURL(n)}),e}catch{return new Worker("data:text/javascript;charset=utf-8,"+encodeURIComponent(p),{type:"module",name:a?.name})}}class A{worker;handlers={};constructor(n={}){this.handlers=n,this.worker=new O,this.worker.onmessage=this.handleMessage.bind(this)}handleMessage(n){const{type:e,payload:t}=n.data;switch(e){case"ready":this.handlers.onReady?.();break;case"start":this.handlers.onStart?.(t.totalFrames);break;case"frame":this.handlers.onFrame?.(t.frame,t.index);break;case"done":this.handlers.onDone?.();break;case"error":this.handlers.onError?.(t);break}}init(n){this.worker.postMessage({type:"init",modelPath:n.modelPath,dataset:n.dataset,zipBuffer:n.zipBuffer,blendingMask:n.blendingMask,wasmPaths:n.wasmPaths},[n.zipBuffer,n.blendingMask])}run(n){this.worker.postMessage({type:"run",audioFeatures:n.audioFeatures,audioDimensions:n.audioDimensions},[n.audioFeatures.buffer])}stop(){this.worker.postMessage({type:"stop"})}terminate(){this.worker.terminate()}}exports.DEFAULT_LOW_FREQ=b;exports.ENERGY_FLOOR=R;exports.FRAME_INTERVAL_MS=I;exports.FRAME_LENGTH_MS=g;exports.FRAME_SHIFT_MS=_;exports.FeatureExtractor=x;exports.InferenceEngine=A;exports.NUM_MEL_BINS=y;exports.NUM_SEQUENCE_FRAMES=E;exports.PREEMPH_COEFF=M;exports.REMOVE_DC_OFFSET=F;exports.ROUND_TO_POWER_OF_TWO=L;exports.SAMPLE_RATE=w;exports.TARGET_FPS=d;exports.calculatePingPongState=v;exports.getBorder=k;exports.loadDataset=U;
1
+ "use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});const u=require("onnxruntime-web");var o=typeof document<"u"?document.currentScript:null;const _=16e3,f=25,m=10,w=80,R=4,p=.97,M=1e-10,k=20,F=!0,g=!0,l=25,L=1e3/l,S=s=>{if(s===128)return 4;if(s===96)return 3;throw new Error(`Unsupported crop size: ${s}. Only nano (96) and tiny (128) are supported.`)},O=(s,e,t)=>{if(e<=1)return{nextIndex:0,nextDirection:1};let r=s+t,n=t;return r>=e?(r=e-2,n=-1):r<0&&(r=1,n=1),{nextIndex:r,nextDirection:n}};async function T(s={}){const{jsonUrl:e="/complete_dataset.json",zipUrl:t="/processed_images.zip"}=s,[r,n]=await Promise.all([fetch(e),fetch(t)]);if(!r.ok)throw new Error(`无法加载 ${e}`);if(!n.ok)throw new Error(`无法加载 ${t}`);const i=await r.json(),E=await n.arrayBuffer();return{dataset:i,zipBuffer:E}}class P{worker=new Worker(new URL("./workers/feature.worker.js",typeof document>"u"?require("url").pathToFileURL(__filename).href:o&&o.tagName.toUpperCase()==="SCRIPT"&&o.src||new URL("sophontalk-services.cjs",document.baseURI).href),{type:"module"});async process(e){const t=e.getChannelData(0),r=e.numberOfChannels>1?e.getChannelData(1):void 0,n={leftChannel:t,rightChannel:r,sampleRate:e.sampleRate},i=[t.buffer];return r&&i.push(r.buffer),new Promise((E,h)=>{const c=a=>{a.data?.status==="success"?(this.worker.removeEventListener("message",c),this.worker.removeEventListener("error",d),E(a.data.payload)):a.data?.status==="error"&&(this.worker.removeEventListener("message",c),this.worker.removeEventListener("error",d),h(new Error(a.data.error||"特征提取失败")))},d=a=>{this.worker.removeEventListener("message",c),this.worker.removeEventListener("error",d),h(a)};this.worker.addEventListener("message",c),this.worker.addEventListener("error",d),this.worker.postMessage(n,i)})}dispose(){this.worker.terminate()}}class U{worker;handlers={};constructor(e={}){this.handlers=e,this.worker=new Worker(new URL("./workers/inference.worker.js",typeof document>"u"?require("url").pathToFileURL(__filename).href:o&&o.tagName.toUpperCase()==="SCRIPT"&&o.src||new URL("sophontalk-services.cjs",document.baseURI).href),{type:"module"}),this.worker.onmessage=this.handleMessage.bind(this)}handleMessage(e){const{type:t,payload:r}=e.data;switch(t){case"ready":this.handlers.onReady?.();break;case"start":this.handlers.onStart?.(r.totalFrames);break;case"frame":this.handlers.onFrame?.(r.frame,r.index);break;case"done":this.handlers.onDone?.();break;case"error":this.handlers.onError?.(r);break}}init(e){this.worker.postMessage({type:"init",modelPath:e.modelPath,dataset:e.dataset,zipBuffer:e.zipBuffer,blendingMask:e.blendingMask,wasmPaths:e.wasmPaths||u.env.wasm.wasmPaths},[e.zipBuffer,e.blendingMask])}run(e){this.worker.postMessage({type:"run",audioFeatures:e.audioFeatures,audioDimensions:e.audioDimensions},[e.audioFeatures.buffer])}stop(){this.worker.postMessage({type:"stop"})}terminate(){this.worker.terminate()}}exports.DEFAULT_LOW_FREQ=k;exports.ENERGY_FLOOR=M;exports.FRAME_INTERVAL_MS=L;exports.FRAME_LENGTH_MS=f;exports.FRAME_SHIFT_MS=m;exports.FeatureExtractor=P;exports.InferenceEngine=U;exports.NUM_MEL_BINS=w;exports.NUM_SEQUENCE_FRAMES=R;exports.PREEMPH_COEFF=p;exports.REMOVE_DC_OFFSET=F;exports.ROUND_TO_POWER_OF_TWO=g;exports.SAMPLE_RATE=_;exports.TARGET_FPS=l;exports.calculatePingPongState=O;exports.getBorder=S;exports.loadDataset=T;