sophontalk-services 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
import { env as P, InferenceSession as $, Tensor as W } from "onnxruntime-web";
|
|
2
|
+
import q from "jszip";
|
|
3
|
+
const V = 80, G = 4, H = 25, j = 1e3 / H, J = (t) => {
|
|
4
|
+
if (t === 128) return 4;
|
|
5
|
+
if (t === 96) return 3;
|
|
6
|
+
throw new Error(`Unsupported crop size: ${t}. Only nano (96) and tiny (128) are supported.`);
|
|
7
|
+
}, Q = (t, e, n) => {
|
|
8
|
+
if (e <= 1) return { nextIndex: 0, nextDirection: 1 };
|
|
9
|
+
let a = t + n, s = n;
|
|
10
|
+
return a >= e ? (a = e - 2, s = -1) : a < 0 && (a = 1, s = 1), { nextIndex: a, nextDirection: s };
|
|
11
|
+
};
|
|
12
|
+
P.wasm.numThreads = 1;
|
|
13
|
+
const Z = () => typeof navigator < "u" && !!navigator.gpu, F = 32;
|
|
14
|
+
let _ = null, R = null, x = null, L = null;
|
|
15
|
+
const w = {
|
|
16
|
+
tensors: /* @__PURE__ */ new Map(),
|
|
17
|
+
faceImages: /* @__PURE__ */ new Map(),
|
|
18
|
+
fullImages: /* @__PURE__ */ new Map()
|
|
19
|
+
};
|
|
20
|
+
let S = 3, b = 96, z = 0, k = 0, C = null, A = null, y = null, E = null, g = null, I = null, D = null, M = null, T = !1, v = "", p = null;
|
|
21
|
+
const B = (t) => {
|
|
22
|
+
self.postMessage({ type: "error", payload: t });
|
|
23
|
+
}, K = (t) => {
|
|
24
|
+
const e = w.tensors.get(t);
|
|
25
|
+
if (!e) throw new Error(`Cache miss: ${t}`);
|
|
26
|
+
const n = b - 2 * S;
|
|
27
|
+
return new W("float32", e, [1, 6, n, n]);
|
|
28
|
+
}, X = (t) => {
|
|
29
|
+
const e = w.faceImages.get(t);
|
|
30
|
+
if (!e) throw new Error(`Cache miss: ${t}`);
|
|
31
|
+
return e;
|
|
32
|
+
}, Y = async (t) => {
|
|
33
|
+
const e = w.fullImages.get(t);
|
|
34
|
+
if (!e) throw new Error(`Cache miss: ${t}`);
|
|
35
|
+
return await createImageBitmap(e);
|
|
36
|
+
}, ee = (t) => {
|
|
37
|
+
const [e, n, a, s] = t.dims, u = t.data;
|
|
38
|
+
(!y || y.width !== s || y.height !== a) && (C.width = s, C.height = a, y = A.createImageData(s, a));
|
|
39
|
+
const d = a * s, l = new Uint32Array(y.data.buffer), o = d, i = d * 2, c = n === 3;
|
|
40
|
+
for (let r = 0; r < d; r++) {
|
|
41
|
+
let m = (c ? u[r + i] : u[r]) * 255, f = (c ? u[r + o] : u[r]) * 255, h = u[r] * 255;
|
|
42
|
+
m = m < 0 ? 0 : m > 255 ? 255 : m, f = f < 0 ? 0 : f > 255 ? 255 : f, h = h < 0 ? 0 : h > 255 ? 255 : h, l[r] = 255 << 24 | (h & 255) << 16 | (f & 255) << 8 | m & 255;
|
|
43
|
+
}
|
|
44
|
+
A.putImageData(y, 0, 0);
|
|
45
|
+
}, te = async (t, e, n) => {
|
|
46
|
+
ee(t);
|
|
47
|
+
const a = X(e.face_image);
|
|
48
|
+
g.globalCompositeOperation = "source-over", g.drawImage(a, 0, 0), g.drawImage(C, S, S), g.globalCompositeOperation = "destination-in", g.drawImage(L, 0, 0, b, b), g.globalCompositeOperation = "destination-over", g.drawImage(a, 0, 0), g.globalCompositeOperation = "source-over", D.drawImage(n, 0, 0);
|
|
49
|
+
const s = e.crop_info.ymax - e.crop_info.ymin;
|
|
50
|
+
return D.drawImage(
|
|
51
|
+
E,
|
|
52
|
+
e.crop_info.xmin,
|
|
53
|
+
e.crop_info.ymin,
|
|
54
|
+
e.crop_info.width,
|
|
55
|
+
s
|
|
56
|
+
), await createImageBitmap(I);
|
|
57
|
+
}, ae = (t, e, n) => {
|
|
58
|
+
const [a, s, u] = e, d = s * u, l = F * d;
|
|
59
|
+
(!M || M.length !== l) && (M = new Float32Array(l));
|
|
60
|
+
for (let o = 0; o < F; o++) {
|
|
61
|
+
const c = Math.min(
|
|
62
|
+
Math.max(n - F / 2 + o, 0),
|
|
63
|
+
a - 1
|
|
64
|
+
) * d, r = o * d;
|
|
65
|
+
M.set(
|
|
66
|
+
t.subarray(c, c + d),
|
|
67
|
+
r
|
|
68
|
+
);
|
|
69
|
+
}
|
|
70
|
+
return M;
|
|
71
|
+
}, ne = (t, e, n) => {
|
|
72
|
+
C || (C = new OffscreenCanvas(1, 1), A = C.getContext("2d", {
|
|
73
|
+
willReadFrequently: !0
|
|
74
|
+
})), (!E || E.width !== n) && (E = new OffscreenCanvas(n, n), g = E.getContext("2d", {
|
|
75
|
+
willReadFrequently: !0
|
|
76
|
+
})), (!I || I.width !== t || I.height !== e) && (I = new OffscreenCanvas(t, e), D = I.getContext("2d", {
|
|
77
|
+
willReadFrequently: !0
|
|
78
|
+
}));
|
|
79
|
+
}, se = async (t) => {
|
|
80
|
+
try {
|
|
81
|
+
if (t.wasmPaths ? P.wasm.wasmPaths = t.wasmPaths : P.wasm.wasmPaths = "/", !t.dataset) throw new Error("Missing dataset");
|
|
82
|
+
const e = t.dataset;
|
|
83
|
+
R = e, b = e.dataset_info.config.crop_size, S = J(b), z = e.dataset_info.source_image_dimensions?.width || 0, k = e.dataset_info.source_image_dimensions?.height || 0, ne(z, k, b), x = await q.loadAsync(t.zipBuffer), t.zipBuffer = null, L = t.blendingMask;
|
|
84
|
+
const n = e.images.map(async (l) => {
|
|
85
|
+
if (!w.tensors.has(l.tensor_file)) {
|
|
86
|
+
const o = x.file(l.tensor_file);
|
|
87
|
+
if (o) {
|
|
88
|
+
const i = await o.async("arraybuffer");
|
|
89
|
+
w.tensors.set(l.tensor_file, new Float32Array(i));
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
if (!w.faceImages.has(l.face_image)) {
|
|
93
|
+
const o = x.file(l.face_image);
|
|
94
|
+
if (o) {
|
|
95
|
+
const i = await o.async("blob"), c = await createImageBitmap(i);
|
|
96
|
+
w.faceImages.set(l.face_image, c);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
if (!w.fullImages.has(l.full_image)) {
|
|
100
|
+
const o = x.file(l.full_image);
|
|
101
|
+
if (o) {
|
|
102
|
+
const i = await o.async("blob");
|
|
103
|
+
w.fullImages.set(l.full_image, i);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
});
|
|
107
|
+
await Promise.all(n), x = null;
|
|
108
|
+
const a = [];
|
|
109
|
+
Z() && a.push("webgpu"), a.push("wasm");
|
|
110
|
+
const s = new URL(t.modelPath, self.location.href).toString(), u = new URL(s).pathname.split("/").pop() || "model.onnx", d = new URL(`${u}.data`, s).toString();
|
|
111
|
+
_ = await $.create(s, {
|
|
112
|
+
executionProviders: a,
|
|
113
|
+
graphOptimizationLevel: "all",
|
|
114
|
+
enableMemPattern: !0,
|
|
115
|
+
enableCpuMemArena: !0,
|
|
116
|
+
externalData: [{ data: d, path: `${u}.data` }]
|
|
117
|
+
}), self.postMessage({ type: "ready" });
|
|
118
|
+
} catch (e) {
|
|
119
|
+
B(e?.message || "Init Error");
|
|
120
|
+
}
|
|
121
|
+
}, oe = async (t) => {
|
|
122
|
+
if (!_ || !R) {
|
|
123
|
+
B("Not initialized");
|
|
124
|
+
return;
|
|
125
|
+
}
|
|
126
|
+
T = !1;
|
|
127
|
+
const e = t.audioDimensions[0];
|
|
128
|
+
self.postMessage({
|
|
129
|
+
type: "start",
|
|
130
|
+
payload: { totalFrames: e, frameInterval: j }
|
|
131
|
+
});
|
|
132
|
+
let n = 0, a = 1;
|
|
133
|
+
const s = {}, u = _.inputNames[0], d = _.inputNames[1], l = _.outputNames[0];
|
|
134
|
+
v = "", p && (p.close(), p = null);
|
|
135
|
+
for (let o = 0; o < e && !T; o++) {
|
|
136
|
+
let i = null, c = null, r = null, m = null;
|
|
137
|
+
try {
|
|
138
|
+
const f = R.images[n];
|
|
139
|
+
i = K(f.tensor_file), f.full_image !== v && (p && p.close(), p = await Y(f.full_image), v = f.full_image);
|
|
140
|
+
const h = ae(
|
|
141
|
+
t.audioFeatures,
|
|
142
|
+
t.audioDimensions,
|
|
143
|
+
o
|
|
144
|
+
);
|
|
145
|
+
c = new W("float32", h, [
|
|
146
|
+
1,
|
|
147
|
+
F,
|
|
148
|
+
G,
|
|
149
|
+
V
|
|
150
|
+
]), s[u] = i, s[d] = c, r = await _.run(s);
|
|
151
|
+
const N = r[l];
|
|
152
|
+
if (!N)
|
|
153
|
+
throw new Error("Missing inference output tensor");
|
|
154
|
+
m = N;
|
|
155
|
+
const U = await te(N, f, p);
|
|
156
|
+
self.postMessage(
|
|
157
|
+
{ type: "frame", payload: { frame: U, index: o } },
|
|
158
|
+
[U]
|
|
159
|
+
);
|
|
160
|
+
const O = Q(
|
|
161
|
+
n,
|
|
162
|
+
R.images.length,
|
|
163
|
+
a
|
|
164
|
+
);
|
|
165
|
+
n = O.nextIndex, a = O.nextDirection;
|
|
166
|
+
} catch (f) {
|
|
167
|
+
B(f?.message || "Run Error");
|
|
168
|
+
break;
|
|
169
|
+
} finally {
|
|
170
|
+
i && i.dispose(), c && c.dispose(), m && m.dispose(), i = null, c = null, m = null, r = null;
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
p && (p.close(), p = null), v = "", self.postMessage({ type: "done" });
|
|
174
|
+
};
|
|
175
|
+
self.onmessage = (t) => {
|
|
176
|
+
const e = t.data;
|
|
177
|
+
e.type === "init" ? se(e) : e.type === "run" ? oe(e) : e.type === "stop" && (T = !0);
|
|
178
|
+
};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
"use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});const l=16e3,_=25,u=10,w=80,
|
|
1
|
+
"use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});const l=16e3,_=25,u=10,w=80,M=4,m=.97,f=1e-10,k=20,F=!0,R=!0,h=25,g=1e3/h,p=t=>{if(t===128)return 4;if(t===96)return 3;throw new Error(`Unsupported crop size: ${t}. Only nano (96) and tiny (128) are supported.`)},O=(t,e,s)=>{if(e<=1)return{nextIndex:0,nextDirection:1};let r=t+s,a=s;return r>=e?(r=e-2,a=-1):r<0&&(r=1,a=1),{nextIndex:r,nextDirection:a}};async function L(t={}){const{jsonUrl:e="/complete_dataset.json",zipUrl:s="/processed_images.zip"}=t,[r,a]=await Promise.all([fetch(e),fetch(s)]);if(!r.ok)throw new Error(`无法加载 ${e}`);if(!a.ok)throw new Error(`无法加载 ${s}`);const o=await r.json(),c=await a.arrayBuffer();return{dataset:o,zipBuffer:c}}function S(t){return new Worker("./assets/feature.worker-Dx6moind.js",{type:"module",name:t?.name})}class P{worker=new S;async process(e){const s=e.getChannelData(0),r=e.numberOfChannels>1?e.getChannelData(1):void 0,a={leftChannel:s,rightChannel:r,sampleRate:e.sampleRate},o=[s.buffer];return r&&o.push(r.buffer),new Promise((c,d)=>{const i=n=>{n.data?.status==="success"?(this.worker.removeEventListener("message",i),this.worker.removeEventListener("error",E),c(n.data.payload)):n.data?.status==="error"&&(this.worker.removeEventListener("message",i),this.worker.removeEventListener("error",E),d(new Error(n.data.error||"特征提取失败")))},E=n=>{this.worker.removeEventListener("message",i),this.worker.removeEventListener("error",E),d(n)};this.worker.addEventListener("message",i),this.worker.addEventListener("error",E),this.worker.postMessage(a,o)})}dispose(){this.worker.terminate()}}function T(t){return new Worker("./assets/inference.worker-BPSCUMiL.js",{type:"module",name:t?.name})}class A{worker;handlers={};constructor(e={}){this.handlers=e,this.worker=new T,this.worker.onmessage=this.handleMessage.bind(this)}handleMessage(e){const{type:s,payload:r}=e.data;switch(s){case"ready":this.handlers.onReady?.();break;case"start":this.handlers.onStart?.(r.totalFrames);break;case"frame":this.handlers.onFrame?.(r.frame,r.index);break;case"done":this.handlers.onDone?.();break;case"error":this.handlers.onError?.(r);break}}init(e){this.worker.postMessage({type:"init",modelPath:e.modelPath,dataset:e.dataset,zipBuffer:e.zipBuffer,blendingMask:e.blendingMask,wasmPaths:e.wasmPaths},[e.zipBuffer,e.blendingMask])}run(e){this.worker.postMessage({type:"run",audioFeatures:e.audioFeatures,audioDimensions:e.audioDimensions},[e.audioFeatures.buffer])}stop(){this.worker.postMessage({type:"stop"})}terminate(){this.worker.terminate()}}exports.DEFAULT_LOW_FREQ=k;exports.ENERGY_FLOOR=f;exports.FRAME_INTERVAL_MS=g;exports.FRAME_LENGTH_MS=_;exports.FRAME_SHIFT_MS=u;exports.FeatureExtractor=P;exports.InferenceEngine=A;exports.NUM_MEL_BINS=w;exports.NUM_SEQUENCE_FRAMES=M;exports.PREEMPH_COEFF=m;exports.REMOVE_DC_OFFSET=F;exports.ROUND_TO_POWER_OF_TWO=R;exports.SAMPLE_RATE=l;exports.TARGET_FPS=h;exports.calculatePingPongState=O;exports.getBorder=p;exports.loadDataset=L;
|
|
@@ -1,61 +1,61 @@
|
|
|
1
|
-
const w = 16e3,
|
|
2
|
-
if (
|
|
3
|
-
if (
|
|
4
|
-
throw new Error(`Unsupported crop size: ${
|
|
5
|
-
}, P = (
|
|
1
|
+
const w = 16e3, u = 25, m = 10, k = 80, f = 4, p = 0.97, _ = 1e-10, g = 20, M = !0, R = !0, F = 25, L = 40, y = (s) => {
|
|
2
|
+
if (s === 128) return 4;
|
|
3
|
+
if (s === 96) return 3;
|
|
4
|
+
throw new Error(`Unsupported crop size: ${s}. Only nano (96) and tiny (128) are supported.`);
|
|
5
|
+
}, P = (s, e, t) => {
|
|
6
6
|
if (e <= 1) return { nextIndex: 0, nextDirection: 1 };
|
|
7
|
-
let r =
|
|
7
|
+
let r = s + t, n = t;
|
|
8
8
|
return r >= e ? (r = e - 2, n = -1) : r < 0 && (r = 1, n = 1), { nextIndex: r, nextDirection: n };
|
|
9
9
|
};
|
|
10
|
-
async function b(
|
|
10
|
+
async function b(s = {}) {
|
|
11
11
|
const {
|
|
12
12
|
jsonUrl: e = "/complete_dataset.json",
|
|
13
|
-
zipUrl:
|
|
14
|
-
} =
|
|
13
|
+
zipUrl: t = "/processed_images.zip"
|
|
14
|
+
} = s, [r, n] = await Promise.all([
|
|
15
15
|
fetch(e),
|
|
16
|
-
fetch(
|
|
16
|
+
fetch(t)
|
|
17
17
|
]);
|
|
18
18
|
if (!r.ok) throw new Error(`无法加载 ${e}`);
|
|
19
|
-
if (!n.ok) throw new Error(`无法加载 ${
|
|
20
|
-
const o = await r.json(),
|
|
21
|
-
return { dataset: o, zipBuffer:
|
|
19
|
+
if (!n.ok) throw new Error(`无法加载 ${t}`);
|
|
20
|
+
const o = await r.json(), c = await n.arrayBuffer();
|
|
21
|
+
return { dataset: o, zipBuffer: c };
|
|
22
22
|
}
|
|
23
|
-
function l(
|
|
23
|
+
function l(s) {
|
|
24
24
|
return new Worker(
|
|
25
25
|
"./assets/feature.worker-Dx6moind.js",
|
|
26
26
|
{
|
|
27
27
|
type: "module",
|
|
28
|
-
name:
|
|
28
|
+
name: s?.name
|
|
29
29
|
}
|
|
30
30
|
);
|
|
31
31
|
}
|
|
32
32
|
class O {
|
|
33
33
|
worker = new l();
|
|
34
34
|
async process(e) {
|
|
35
|
-
const
|
|
36
|
-
leftChannel:
|
|
35
|
+
const t = e.getChannelData(0), r = e.numberOfChannels > 1 ? e.getChannelData(1) : void 0, n = {
|
|
36
|
+
leftChannel: t,
|
|
37
37
|
rightChannel: r,
|
|
38
38
|
sampleRate: e.sampleRate
|
|
39
|
-
}, o = [
|
|
40
|
-
return r && o.push(r.buffer), new Promise((
|
|
39
|
+
}, o = [t.buffer];
|
|
40
|
+
return r && o.push(r.buffer), new Promise((c, d) => {
|
|
41
41
|
const i = (a) => {
|
|
42
|
-
a.data?.status === "success" ? (this.worker.removeEventListener("message", i), this.worker.removeEventListener("error",
|
|
43
|
-
},
|
|
44
|
-
this.worker.removeEventListener("message", i), this.worker.removeEventListener("error",
|
|
42
|
+
a.data?.status === "success" ? (this.worker.removeEventListener("message", i), this.worker.removeEventListener("error", h), c(a.data.payload)) : a.data?.status === "error" && (this.worker.removeEventListener("message", i), this.worker.removeEventListener("error", h), d(new Error(a.data.error || "特征提取失败")));
|
|
43
|
+
}, h = (a) => {
|
|
44
|
+
this.worker.removeEventListener("message", i), this.worker.removeEventListener("error", h), d(a);
|
|
45
45
|
};
|
|
46
|
-
this.worker.addEventListener("message", i), this.worker.addEventListener("error",
|
|
46
|
+
this.worker.addEventListener("message", i), this.worker.addEventListener("error", h), this.worker.postMessage(n, o);
|
|
47
47
|
});
|
|
48
48
|
}
|
|
49
49
|
dispose() {
|
|
50
50
|
this.worker.terminate();
|
|
51
51
|
}
|
|
52
52
|
}
|
|
53
|
-
function E(
|
|
53
|
+
function E(s) {
|
|
54
54
|
return new Worker(
|
|
55
|
-
"./assets/inference.worker-
|
|
55
|
+
"./assets/inference.worker-BPSCUMiL.js",
|
|
56
56
|
{
|
|
57
57
|
type: "module",
|
|
58
|
-
name:
|
|
58
|
+
name: s?.name
|
|
59
59
|
}
|
|
60
60
|
);
|
|
61
61
|
}
|
|
@@ -66,8 +66,8 @@ class v {
|
|
|
66
66
|
this.handlers = e, this.worker = new E(), this.worker.onmessage = this.handleMessage.bind(this);
|
|
67
67
|
}
|
|
68
68
|
handleMessage(e) {
|
|
69
|
-
const { type:
|
|
70
|
-
switch (
|
|
69
|
+
const { type: t, payload: r } = e.data;
|
|
70
|
+
switch (t) {
|
|
71
71
|
case "ready":
|
|
72
72
|
this.handlers.onReady?.();
|
|
73
73
|
break;
|
|
@@ -119,8 +119,8 @@ export {
|
|
|
119
119
|
g as DEFAULT_LOW_FREQ,
|
|
120
120
|
_ as ENERGY_FLOOR,
|
|
121
121
|
L as FRAME_INTERVAL_MS,
|
|
122
|
-
|
|
123
|
-
|
|
122
|
+
u as FRAME_LENGTH_MS,
|
|
123
|
+
m as FRAME_SHIFT_MS,
|
|
124
124
|
O as FeatureExtractor,
|
|
125
125
|
v as InferenceEngine,
|
|
126
126
|
k as NUM_MEL_BINS,
|
package/package.json
CHANGED
|
@@ -1,42 +1,42 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "sophontalk-services",
|
|
3
|
-
"version": "0.0.
|
|
4
|
-
"type": "module",
|
|
5
|
-
"scripts": {
|
|
6
|
-
"dev": "vite",
|
|
7
|
-
"build": "vue-tsc -b && vite build",
|
|
8
|
-
"build:lib": "vite build --config vite.lib.config.ts",
|
|
9
|
-
"preview": "vite preview",
|
|
10
|
-
"publish": "npm publish
|
|
11
|
-
},
|
|
12
|
-
"main": "./dist-lib/sophontalk-services.cjs",
|
|
13
|
-
"module": "./dist-lib/sophontalk-services.js",
|
|
14
|
-
"types": "./dist-lib/types/src/services/index.d.ts",
|
|
15
|
-
"files": [
|
|
16
|
-
"dist-lib"
|
|
17
|
-
],
|
|
18
|
-
"exports": {
|
|
19
|
-
".": {
|
|
20
|
-
"types": "./dist-lib/types/src/services/index.d.ts",
|
|
21
|
-
"import": "./dist-lib/sophontalk-services.js",
|
|
22
|
-
"require": "./dist-lib/sophontalk-services.cjs"
|
|
23
|
-
}
|
|
24
|
-
},
|
|
25
|
-
"dependencies": {
|
|
26
|
-
"@alexanderolsen/libsamplerate-js": "^2.1.2",
|
|
27
|
-
"fft.js": "^4.0.4",
|
|
28
|
-
"jszip": "^3.10.1",
|
|
29
|
-
"onnxruntime-web": "^1.22.0",
|
|
30
|
-
"vue": "^3.5.17"
|
|
31
|
-
},
|
|
32
|
-
"devDependencies": {
|
|
33
|
-
"@rollup/plugin-replace": "^6.0.3",
|
|
34
|
-
"@vitejs/plugin-vue": "^6.0.0",
|
|
35
|
-
"@vue/tsconfig": "^0.7.0",
|
|
36
|
-
"typescript": "~5.8.3",
|
|
37
|
-
"vite": "^7.0.2",
|
|
38
|
-
"vite-plugin-dts": "^4.5.4",
|
|
39
|
-
"vite-plugin-static-copy": "^3.1.0",
|
|
40
|
-
"vue-tsc": "^3.0.1"
|
|
41
|
-
}
|
|
42
|
-
}
|
|
1
|
+
{
|
|
2
|
+
"name": "sophontalk-services",
|
|
3
|
+
"version": "0.0.3",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"scripts": {
|
|
6
|
+
"dev": "vite",
|
|
7
|
+
"build": "vue-tsc -b && vite build",
|
|
8
|
+
"build:lib": "vite build --config vite.lib.config.ts",
|
|
9
|
+
"preview": "vite preview",
|
|
10
|
+
"publish": "npm publish --registry https://registry.npmjs.org --access public"
|
|
11
|
+
},
|
|
12
|
+
"main": "./dist-lib/sophontalk-services.cjs",
|
|
13
|
+
"module": "./dist-lib/sophontalk-services.js",
|
|
14
|
+
"types": "./dist-lib/types/src/services/index.d.ts",
|
|
15
|
+
"files": [
|
|
16
|
+
"dist-lib"
|
|
17
|
+
],
|
|
18
|
+
"exports": {
|
|
19
|
+
".": {
|
|
20
|
+
"types": "./dist-lib/types/src/services/index.d.ts",
|
|
21
|
+
"import": "./dist-lib/sophontalk-services.js",
|
|
22
|
+
"require": "./dist-lib/sophontalk-services.cjs"
|
|
23
|
+
}
|
|
24
|
+
},
|
|
25
|
+
"dependencies": {
|
|
26
|
+
"@alexanderolsen/libsamplerate-js": "^2.1.2",
|
|
27
|
+
"fft.js": "^4.0.4",
|
|
28
|
+
"jszip": "^3.10.1",
|
|
29
|
+
"onnxruntime-web": "^1.22.0",
|
|
30
|
+
"vue": "^3.5.17"
|
|
31
|
+
},
|
|
32
|
+
"devDependencies": {
|
|
33
|
+
"@rollup/plugin-replace": "^6.0.3",
|
|
34
|
+
"@vitejs/plugin-vue": "^6.0.0",
|
|
35
|
+
"@vue/tsconfig": "^0.7.0",
|
|
36
|
+
"typescript": "~5.8.3",
|
|
37
|
+
"vite": "^7.0.2",
|
|
38
|
+
"vite-plugin-dts": "^4.5.4",
|
|
39
|
+
"vite-plugin-static-copy": "^3.1.0",
|
|
40
|
+
"vue-tsc": "^3.0.1"
|
|
41
|
+
}
|
|
42
|
+
}
|
|
@@ -1,175 +0,0 @@
|
|
|
1
|
-
import { env as B, InferenceSession as V, Tensor as U } from "onnxruntime-web";
|
|
2
|
-
import $ from "jszip";
|
|
3
|
-
const G = 80, L = 4, H = 25, j = 1e3 / H, J = (t) => {
|
|
4
|
-
if (t === 128) return 4;
|
|
5
|
-
if (t === 96) return 3;
|
|
6
|
-
throw new Error(`Unsupported crop size: ${t}. Only nano (96) and tiny (128) are supported.`);
|
|
7
|
-
}, Q = (t, e, a) => {
|
|
8
|
-
if (e <= 1) return { nextIndex: 0, nextDirection: 1 };
|
|
9
|
-
let s = t + a, o = a;
|
|
10
|
-
return s >= e ? (s = e - 2, o = -1) : s < 0 && (s = 1, o = 1), { nextIndex: s, nextDirection: o };
|
|
11
|
-
};
|
|
12
|
-
B.wasm.numThreads = 1;
|
|
13
|
-
const Z = () => typeof navigator < "u" && !!navigator.gpu, F = 32;
|
|
14
|
-
let _ = null, R = null, M = null, q = null;
|
|
15
|
-
const w = {
|
|
16
|
-
tensors: /* @__PURE__ */ new Map(),
|
|
17
|
-
faceImages: /* @__PURE__ */ new Map(),
|
|
18
|
-
fullImages: /* @__PURE__ */ new Map()
|
|
19
|
-
};
|
|
20
|
-
let P = 3, b = 96, k = 0, W = 0, C = null, N = null, y = null, E = null, g = null, I = null, S = null, x = null, T = !1, v = "", p = null;
|
|
21
|
-
const D = (t) => {
|
|
22
|
-
self.postMessage({ type: "error", payload: t });
|
|
23
|
-
}, K = (t) => {
|
|
24
|
-
const e = w.tensors.get(t);
|
|
25
|
-
if (!e) throw new Error(`Cache miss: ${t}`);
|
|
26
|
-
const a = b - 2 * P;
|
|
27
|
-
return new U("float32", e, [1, 6, a, a]);
|
|
28
|
-
}, X = (t) => {
|
|
29
|
-
const e = w.faceImages.get(t);
|
|
30
|
-
if (!e) throw new Error(`Cache miss: ${t}`);
|
|
31
|
-
return e;
|
|
32
|
-
}, Y = async (t) => {
|
|
33
|
-
const e = w.fullImages.get(t);
|
|
34
|
-
if (!e) throw new Error(`Cache miss: ${t}`);
|
|
35
|
-
return await createImageBitmap(e);
|
|
36
|
-
}, ee = (t) => {
|
|
37
|
-
const [e, a, s, o] = t.dims, c = t.data;
|
|
38
|
-
(!y || y.width !== o || y.height !== s) && (C.width = o, C.height = s, y = N.createImageData(o, s));
|
|
39
|
-
const n = s * o, l = new Uint32Array(y.data.buffer), i = n, d = n * 2, u = a === 3;
|
|
40
|
-
for (let r = 0; r < n; r++) {
|
|
41
|
-
let m = (u ? c[r + d] : c[r]) * 255, f = (u ? c[r + i] : c[r]) * 255, h = c[r] * 255;
|
|
42
|
-
m = m < 0 ? 0 : m > 255 ? 255 : m, f = f < 0 ? 0 : f > 255 ? 255 : f, h = h < 0 ? 0 : h > 255 ? 255 : h, l[r] = 255 << 24 | (h & 255) << 16 | (f & 255) << 8 | m & 255;
|
|
43
|
-
}
|
|
44
|
-
N.putImageData(y, 0, 0);
|
|
45
|
-
}, te = async (t, e, a) => {
|
|
46
|
-
ee(t);
|
|
47
|
-
const s = X(e.face_image);
|
|
48
|
-
g.globalCompositeOperation = "source-over", g.drawImage(s, 0, 0), g.drawImage(C, P, P), g.globalCompositeOperation = "destination-in", g.drawImage(q, 0, 0, b, b), g.globalCompositeOperation = "destination-over", g.drawImage(s, 0, 0), g.globalCompositeOperation = "source-over", S.drawImage(a, 0, 0);
|
|
49
|
-
const o = e.crop_info.ymax - e.crop_info.ymin;
|
|
50
|
-
return S.drawImage(
|
|
51
|
-
E,
|
|
52
|
-
e.crop_info.xmin,
|
|
53
|
-
e.crop_info.ymin,
|
|
54
|
-
e.crop_info.width,
|
|
55
|
-
o
|
|
56
|
-
), await createImageBitmap(I);
|
|
57
|
-
}, ae = (t, e, a) => {
|
|
58
|
-
const [s, o, c] = e, n = o * c, l = F * n;
|
|
59
|
-
(!x || x.length !== l) && (x = new Float32Array(l));
|
|
60
|
-
for (let i = 0; i < F; i++) {
|
|
61
|
-
const u = Math.min(
|
|
62
|
-
Math.max(a - F / 2 + i, 0),
|
|
63
|
-
s - 1
|
|
64
|
-
) * n, r = i * n;
|
|
65
|
-
x.set(
|
|
66
|
-
t.subarray(u, u + n),
|
|
67
|
-
r
|
|
68
|
-
);
|
|
69
|
-
}
|
|
70
|
-
return x;
|
|
71
|
-
}, ne = (t, e, a) => {
|
|
72
|
-
C || (C = new OffscreenCanvas(1, 1), N = C.getContext("2d", {
|
|
73
|
-
willReadFrequently: !0
|
|
74
|
-
})), (!E || E.width !== a) && (E = new OffscreenCanvas(a, a), g = E.getContext("2d", {
|
|
75
|
-
willReadFrequently: !0
|
|
76
|
-
})), (!I || I.width !== t || I.height !== e) && (I = new OffscreenCanvas(t, e), S = I.getContext("2d", {
|
|
77
|
-
willReadFrequently: !0
|
|
78
|
-
}));
|
|
79
|
-
}, se = async (t) => {
|
|
80
|
-
try {
|
|
81
|
-
if (t.wasmPaths ? B.wasm.wasmPaths = t.wasmPaths : B.wasm.wasmPaths = "/", !t.dataset) throw new Error("Missing dataset");
|
|
82
|
-
const e = t.dataset;
|
|
83
|
-
R = e, b = e.dataset_info.config.crop_size, P = J(b), k = e.dataset_info.source_image_dimensions?.width || 0, W = e.dataset_info.source_image_dimensions?.height || 0, ne(k, W, b), M = await $.loadAsync(t.zipBuffer), t.zipBuffer = null, q = t.blendingMask;
|
|
84
|
-
const a = e.images.map(async (n) => {
|
|
85
|
-
if (!w.tensors.has(n.tensor_file)) {
|
|
86
|
-
const l = M.file(n.tensor_file);
|
|
87
|
-
if (l) {
|
|
88
|
-
const i = await l.async("arraybuffer");
|
|
89
|
-
w.tensors.set(n.tensor_file, new Float32Array(i));
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
if (!w.faceImages.has(n.face_image)) {
|
|
93
|
-
const l = M.file(n.face_image);
|
|
94
|
-
if (l) {
|
|
95
|
-
const i = await l.async("blob"), d = await createImageBitmap(i);
|
|
96
|
-
w.faceImages.set(n.face_image, d);
|
|
97
|
-
}
|
|
98
|
-
}
|
|
99
|
-
if (!w.fullImages.has(n.full_image)) {
|
|
100
|
-
const l = M.file(n.full_image);
|
|
101
|
-
if (l) {
|
|
102
|
-
const i = await l.async("blob");
|
|
103
|
-
w.fullImages.set(n.full_image, i);
|
|
104
|
-
}
|
|
105
|
-
}
|
|
106
|
-
});
|
|
107
|
-
await Promise.all(a), M = null;
|
|
108
|
-
const o = await (await fetch(t.modelPath)).arrayBuffer(), c = [];
|
|
109
|
-
Z() && c.push("webgpu"), c.push("wasm"), _ = await V.create(o, {
|
|
110
|
-
executionProviders: c,
|
|
111
|
-
graphOptimizationLevel: "all",
|
|
112
|
-
enableMemPattern: !0,
|
|
113
|
-
enableCpuMemArena: !0
|
|
114
|
-
}), self.postMessage({ type: "ready" });
|
|
115
|
-
} catch (e) {
|
|
116
|
-
D(e?.message || "Init Error");
|
|
117
|
-
}
|
|
118
|
-
}, oe = async (t) => {
|
|
119
|
-
if (!_ || !R) {
|
|
120
|
-
D("Not initialized");
|
|
121
|
-
return;
|
|
122
|
-
}
|
|
123
|
-
T = !1;
|
|
124
|
-
const e = t.audioDimensions[0];
|
|
125
|
-
self.postMessage({
|
|
126
|
-
type: "start",
|
|
127
|
-
payload: { totalFrames: e, frameInterval: j }
|
|
128
|
-
});
|
|
129
|
-
let a = 0, s = 1;
|
|
130
|
-
const o = {}, c = _.inputNames[0], n = _.inputNames[1], l = _.outputNames[0];
|
|
131
|
-
v = "", p && (p.close(), p = null);
|
|
132
|
-
for (let i = 0; i < e && !T; i++) {
|
|
133
|
-
let d = null, u = null, r = null, m = null;
|
|
134
|
-
try {
|
|
135
|
-
const f = R.images[a];
|
|
136
|
-
d = K(f.tensor_file), f.full_image !== v && (p && p.close(), p = await Y(f.full_image), v = f.full_image);
|
|
137
|
-
const h = ae(
|
|
138
|
-
t.audioFeatures,
|
|
139
|
-
t.audioDimensions,
|
|
140
|
-
i
|
|
141
|
-
);
|
|
142
|
-
u = new U("float32", h, [
|
|
143
|
-
1,
|
|
144
|
-
F,
|
|
145
|
-
L,
|
|
146
|
-
G
|
|
147
|
-
]), o[c] = d, o[n] = u, r = await _.run(o);
|
|
148
|
-
const A = r[l];
|
|
149
|
-
if (!A)
|
|
150
|
-
throw new Error("Missing inference output tensor");
|
|
151
|
-
m = A;
|
|
152
|
-
const O = await te(A, f, p);
|
|
153
|
-
self.postMessage(
|
|
154
|
-
{ type: "frame", payload: { frame: O, index: i } },
|
|
155
|
-
[O]
|
|
156
|
-
);
|
|
157
|
-
const z = Q(
|
|
158
|
-
a,
|
|
159
|
-
R.images.length,
|
|
160
|
-
s
|
|
161
|
-
);
|
|
162
|
-
a = z.nextIndex, s = z.nextDirection;
|
|
163
|
-
} catch (f) {
|
|
164
|
-
D(f?.message || "Run Error");
|
|
165
|
-
break;
|
|
166
|
-
} finally {
|
|
167
|
-
d && d.dispose(), u && u.dispose(), m && m.dispose(), d = null, u = null, m = null, r = null;
|
|
168
|
-
}
|
|
169
|
-
}
|
|
170
|
-
p && (p.close(), p = null), v = "", self.postMessage({ type: "done" });
|
|
171
|
-
};
|
|
172
|
-
self.onmessage = (t) => {
|
|
173
|
-
const e = t.data;
|
|
174
|
-
e.type === "init" ? se(e) : e.type === "run" ? oe(e) : e.type === "stop" && (T = !0);
|
|
175
|
-
};
|