sophontalk-services 0.0.3 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,13 +1,12 @@
1
1
  {
2
2
  "name": "sophontalk-services",
3
- "version": "0.0.3",
3
+ "version": "0.0.5",
4
4
  "type": "module",
5
5
  "scripts": {
6
6
  "dev": "vite",
7
7
  "build": "vue-tsc -b && vite build",
8
8
  "build:lib": "vite build --config vite.lib.config.ts",
9
- "preview": "vite preview",
10
- "publish": "npm publish --registry https://registry.npmjs.org --access public"
9
+ "preview": "vite preview"
11
10
  },
12
11
  "main": "./dist-lib/sophontalk-services.cjs",
13
12
  "module": "./dist-lib/sophontalk-services.js",
@@ -1,145 +0,0 @@
1
- import x from "fft.js";
2
- import { create as O, ConverterType as H } from "@alexanderolsen/libsamplerate-js";
3
- const y = 16e3, U = 25, D = 10, u = 80, S = 4, W = 0.97, L = 1e-10, b = 20, h = Math.round(U * y / 1e3), C = Math.round(D * y / 1e3), k = (e) => {
4
- if (e <= 0) return 1;
5
- let t = 1;
6
- for (; t < e; ) t *= 2;
7
- return t;
8
- }, P = k(h), N = new x(P), v = new Float32Array(h), G = () => {
9
- if (h <= 1) return;
10
- const e = 2 * Math.PI / (h - 1);
11
- for (let t = 0; t < h; t++)
12
- v[t] = 0.5 - 0.5 * Math.cos(t * e);
13
- };
14
- G();
15
- function Q(e, t, o, a = 20, s) {
16
- s = s || o / 2;
17
- const i = (n) => 700 * (Math.exp(n / 1127) - 1), r = (n) => 1127 * Math.log(1 + n / 700), l = r(a), A = r(s), w = new Float32Array(e + 2);
18
- for (let n = 0; n < e + 2; n++)
19
- w[n] = l + (A - l) * n / (e + 1);
20
- const _ = w.map(i).map((n) => n * t / o), f = Math.floor(t / 2) + 1, m = [];
21
- for (let n = 1; n <= e; n++) {
22
- const c = _[n - 1], F = _[n], M = _[n + 1], I = Math.ceil(c), T = Math.floor(M), E = Math.max(0, I), p = Math.min(f - 1, T), B = p - E + 1;
23
- if (B <= 0) {
24
- m.push({ startBin: 0, values: new Float32Array(0) });
25
- continue;
26
- }
27
- const R = new Float32Array(B);
28
- for (let d = E; d <= p; d++) {
29
- let g = 0;
30
- d <= F ? g = (d - c) / (F - c) : g = (M - d) / (M - F), R[d - E] = g;
31
- }
32
- m.push({ startBin: E, values: R });
33
- }
34
- return m;
35
- }
36
- const q = Q(
37
- u,
38
- P,
39
- y,
40
- b
41
- );
42
- function K(e) {
43
- for (let t = 0; t < e.length; t++)
44
- e[t] *= v[t];
45
- }
46
- function Y(e, t) {
47
- for (let o = e.length - 1; o > 0; o--)
48
- e[o] -= t * e[o - 1];
49
- e[0] -= t * e[0];
50
- }
51
- function V(e) {
52
- const t = e.length;
53
- if (t === 0) return;
54
- let o = 0;
55
- for (let s = 0; s < t; s++) o += e[s];
56
- const a = o / t;
57
- for (let s = 0; s < t; s++) e[s] -= a;
58
- }
59
- function Z(e, t) {
60
- const o = e.length;
61
- for (let a = 0; a < o; a++) {
62
- const s = t[a * 2], i = t[a * 2 + 1];
63
- e[a] = s * s + i * i;
64
- }
65
- }
66
- function j(e) {
67
- const t = e.length;
68
- if (t < 2) return;
69
- let o = 0;
70
- for (let r = 0; r < t; r++) o += e[r];
71
- const a = o / t;
72
- let s = 0;
73
- for (let r = 0; r < t; r++) {
74
- const l = e[r] - a;
75
- s += l * l;
76
- }
77
- const i = Math.sqrt(s / (t - 1));
78
- if (i > 1e-8) {
79
- const r = 1 / i;
80
- for (let l = 0; l < t; l++)
81
- e[l] = (e[l] - a) * r;
82
- } else
83
- for (let r = 0; r < t; r++)
84
- e[r] -= a;
85
- }
86
- async function z(e) {
87
- let t = e.leftChannel;
88
- if (e.rightChannel) {
89
- const f = e.rightChannel, m = new Float32Array(t.length);
90
- for (let n = 0; n < t.length; n++)
91
- m[n] = (t[n] + f[n]) * 0.5;
92
- t = m;
93
- }
94
- if (e.sampleRate !== y) {
95
- const f = await O(1, e.sampleRate, y, {
96
- converterType: H.SRC_SINC_MEDIUM_QUALITY
97
- });
98
- t = f.simple(t), f.destroy();
99
- }
100
- const o = Math.floor((t.length - h) / C) + 1;
101
- if (o <= 0)
102
- throw new Error("特征提取失败:音频时长过短。");
103
- const a = Math.ceil(o / S) * S, s = a * u, i = new Float32Array(s), r = new Float32Array(h), l = N.createComplexArray(), A = N.createComplexArray(), w = new Float32Array(Math.floor(P / 2) + 1);
104
- for (let f = 0; f < o; f++) {
105
- const m = f * C;
106
- r.set(t.subarray(m, m + h)), V(r), Y(r, W), K(r), l.fill(0);
107
- for (let c = 0; c < h; c++)
108
- l[c * 2] = r[c];
109
- N.transform(A, l), Z(w, A);
110
- const n = f * u;
111
- for (let c = 0; c < u; c++) {
112
- const F = q[c];
113
- let M = 0;
114
- const I = F.values, T = F.startBin, E = I.length;
115
- for (let p = 0; p < E; p++)
116
- M += w[T + p] * I[p];
117
- M = M < L ? L : M, i[n + c] = Math.log(M);
118
- }
119
- }
120
- if (a > o) {
121
- const f = (o - 1) * u, m = f + u, n = i.subarray(f, m);
122
- for (let c = o; c < a; c++)
123
- i.set(n, c * u);
124
- }
125
- j(i);
126
- const _ = a / S;
127
- return {
128
- features: i,
129
- dimensions: [_, S, u]
130
- };
131
- }
132
- self.addEventListener(
133
- "message",
134
- async (e) => {
135
- try {
136
- const t = e.data, o = await z(t);
137
- self.postMessage(
138
- { status: "success", payload: o },
139
- { transfer: [o.features.buffer] }
140
- );
141
- } catch (t) {
142
- self.postMessage({ status: "error", error: t.message });
143
- }
144
- }
145
- );
@@ -1,178 +0,0 @@
1
- import { env as P, InferenceSession as $, Tensor as W } from "onnxruntime-web";
2
- import q from "jszip";
3
- const V = 80, G = 4, H = 25, j = 1e3 / H, J = (t) => {
4
- if (t === 128) return 4;
5
- if (t === 96) return 3;
6
- throw new Error(`Unsupported crop size: ${t}. Only nano (96) and tiny (128) are supported.`);
7
- }, Q = (t, e, n) => {
8
- if (e <= 1) return { nextIndex: 0, nextDirection: 1 };
9
- let a = t + n, s = n;
10
- return a >= e ? (a = e - 2, s = -1) : a < 0 && (a = 1, s = 1), { nextIndex: a, nextDirection: s };
11
- };
12
- P.wasm.numThreads = 1;
13
- const Z = () => typeof navigator < "u" && !!navigator.gpu, F = 32;
14
- let _ = null, R = null, x = null, L = null;
15
- const w = {
16
- tensors: /* @__PURE__ */ new Map(),
17
- faceImages: /* @__PURE__ */ new Map(),
18
- fullImages: /* @__PURE__ */ new Map()
19
- };
20
- let S = 3, b = 96, z = 0, k = 0, C = null, A = null, y = null, E = null, g = null, I = null, D = null, M = null, T = !1, v = "", p = null;
21
- const B = (t) => {
22
- self.postMessage({ type: "error", payload: t });
23
- }, K = (t) => {
24
- const e = w.tensors.get(t);
25
- if (!e) throw new Error(`Cache miss: ${t}`);
26
- const n = b - 2 * S;
27
- return new W("float32", e, [1, 6, n, n]);
28
- }, X = (t) => {
29
- const e = w.faceImages.get(t);
30
- if (!e) throw new Error(`Cache miss: ${t}`);
31
- return e;
32
- }, Y = async (t) => {
33
- const e = w.fullImages.get(t);
34
- if (!e) throw new Error(`Cache miss: ${t}`);
35
- return await createImageBitmap(e);
36
- }, ee = (t) => {
37
- const [e, n, a, s] = t.dims, u = t.data;
38
- (!y || y.width !== s || y.height !== a) && (C.width = s, C.height = a, y = A.createImageData(s, a));
39
- const d = a * s, l = new Uint32Array(y.data.buffer), o = d, i = d * 2, c = n === 3;
40
- for (let r = 0; r < d; r++) {
41
- let m = (c ? u[r + i] : u[r]) * 255, f = (c ? u[r + o] : u[r]) * 255, h = u[r] * 255;
42
- m = m < 0 ? 0 : m > 255 ? 255 : m, f = f < 0 ? 0 : f > 255 ? 255 : f, h = h < 0 ? 0 : h > 255 ? 255 : h, l[r] = 255 << 24 | (h & 255) << 16 | (f & 255) << 8 | m & 255;
43
- }
44
- A.putImageData(y, 0, 0);
45
- }, te = async (t, e, n) => {
46
- ee(t);
47
- const a = X(e.face_image);
48
- g.globalCompositeOperation = "source-over", g.drawImage(a, 0, 0), g.drawImage(C, S, S), g.globalCompositeOperation = "destination-in", g.drawImage(L, 0, 0, b, b), g.globalCompositeOperation = "destination-over", g.drawImage(a, 0, 0), g.globalCompositeOperation = "source-over", D.drawImage(n, 0, 0);
49
- const s = e.crop_info.ymax - e.crop_info.ymin;
50
- return D.drawImage(
51
- E,
52
- e.crop_info.xmin,
53
- e.crop_info.ymin,
54
- e.crop_info.width,
55
- s
56
- ), await createImageBitmap(I);
57
- }, ae = (t, e, n) => {
58
- const [a, s, u] = e, d = s * u, l = F * d;
59
- (!M || M.length !== l) && (M = new Float32Array(l));
60
- for (let o = 0; o < F; o++) {
61
- const c = Math.min(
62
- Math.max(n - F / 2 + o, 0),
63
- a - 1
64
- ) * d, r = o * d;
65
- M.set(
66
- t.subarray(c, c + d),
67
- r
68
- );
69
- }
70
- return M;
71
- }, ne = (t, e, n) => {
72
- C || (C = new OffscreenCanvas(1, 1), A = C.getContext("2d", {
73
- willReadFrequently: !0
74
- })), (!E || E.width !== n) && (E = new OffscreenCanvas(n, n), g = E.getContext("2d", {
75
- willReadFrequently: !0
76
- })), (!I || I.width !== t || I.height !== e) && (I = new OffscreenCanvas(t, e), D = I.getContext("2d", {
77
- willReadFrequently: !0
78
- }));
79
- }, se = async (t) => {
80
- try {
81
- if (t.wasmPaths ? P.wasm.wasmPaths = t.wasmPaths : P.wasm.wasmPaths = "/", !t.dataset) throw new Error("Missing dataset");
82
- const e = t.dataset;
83
- R = e, b = e.dataset_info.config.crop_size, S = J(b), z = e.dataset_info.source_image_dimensions?.width || 0, k = e.dataset_info.source_image_dimensions?.height || 0, ne(z, k, b), x = await q.loadAsync(t.zipBuffer), t.zipBuffer = null, L = t.blendingMask;
84
- const n = e.images.map(async (l) => {
85
- if (!w.tensors.has(l.tensor_file)) {
86
- const o = x.file(l.tensor_file);
87
- if (o) {
88
- const i = await o.async("arraybuffer");
89
- w.tensors.set(l.tensor_file, new Float32Array(i));
90
- }
91
- }
92
- if (!w.faceImages.has(l.face_image)) {
93
- const o = x.file(l.face_image);
94
- if (o) {
95
- const i = await o.async("blob"), c = await createImageBitmap(i);
96
- w.faceImages.set(l.face_image, c);
97
- }
98
- }
99
- if (!w.fullImages.has(l.full_image)) {
100
- const o = x.file(l.full_image);
101
- if (o) {
102
- const i = await o.async("blob");
103
- w.fullImages.set(l.full_image, i);
104
- }
105
- }
106
- });
107
- await Promise.all(n), x = null;
108
- const a = [];
109
- Z() && a.push("webgpu"), a.push("wasm");
110
- const s = new URL(t.modelPath, self.location.href).toString(), u = new URL(s).pathname.split("/").pop() || "model.onnx", d = new URL(`${u}.data`, s).toString();
111
- _ = await $.create(s, {
112
- executionProviders: a,
113
- graphOptimizationLevel: "all",
114
- enableMemPattern: !0,
115
- enableCpuMemArena: !0,
116
- externalData: [{ data: d, path: `${u}.data` }]
117
- }), self.postMessage({ type: "ready" });
118
- } catch (e) {
119
- B(e?.message || "Init Error");
120
- }
121
- }, oe = async (t) => {
122
- if (!_ || !R) {
123
- B("Not initialized");
124
- return;
125
- }
126
- T = !1;
127
- const e = t.audioDimensions[0];
128
- self.postMessage({
129
- type: "start",
130
- payload: { totalFrames: e, frameInterval: j }
131
- });
132
- let n = 0, a = 1;
133
- const s = {}, u = _.inputNames[0], d = _.inputNames[1], l = _.outputNames[0];
134
- v = "", p && (p.close(), p = null);
135
- for (let o = 0; o < e && !T; o++) {
136
- let i = null, c = null, r = null, m = null;
137
- try {
138
- const f = R.images[n];
139
- i = K(f.tensor_file), f.full_image !== v && (p && p.close(), p = await Y(f.full_image), v = f.full_image);
140
- const h = ae(
141
- t.audioFeatures,
142
- t.audioDimensions,
143
- o
144
- );
145
- c = new W("float32", h, [
146
- 1,
147
- F,
148
- G,
149
- V
150
- ]), s[u] = i, s[d] = c, r = await _.run(s);
151
- const N = r[l];
152
- if (!N)
153
- throw new Error("Missing inference output tensor");
154
- m = N;
155
- const U = await te(N, f, p);
156
- self.postMessage(
157
- { type: "frame", payload: { frame: U, index: o } },
158
- [U]
159
- );
160
- const O = Q(
161
- n,
162
- R.images.length,
163
- a
164
- );
165
- n = O.nextIndex, a = O.nextDirection;
166
- } catch (f) {
167
- B(f?.message || "Run Error");
168
- break;
169
- } finally {
170
- i && i.dispose(), c && c.dispose(), m && m.dispose(), i = null, c = null, m = null, r = null;
171
- }
172
- }
173
- p && (p.close(), p = null), v = "", self.postMessage({ type: "done" });
174
- };
175
- self.onmessage = (t) => {
176
- const e = t.data;
177
- e.type === "init" ? se(e) : e.type === "run" ? oe(e) : e.type === "stop" && (T = !0);
178
- };