@viji-dev/core 0.3.19 → 0.3.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -13
- package/dist/artist-dts-p5.js +1 -1
- package/dist/artist-dts.js +1 -1
- package/dist/artist-global.d.ts +65 -7
- package/dist/assets/cv-tasks.worker.js +302 -47
- package/dist/assets/viji.worker-bm-hvzXt.js +25975 -0
- package/dist/assets/viji.worker-bm-hvzXt.js.map +1 -0
- package/dist/{essentia-wasm.web-C7QoUtrj.js → essentia-wasm.web-C1URJxCY.js} +2 -2
- package/dist/{essentia-wasm.web-C7QoUtrj.js.map → essentia-wasm.web-C1URJxCY.js.map} +1 -1
- package/dist/{index-BKGarA3m.js → index-trkn0FNW.js} +699 -739
- package/dist/index-trkn0FNW.js.map +1 -0
- package/dist/index.d.ts +161 -206
- package/dist/index.js +1 -1
- package/dist/shader-uniforms.js +403 -23
- package/package.json +3 -2
- package/dist/assets/viji.worker-BnDb6mPh.js +0 -4325
- package/dist/assets/viji.worker-BnDb6mPh.js.map +0 -1
- package/dist/index-BKGarA3m.js.map +0 -1
|
@@ -5,24 +5,27 @@
|
|
|
5
5
|
* Uses importScripts() to load MediaPipe Tasks Vision UMD bundle.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
+
// Debug logging — controlled by CVSystem via 'debug' message
|
|
9
|
+
let DEBUG = false;
|
|
10
|
+
function log(...args) {
|
|
11
|
+
if (DEBUG) {
|
|
12
|
+
console.log('🔧 [CV Tasks Worker]', ...args);
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
|
|
8
16
|
// Define CommonJS environment for MediaPipe bundle
|
|
9
17
|
self.exports = {};
|
|
10
18
|
self.module = { exports: {} };
|
|
11
19
|
|
|
12
20
|
// Import MediaPipe Tasks Vision UMD bundle
|
|
13
|
-
|
|
21
|
+
log('Starting to load vision_bundle.js...');
|
|
14
22
|
try {
|
|
15
23
|
importScripts('/dist/assets/vision_bundle.js');
|
|
16
|
-
|
|
24
|
+
log('vision_bundle.js loaded successfully');
|
|
17
25
|
} catch (error) {
|
|
18
26
|
console.error('❌ [CV Tasks Worker] Failed to load vision_bundle.js:', error);
|
|
19
27
|
}
|
|
20
28
|
|
|
21
|
-
// Debug: Check what's available after import (disabled for production)
|
|
22
|
-
// console.log('🔧 [CV Tasks Worker] Available globals after import:', Object.keys(self));
|
|
23
|
-
// console.log('🔧 [CV Tasks Worker] module.exports:', self.module.exports);
|
|
24
|
-
// console.log('🔧 [CV Tasks Worker] exports:', self.exports);
|
|
25
|
-
|
|
26
29
|
// MediaPipe model instances
|
|
27
30
|
let faceDetector = null;
|
|
28
31
|
let faceLandmarker = null;
|
|
@@ -45,14 +48,236 @@ let processingConfig = false;
|
|
|
45
48
|
let workerHealthy = true;
|
|
46
49
|
let memoryPressureDetected = false;
|
|
47
50
|
|
|
48
|
-
//
|
|
51
|
+
// Safe zero-defaults for face data when features are inactive
|
|
52
|
+
const EMPTY_EXPRESSIONS = Object.freeze({
|
|
53
|
+
neutral: 0, happy: 0, sad: 0, angry: 0, surprised: 0, disgusted: 0, fearful: 0
|
|
54
|
+
});
|
|
49
55
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
56
|
+
const EMPTY_HEAD_POSE = Object.freeze({ pitch: 0, yaw: 0, roll: 0 });
|
|
57
|
+
|
|
58
|
+
const EMPTY_BLENDSHAPES = Object.freeze({
|
|
59
|
+
browDownLeft: 0, browDownRight: 0, browInnerUp: 0, browOuterUpLeft: 0, browOuterUpRight: 0,
|
|
60
|
+
cheekPuff: 0, cheekSquintLeft: 0, cheekSquintRight: 0,
|
|
61
|
+
eyeBlinkLeft: 0, eyeBlinkRight: 0,
|
|
62
|
+
eyeLookDownLeft: 0, eyeLookDownRight: 0, eyeLookInLeft: 0, eyeLookInRight: 0,
|
|
63
|
+
eyeLookOutLeft: 0, eyeLookOutRight: 0, eyeLookUpLeft: 0, eyeLookUpRight: 0,
|
|
64
|
+
eyeSquintLeft: 0, eyeSquintRight: 0, eyeWideLeft: 0, eyeWideRight: 0,
|
|
65
|
+
jawForward: 0, jawLeft: 0, jawOpen: 0, jawRight: 0,
|
|
66
|
+
mouthClose: 0, mouthDimpleLeft: 0, mouthDimpleRight: 0,
|
|
67
|
+
mouthFrownLeft: 0, mouthFrownRight: 0, mouthFunnel: 0, mouthLeft: 0,
|
|
68
|
+
mouthLowerDownLeft: 0, mouthLowerDownRight: 0, mouthPressLeft: 0, mouthPressRight: 0,
|
|
69
|
+
mouthPucker: 0, mouthRight: 0, mouthRollLower: 0, mouthRollUpper: 0,
|
|
70
|
+
mouthShrugLower: 0, mouthShrugUpper: 0, mouthSmileLeft: 0, mouthSmileRight: 0,
|
|
71
|
+
mouthStretchLeft: 0, mouthStretchRight: 0, mouthUpperUpLeft: 0, mouthUpperUpRight: 0,
|
|
72
|
+
noseSneerLeft: 0, noseSneerRight: 0, tongueOut: 0
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Convert MediaPipe faceBlendshapes categories array to a flat record
|
|
77
|
+
*/
|
|
78
|
+
function buildBlendshapesRecord(categories) {
|
|
79
|
+
const record = {};
|
|
80
|
+
for (const cat of categories) {
|
|
81
|
+
record[cat.categoryName] = cat.score;
|
|
82
|
+
}
|
|
83
|
+
return record;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* EMFACS-based emotion prototype vectors (Ekman FACS → ARKit blendshapes).
|
|
88
|
+
* Weights reflect each blendshape's reliability in MediaPipe's 2D web model.
|
|
89
|
+
* Known near-zero blendshapes (cheekSquint*, noseSneer*, eyeWide*) are
|
|
90
|
+
* down-weighted and supplemented by correlated signals that do activate.
|
|
91
|
+
*
|
|
92
|
+
* Reference: Aldenhoven et al. (2026) "Real-Time Emotion Recognition
|
|
93
|
+
* Performance of Mobile Devices" — EMFACS cosine similarity approach,
|
|
94
|
+
* 68.3% accuracy on 7 emotions, exceeding human raters (58.9%).
|
|
95
|
+
*/
|
|
96
|
+
const EMOTION_PROTOTYPES = {
|
|
97
|
+
// mouthSmile is unique to happiness — no other emotion uses it.
|
|
98
|
+
// eyeSquint is a secondary "Duchenne smile" indicator.
|
|
99
|
+
happy: {
|
|
100
|
+
mouthSmileLeft: 1.0, mouthSmileRight: 1.0,
|
|
101
|
+
eyeSquintLeft: 0.3, eyeSquintRight: 0.3
|
|
102
|
+
},
|
|
103
|
+
// Pouty/trembling lip: mouthShrugLower (chin raiser) is the primary signal,
|
|
104
|
+
// mouthPucker (compressed lips) secondary. Compact prototype so it wins
|
|
105
|
+
// over angry when the differentiating upper-face signals are absent.
|
|
106
|
+
sad: {
|
|
107
|
+
mouthShrugLower: 1.0,
|
|
108
|
+
mouthPucker: 0.8
|
|
109
|
+
},
|
|
110
|
+
// Shares sad's mouth signals at lower weight, differentiated by upper-face
|
|
111
|
+
// tension: eyeSquint + browDown. These extra dimensions shift the cosine
|
|
112
|
+
// direction away from sad only when genuinely activated.
|
|
113
|
+
angry: {
|
|
114
|
+
mouthShrugLower: 0.6, mouthPucker: 0.5,
|
|
115
|
+
eyeSquintLeft: 1.0, eyeSquintRight: 1.0,
|
|
116
|
+
browDownLeft: 1.0, browDownRight: 1.0
|
|
117
|
+
},
|
|
118
|
+
// Brow raise only — the simplest, most distinctive prototype.
|
|
119
|
+
// jawOpen removed to avoid overlap with fearful.
|
|
120
|
+
surprised: {
|
|
121
|
+
browInnerUp: 1.0,
|
|
122
|
+
browOuterUpLeft: 1.0, browOuterUpRight: 1.0
|
|
123
|
+
},
|
|
124
|
+
// mouthUpperUp (upper lip raise) is the unique primary signal.
|
|
125
|
+
// mouthFrown supports, browDown at low weight for wrinkled-brow disgust.
|
|
126
|
+
disgusted: {
|
|
127
|
+
mouthUpperUpLeft: 1.0, mouthUpperUpRight: 1.0,
|
|
128
|
+
mouthFrownLeft: 0.8, mouthFrownRight: 0.8,
|
|
129
|
+
browDownLeft: 0.3, browDownRight: 0.3
|
|
130
|
+
},
|
|
131
|
+
// Shares surprised's brow raise, differentiated by jawOpen (rare in other
|
|
132
|
+
// emotions at even 10-20%). jawOpen is the primary differentiator.
|
|
133
|
+
fearful: {
|
|
134
|
+
browInnerUp: 0.8, browOuterUpLeft: 0.8, browOuterUpRight: 0.8,
|
|
135
|
+
jawOpen: 1.0
|
|
136
|
+
}
|
|
137
|
+
};
|
|
138
|
+
|
|
139
|
+
const PROTOTYPE_KEYS = Object.keys(EMPTY_BLENDSHAPES);
|
|
140
|
+
|
|
141
|
+
// Pre-compute prototype magnitudes for cosine similarity
|
|
142
|
+
const PROTOTYPE_MAGNITUDES = {};
|
|
143
|
+
for (const [emotion, proto] of Object.entries(EMOTION_PROTOTYPES)) {
|
|
144
|
+
let sumSq = 0;
|
|
145
|
+
for (const key of PROTOTYPE_KEYS) {
|
|
146
|
+
const v = proto[key] || 0;
|
|
147
|
+
sumSq += v * v;
|
|
148
|
+
}
|
|
149
|
+
PROTOTYPE_MAGNITUDES[emotion] = Math.sqrt(sumSq);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// Noise floor: blendshape values below this are treated as zero to
|
|
153
|
+
// prevent resting-state activations from matching emotion prototypes
|
|
154
|
+
const BLENDSHAPE_NOISE_FLOOR = 0.10;
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* Cosine similarity between observed blendshape vector and a prototype.
|
|
158
|
+
* cos(v, p) = (v · p) / (|v| * |p|)
|
|
159
|
+
* Applies a noise floor to observed values to suppress resting-state noise.
|
|
160
|
+
*/
|
|
161
|
+
function emotionCosineSimilarity(observed, prototype, protoMag) {
|
|
162
|
+
let dot = 0;
|
|
163
|
+
let magO = 0;
|
|
164
|
+
for (const key of PROTOTYPE_KEYS) {
|
|
165
|
+
const raw = observed[key] || 0;
|
|
166
|
+
const o = raw > BLENDSHAPE_NOISE_FLOOR ? raw : 0;
|
|
167
|
+
const p = prototype[key] || 0;
|
|
168
|
+
dot += o * p;
|
|
169
|
+
magO += o * o;
|
|
170
|
+
}
|
|
171
|
+
magO = Math.sqrt(magO);
|
|
172
|
+
if (magO < 1e-8 || protoMag < 1e-8) return 0;
|
|
173
|
+
return dot / (magO * protoMag);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// Cross-suppression: when one emotion is confident, competing emotions are
|
|
177
|
+
// reduced. Uses raw (pre-suppression) scores so order doesn't matter.
|
|
178
|
+
// [suppressor] → { [target]: strength 0-1 }
|
|
179
|
+
const EMOTION_INHIBITIONS = {
|
|
180
|
+
happy: { angry: 0.7, sad: 0.5, disgusted: 0.4, fearful: 0.3 },
|
|
181
|
+
sad: { happy: 0.3, angry: 0.2 },
|
|
182
|
+
angry: { happy: 0.3, sad: 0.2 },
|
|
183
|
+
surprised: { angry: 0.3, sad: 0.3 },
|
|
184
|
+
disgusted: { happy: 0.4, surprised: 0.2 },
|
|
185
|
+
fearful: { happy: 0.3, angry: 0.2 }
|
|
186
|
+
};
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Classify observed blendshapes into emotions using a 3-stage pipeline:
|
|
190
|
+
* 1. Cosine similarity against EMFACS prototypes (base scores)
|
|
191
|
+
* 2. Key-signal boosters for defining blendshapes (mouthPress → angry)
|
|
192
|
+
* 3. Cross-emotion inhibition matrix (happy suppresses angry, etc.)
|
|
193
|
+
*
|
|
194
|
+
* Returns { neutral, happy, sad, angry, surprised, disgusted, fearful }
|
|
195
|
+
*/
|
|
196
|
+
function mapBlendshapesToEmotions(bs) {
|
|
197
|
+
const NF = BLENDSHAPE_NOISE_FLOOR;
|
|
198
|
+
|
|
199
|
+
// --- Stage 1: Cosine similarity base scores ---
|
|
200
|
+
const scores = {};
|
|
201
|
+
for (const [emotion, proto] of Object.entries(EMOTION_PROTOTYPES)) {
|
|
202
|
+
scores[emotion] = Math.max(0, emotionCosineSimilarity(bs, proto, PROTOTYPE_MAGNITUDES[emotion]));
|
|
55
203
|
}
|
|
204
|
+
|
|
205
|
+
// --- Stage 2: Key-signal boosters ---
|
|
206
|
+
// mouthPress is a defining angry signal not in the prototype (to avoid
|
|
207
|
+
// resting-state contamination) but boosts angry when clearly present
|
|
208
|
+
const mouthPress = Math.max(0,
|
|
209
|
+
((bs.mouthPressLeft || 0) + (bs.mouthPressRight || 0)) / 2 - NF);
|
|
210
|
+
if (mouthPress > 0) {
|
|
211
|
+
scores.angry = Math.min(1, scores.angry + mouthPress * 0.3);
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// --- Stage 3: Cross-emotion inhibition ---
|
|
215
|
+
// Snapshot raw scores so suppression is non-circular
|
|
216
|
+
const raw = {};
|
|
217
|
+
for (const key in scores) raw[key] = scores[key];
|
|
218
|
+
|
|
219
|
+
for (const [suppressor, targets] of Object.entries(EMOTION_INHIBITIONS)) {
|
|
220
|
+
const suppressorScore = raw[suppressor] || 0;
|
|
221
|
+
if (suppressorScore > 0.1) {
|
|
222
|
+
for (const [target, strength] of Object.entries(targets)) {
|
|
223
|
+
scores[target] *= (1 - suppressorScore * strength);
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// --- Neutral: dominant when no emotion is confident ---
|
|
229
|
+
let maxScore = 0;
|
|
230
|
+
for (const emotion of Object.keys(EMOTION_PROTOTYPES)) {
|
|
231
|
+
if (scores[emotion] > maxScore) maxScore = scores[emotion];
|
|
232
|
+
}
|
|
233
|
+
const neutralThreshold = 0.35;
|
|
234
|
+
scores.neutral = maxScore < neutralThreshold ? 1.0 : Math.max(0, 1.0 - maxScore * 1.5);
|
|
235
|
+
|
|
236
|
+
return {
|
|
237
|
+
neutral: scores.neutral,
|
|
238
|
+
happy: scores.happy || 0,
|
|
239
|
+
sad: scores.sad || 0,
|
|
240
|
+
angry: scores.angry || 0,
|
|
241
|
+
surprised: scores.surprised || 0,
|
|
242
|
+
disgusted: scores.disgusted || 0,
|
|
243
|
+
fearful: scores.fearful || 0
|
|
244
|
+
};
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
/**
|
|
248
|
+
* Compute head pose (pitch, yaw, roll in degrees) from 468 face landmarks.
|
|
249
|
+
* Uses nose tip, chin, eye corners, and forehead to derive 3D orientation.
|
|
250
|
+
*/
|
|
251
|
+
function computeHeadPoseFromLandmarks(landmarks) {
|
|
252
|
+
// Key landmark indices (MediaPipe FaceMesh)
|
|
253
|
+
const noseTip = landmarks[1];
|
|
254
|
+
const chin = landmarks[152];
|
|
255
|
+
const leftEye = landmarks[33];
|
|
256
|
+
const rightEye = landmarks[263];
|
|
257
|
+
const forehead = landmarks[10];
|
|
258
|
+
|
|
259
|
+
if (!noseTip || !chin || !leftEye || !rightEye || !forehead) {
|
|
260
|
+
return { pitch: 0, yaw: 0, roll: 0 };
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
// Yaw: horizontal angle from eye midpoint to nose tip
|
|
264
|
+
const eyeMidX = (leftEye.x + rightEye.x) / 2;
|
|
265
|
+
const eyeMidY = (leftEye.y + rightEye.y) / 2;
|
|
266
|
+
const yaw = Math.atan2(noseTip.x - eyeMidX, Math.abs(noseTip.z - ((leftEye.z + rightEye.z) / 2 || 0)) + 0.001) * (180 / Math.PI);
|
|
267
|
+
|
|
268
|
+
// Pitch: vertical angle from forehead to chin through nose
|
|
269
|
+
const faceVerticalLen = Math.sqrt((chin.x - forehead.x) ** 2 + (chin.y - forehead.y) ** 2) || 0.001;
|
|
270
|
+
const noseRelY = (noseTip.y - forehead.y) / faceVerticalLen;
|
|
271
|
+
const pitch = (noseRelY - 0.5) * 180;
|
|
272
|
+
|
|
273
|
+
// Roll: tilt from horizontal eye line
|
|
274
|
+
const roll = Math.atan2(rightEye.y - leftEye.y, rightEye.x - leftEye.x) * (180 / Math.PI);
|
|
275
|
+
|
|
276
|
+
return {
|
|
277
|
+
pitch: isNaN(pitch) ? 0 : Math.max(-90, Math.min(90, pitch)),
|
|
278
|
+
yaw: isNaN(yaw) ? 0 : Math.max(-90, Math.min(90, yaw)),
|
|
279
|
+
roll: isNaN(roll) ? 0 : Math.max(-180, Math.min(180, roll))
|
|
280
|
+
};
|
|
56
281
|
}
|
|
57
282
|
|
|
58
283
|
/**
|
|
@@ -60,21 +285,18 @@ function log(...args) {
|
|
|
60
285
|
*/
|
|
61
286
|
async function initializeVision() {
|
|
62
287
|
if (isInitialized) {
|
|
63
|
-
|
|
288
|
+
log('Vision already initialized, skipping');
|
|
64
289
|
return;
|
|
65
290
|
}
|
|
66
291
|
|
|
67
292
|
try {
|
|
68
|
-
|
|
293
|
+
log('Starting MediaPipe Tasks Vision initialization...');
|
|
69
294
|
|
|
70
|
-
// Initialize the vision runtime with WASM files
|
|
71
|
-
// MediaPipe Tasks Vision expects the base path without trailing slash
|
|
72
295
|
const wasmBasePath = '/dist/assets/wasm';
|
|
73
296
|
log('WASM base path:', wasmBasePath);
|
|
74
297
|
|
|
75
|
-
// Try different ways to access FilesetResolver
|
|
76
298
|
const FilesetResolver = self.FilesetResolver || self.module.exports.FilesetResolver || self.exports.FilesetResolver;
|
|
77
|
-
|
|
299
|
+
log('FilesetResolver found:', !!FilesetResolver);
|
|
78
300
|
|
|
79
301
|
if (!FilesetResolver) {
|
|
80
302
|
throw new Error('FilesetResolver not found in any expected location');
|
|
@@ -139,12 +361,13 @@ async function initializeFaceLandmarks() {
|
|
|
139
361
|
delegate: 'GPU'
|
|
140
362
|
},
|
|
141
363
|
runningMode: 'VIDEO',
|
|
142
|
-
numFaces: 1
|
|
364
|
+
numFaces: 1,
|
|
365
|
+
outputFaceBlendshapes: true
|
|
143
366
|
};
|
|
144
367
|
|
|
145
368
|
const FaceLandmarker = self.FaceLandmarker || self.module.exports.FaceLandmarker || self.exports.FaceLandmarker;
|
|
146
369
|
faceLandmarker = await FaceLandmarker.createFromOptions(vision, options);
|
|
147
|
-
log('✅ Face Landmarker loaded');
|
|
370
|
+
log('✅ Face Landmarker loaded (blendshapes enabled)');
|
|
148
371
|
} catch (error) {
|
|
149
372
|
log('❌ Failed to load Face Landmarker:', error);
|
|
150
373
|
throw error;
|
|
@@ -256,42 +479,60 @@ async function processFrame(imageInput, timestamp, features) {
|
|
|
256
479
|
if (features.includes('faceDetection') && faceDetector) {
|
|
257
480
|
const detectionResult = faceDetector.detectForVideo(imageInput, timestamp);
|
|
258
481
|
results.faces = detectionResult.detections.map((detection) => ({
|
|
259
|
-
|
|
260
|
-
// Normalize coordinates to 0-1 range to match other CV features
|
|
482
|
+
bounds: {
|
|
261
483
|
x: detection.boundingBox.originX / imageInput.width,
|
|
262
484
|
y: detection.boundingBox.originY / imageInput.height,
|
|
263
485
|
width: detection.boundingBox.width / imageInput.width,
|
|
264
486
|
height: detection.boundingBox.height / imageInput.height
|
|
265
487
|
},
|
|
488
|
+
center: {
|
|
489
|
+
x: (detection.boundingBox.originX + detection.boundingBox.width / 2) / imageInput.width,
|
|
490
|
+
y: (detection.boundingBox.originY + detection.boundingBox.height / 2) / imageInput.height
|
|
491
|
+
},
|
|
266
492
|
landmarks: [],
|
|
267
|
-
expressions:
|
|
493
|
+
expressions: EMPTY_EXPRESSIONS,
|
|
494
|
+
headPose: EMPTY_HEAD_POSE,
|
|
495
|
+
blendshapes: EMPTY_BLENDSHAPES,
|
|
268
496
|
confidence: detection.categories[0]?.score || 0
|
|
269
497
|
}));
|
|
270
498
|
}
|
|
271
499
|
|
|
272
|
-
// Process face landmarks
|
|
273
|
-
if (features.includes('faceMesh') && faceLandmarker) {
|
|
500
|
+
// Process face landmarks (used by faceMesh and emotionDetection)
|
|
501
|
+
if ((features.includes('faceMesh') || features.includes('emotionDetection')) && faceLandmarker) {
|
|
274
502
|
const landmarkResult = faceLandmarker.detectForVideo(imageInput, timestamp);
|
|
275
503
|
if (landmarkResult.faceLandmarks.length > 0) {
|
|
276
504
|
const landmarks = landmarkResult.faceLandmarks[0];
|
|
277
505
|
|
|
278
|
-
// If no face detection results exist, create a basic face structure
|
|
279
506
|
if (!results.faces) {
|
|
280
507
|
results.faces = [{
|
|
281
|
-
|
|
508
|
+
bounds: null,
|
|
509
|
+
center: null,
|
|
282
510
|
landmarks: [],
|
|
283
|
-
expressions:
|
|
284
|
-
|
|
511
|
+
expressions: EMPTY_EXPRESSIONS,
|
|
512
|
+
headPose: EMPTY_HEAD_POSE,
|
|
513
|
+
blendshapes: EMPTY_BLENDSHAPES,
|
|
514
|
+
confidence: 0.8
|
|
285
515
|
}];
|
|
286
516
|
}
|
|
287
517
|
|
|
288
|
-
|
|
518
|
+
const mappedLandmarks = landmarks.map((landmark) => ({
|
|
519
|
+
x: landmark.x,
|
|
520
|
+
y: landmark.y,
|
|
521
|
+
z: landmark.z || 0
|
|
522
|
+
}));
|
|
523
|
+
|
|
289
524
|
if (results.faces[0]) {
|
|
290
|
-
results.faces[0].landmarks =
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
525
|
+
results.faces[0].landmarks = mappedLandmarks;
|
|
526
|
+
results.faces[0].headPose = computeHeadPoseFromLandmarks(landmarks);
|
|
527
|
+
|
|
528
|
+
// Populate emotion data when emotionDetection is active
|
|
529
|
+
if (features.includes('emotionDetection') &&
|
|
530
|
+
landmarkResult.faceBlendshapes &&
|
|
531
|
+
landmarkResult.faceBlendshapes.length > 0) {
|
|
532
|
+
const bs = buildBlendshapesRecord(landmarkResult.faceBlendshapes[0].categories);
|
|
533
|
+
results.faces[0].blendshapes = bs;
|
|
534
|
+
results.faces[0].expressions = mapBlendshapesToEmotions(bs);
|
|
535
|
+
}
|
|
295
536
|
}
|
|
296
537
|
}
|
|
297
538
|
}
|
|
@@ -343,14 +584,7 @@ async function processFrame(imageInput, timestamp, features) {
|
|
|
343
584
|
height: segmentResult.categoryMask.height
|
|
344
585
|
};
|
|
345
586
|
|
|
346
|
-
|
|
347
|
-
if (DEBUG) {
|
|
348
|
-
console.log('🔧 [CV Tasks Worker] Segmentation mask:', {
|
|
349
|
-
width: results.segmentation.width,
|
|
350
|
-
height: results.segmentation.height,
|
|
351
|
-
maskSize: results.segmentation.mask.length
|
|
352
|
-
});
|
|
353
|
-
}
|
|
587
|
+
log('Segmentation mask:', results.segmentation.width, 'x', results.segmentation.height);
|
|
354
588
|
} finally {
|
|
355
589
|
// CRITICAL: Close MPMask instance to prevent resource leaks
|
|
356
590
|
segmentResult.categoryMask.close();
|
|
@@ -449,8 +683,18 @@ async function handleConfigUpdateInternal(features) {
|
|
|
449
683
|
faceDetector = null;
|
|
450
684
|
break;
|
|
451
685
|
case 'faceMesh':
|
|
452
|
-
|
|
453
|
-
|
|
686
|
+
// Only teardown FaceLandmarker if emotionDetection also not active
|
|
687
|
+
if (!newFeatures.has('emotionDetection')) {
|
|
688
|
+
cleanupPromises.push(cleanupWasmInstance(faceLandmarker, 'FaceLandmarker'));
|
|
689
|
+
faceLandmarker = null;
|
|
690
|
+
}
|
|
691
|
+
break;
|
|
692
|
+
case 'emotionDetection':
|
|
693
|
+
// Only teardown FaceLandmarker if faceMesh also not active
|
|
694
|
+
if (!newFeatures.has('faceMesh')) {
|
|
695
|
+
cleanupPromises.push(cleanupWasmInstance(faceLandmarker, 'FaceLandmarker'));
|
|
696
|
+
faceLandmarker = null;
|
|
697
|
+
}
|
|
454
698
|
break;
|
|
455
699
|
case 'handTracking':
|
|
456
700
|
cleanupPromises.push(cleanupWasmInstance(handLandmarker, 'HandLandmarker'));
|
|
@@ -485,6 +729,8 @@ async function handleConfigUpdateInternal(features) {
|
|
|
485
729
|
await initializeFaceDetection();
|
|
486
730
|
break;
|
|
487
731
|
case 'faceMesh':
|
|
732
|
+
case 'emotionDetection':
|
|
733
|
+
// Both share the FaceLandmarker (with blendshapes enabled)
|
|
488
734
|
await initializeFaceLandmarks();
|
|
489
735
|
break;
|
|
490
736
|
case 'handTracking':
|
|
@@ -525,10 +771,19 @@ async function handleConfigUpdate(features) {
|
|
|
525
771
|
self.onmessage = async (event) => {
|
|
526
772
|
const message = event.data;
|
|
527
773
|
|
|
528
|
-
|
|
774
|
+
// Only log non-process messages to avoid per-frame spam
|
|
775
|
+
if (message.type !== 'process') {
|
|
776
|
+
log('Received message:', message.type);
|
|
777
|
+
}
|
|
529
778
|
|
|
530
779
|
try {
|
|
531
780
|
switch (message.type) {
|
|
781
|
+
case 'debug': {
|
|
782
|
+
DEBUG = !!message.enabled;
|
|
783
|
+
log('Debug mode', DEBUG ? 'enabled' : 'disabled');
|
|
784
|
+
break;
|
|
785
|
+
}
|
|
786
|
+
|
|
532
787
|
case 'init': {
|
|
533
788
|
log('Received init message');
|
|
534
789
|
|