@viji-dev/core 0.3.22 → 0.3.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,878 @@
1
+ /**
2
+ * MediaPipe Tasks Vision Classic Worker
3
+ *
4
+ * Classic worker for MediaPipe Tasks Vision processing.
5
+ * Uses importScripts() to load MediaPipe Tasks Vision UMD bundle.
6
+ */
7
+
8
+ // Debug logging — controlled by CVSystem via 'debug' message
9
+ let DEBUG = false;
10
+ function log(...args) {
11
+ if (DEBUG) {
12
+ console.log('🔧 [CV Tasks Worker]', ...args);
13
+ }
14
+ }
15
+
16
+ // Define CommonJS environment for MediaPipe bundle
17
+ self.exports = {};
18
+ self.module = { exports: {} };
19
+
20
+ // Import MediaPipe Tasks Vision UMD bundle
21
+ log('Starting to load vision_bundle.js...');
22
+ try {
23
+ importScripts('/dist/assets/vision_bundle.js');
24
+ log('vision_bundle.js loaded successfully');
25
+ } catch (error) {
26
+ console.error('❌ [CV Tasks Worker] Failed to load vision_bundle.js:', error);
27
+ }
28
+
29
+ // MediaPipe model instances
30
+ let faceDetector = null;
31
+ let faceLandmarker = null;
32
+ let handLandmarker = null;
33
+ let poseLandmarker = null;
34
+ let imageSegmenter = null;
35
+
36
+ // Vision runtime
37
+ let vision = null;
38
+ let isInitialized = false;
39
+
40
+ // Active features tracking
41
+ const activeFeatures = new Set();
42
+
43
+ // Configuration queue to prevent race conditions
44
+ const configQueue = [];
45
+ let processingConfig = false;
46
+
47
+ // Worker health tracking
48
+ let workerHealthy = true;
49
+ let memoryPressureDetected = false;
50
+
51
+ // Safe zero-defaults for face data when features are inactive
52
+ const EMPTY_EXPRESSIONS = Object.freeze({
53
+ neutral: 0, happy: 0, sad: 0, angry: 0, surprised: 0, disgusted: 0, fearful: 0
54
+ });
55
+
56
+ const EMPTY_HEAD_POSE = Object.freeze({ pitch: 0, yaw: 0, roll: 0 });
57
+
58
+ const EMPTY_BLENDSHAPES = Object.freeze({
59
+ browDownLeft: 0, browDownRight: 0, browInnerUp: 0, browOuterUpLeft: 0, browOuterUpRight: 0,
60
+ cheekPuff: 0, cheekSquintLeft: 0, cheekSquintRight: 0,
61
+ eyeBlinkLeft: 0, eyeBlinkRight: 0,
62
+ eyeLookDownLeft: 0, eyeLookDownRight: 0, eyeLookInLeft: 0, eyeLookInRight: 0,
63
+ eyeLookOutLeft: 0, eyeLookOutRight: 0, eyeLookUpLeft: 0, eyeLookUpRight: 0,
64
+ eyeSquintLeft: 0, eyeSquintRight: 0, eyeWideLeft: 0, eyeWideRight: 0,
65
+ jawForward: 0, jawLeft: 0, jawOpen: 0, jawRight: 0,
66
+ mouthClose: 0, mouthDimpleLeft: 0, mouthDimpleRight: 0,
67
+ mouthFrownLeft: 0, mouthFrownRight: 0, mouthFunnel: 0, mouthLeft: 0,
68
+ mouthLowerDownLeft: 0, mouthLowerDownRight: 0, mouthPressLeft: 0, mouthPressRight: 0,
69
+ mouthPucker: 0, mouthRight: 0, mouthRollLower: 0, mouthRollUpper: 0,
70
+ mouthShrugLower: 0, mouthShrugUpper: 0, mouthSmileLeft: 0, mouthSmileRight: 0,
71
+ mouthStretchLeft: 0, mouthStretchRight: 0, mouthUpperUpLeft: 0, mouthUpperUpRight: 0,
72
+ noseSneerLeft: 0, noseSneerRight: 0, tongueOut: 0
73
+ });
74
+
75
+ /**
76
+ * Convert MediaPipe faceBlendshapes categories array to a flat record
77
+ */
78
+ function buildBlendshapesRecord(categories) {
79
+ const record = {};
80
+ for (const cat of categories) {
81
+ record[cat.categoryName] = cat.score;
82
+ }
83
+ return record;
84
+ }
85
+
86
+ /**
87
+ * EMFACS-based emotion prototype vectors (Ekman FACS → ARKit blendshapes).
88
+ * Weights reflect each blendshape's reliability in MediaPipe's 2D web model.
89
+ * Known near-zero blendshapes (cheekSquint*, noseSneer*, eyeWide*) are
90
+ * down-weighted and supplemented by correlated signals that do activate.
91
+ *
92
+ * Reference: Aldenhoven et al. (2026) "Real-Time Emotion Recognition
93
+ * Performance of Mobile Devices" — EMFACS cosine similarity approach,
94
+ * 68.3% accuracy on 7 emotions, exceeding human raters (58.9%).
95
+ */
96
+ const EMOTION_PROTOTYPES = {
97
+ // mouthSmile is unique to happiness — no other emotion uses it.
98
+ // eyeSquint is a secondary "Duchenne smile" indicator.
99
+ happy: {
100
+ mouthSmileLeft: 1.0, mouthSmileRight: 1.0,
101
+ eyeSquintLeft: 0.3, eyeSquintRight: 0.3
102
+ },
103
+ // Pouty/trembling lip: mouthShrugLower (chin raiser) is the primary signal,
104
+ // mouthPucker (compressed lips) secondary. Compact prototype so it wins
105
+ // over angry when the differentiating upper-face signals are absent.
106
+ sad: {
107
+ mouthShrugLower: 1.0,
108
+ mouthPucker: 0.8
109
+ },
110
+ // Shares sad's mouth signals at lower weight, differentiated by upper-face
111
+ // tension: eyeSquint + browDown. These extra dimensions shift the cosine
112
+ // direction away from sad only when genuinely activated.
113
+ angry: {
114
+ mouthShrugLower: 0.6, mouthPucker: 0.5,
115
+ eyeSquintLeft: 1.0, eyeSquintRight: 1.0,
116
+ browDownLeft: 1.0, browDownRight: 1.0
117
+ },
118
+ // Brow raise only — the simplest, most distinctive prototype.
119
+ // jawOpen removed to avoid overlap with fearful.
120
+ surprised: {
121
+ browInnerUp: 1.0,
122
+ browOuterUpLeft: 1.0, browOuterUpRight: 1.0
123
+ },
124
+ // mouthUpperUp (upper lip raise) is the unique primary signal.
125
+ // mouthFrown supports, browDown at low weight for wrinkled-brow disgust.
126
+ disgusted: {
127
+ mouthUpperUpLeft: 1.0, mouthUpperUpRight: 1.0,
128
+ mouthFrownLeft: 0.8, mouthFrownRight: 0.8,
129
+ browDownLeft: 0.3, browDownRight: 0.3
130
+ },
131
+ // Shares surprised's brow raise, differentiated by jawOpen (rare in other
132
+ // emotions at even 10-20%). jawOpen is the primary differentiator.
133
+ fearful: {
134
+ browInnerUp: 0.8, browOuterUpLeft: 0.8, browOuterUpRight: 0.8,
135
+ jawOpen: 1.0
136
+ }
137
+ };
138
+
139
+ const PROTOTYPE_KEYS = Object.keys(EMPTY_BLENDSHAPES);
140
+
141
+ // Pre-compute prototype magnitudes for cosine similarity
142
+ const PROTOTYPE_MAGNITUDES = {};
143
+ for (const [emotion, proto] of Object.entries(EMOTION_PROTOTYPES)) {
144
+ let sumSq = 0;
145
+ for (const key of PROTOTYPE_KEYS) {
146
+ const v = proto[key] || 0;
147
+ sumSq += v * v;
148
+ }
149
+ PROTOTYPE_MAGNITUDES[emotion] = Math.sqrt(sumSq);
150
+ }
151
+
152
+ // Noise floor: blendshape values below this are treated as zero to
153
+ // prevent resting-state activations from matching emotion prototypes
154
+ const BLENDSHAPE_NOISE_FLOOR = 0.10;
155
+
156
+ /**
157
+ * Cosine similarity between observed blendshape vector and a prototype.
158
+ * cos(v, p) = (v · p) / (|v| * |p|)
159
+ * Applies a noise floor to observed values to suppress resting-state noise.
160
+ */
161
+ function emotionCosineSimilarity(observed, prototype, protoMag) {
162
+ let dot = 0;
163
+ let magO = 0;
164
+ for (const key of PROTOTYPE_KEYS) {
165
+ const raw = observed[key] || 0;
166
+ const o = raw > BLENDSHAPE_NOISE_FLOOR ? raw : 0;
167
+ const p = prototype[key] || 0;
168
+ dot += o * p;
169
+ magO += o * o;
170
+ }
171
+ magO = Math.sqrt(magO);
172
+ if (magO < 1e-8 || protoMag < 1e-8) return 0;
173
+ return dot / (magO * protoMag);
174
+ }
175
+
176
+ // Cross-suppression: when one emotion is confident, competing emotions are
177
+ // reduced. Uses raw (pre-suppression) scores so order doesn't matter.
178
+ // [suppressor] → { [target]: strength 0-1 }
179
+ const EMOTION_INHIBITIONS = {
180
+ happy: { angry: 0.7, sad: 0.5, disgusted: 0.4, fearful: 0.3 },
181
+ sad: { happy: 0.3, angry: 0.2 },
182
+ angry: { happy: 0.3, sad: 0.2 },
183
+ surprised: { angry: 0.3, sad: 0.3 },
184
+ disgusted: { happy: 0.4, surprised: 0.2 },
185
+ fearful: { happy: 0.3, angry: 0.2 }
186
+ };
187
+
188
+ /**
189
+ * Classify observed blendshapes into emotions using a 3-stage pipeline:
190
+ * 1. Cosine similarity against EMFACS prototypes (base scores)
191
+ * 2. Key-signal boosters for defining blendshapes (mouthPress → angry)
192
+ * 3. Cross-emotion inhibition matrix (happy suppresses angry, etc.)
193
+ *
194
+ * Returns { neutral, happy, sad, angry, surprised, disgusted, fearful }
195
+ */
196
+ function mapBlendshapesToEmotions(bs) {
197
+ const NF = BLENDSHAPE_NOISE_FLOOR;
198
+
199
+ // --- Stage 1: Cosine similarity base scores ---
200
+ const scores = {};
201
+ for (const [emotion, proto] of Object.entries(EMOTION_PROTOTYPES)) {
202
+ scores[emotion] = Math.max(0, emotionCosineSimilarity(bs, proto, PROTOTYPE_MAGNITUDES[emotion]));
203
+ }
204
+
205
+ // --- Stage 2: Key-signal boosters ---
206
+ // mouthPress is a defining angry signal not in the prototype (to avoid
207
+ // resting-state contamination) but boosts angry when clearly present
208
+ const mouthPress = Math.max(0,
209
+ ((bs.mouthPressLeft || 0) + (bs.mouthPressRight || 0)) / 2 - NF);
210
+ if (mouthPress > 0) {
211
+ scores.angry = Math.min(1, scores.angry + mouthPress * 0.3);
212
+ }
213
+
214
+ // --- Stage 3: Cross-emotion inhibition ---
215
+ // Snapshot raw scores so suppression is non-circular
216
+ const raw = {};
217
+ for (const key in scores) raw[key] = scores[key];
218
+
219
+ for (const [suppressor, targets] of Object.entries(EMOTION_INHIBITIONS)) {
220
+ const suppressorScore = raw[suppressor] || 0;
221
+ if (suppressorScore > 0.1) {
222
+ for (const [target, strength] of Object.entries(targets)) {
223
+ scores[target] *= (1 - suppressorScore * strength);
224
+ }
225
+ }
226
+ }
227
+
228
+ // --- Neutral: dominant when no emotion is confident ---
229
+ let maxScore = 0;
230
+ for (const emotion of Object.keys(EMOTION_PROTOTYPES)) {
231
+ if (scores[emotion] > maxScore) maxScore = scores[emotion];
232
+ }
233
+ const neutralThreshold = 0.35;
234
+ scores.neutral = maxScore < neutralThreshold ? 1.0 : Math.max(0, 1.0 - maxScore * 1.5);
235
+
236
+ return {
237
+ neutral: scores.neutral,
238
+ happy: scores.happy || 0,
239
+ sad: scores.sad || 0,
240
+ angry: scores.angry || 0,
241
+ surprised: scores.surprised || 0,
242
+ disgusted: scores.disgusted || 0,
243
+ fearful: scores.fearful || 0
244
+ };
245
+ }
246
+
247
+ /**
248
+ * Compute head pose (pitch, yaw, roll in degrees) from 468 face landmarks.
249
+ * Uses nose tip, chin, eye corners, and forehead to derive 3D orientation.
250
+ */
251
+ function computeHeadPoseFromLandmarks(landmarks) {
252
+ // Key landmark indices (MediaPipe FaceMesh)
253
+ const noseTip = landmarks[1];
254
+ const chin = landmarks[152];
255
+ const leftEye = landmarks[33];
256
+ const rightEye = landmarks[263];
257
+ const forehead = landmarks[10];
258
+
259
+ if (!noseTip || !chin || !leftEye || !rightEye || !forehead) {
260
+ return { pitch: 0, yaw: 0, roll: 0 };
261
+ }
262
+
263
+ // Yaw: horizontal angle from eye midpoint to nose tip
264
+ const eyeMidX = (leftEye.x + rightEye.x) / 2;
265
+ const eyeMidY = (leftEye.y + rightEye.y) / 2;
266
+ const yaw = Math.atan2(noseTip.x - eyeMidX, Math.abs(noseTip.z - ((leftEye.z + rightEye.z) / 2 || 0)) + 0.001) * (180 / Math.PI);
267
+
268
+ // Pitch: vertical angle from forehead to chin through nose
269
+ const faceVerticalLen = Math.sqrt((chin.x - forehead.x) ** 2 + (chin.y - forehead.y) ** 2) || 0.001;
270
+ const noseRelY = (noseTip.y - forehead.y) / faceVerticalLen;
271
+ const pitch = (noseRelY - 0.5) * 180;
272
+
273
+ // Roll: tilt from horizontal eye line
274
+ const roll = Math.atan2(rightEye.y - leftEye.y, rightEye.x - leftEye.x) * (180 / Math.PI);
275
+
276
+ return {
277
+ pitch: isNaN(pitch) ? 0 : Math.max(-90, Math.min(90, pitch)),
278
+ yaw: isNaN(yaw) ? 0 : Math.max(-90, Math.min(90, yaw)),
279
+ roll: isNaN(roll) ? 0 : Math.max(-180, Math.min(180, roll))
280
+ };
281
+ }
282
+
283
+ /**
284
+ * Initialize MediaPipe Tasks Vision runtime
285
+ */
286
+ async function initializeVision() {
287
+ if (isInitialized) {
288
+ log('Vision already initialized, skipping');
289
+ return;
290
+ }
291
+
292
+ try {
293
+ log('Starting MediaPipe Tasks Vision initialization...');
294
+
295
+ const wasmBasePath = '/dist/assets/wasm';
296
+ log('WASM base path:', wasmBasePath);
297
+
298
+ const FilesetResolver = self.FilesetResolver || self.module.exports.FilesetResolver || self.exports.FilesetResolver;
299
+ log('FilesetResolver found:', !!FilesetResolver);
300
+
301
+ if (!FilesetResolver) {
302
+ throw new Error('FilesetResolver not found in any expected location');
303
+ }
304
+
305
+ vision = await FilesetResolver.forVisionTasks(wasmBasePath);
306
+
307
+ isInitialized = true;
308
+ log('✅ MediaPipe Tasks Vision initialized successfully');
309
+ } catch (error) {
310
+ log('❌ Failed to initialize MediaPipe Tasks Vision:', error);
311
+ throw error;
312
+ }
313
+ }
314
+
315
+ /**
316
+ * Load and initialize Face Detection
317
+ */
318
+ async function initializeFaceDetection() {
319
+ if (faceDetector) return;
320
+
321
+ // Ensure vision runtime is initialized first
322
+ await initializeVision();
323
+
324
+ try {
325
+ log('Loading Face Detector...');
326
+
327
+ const options = {
328
+ baseOptions: {
329
+ modelAssetPath: 'https://storage.googleapis.com/mediapipe-models/face_detector/blaze_face_short_range/float16/1/blaze_face_short_range.tflite',
330
+ delegate: 'GPU'
331
+ },
332
+ runningMode: 'VIDEO',
333
+ minDetectionConfidence: 0.5,
334
+ minSuppressionThreshold: 0.3
335
+ };
336
+
337
+ const FaceDetector = self.FaceDetector || self.module.exports.FaceDetector || self.exports.FaceDetector;
338
+ faceDetector = await FaceDetector.createFromOptions(vision, options);
339
+ log('✅ Face Detector loaded');
340
+ } catch (error) {
341
+ log('❌ Failed to load Face Detector:', error);
342
+ throw error;
343
+ }
344
+ }
345
+
346
+ /**
347
+ * Load and initialize Face Landmarks
348
+ */
349
+ async function initializeFaceLandmarks() {
350
+ if (faceLandmarker) return;
351
+
352
+ // Ensure vision runtime is initialized first
353
+ await initializeVision();
354
+
355
+ try {
356
+ log('Loading Face Landmarker...');
357
+
358
+ const options = {
359
+ baseOptions: {
360
+ modelAssetPath: 'https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/1/face_landmarker.task',
361
+ delegate: 'GPU'
362
+ },
363
+ runningMode: 'VIDEO',
364
+ numFaces: 1,
365
+ outputFaceBlendshapes: true
366
+ };
367
+
368
+ const FaceLandmarker = self.FaceLandmarker || self.module.exports.FaceLandmarker || self.exports.FaceLandmarker;
369
+ faceLandmarker = await FaceLandmarker.createFromOptions(vision, options);
370
+ log('✅ Face Landmarker loaded (blendshapes enabled)');
371
+ } catch (error) {
372
+ log('❌ Failed to load Face Landmarker:', error);
373
+ throw error;
374
+ }
375
+ }
376
+
377
+ /**
378
+ * Load and initialize Hand Tracking
379
+ */
380
+ async function initializeHandTracking() {
381
+ if (handLandmarker) return;
382
+
383
+ // Ensure vision runtime is initialized first
384
+ await initializeVision();
385
+
386
+ try {
387
+ log('Loading Hand Landmarker...');
388
+
389
+ const options = {
390
+ baseOptions: {
391
+ modelAssetPath: 'https://storage.googleapis.com/mediapipe-models/hand_landmarker/hand_landmarker/float16/1/hand_landmarker.task',
392
+ delegate: 'GPU'
393
+ },
394
+ runningMode: 'VIDEO',
395
+ numHands: 2
396
+ };
397
+
398
+ const HandLandmarker = self.HandLandmarker || self.module.exports.HandLandmarker || self.exports.HandLandmarker;
399
+ handLandmarker = await HandLandmarker.createFromOptions(vision, options);
400
+ log('✅ Hand Landmarker loaded');
401
+ } catch (error) {
402
+ log('❌ Failed to load Hand Landmarker:', error);
403
+ throw error;
404
+ }
405
+ }
406
+
407
+ /**
408
+ * Load and initialize Pose Detection
409
+ */
410
+ async function initializePoseDetection() {
411
+ if (poseLandmarker) return;
412
+
413
+ // Ensure vision runtime is initialized first
414
+ await initializeVision();
415
+
416
+ try {
417
+ log('Loading Pose Landmarker...');
418
+
419
+ const options = {
420
+ baseOptions: {
421
+ modelAssetPath: 'https://storage.googleapis.com/mediapipe-models/pose_landmarker/pose_landmarker_lite/float16/1/pose_landmarker_lite.task',
422
+ delegate: 'GPU'
423
+ },
424
+ runningMode: 'VIDEO',
425
+ numPoses: 1
426
+ };
427
+
428
+ const PoseLandmarker = self.PoseLandmarker || self.module.exports.PoseLandmarker || self.exports.PoseLandmarker;
429
+ poseLandmarker = await PoseLandmarker.createFromOptions(vision, options);
430
+ log('✅ Pose Landmarker loaded');
431
+ } catch (error) {
432
+ log('❌ Failed to load Pose Landmarker:', error);
433
+ throw error;
434
+ }
435
+ }
436
+
437
+ /**
438
+ * Load and initialize Body Segmentation
439
+ */
440
+ async function initializeBodySegmentation() {
441
+ if (imageSegmenter) return;
442
+
443
+ // Ensure vision runtime is initialized first
444
+ await initializeVision();
445
+
446
+ try {
447
+ log('Loading Image Segmenter...');
448
+
449
+ const options = {
450
+ baseOptions: {
451
+ modelAssetPath: 'https://storage.googleapis.com/mediapipe-models/image_segmenter/selfie_segmenter/float16/1/selfie_segmenter.tflite',
452
+ delegate: 'GPU'
453
+ },
454
+ runningMode: 'IMAGE',
455
+ outputCategoryMask: true,
456
+ outputConfidenceMasks: false
457
+ };
458
+
459
+ const ImageSegmenter = self.ImageSegmenter || self.module.exports.ImageSegmenter || self.exports.ImageSegmenter;
460
+ imageSegmenter = await ImageSegmenter.createFromOptions(vision, options);
461
+ log('✅ Image Segmenter loaded');
462
+ } catch (error) {
463
+ log('❌ Failed to load Image Segmenter:', error);
464
+ throw error;
465
+ }
466
+ }
467
+
468
+ /**
469
+ * Process video frame with active CV features
470
+ * @param {ImageData|ImageBitmap} imageInput - Image input (ImageData or ImageBitmap)
471
+ * @param {number} timestamp - Frame timestamp
472
+ * @param {string[]} features - Active CV features
473
+ */
474
+ async function processFrame(imageInput, timestamp, features) {
475
+ const results = {};
476
+
477
+ try {
478
+ // Process face detection
479
+ if (features.includes('faceDetection') && faceDetector) {
480
+ const detectionResult = faceDetector.detectForVideo(imageInput, timestamp);
481
+ results.faces = detectionResult.detections.map((detection) => ({
482
+ bounds: {
483
+ x: detection.boundingBox.originX / imageInput.width,
484
+ y: detection.boundingBox.originY / imageInput.height,
485
+ width: detection.boundingBox.width / imageInput.width,
486
+ height: detection.boundingBox.height / imageInput.height
487
+ },
488
+ center: {
489
+ x: (detection.boundingBox.originX + detection.boundingBox.width / 2) / imageInput.width,
490
+ y: (detection.boundingBox.originY + detection.boundingBox.height / 2) / imageInput.height
491
+ },
492
+ landmarks: [],
493
+ expressions: EMPTY_EXPRESSIONS,
494
+ headPose: EMPTY_HEAD_POSE,
495
+ blendshapes: EMPTY_BLENDSHAPES,
496
+ confidence: detection.categories[0]?.score || 0
497
+ }));
498
+ }
499
+
500
+ // Process face landmarks (used by faceMesh and emotionDetection)
501
+ if ((features.includes('faceMesh') || features.includes('emotionDetection')) && faceLandmarker) {
502
+ const landmarkResult = faceLandmarker.detectForVideo(imageInput, timestamp);
503
+ if (landmarkResult.faceLandmarks.length > 0) {
504
+ const landmarks = landmarkResult.faceLandmarks[0];
505
+
506
+ if (!results.faces) {
507
+ results.faces = [{
508
+ bounds: null,
509
+ center: null,
510
+ landmarks: [],
511
+ expressions: EMPTY_EXPRESSIONS,
512
+ headPose: EMPTY_HEAD_POSE,
513
+ blendshapes: EMPTY_BLENDSHAPES,
514
+ confidence: 0.8
515
+ }];
516
+ }
517
+
518
+ const mappedLandmarks = landmarks.map((landmark) => ({
519
+ x: landmark.x,
520
+ y: landmark.y,
521
+ z: landmark.z || 0
522
+ }));
523
+
524
+ if (results.faces[0]) {
525
+ results.faces[0].landmarks = mappedLandmarks;
526
+ results.faces[0].headPose = computeHeadPoseFromLandmarks(landmarks);
527
+
528
+ // Populate emotion data when emotionDetection is active
529
+ if (features.includes('emotionDetection') &&
530
+ landmarkResult.faceBlendshapes &&
531
+ landmarkResult.faceBlendshapes.length > 0) {
532
+ const bs = buildBlendshapesRecord(landmarkResult.faceBlendshapes[0].categories);
533
+ results.faces[0].blendshapes = bs;
534
+ results.faces[0].expressions = mapBlendshapesToEmotions(bs);
535
+ }
536
+ }
537
+ }
538
+ }
539
+
540
+ // Process hand tracking
541
+ if (features.includes('handTracking') && handLandmarker) {
542
+ const handResult = handLandmarker.detectForVideo(imageInput, timestamp);
543
+ results.hands = handResult.landmarks.map((landmarks, index) => ({
544
+ landmarks: landmarks.map((landmark) => ({
545
+ x: landmark.x,
546
+ y: landmark.y,
547
+ z: landmark.z || 0
548
+ })),
549
+ handedness: handResult.handednesses[index]?.[0]?.categoryName || 'Unknown',
550
+ confidence: handResult.handednesses[index]?.[0]?.score || 0
551
+ }));
552
+ }
553
+
554
+ // Process pose detection
555
+ if (features.includes('poseDetection') && poseLandmarker) {
556
+ const poseResult = poseLandmarker.detectForVideo(imageInput, timestamp);
557
+ if (poseResult.landmarks.length > 0) {
558
+ results.pose = {
559
+ landmarks: poseResult.landmarks[0].map((landmark) => ({
560
+ x: landmark.x,
561
+ y: landmark.y,
562
+ z: landmark.z || 0,
563
+ visibility: landmark.visibility || 1
564
+ })),
565
+ worldLandmarks: poseResult.worldLandmarks?.[0]?.map((landmark) => ({
566
+ x: landmark.x,
567
+ y: landmark.y,
568
+ z: landmark.z || 0,
569
+ visibility: landmark.visibility || 1
570
+ })) || []
571
+ };
572
+ }
573
+ }
574
+
575
+ // Process body segmentation
576
+ if (features.includes('bodySegmentation') && imageSegmenter) {
577
+ const segmentResult = imageSegmenter.segment(imageInput);
578
+ if (segmentResult.categoryMask) {
579
+ try {
580
+ // Extract data before closing the mask
581
+ results.segmentation = {
582
+ mask: segmentResult.categoryMask.getAsUint8Array(),
583
+ width: segmentResult.categoryMask.width,
584
+ height: segmentResult.categoryMask.height
585
+ };
586
+
587
+ log('Segmentation mask:', results.segmentation.width, 'x', results.segmentation.height);
588
+ } finally {
589
+ // CRITICAL: Close MPMask instance to prevent resource leaks
590
+ segmentResult.categoryMask.close();
591
+ }
592
+ }
593
+ }
594
+
595
+ return results;
596
+ } catch (error) {
597
+ log('❌ Error processing frame:', error);
598
+ return {};
599
+ }
600
+ }
601
+
602
+ // Note: Removed reusable canvas functions - no longer needed with direct ImageBitmap processing!
603
+
604
+ /**
605
+ * Clean up WASM instance with proper memory management
606
+ */
607
+ function cleanupWasmInstance(instance, featureName) {
608
+ if (instance) {
609
+ try {
610
+ log(`🧹 Cleaning up ${featureName} WASM instance...`);
611
+ instance.close();
612
+
613
+ // Force garbage collection if available (Chrome DevTools)
614
+ if (typeof gc === 'function') {
615
+ gc();
616
+ }
617
+
618
+ // Give time for WASM cleanup
619
+ return new Promise(resolve => {
620
+ setTimeout(resolve, 100);
621
+ });
622
+ } catch (error) {
623
+ log(`⚠️ Error cleaning up ${featureName}:`, error);
624
+ }
625
+ }
626
+ return Promise.resolve();
627
+ }
628
+
629
+ /**
630
+ * Process configuration queue sequentially
631
+ */
632
+ async function processConfigQueue() {
633
+ if (processingConfig || configQueue.length === 0) return;
634
+
635
+ processingConfig = true;
636
+
637
+ try {
638
+ while (configQueue.length > 0) {
639
+ const { features, resolve, reject } = configQueue.shift();
640
+
641
+ try {
642
+ await handleConfigUpdateInternal(features);
643
+ resolve({ configured: true, activeFeatures: Array.from(activeFeatures) });
644
+ } catch (error) {
645
+ reject(error);
646
+ }
647
+ }
648
+ } finally {
649
+ processingConfig = false;
650
+ }
651
+ }
652
+
653
+ /**
654
+ * Queue configuration update to prevent race conditions
655
+ */
656
+ function queueConfigUpdate(features) {
657
+ return new Promise((resolve, reject) => {
658
+ configQueue.push({ features, resolve, reject });
659
+ processConfigQueue();
660
+ });
661
+ }
662
+
663
+ /**
664
+ * Handle feature configuration updates (internal)
665
+ */
666
+ async function handleConfigUpdateInternal(features) {
667
+ if (!workerHealthy) {
668
+ throw new Error('Worker is in unhealthy state, restart required');
669
+ }
670
+
671
+ const newFeatures = new Set(features);
672
+ const toEnable = features.filter(f => !activeFeatures.has(f));
673
+ const toDisable = Array.from(activeFeatures).filter(f => !newFeatures.has(f));
674
+
675
+ log(`🔄 Config update: enable [${toEnable.join(', ')}], disable [${toDisable.join(', ')}]`);
676
+
677
+ // Disable unused features first (cleanup instances)
678
+ const cleanupPromises = [];
679
+ for (const feature of toDisable) {
680
+ switch (feature) {
681
+ case 'faceDetection':
682
+ cleanupPromises.push(cleanupWasmInstance(faceDetector, 'FaceDetector'));
683
+ faceDetector = null;
684
+ break;
685
+ case 'faceMesh':
686
+ // Only teardown FaceLandmarker if emotionDetection also not active
687
+ if (!newFeatures.has('emotionDetection')) {
688
+ cleanupPromises.push(cleanupWasmInstance(faceLandmarker, 'FaceLandmarker'));
689
+ faceLandmarker = null;
690
+ }
691
+ break;
692
+ case 'emotionDetection':
693
+ // Only teardown FaceLandmarker if faceMesh also not active
694
+ if (!newFeatures.has('faceMesh')) {
695
+ cleanupPromises.push(cleanupWasmInstance(faceLandmarker, 'FaceLandmarker'));
696
+ faceLandmarker = null;
697
+ }
698
+ break;
699
+ case 'handTracking':
700
+ cleanupPromises.push(cleanupWasmInstance(handLandmarker, 'HandLandmarker'));
701
+ handLandmarker = null;
702
+ break;
703
+ case 'poseDetection':
704
+ cleanupPromises.push(cleanupWasmInstance(poseLandmarker, 'PoseLandmarker'));
705
+ poseLandmarker = null;
706
+ break;
707
+ case 'bodySegmentation':
708
+ cleanupPromises.push(cleanupWasmInstance(imageSegmenter, 'ImageSegmenter'));
709
+ imageSegmenter = null;
710
+ break;
711
+ }
712
+ activeFeatures.delete(feature);
713
+ log(`🗑️ Disabled feature: ${feature}`);
714
+ }
715
+
716
+ // Wait for all cleanup to complete
717
+ if (cleanupPromises.length > 0) {
718
+ await Promise.all(cleanupPromises);
719
+ log('✅ All cleanup completed');
720
+ }
721
+
722
+ // Note: No canvas cleanup needed - using direct ImageBitmap processing!
723
+
724
+ // Enable new features
725
+ for (const feature of toEnable) {
726
+ try {
727
+ switch (feature) {
728
+ case 'faceDetection':
729
+ await initializeFaceDetection();
730
+ break;
731
+ case 'faceMesh':
732
+ case 'emotionDetection':
733
+ // Both share the FaceLandmarker (with blendshapes enabled)
734
+ await initializeFaceLandmarks();
735
+ break;
736
+ case 'handTracking':
737
+ await initializeHandTracking();
738
+ break;
739
+ case 'poseDetection':
740
+ await initializePoseDetection();
741
+ break;
742
+ case 'bodySegmentation':
743
+ await initializeBodySegmentation();
744
+ break;
745
+ }
746
+ activeFeatures.add(feature);
747
+ log(`✅ Enabled feature: ${feature}`);
748
+ } catch (error) {
749
+ log(`❌ Failed to enable feature ${feature}:`, error);
750
+
751
+ // Check if this is a memory error
752
+ if (error.message && error.message.includes('Out of memory')) {
753
+ memoryPressureDetected = true;
754
+ workerHealthy = false;
755
+ throw new Error(`Memory exhausted while enabling ${feature}. Worker restart required.`);
756
+ }
757
+
758
+ throw error;
759
+ }
760
+ }
761
+ }
762
+
763
+ /**
764
+ * Legacy function for backward compatibility
765
+ */
766
+ async function handleConfigUpdate(features) {
767
+ return await queueConfigUpdate(features);
768
+ }
769
+
770
+ // Message handler
771
+ self.onmessage = async (event) => {
772
+ const message = event.data;
773
+
774
+ // Only log non-process messages to avoid per-frame spam
775
+ if (message.type !== 'process') {
776
+ log('Received message:', message.type);
777
+ }
778
+
779
+ try {
780
+ switch (message.type) {
781
+ case 'debug': {
782
+ DEBUG = !!message.enabled;
783
+ log('Debug mode', DEBUG ? 'enabled' : 'disabled');
784
+ break;
785
+ }
786
+
787
+ case 'init': {
788
+ log('Received init message');
789
+
790
+ try {
791
+ await initializeVision();
792
+ log('Vision runtime ready for feature loading');
793
+ } catch (error) {
794
+ log('❌ Vision runtime initialization failed:', error);
795
+ throw error;
796
+ }
797
+
798
+ const response = {
799
+ type: 'result',
800
+ success: true,
801
+ data: { initialized: true }
802
+ };
803
+ self.postMessage(response);
804
+ break;
805
+ }
806
+
807
+ case 'config': {
808
+ log('Received config message:', message.features);
809
+
810
+ try {
811
+ const result = await handleConfigUpdate(message.features);
812
+
813
+ const response = {
814
+ type: 'result',
815
+ success: true,
816
+ data: result
817
+ };
818
+ self.postMessage(response);
819
+ } catch (error) {
820
+ log('❌ Config update failed:', error);
821
+
822
+ // Check if worker needs restart
823
+ if (!workerHealthy || memoryPressureDetected) {
824
+ const errorResponse = {
825
+ type: 'result',
826
+ success: false,
827
+ error: error.message,
828
+ restartRequired: true
829
+ };
830
+ self.postMessage(errorResponse);
831
+ } else {
832
+ throw error; // Re-throw for normal error handling
833
+ }
834
+ }
835
+ break;
836
+ }
837
+
838
+ case 'process': {
839
+ try {
840
+ // 🚀 OPTIMIZED: Pass ImageBitmap directly to MediaPipe (no conversion!)
841
+ const results = await processFrame(message.bitmap, message.timestamp, message.features);
842
+
843
+ const response = {
844
+ type: 'result',
845
+ success: true,
846
+ data: results
847
+ };
848
+ self.postMessage(response);
849
+ } finally {
850
+ // Clean up ImageBitmap after processing
851
+ if (message.bitmap && typeof message.bitmap.close === 'function') {
852
+ message.bitmap.close();
853
+ }
854
+ }
855
+ break;
856
+ }
857
+
858
+ default:
859
+ log('❌ Unknown message type:', message.type);
860
+ const errorResponse = {
861
+ type: 'result',
862
+ success: false,
863
+ error: `Unknown message type: ${message.type}`
864
+ };
865
+ self.postMessage(errorResponse);
866
+ }
867
+ } catch (error) {
868
+ log('❌ Error handling message:', error);
869
+ const errorResponse = {
870
+ type: 'result',
871
+ success: false,
872
+ error: error instanceof Error ? error.message : String(error)
873
+ };
874
+ self.postMessage(errorResponse);
875
+ }
876
+ };
877
+
878
+ log('CV Tasks Worker initialized and ready');