@viji-dev/core 0.3.36 → 0.3.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,974 +0,0 @@
1
- /**
2
- * MediaPipe Tasks Vision Classic Worker
3
- *
4
- * Classic worker for MediaPipe Tasks Vision processing.
5
- * Asset URLs (vision bundle, WASM) are received from CVSystem via the 'init'
6
- * message — no hardcoded paths.
7
- */
8
-
9
- // Debug logging — controlled by CVSystem via 'debug' message
10
- let DEBUG = false;
11
- function log(...args) {
12
- if (DEBUG) {
13
- console.log('🔧 [CV Tasks Worker]', ...args);
14
- }
15
- }
16
-
17
- // Asset URLs received from CVSystem via init message
18
- let assetConfig = { visionBundleUrl: null, wasmBasePath: null };
19
-
20
- // Define CommonJS environment for MediaPipe bundle
21
- self.exports = {};
22
- self.module = { exports: {} };
23
-
24
- // MediaPipe model instances
25
- let faceDetector = null;
26
- let faceLandmarker = null;
27
- let gestureRecognizer = null;
28
- let poseLandmarker = null;
29
- let imageSegmenter = null;
30
-
31
- // Vision runtime
32
- let vision = null;
33
- let isInitialized = false;
34
-
35
- // Active features tracking
36
- const activeFeatures = new Set();
37
-
38
- // Configuration queue to prevent race conditions
39
- const configQueue = [];
40
- let processingConfig = false;
41
-
42
- // Worker health tracking
43
- let workerHealthy = true;
44
- let memoryPressureDetected = false;
45
-
46
- // Safe zero-defaults for face data when features are inactive
47
- const EMPTY_EXPRESSIONS = Object.freeze({
48
- neutral: 0, happy: 0, sad: 0, angry: 0, surprised: 0, disgusted: 0, fearful: 0
49
- });
50
-
51
- const EMPTY_HEAD_POSE = Object.freeze({ pitch: 0, yaw: 0, roll: 0 });
52
-
53
- const EMPTY_BLENDSHAPES = Object.freeze({
54
- browDownLeft: 0, browDownRight: 0, browInnerUp: 0, browOuterUpLeft: 0, browOuterUpRight: 0,
55
- cheekPuff: 0, cheekSquintLeft: 0, cheekSquintRight: 0,
56
- eyeBlinkLeft: 0, eyeBlinkRight: 0,
57
- eyeLookDownLeft: 0, eyeLookDownRight: 0, eyeLookInLeft: 0, eyeLookInRight: 0,
58
- eyeLookOutLeft: 0, eyeLookOutRight: 0, eyeLookUpLeft: 0, eyeLookUpRight: 0,
59
- eyeSquintLeft: 0, eyeSquintRight: 0, eyeWideLeft: 0, eyeWideRight: 0,
60
- jawForward: 0, jawLeft: 0, jawOpen: 0, jawRight: 0,
61
- mouthClose: 0, mouthDimpleLeft: 0, mouthDimpleRight: 0,
62
- mouthFrownLeft: 0, mouthFrownRight: 0, mouthFunnel: 0, mouthLeft: 0,
63
- mouthLowerDownLeft: 0, mouthLowerDownRight: 0, mouthPressLeft: 0, mouthPressRight: 0,
64
- mouthPucker: 0, mouthRight: 0, mouthRollLower: 0, mouthRollUpper: 0,
65
- mouthShrugLower: 0, mouthShrugUpper: 0, mouthSmileLeft: 0, mouthSmileRight: 0,
66
- mouthStretchLeft: 0, mouthStretchRight: 0, mouthUpperUpLeft: 0, mouthUpperUpRight: 0,
67
- noseSneerLeft: 0, noseSneerRight: 0, tongueOut: 0
68
- });
69
-
70
- /**
71
- * Convert MediaPipe faceBlendshapes categories array to a flat record
72
- */
73
- function buildBlendshapesRecord(categories) {
74
- const record = {};
75
- for (const cat of categories) {
76
- record[cat.categoryName] = cat.score;
77
- }
78
- return record;
79
- }
80
-
81
- /**
82
- * EMFACS-based emotion prototype vectors (Ekman FACS → ARKit blendshapes).
83
- * Weights reflect each blendshape's reliability in MediaPipe's 2D web model.
84
- * Known near-zero blendshapes (cheekSquint*, noseSneer*, eyeWide*) are
85
- * down-weighted and supplemented by correlated signals that do activate.
86
- *
87
- * Reference: Aldenhoven et al. (2026) "Real-Time Emotion Recognition
88
- * Performance of Mobile Devices" — EMFACS cosine similarity approach,
89
- * 68.3% accuracy on 7 emotions, exceeding human raters (58.9%).
90
- */
91
- const EMOTION_PROTOTYPES = {
92
- // mouthSmile is unique to happiness — no other emotion uses it.
93
- // eyeSquint is a secondary "Duchenne smile" indicator.
94
- happy: {
95
- mouthSmileLeft: 1.0, mouthSmileRight: 1.0,
96
- eyeSquintLeft: 0.3, eyeSquintRight: 0.3
97
- },
98
- // Pouty/trembling lip: mouthShrugLower (chin raiser) is the primary signal,
99
- // mouthPucker (compressed lips) secondary. Compact prototype so it wins
100
- // over angry when the differentiating upper-face signals are absent.
101
- sad: {
102
- mouthShrugLower: 1.0,
103
- mouthPucker: 0.8
104
- },
105
- // Shares sad's mouth signals at lower weight, differentiated by upper-face
106
- // tension: eyeSquint + browDown. These extra dimensions shift the cosine
107
- // direction away from sad only when genuinely activated.
108
- angry: {
109
- mouthShrugLower: 0.6, mouthPucker: 0.5,
110
- eyeSquintLeft: 1.0, eyeSquintRight: 1.0,
111
- browDownLeft: 1.0, browDownRight: 1.0
112
- },
113
- // Brow raise only — the simplest, most distinctive prototype.
114
- // jawOpen removed to avoid overlap with fearful.
115
- surprised: {
116
- browInnerUp: 1.0,
117
- browOuterUpLeft: 1.0, browOuterUpRight: 1.0
118
- },
119
- // mouthUpperUp (upper lip raise) is the unique primary signal.
120
- // mouthFrown supports, browDown at low weight for wrinkled-brow disgust.
121
- disgusted: {
122
- mouthUpperUpLeft: 1.0, mouthUpperUpRight: 1.0,
123
- mouthFrownLeft: 0.8, mouthFrownRight: 0.8,
124
- browDownLeft: 0.3, browDownRight: 0.3
125
- },
126
- // Shares surprised's brow raise, differentiated by jawOpen (rare in other
127
- // emotions at even 10-20%). jawOpen is the primary differentiator.
128
- fearful: {
129
- browInnerUp: 0.8, browOuterUpLeft: 0.8, browOuterUpRight: 0.8,
130
- jawOpen: 1.0
131
- }
132
- };
133
-
134
- const PROTOTYPE_KEYS = Object.keys(EMPTY_BLENDSHAPES);
135
-
136
- // Pre-compute prototype magnitudes for cosine similarity
137
- const PROTOTYPE_MAGNITUDES = {};
138
- for (const [emotion, proto] of Object.entries(EMOTION_PROTOTYPES)) {
139
- let sumSq = 0;
140
- for (const key of PROTOTYPE_KEYS) {
141
- const v = proto[key] || 0;
142
- sumSq += v * v;
143
- }
144
- PROTOTYPE_MAGNITUDES[emotion] = Math.sqrt(sumSq);
145
- }
146
-
147
- // Noise floor: blendshape values below this are treated as zero to
148
- // prevent resting-state activations from matching emotion prototypes
149
- const BLENDSHAPE_NOISE_FLOOR = 0.10;
150
-
151
- /**
152
- * Cosine similarity between observed blendshape vector and a prototype.
153
- * cos(v, p) = (v · p) / (|v| * |p|)
154
- * Applies a noise floor to observed values to suppress resting-state noise.
155
- */
156
- function emotionCosineSimilarity(observed, prototype, protoMag) {
157
- let dot = 0;
158
- let magO = 0;
159
- for (const key of PROTOTYPE_KEYS) {
160
- const raw = observed[key] || 0;
161
- const o = raw > BLENDSHAPE_NOISE_FLOOR ? raw : 0;
162
- const p = prototype[key] || 0;
163
- dot += o * p;
164
- magO += o * o;
165
- }
166
- magO = Math.sqrt(magO);
167
- if (magO < 1e-8 || protoMag < 1e-8) return 0;
168
- return dot / (magO * protoMag);
169
- }
170
-
171
- // Cross-suppression: when one emotion is confident, competing emotions are
172
- // reduced. Uses raw (pre-suppression) scores so order doesn't matter.
173
- // [suppressor] → { [target]: strength 0-1 }
174
- const EMOTION_INHIBITIONS = {
175
- happy: { angry: 0.7, sad: 0.5, disgusted: 0.4, fearful: 0.3 },
176
- sad: { happy: 0.3, angry: 0.2 },
177
- angry: { happy: 0.3, sad: 0.2 },
178
- surprised: { angry: 0.3, sad: 0.3 },
179
- disgusted: { happy: 0.4, surprised: 0.2 },
180
- fearful: { happy: 0.3, angry: 0.2 }
181
- };
182
-
183
- /**
184
- * Classify observed blendshapes into emotions using a 3-stage pipeline:
185
- * 1. Cosine similarity against EMFACS prototypes (base scores)
186
- * 2. Key-signal boosters for defining blendshapes (mouthPress → angry)
187
- * 3. Cross-emotion inhibition matrix (happy suppresses angry, etc.)
188
- *
189
- * Returns { neutral, happy, sad, angry, surprised, disgusted, fearful }
190
- */
191
- function mapBlendshapesToEmotions(bs) {
192
- const NF = BLENDSHAPE_NOISE_FLOOR;
193
-
194
- // --- Stage 1: Cosine similarity base scores ---
195
- const scores = {};
196
- for (const [emotion, proto] of Object.entries(EMOTION_PROTOTYPES)) {
197
- scores[emotion] = Math.max(0, emotionCosineSimilarity(bs, proto, PROTOTYPE_MAGNITUDES[emotion]));
198
- }
199
-
200
- // --- Stage 2: Key-signal boosters ---
201
- // mouthPress is a defining angry signal not in the prototype (to avoid
202
- // resting-state contamination) but boosts angry when clearly present
203
- const mouthPress = Math.max(0,
204
- ((bs.mouthPressLeft || 0) + (bs.mouthPressRight || 0)) / 2 - NF);
205
- if (mouthPress > 0) {
206
- scores.angry = Math.min(1, scores.angry + mouthPress * 0.3);
207
- }
208
-
209
- // --- Stage 3: Cross-emotion inhibition ---
210
- // Snapshot raw scores so suppression is non-circular
211
- const raw = {};
212
- for (const key in scores) raw[key] = scores[key];
213
-
214
- for (const [suppressor, targets] of Object.entries(EMOTION_INHIBITIONS)) {
215
- const suppressorScore = raw[suppressor] || 0;
216
- if (suppressorScore > 0.1) {
217
- for (const [target, strength] of Object.entries(targets)) {
218
- scores[target] *= (1 - suppressorScore * strength);
219
- }
220
- }
221
- }
222
-
223
- // --- Neutral: dominant when no emotion is confident ---
224
- let maxScore = 0;
225
- for (const emotion of Object.keys(EMOTION_PROTOTYPES)) {
226
- if (scores[emotion] > maxScore) maxScore = scores[emotion];
227
- }
228
- const neutralThreshold = 0.35;
229
- scores.neutral = maxScore < neutralThreshold ? 1.0 : Math.max(0, 1.0 - maxScore * 1.5);
230
-
231
- return {
232
- neutral: scores.neutral,
233
- happy: scores.happy || 0,
234
- sad: scores.sad || 0,
235
- angry: scores.angry || 0,
236
- surprised: scores.surprised || 0,
237
- disgusted: scores.disgusted || 0,
238
- fearful: scores.fearful || 0
239
- };
240
- }
241
-
242
- /**
243
- * Compute head pose (pitch, yaw, roll in degrees) from 468 face landmarks.
244
- * Uses nose tip, chin, eye corners, and forehead to derive 3D orientation.
245
- */
246
- function computeHeadPoseFromLandmarks(landmarks) {
247
- // Key landmark indices (MediaPipe FaceMesh)
248
- const noseTip = landmarks[1];
249
- const chin = landmarks[152];
250
- const leftEye = landmarks[33];
251
- const rightEye = landmarks[263];
252
- const forehead = landmarks[10];
253
-
254
- if (!noseTip || !chin || !leftEye || !rightEye || !forehead) {
255
- return { pitch: 0, yaw: 0, roll: 0 };
256
- }
257
-
258
- // Yaw: horizontal angle from eye midpoint to nose tip
259
- const eyeMidX = (leftEye.x + rightEye.x) / 2;
260
- const eyeMidY = (leftEye.y + rightEye.y) / 2;
261
- const yaw = Math.atan2(noseTip.x - eyeMidX, Math.abs(noseTip.z - ((leftEye.z + rightEye.z) / 2 || 0)) + 0.001) * (180 / Math.PI);
262
-
263
- // Pitch: vertical angle from forehead to chin through nose
264
- const faceVerticalLen = Math.sqrt((chin.x - forehead.x) ** 2 + (chin.y - forehead.y) ** 2) || 0.001;
265
- const noseRelY = (noseTip.y - forehead.y) / faceVerticalLen;
266
- const pitch = (noseRelY - 0.5) * 180;
267
-
268
- // Roll: tilt from horizontal eye line
269
- const roll = Math.atan2(rightEye.y - leftEye.y, rightEye.x - leftEye.x) * (180 / Math.PI);
270
-
271
- return {
272
- pitch: isNaN(pitch) ? 0 : Math.max(-90, Math.min(90, pitch)),
273
- yaw: isNaN(yaw) ? 0 : Math.max(-90, Math.min(90, yaw)),
274
- roll: isNaN(roll) ? 0 : Math.max(-180, Math.min(180, roll))
275
- };
276
- }
277
-
278
- /**
279
- * Initialize MediaPipe Tasks Vision runtime
280
- */
281
- async function initializeVision() {
282
- if (isInitialized) {
283
- log('Vision already initialized, skipping');
284
- return;
285
- }
286
-
287
- if (!assetConfig.wasmBasePath) {
288
- throw new Error('WASM base path not set — init message must be received before initializeVision()');
289
- }
290
-
291
- try {
292
- log('Starting MediaPipe Tasks Vision initialization...');
293
- log('WASM base path:', assetConfig.wasmBasePath);
294
-
295
- const FilesetResolver = self.FilesetResolver || self.module.exports.FilesetResolver || self.exports.FilesetResolver;
296
- log('FilesetResolver found:', !!FilesetResolver);
297
-
298
- if (!FilesetResolver) {
299
- throw new Error('FilesetResolver not found — vision bundle may have failed to load');
300
- }
301
-
302
- vision = await FilesetResolver.forVisionTasks(assetConfig.wasmBasePath);
303
-
304
- isInitialized = true;
305
- log('✅ MediaPipe Tasks Vision initialized successfully');
306
- } catch (error) {
307
- log('❌ Failed to initialize MediaPipe Tasks Vision:', error);
308
- throw error;
309
- }
310
- }
311
-
312
- /**
313
- * Load and initialize Face Detection
314
- */
315
- async function initializeFaceDetection() {
316
- if (faceDetector) return;
317
-
318
- // Ensure vision runtime is initialized first
319
- await initializeVision();
320
-
321
- try {
322
- log('Loading Face Detector...');
323
-
324
- const options = {
325
- baseOptions: {
326
- modelAssetPath: 'https://storage.googleapis.com/mediapipe-models/face_detector/blaze_face_short_range/float16/1/blaze_face_short_range.tflite',
327
- delegate: 'GPU'
328
- },
329
- runningMode: 'VIDEO',
330
- minDetectionConfidence: 0.5,
331
- minSuppressionThreshold: 0.3
332
- };
333
-
334
- const FaceDetector = self.FaceDetector || self.module.exports.FaceDetector || self.exports.FaceDetector;
335
- faceDetector = await FaceDetector.createFromOptions(vision, options);
336
- log('✅ Face Detector loaded');
337
- } catch (error) {
338
- log('❌ Failed to load Face Detector:', error);
339
- throw error;
340
- }
341
- }
342
-
343
- /**
344
- * Load and initialize Face Landmarks
345
- */
346
- async function initializeFaceLandmarks() {
347
- if (faceLandmarker) return;
348
-
349
- // Ensure vision runtime is initialized first
350
- await initializeVision();
351
-
352
- try {
353
- log('Loading Face Landmarker...');
354
-
355
- const options = {
356
- baseOptions: {
357
- modelAssetPath: 'https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/1/face_landmarker.task',
358
- delegate: 'GPU'
359
- },
360
- runningMode: 'VIDEO',
361
- numFaces: 1,
362
- outputFaceBlendshapes: true
363
- };
364
-
365
- const FaceLandmarker = self.FaceLandmarker || self.module.exports.FaceLandmarker || self.exports.FaceLandmarker;
366
- faceLandmarker = await FaceLandmarker.createFromOptions(vision, options);
367
- log('✅ Face Landmarker loaded (blendshapes enabled)');
368
- } catch (error) {
369
- log('❌ Failed to load Face Landmarker:', error);
370
- throw error;
371
- }
372
- }
373
-
374
- /**
375
- * Load and initialize Hand Tracking via GestureRecognizer
376
- * (provides landmarks + handedness + ML-based gesture classification in one call)
377
- */
378
- async function initializeHandTracking() {
379
- if (gestureRecognizer) return;
380
-
381
- await initializeVision();
382
-
383
- try {
384
- log('Loading Gesture Recognizer...');
385
-
386
- const options = {
387
- baseOptions: {
388
- modelAssetPath: 'https://storage.googleapis.com/mediapipe-models/gesture_recognizer/gesture_recognizer/float16/1/gesture_recognizer.task',
389
- delegate: 'GPU'
390
- },
391
- runningMode: 'VIDEO',
392
- numHands: 2
393
- };
394
-
395
- const GestureRecognizer = self.GestureRecognizer || self.module.exports.GestureRecognizer || self.exports.GestureRecognizer;
396
- gestureRecognizer = await GestureRecognizer.createFromOptions(vision, options);
397
- log('✅ Gesture Recognizer loaded');
398
- } catch (error) {
399
- log('❌ Failed to load Gesture Recognizer:', error);
400
- throw error;
401
- }
402
- }
403
-
404
- /**
405
- * Load and initialize Pose Detection
406
- */
407
- async function initializePoseDetection() {
408
- if (poseLandmarker) return;
409
-
410
- // Ensure vision runtime is initialized first
411
- await initializeVision();
412
-
413
- try {
414
- log('Loading Pose Landmarker...');
415
-
416
- const options = {
417
- baseOptions: {
418
- modelAssetPath: 'https://storage.googleapis.com/mediapipe-models/pose_landmarker/pose_landmarker_lite/float16/1/pose_landmarker_lite.task',
419
- delegate: 'GPU'
420
- },
421
- runningMode: 'VIDEO',
422
- numPoses: 1
423
- };
424
-
425
- const PoseLandmarker = self.PoseLandmarker || self.module.exports.PoseLandmarker || self.exports.PoseLandmarker;
426
- poseLandmarker = await PoseLandmarker.createFromOptions(vision, options);
427
- log('✅ Pose Landmarker loaded');
428
- } catch (error) {
429
- log('❌ Failed to load Pose Landmarker:', error);
430
- throw error;
431
- }
432
- }
433
-
434
- /**
435
- * Load and initialize Body Segmentation
436
- */
437
- async function initializeBodySegmentation() {
438
- if (imageSegmenter) return;
439
-
440
- // Ensure vision runtime is initialized first
441
- await initializeVision();
442
-
443
- try {
444
- log('Loading Image Segmenter...');
445
-
446
- const options = {
447
- baseOptions: {
448
- modelAssetPath: 'https://storage.googleapis.com/mediapipe-models/image_segmenter/selfie_segmenter/float16/1/selfie_segmenter.tflite',
449
- delegate: 'GPU'
450
- },
451
- runningMode: 'IMAGE',
452
- outputCategoryMask: true,
453
- outputConfidenceMasks: false
454
- };
455
-
456
- const ImageSegmenter = self.ImageSegmenter || self.module.exports.ImageSegmenter || self.exports.ImageSegmenter;
457
- imageSegmenter = await ImageSegmenter.createFromOptions(vision, options);
458
- log('✅ Image Segmenter loaded');
459
- } catch (error) {
460
- log('❌ Failed to load Image Segmenter:', error);
461
- throw error;
462
- }
463
- }
464
-
465
- function computeHandBounds(landmarks) {
466
- let minX = 1, minY = 1, maxX = 0, maxY = 0;
467
- for (const p of landmarks) {
468
- if (p.x < minX) minX = p.x;
469
- if (p.y < minY) minY = p.y;
470
- if (p.x > maxX) maxX = p.x;
471
- if (p.y > maxY) maxY = p.y;
472
- }
473
- return { x: minX, y: minY, width: maxX - minX, height: maxY - minY };
474
- }
475
-
476
- // Map MediaPipe GestureRecognizer category names to our API field names
477
- const GESTURE_NAME_MAP = {
478
- 'Closed_Fist': 'fist',
479
- 'Open_Palm': 'openPalm',
480
- 'Victory': 'peace',
481
- 'Thumb_Up': 'thumbsUp',
482
- 'Pointing_Up': 'pointing',
483
- 'Thumb_Down': 'thumbsDown',
484
- 'ILoveYou': 'iLoveYou'
485
- };
486
-
487
- function mapGestures(gestureCategories) {
488
- const result = { fist: 0, openPalm: 0, peace: 0, thumbsUp: 0, pointing: 0, thumbsDown: 0, iLoveYou: 0 };
489
- if (!gestureCategories) return result;
490
- for (const cat of gestureCategories) {
491
- const key = GESTURE_NAME_MAP[cat.categoryName];
492
- if (key) result[key] = cat.score;
493
- }
494
- return result;
495
- }
496
-
497
- // BlazePose landmark index groups for named body parts
498
- const POSE_FACE_INDICES = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
499
- const POSE_TORSO_INDICES = [11, 12, 23, 24];
500
- const POSE_LEFT_ARM_INDICES = [11, 13, 15];
501
- const POSE_RIGHT_ARM_INDICES = [12, 14, 16];
502
- const POSE_LEFT_LEG_INDICES = [23, 25, 27, 29, 31];
503
- const POSE_RIGHT_LEG_INDICES = [24, 26, 28, 30, 32];
504
-
505
- function extractPoseGroup(landmarks, indices) {
506
- return indices.map(i => {
507
- const lm = landmarks[i];
508
- return lm ? { x: lm.x, y: lm.y } : { x: 0, y: 0 };
509
- });
510
- }
511
-
512
- function computePoseConfidence(landmarks) {
513
- let sum = 0;
514
- for (const lm of landmarks) sum += (lm.visibility || 0);
515
- return landmarks.length > 0 ? sum / landmarks.length : 0;
516
- }
517
-
518
- /**
519
- * Process video frame with active CV features
520
- * @param {ImageData|ImageBitmap} imageInput - Image input (ImageData or ImageBitmap)
521
- * @param {number} timestamp - Frame timestamp
522
- * @param {string[]} features - Active CV features
523
- */
524
- async function processFrame(imageInput, timestamp, features) {
525
- const results = {};
526
-
527
- try {
528
- // Process face detection
529
- if (features.includes('faceDetection') && faceDetector) {
530
- const detectionResult = faceDetector.detectForVideo(imageInput, timestamp);
531
- results.faces = detectionResult.detections.map((detection, index) => ({
532
- id: index,
533
- bounds: {
534
- x: detection.boundingBox.originX / imageInput.width,
535
- y: detection.boundingBox.originY / imageInput.height,
536
- width: detection.boundingBox.width / imageInput.width,
537
- height: detection.boundingBox.height / imageInput.height
538
- },
539
- center: {
540
- x: (detection.boundingBox.originX + detection.boundingBox.width / 2) / imageInput.width,
541
- y: (detection.boundingBox.originY + detection.boundingBox.height / 2) / imageInput.height
542
- },
543
- landmarks: [],
544
- expressions: EMPTY_EXPRESSIONS,
545
- headPose: EMPTY_HEAD_POSE,
546
- blendshapes: EMPTY_BLENDSHAPES,
547
- confidence: detection.categories[0]?.score || 0
548
- }));
549
- }
550
-
551
- // Process face landmarks (used by faceMesh and emotionDetection)
552
- if ((features.includes('faceMesh') || features.includes('emotionDetection')) && faceLandmarker) {
553
- const landmarkResult = faceLandmarker.detectForVideo(imageInput, timestamp);
554
- if (landmarkResult.faceLandmarks.length > 0) {
555
- const landmarks = landmarkResult.faceLandmarks[0];
556
-
557
- if (!results.faces) {
558
- results.faces = [{
559
- id: 0,
560
- bounds: null,
561
- center: null,
562
- landmarks: [],
563
- expressions: EMPTY_EXPRESSIONS,
564
- headPose: EMPTY_HEAD_POSE,
565
- blendshapes: EMPTY_BLENDSHAPES,
566
- confidence: 0.8
567
- }];
568
- }
569
-
570
- const mappedLandmarks = landmarks.map((landmark) => ({
571
- x: landmark.x,
572
- y: landmark.y,
573
- z: landmark.z || 0
574
- }));
575
-
576
- if (results.faces[0]) {
577
- results.faces[0].landmarks = mappedLandmarks;
578
- results.faces[0].headPose = computeHeadPoseFromLandmarks(landmarks);
579
-
580
- if (features.includes('emotionDetection') &&
581
- landmarkResult.faceBlendshapes &&
582
- landmarkResult.faceBlendshapes.length > 0) {
583
- const bs = buildBlendshapesRecord(landmarkResult.faceBlendshapes[0].categories);
584
- results.faces[0].blendshapes = bs;
585
- results.faces[0].expressions = mapBlendshapesToEmotions(bs);
586
- }
587
- }
588
- } else if (!results.faces) {
589
- results.faces = [];
590
- }
591
- }
592
-
593
- // Process hand tracking (GestureRecognizer provides landmarks + gestures in one call)
594
- if (features.includes('handTracking') && gestureRecognizer) {
595
- const handResult = gestureRecognizer.recognizeForVideo(imageInput, timestamp);
596
- results.hands = handResult.landmarks.map((landmarks, index) => {
597
- const mapped = landmarks.map((landmark) => ({
598
- x: landmark.x,
599
- y: landmark.y,
600
- z: landmark.z || 0
601
- }));
602
-
603
- const rawHandedness = handResult.handednesses[index]?.[0]?.categoryName || 'unknown';
604
-
605
- return {
606
- id: index,
607
- handedness: rawHandedness.toLowerCase(),
608
- confidence: handResult.handednesses[index]?.[0]?.score || 0,
609
- bounds: computeHandBounds(mapped),
610
- landmarks: mapped,
611
- palm: mapped[9],
612
- gestures: mapGestures(handResult.gestures[index])
613
- };
614
- });
615
- }
616
-
617
- // Process pose detection
618
- if (features.includes('poseDetection') && poseLandmarker) {
619
- const poseResult = poseLandmarker.detectForVideo(imageInput, timestamp);
620
- if (poseResult.landmarks.length > 0) {
621
- const poseLandmarks = poseResult.landmarks[0].map((landmark) => ({
622
- x: landmark.x,
623
- y: landmark.y,
624
- z: landmark.z || 0,
625
- visibility: landmark.visibility || 1
626
- }));
627
-
628
- results.pose = {
629
- confidence: computePoseConfidence(poseLandmarks),
630
- landmarks: poseLandmarks,
631
- face: extractPoseGroup(poseLandmarks, POSE_FACE_INDICES),
632
- torso: extractPoseGroup(poseLandmarks, POSE_TORSO_INDICES),
633
- leftArm: extractPoseGroup(poseLandmarks, POSE_LEFT_ARM_INDICES),
634
- rightArm: extractPoseGroup(poseLandmarks, POSE_RIGHT_ARM_INDICES),
635
- leftLeg: extractPoseGroup(poseLandmarks, POSE_LEFT_LEG_INDICES),
636
- rightLeg: extractPoseGroup(poseLandmarks, POSE_RIGHT_LEG_INDICES),
637
- worldLandmarks: poseResult.worldLandmarks?.[0]?.map((landmark) => ({
638
- x: landmark.x,
639
- y: landmark.y,
640
- z: landmark.z || 0,
641
- visibility: landmark.visibility || 1
642
- })) || []
643
- };
644
- } else {
645
- results.pose = null;
646
- }
647
- }
648
-
649
- // Process body segmentation
650
- if (features.includes('bodySegmentation') && imageSegmenter) {
651
- const segmentResult = imageSegmenter.segment(imageInput);
652
- if (segmentResult.categoryMask) {
653
- try {
654
- results.segmentation = {
655
- mask: segmentResult.categoryMask.getAsUint8Array(),
656
- width: segmentResult.categoryMask.width,
657
- height: segmentResult.categoryMask.height
658
- };
659
- log('Segmentation mask:', results.segmentation.width, 'x', results.segmentation.height);
660
- } finally {
661
- segmentResult.categoryMask.close();
662
- }
663
- } else {
664
- results.segmentation = null;
665
- }
666
- }
667
-
668
- return results;
669
- } catch (error) {
670
- log('❌ Error processing frame:', error);
671
- return {};
672
- }
673
- }
674
-
675
- // Note: Removed reusable canvas functions - no longer needed with direct ImageBitmap processing!
676
-
677
- /**
678
- * Clean up WASM instance with proper memory management
679
- */
680
- function cleanupWasmInstance(instance, featureName) {
681
- if (instance) {
682
- try {
683
- log(`🧹 Cleaning up ${featureName} WASM instance...`);
684
- instance.close();
685
-
686
- // Force garbage collection if available (Chrome DevTools)
687
- if (typeof gc === 'function') {
688
- gc();
689
- }
690
-
691
- // Give time for WASM cleanup
692
- return new Promise(resolve => {
693
- setTimeout(resolve, 100);
694
- });
695
- } catch (error) {
696
- log(`⚠️ Error cleaning up ${featureName}:`, error);
697
- }
698
- }
699
- return Promise.resolve();
700
- }
701
-
702
- /**
703
- * Process configuration queue sequentially
704
- */
705
- async function processConfigQueue() {
706
- if (processingConfig || configQueue.length === 0) return;
707
-
708
- processingConfig = true;
709
-
710
- try {
711
- while (configQueue.length > 0) {
712
- const { features, resolve, reject } = configQueue.shift();
713
-
714
- try {
715
- await handleConfigUpdateInternal(features);
716
- resolve({ configured: true, activeFeatures: Array.from(activeFeatures) });
717
- } catch (error) {
718
- reject(error);
719
- }
720
- }
721
- } finally {
722
- processingConfig = false;
723
- }
724
- }
725
-
726
- /**
727
- * Queue configuration update to prevent race conditions
728
- */
729
- function queueConfigUpdate(features) {
730
- return new Promise((resolve, reject) => {
731
- configQueue.push({ features, resolve, reject });
732
- processConfigQueue();
733
- });
734
- }
735
-
736
- /**
737
- * Handle feature configuration updates (internal)
738
- */
739
- async function handleConfigUpdateInternal(features) {
740
- if (!workerHealthy) {
741
- throw new Error('Worker is in unhealthy state, restart required');
742
- }
743
-
744
- const newFeatures = new Set(features);
745
- const toEnable = features.filter(f => !activeFeatures.has(f));
746
- const toDisable = Array.from(activeFeatures).filter(f => !newFeatures.has(f));
747
-
748
- log(`🔄 Config update: enable [${toEnable.join(', ')}], disable [${toDisable.join(', ')}]`);
749
-
750
- // Disable unused features first (cleanup instances)
751
- const cleanupPromises = [];
752
- for (const feature of toDisable) {
753
- switch (feature) {
754
- case 'faceDetection':
755
- cleanupPromises.push(cleanupWasmInstance(faceDetector, 'FaceDetector'));
756
- faceDetector = null;
757
- break;
758
- case 'faceMesh':
759
- // Only teardown FaceLandmarker if emotionDetection also not active
760
- if (!newFeatures.has('emotionDetection')) {
761
- cleanupPromises.push(cleanupWasmInstance(faceLandmarker, 'FaceLandmarker'));
762
- faceLandmarker = null;
763
- }
764
- break;
765
- case 'emotionDetection':
766
- // Only teardown FaceLandmarker if faceMesh also not active
767
- if (!newFeatures.has('faceMesh')) {
768
- cleanupPromises.push(cleanupWasmInstance(faceLandmarker, 'FaceLandmarker'));
769
- faceLandmarker = null;
770
- }
771
- break;
772
- case 'handTracking':
773
- cleanupPromises.push(cleanupWasmInstance(gestureRecognizer, 'GestureRecognizer'));
774
- gestureRecognizer = null;
775
- break;
776
- case 'poseDetection':
777
- cleanupPromises.push(cleanupWasmInstance(poseLandmarker, 'PoseLandmarker'));
778
- poseLandmarker = null;
779
- break;
780
- case 'bodySegmentation':
781
- cleanupPromises.push(cleanupWasmInstance(imageSegmenter, 'ImageSegmenter'));
782
- imageSegmenter = null;
783
- break;
784
- }
785
- activeFeatures.delete(feature);
786
- log(`🗑️ Disabled feature: ${feature}`);
787
- }
788
-
789
- // Wait for all cleanup to complete
790
- if (cleanupPromises.length > 0) {
791
- await Promise.all(cleanupPromises);
792
- log('✅ All cleanup completed');
793
- }
794
-
795
- // Note: No canvas cleanup needed - using direct ImageBitmap processing!
796
-
797
- // Enable new features
798
- for (const feature of toEnable) {
799
- try {
800
- switch (feature) {
801
- case 'faceDetection':
802
- await initializeFaceDetection();
803
- break;
804
- case 'faceMesh':
805
- case 'emotionDetection':
806
- // Both share the FaceLandmarker (with blendshapes enabled)
807
- await initializeFaceLandmarks();
808
- break;
809
- case 'handTracking':
810
- await initializeHandTracking();
811
- break;
812
- case 'poseDetection':
813
- await initializePoseDetection();
814
- break;
815
- case 'bodySegmentation':
816
- await initializeBodySegmentation();
817
- break;
818
- }
819
- activeFeatures.add(feature);
820
- log(`✅ Enabled feature: ${feature}`);
821
- } catch (error) {
822
- log(`❌ Failed to enable feature ${feature}:`, error);
823
-
824
- // Check if this is a memory error
825
- if (error.message && error.message.includes('Out of memory')) {
826
- memoryPressureDetected = true;
827
- workerHealthy = false;
828
- throw new Error(`Memory exhausted while enabling ${feature}. Worker restart required.`);
829
- }
830
-
831
- throw error;
832
- }
833
- }
834
- }
835
-
836
- /**
837
- * Legacy function for backward compatibility
838
- */
839
- async function handleConfigUpdate(features) {
840
- return await queueConfigUpdate(features);
841
- }
842
-
843
- // Message handler
844
- self.onmessage = async (event) => {
845
- const message = event.data;
846
-
847
- // Only log non-process messages to avoid per-frame spam
848
- if (message.type !== 'process') {
849
- log('Received message:', message.type);
850
- }
851
-
852
- try {
853
- switch (message.type) {
854
- case 'debug': {
855
- DEBUG = !!message.enabled;
856
- log('Debug mode', DEBUG ? 'enabled' : 'disabled');
857
- break;
858
- }
859
-
860
- case 'init': {
861
- log('Received init message');
862
-
863
- assetConfig.visionBundleUrl = message.visionBundleUrl || null;
864
- assetConfig.wasmBasePath = message.wasmBasePath || null;
865
-
866
- if (!assetConfig.visionBundleUrl || !assetConfig.wasmBasePath) {
867
- throw new Error('init message must include visionBundleUrl and wasmBasePath');
868
- }
869
-
870
- log('Asset URLs:', assetConfig);
871
-
872
- try {
873
- importScripts(assetConfig.visionBundleUrl);
874
- log('vision_bundle.js loaded successfully from:', assetConfig.visionBundleUrl);
875
- } catch (error) {
876
- log('❌ Failed to load vision_bundle.js:', error);
877
- self.postMessage({
878
- type: 'result',
879
- success: false,
880
- error: 'Failed to load vision bundle from ' + assetConfig.visionBundleUrl + ': ' + error.message,
881
- restartRequired: true
882
- });
883
- break;
884
- }
885
-
886
- try {
887
- await initializeVision();
888
- log('Vision runtime ready for feature loading');
889
- } catch (error) {
890
- log('❌ Vision runtime initialization failed:', error);
891
- throw error;
892
- }
893
-
894
- const response = {
895
- type: 'result',
896
- success: true,
897
- data: { initialized: true }
898
- };
899
- self.postMessage(response);
900
- break;
901
- }
902
-
903
- case 'config': {
904
- log('Received config message:', message.features);
905
-
906
- try {
907
- const result = await handleConfigUpdate(message.features);
908
-
909
- const response = {
910
- type: 'result',
911
- success: true,
912
- data: result
913
- };
914
- self.postMessage(response);
915
- } catch (error) {
916
- log('❌ Config update failed:', error);
917
-
918
- // Check if worker needs restart
919
- if (!workerHealthy || memoryPressureDetected) {
920
- const errorResponse = {
921
- type: 'result',
922
- success: false,
923
- error: error.message,
924
- restartRequired: true
925
- };
926
- self.postMessage(errorResponse);
927
- } else {
928
- throw error; // Re-throw for normal error handling
929
- }
930
- }
931
- break;
932
- }
933
-
934
- case 'process': {
935
- try {
936
- // 🚀 OPTIMIZED: Pass ImageBitmap directly to MediaPipe (no conversion!)
937
- const results = await processFrame(message.bitmap, message.timestamp, message.features);
938
-
939
- const response = {
940
- type: 'result',
941
- success: true,
942
- data: results
943
- };
944
- self.postMessage(response);
945
- } finally {
946
- // Clean up ImageBitmap after processing
947
- if (message.bitmap && typeof message.bitmap.close === 'function') {
948
- message.bitmap.close();
949
- }
950
- }
951
- break;
952
- }
953
-
954
- default:
955
- log('❌ Unknown message type:', message.type);
956
- const errorResponse = {
957
- type: 'result',
958
- success: false,
959
- error: `Unknown message type: ${message.type}`
960
- };
961
- self.postMessage(errorResponse);
962
- }
963
- } catch (error) {
964
- log('❌ Error handling message:', error);
965
- const errorResponse = {
966
- type: 'result',
967
- success: false,
968
- error: error instanceof Error ? error.message : String(error)
969
- };
970
- self.postMessage(errorResponse);
971
- }
972
- };
973
-
974
- log('CV Tasks Worker loaded — waiting for init message with asset URLs');