@viji-dev/core 0.3.35 → 0.3.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,954 +0,0 @@
1
- /**
2
- * MediaPipe Tasks Vision Classic Worker
3
- *
4
- * Classic worker for MediaPipe Tasks Vision processing.
5
- * Uses importScripts() to load MediaPipe Tasks Vision UMD bundle.
6
- */
7
-
8
- // Debug logging — controlled by CVSystem via 'debug' message
9
- let DEBUG = false;
10
- function log(...args) {
11
- if (DEBUG) {
12
- console.log('🔧 [CV Tasks Worker]', ...args);
13
- }
14
- }
15
-
16
- // Define CommonJS environment for MediaPipe bundle
17
- self.exports = {};
18
- self.module = { exports: {} };
19
-
20
- // Import MediaPipe Tasks Vision UMD bundle
21
- log('Starting to load vision_bundle.js...');
22
- try {
23
- importScripts('/dist/assets/vision_bundle.js');
24
- log('vision_bundle.js loaded successfully');
25
- } catch (error) {
26
- console.error('❌ [CV Tasks Worker] Failed to load vision_bundle.js:', error);
27
- }
28
-
29
- // MediaPipe model instances
30
- let faceDetector = null;
31
- let faceLandmarker = null;
32
- let gestureRecognizer = null;
33
- let poseLandmarker = null;
34
- let imageSegmenter = null;
35
-
36
- // Vision runtime
37
- let vision = null;
38
- let isInitialized = false;
39
-
40
- // Active features tracking
41
- const activeFeatures = new Set();
42
-
43
- // Configuration queue to prevent race conditions
44
- const configQueue = [];
45
- let processingConfig = false;
46
-
47
- // Worker health tracking
48
- let workerHealthy = true;
49
- let memoryPressureDetected = false;
50
-
51
- // Safe zero-defaults for face data when features are inactive
52
- const EMPTY_EXPRESSIONS = Object.freeze({
53
- neutral: 0, happy: 0, sad: 0, angry: 0, surprised: 0, disgusted: 0, fearful: 0
54
- });
55
-
56
- const EMPTY_HEAD_POSE = Object.freeze({ pitch: 0, yaw: 0, roll: 0 });
57
-
58
- const EMPTY_BLENDSHAPES = Object.freeze({
59
- browDownLeft: 0, browDownRight: 0, browInnerUp: 0, browOuterUpLeft: 0, browOuterUpRight: 0,
60
- cheekPuff: 0, cheekSquintLeft: 0, cheekSquintRight: 0,
61
- eyeBlinkLeft: 0, eyeBlinkRight: 0,
62
- eyeLookDownLeft: 0, eyeLookDownRight: 0, eyeLookInLeft: 0, eyeLookInRight: 0,
63
- eyeLookOutLeft: 0, eyeLookOutRight: 0, eyeLookUpLeft: 0, eyeLookUpRight: 0,
64
- eyeSquintLeft: 0, eyeSquintRight: 0, eyeWideLeft: 0, eyeWideRight: 0,
65
- jawForward: 0, jawLeft: 0, jawOpen: 0, jawRight: 0,
66
- mouthClose: 0, mouthDimpleLeft: 0, mouthDimpleRight: 0,
67
- mouthFrownLeft: 0, mouthFrownRight: 0, mouthFunnel: 0, mouthLeft: 0,
68
- mouthLowerDownLeft: 0, mouthLowerDownRight: 0, mouthPressLeft: 0, mouthPressRight: 0,
69
- mouthPucker: 0, mouthRight: 0, mouthRollLower: 0, mouthRollUpper: 0,
70
- mouthShrugLower: 0, mouthShrugUpper: 0, mouthSmileLeft: 0, mouthSmileRight: 0,
71
- mouthStretchLeft: 0, mouthStretchRight: 0, mouthUpperUpLeft: 0, mouthUpperUpRight: 0,
72
- noseSneerLeft: 0, noseSneerRight: 0, tongueOut: 0
73
- });
74
-
75
- /**
76
- * Convert MediaPipe faceBlendshapes categories array to a flat record
77
- */
78
- function buildBlendshapesRecord(categories) {
79
- const record = {};
80
- for (const cat of categories) {
81
- record[cat.categoryName] = cat.score;
82
- }
83
- return record;
84
- }
85
-
86
- /**
87
- * EMFACS-based emotion prototype vectors (Ekman FACS → ARKit blendshapes).
88
- * Weights reflect each blendshape's reliability in MediaPipe's 2D web model.
89
- * Known near-zero blendshapes (cheekSquint*, noseSneer*, eyeWide*) are
90
- * down-weighted and supplemented by correlated signals that do activate.
91
- *
92
- * Reference: Aldenhoven et al. (2026) "Real-Time Emotion Recognition
93
- * Performance of Mobile Devices" — EMFACS cosine similarity approach,
94
- * 68.3% accuracy on 7 emotions, exceeding human raters (58.9%).
95
- */
96
- const EMOTION_PROTOTYPES = {
97
- // mouthSmile is unique to happiness — no other emotion uses it.
98
- // eyeSquint is a secondary "Duchenne smile" indicator.
99
- happy: {
100
- mouthSmileLeft: 1.0, mouthSmileRight: 1.0,
101
- eyeSquintLeft: 0.3, eyeSquintRight: 0.3
102
- },
103
- // Pouty/trembling lip: mouthShrugLower (chin raiser) is the primary signal,
104
- // mouthPucker (compressed lips) secondary. Compact prototype so it wins
105
- // over angry when the differentiating upper-face signals are absent.
106
- sad: {
107
- mouthShrugLower: 1.0,
108
- mouthPucker: 0.8
109
- },
110
- // Shares sad's mouth signals at lower weight, differentiated by upper-face
111
- // tension: eyeSquint + browDown. These extra dimensions shift the cosine
112
- // direction away from sad only when genuinely activated.
113
- angry: {
114
- mouthShrugLower: 0.6, mouthPucker: 0.5,
115
- eyeSquintLeft: 1.0, eyeSquintRight: 1.0,
116
- browDownLeft: 1.0, browDownRight: 1.0
117
- },
118
- // Brow raise only — the simplest, most distinctive prototype.
119
- // jawOpen removed to avoid overlap with fearful.
120
- surprised: {
121
- browInnerUp: 1.0,
122
- browOuterUpLeft: 1.0, browOuterUpRight: 1.0
123
- },
124
- // mouthUpperUp (upper lip raise) is the unique primary signal.
125
- // mouthFrown supports, browDown at low weight for wrinkled-brow disgust.
126
- disgusted: {
127
- mouthUpperUpLeft: 1.0, mouthUpperUpRight: 1.0,
128
- mouthFrownLeft: 0.8, mouthFrownRight: 0.8,
129
- browDownLeft: 0.3, browDownRight: 0.3
130
- },
131
- // Shares surprised's brow raise, differentiated by jawOpen (rare in other
132
- // emotions at even 10-20%). jawOpen is the primary differentiator.
133
- fearful: {
134
- browInnerUp: 0.8, browOuterUpLeft: 0.8, browOuterUpRight: 0.8,
135
- jawOpen: 1.0
136
- }
137
- };
138
-
139
- const PROTOTYPE_KEYS = Object.keys(EMPTY_BLENDSHAPES);
140
-
141
- // Pre-compute prototype magnitudes for cosine similarity
142
- const PROTOTYPE_MAGNITUDES = {};
143
- for (const [emotion, proto] of Object.entries(EMOTION_PROTOTYPES)) {
144
- let sumSq = 0;
145
- for (const key of PROTOTYPE_KEYS) {
146
- const v = proto[key] || 0;
147
- sumSq += v * v;
148
- }
149
- PROTOTYPE_MAGNITUDES[emotion] = Math.sqrt(sumSq);
150
- }
151
-
152
- // Noise floor: blendshape values below this are treated as zero to
153
- // prevent resting-state activations from matching emotion prototypes
154
- const BLENDSHAPE_NOISE_FLOOR = 0.10;
155
-
156
- /**
157
- * Cosine similarity between observed blendshape vector and a prototype.
158
- * cos(v, p) = (v · p) / (|v| * |p|)
159
- * Applies a noise floor to observed values to suppress resting-state noise.
160
- */
161
- function emotionCosineSimilarity(observed, prototype, protoMag) {
162
- let dot = 0;
163
- let magO = 0;
164
- for (const key of PROTOTYPE_KEYS) {
165
- const raw = observed[key] || 0;
166
- const o = raw > BLENDSHAPE_NOISE_FLOOR ? raw : 0;
167
- const p = prototype[key] || 0;
168
- dot += o * p;
169
- magO += o * o;
170
- }
171
- magO = Math.sqrt(magO);
172
- if (magO < 1e-8 || protoMag < 1e-8) return 0;
173
- return dot / (magO * protoMag);
174
- }
175
-
176
- // Cross-suppression: when one emotion is confident, competing emotions are
177
- // reduced. Uses raw (pre-suppression) scores so order doesn't matter.
178
- // [suppressor] → { [target]: strength 0-1 }
179
- const EMOTION_INHIBITIONS = {
180
- happy: { angry: 0.7, sad: 0.5, disgusted: 0.4, fearful: 0.3 },
181
- sad: { happy: 0.3, angry: 0.2 },
182
- angry: { happy: 0.3, sad: 0.2 },
183
- surprised: { angry: 0.3, sad: 0.3 },
184
- disgusted: { happy: 0.4, surprised: 0.2 },
185
- fearful: { happy: 0.3, angry: 0.2 }
186
- };
187
-
188
- /**
189
- * Classify observed blendshapes into emotions using a 3-stage pipeline:
190
- * 1. Cosine similarity against EMFACS prototypes (base scores)
191
- * 2. Key-signal boosters for defining blendshapes (mouthPress → angry)
192
- * 3. Cross-emotion inhibition matrix (happy suppresses angry, etc.)
193
- *
194
- * Returns { neutral, happy, sad, angry, surprised, disgusted, fearful }
195
- */
196
- function mapBlendshapesToEmotions(bs) {
197
- const NF = BLENDSHAPE_NOISE_FLOOR;
198
-
199
- // --- Stage 1: Cosine similarity base scores ---
200
- const scores = {};
201
- for (const [emotion, proto] of Object.entries(EMOTION_PROTOTYPES)) {
202
- scores[emotion] = Math.max(0, emotionCosineSimilarity(bs, proto, PROTOTYPE_MAGNITUDES[emotion]));
203
- }
204
-
205
- // --- Stage 2: Key-signal boosters ---
206
- // mouthPress is a defining angry signal not in the prototype (to avoid
207
- // resting-state contamination) but boosts angry when clearly present
208
- const mouthPress = Math.max(0,
209
- ((bs.mouthPressLeft || 0) + (bs.mouthPressRight || 0)) / 2 - NF);
210
- if (mouthPress > 0) {
211
- scores.angry = Math.min(1, scores.angry + mouthPress * 0.3);
212
- }
213
-
214
- // --- Stage 3: Cross-emotion inhibition ---
215
- // Snapshot raw scores so suppression is non-circular
216
- const raw = {};
217
- for (const key in scores) raw[key] = scores[key];
218
-
219
- for (const [suppressor, targets] of Object.entries(EMOTION_INHIBITIONS)) {
220
- const suppressorScore = raw[suppressor] || 0;
221
- if (suppressorScore > 0.1) {
222
- for (const [target, strength] of Object.entries(targets)) {
223
- scores[target] *= (1 - suppressorScore * strength);
224
- }
225
- }
226
- }
227
-
228
- // --- Neutral: dominant when no emotion is confident ---
229
- let maxScore = 0;
230
- for (const emotion of Object.keys(EMOTION_PROTOTYPES)) {
231
- if (scores[emotion] > maxScore) maxScore = scores[emotion];
232
- }
233
- const neutralThreshold = 0.35;
234
- scores.neutral = maxScore < neutralThreshold ? 1.0 : Math.max(0, 1.0 - maxScore * 1.5);
235
-
236
- return {
237
- neutral: scores.neutral,
238
- happy: scores.happy || 0,
239
- sad: scores.sad || 0,
240
- angry: scores.angry || 0,
241
- surprised: scores.surprised || 0,
242
- disgusted: scores.disgusted || 0,
243
- fearful: scores.fearful || 0
244
- };
245
- }
246
-
247
- /**
248
- * Compute head pose (pitch, yaw, roll in degrees) from 468 face landmarks.
249
- * Uses nose tip, chin, eye corners, and forehead to derive 3D orientation.
250
- */
251
- function computeHeadPoseFromLandmarks(landmarks) {
252
- // Key landmark indices (MediaPipe FaceMesh)
253
- const noseTip = landmarks[1];
254
- const chin = landmarks[152];
255
- const leftEye = landmarks[33];
256
- const rightEye = landmarks[263];
257
- const forehead = landmarks[10];
258
-
259
- if (!noseTip || !chin || !leftEye || !rightEye || !forehead) {
260
- return { pitch: 0, yaw: 0, roll: 0 };
261
- }
262
-
263
- // Yaw: horizontal angle from eye midpoint to nose tip
264
- const eyeMidX = (leftEye.x + rightEye.x) / 2;
265
- const eyeMidY = (leftEye.y + rightEye.y) / 2;
266
- const yaw = Math.atan2(noseTip.x - eyeMidX, Math.abs(noseTip.z - ((leftEye.z + rightEye.z) / 2 || 0)) + 0.001) * (180 / Math.PI);
267
-
268
- // Pitch: vertical angle from forehead to chin through nose
269
- const faceVerticalLen = Math.sqrt((chin.x - forehead.x) ** 2 + (chin.y - forehead.y) ** 2) || 0.001;
270
- const noseRelY = (noseTip.y - forehead.y) / faceVerticalLen;
271
- const pitch = (noseRelY - 0.5) * 180;
272
-
273
- // Roll: tilt from horizontal eye line
274
- const roll = Math.atan2(rightEye.y - leftEye.y, rightEye.x - leftEye.x) * (180 / Math.PI);
275
-
276
- return {
277
- pitch: isNaN(pitch) ? 0 : Math.max(-90, Math.min(90, pitch)),
278
- yaw: isNaN(yaw) ? 0 : Math.max(-90, Math.min(90, yaw)),
279
- roll: isNaN(roll) ? 0 : Math.max(-180, Math.min(180, roll))
280
- };
281
- }
282
-
283
- /**
284
- * Initialize MediaPipe Tasks Vision runtime
285
- */
286
- async function initializeVision() {
287
- if (isInitialized) {
288
- log('Vision already initialized, skipping');
289
- return;
290
- }
291
-
292
- try {
293
- log('Starting MediaPipe Tasks Vision initialization...');
294
-
295
- const wasmBasePath = '/dist/assets/wasm';
296
- log('WASM base path:', wasmBasePath);
297
-
298
- const FilesetResolver = self.FilesetResolver || self.module.exports.FilesetResolver || self.exports.FilesetResolver;
299
- log('FilesetResolver found:', !!FilesetResolver);
300
-
301
- if (!FilesetResolver) {
302
- throw new Error('FilesetResolver not found in any expected location');
303
- }
304
-
305
- vision = await FilesetResolver.forVisionTasks(wasmBasePath);
306
-
307
- isInitialized = true;
308
- log('✅ MediaPipe Tasks Vision initialized successfully');
309
- } catch (error) {
310
- log('❌ Failed to initialize MediaPipe Tasks Vision:', error);
311
- throw error;
312
- }
313
- }
314
-
315
- /**
316
- * Load and initialize Face Detection
317
- */
318
- async function initializeFaceDetection() {
319
- if (faceDetector) return;
320
-
321
- // Ensure vision runtime is initialized first
322
- await initializeVision();
323
-
324
- try {
325
- log('Loading Face Detector...');
326
-
327
- const options = {
328
- baseOptions: {
329
- modelAssetPath: 'https://storage.googleapis.com/mediapipe-models/face_detector/blaze_face_short_range/float16/1/blaze_face_short_range.tflite',
330
- delegate: 'GPU'
331
- },
332
- runningMode: 'VIDEO',
333
- minDetectionConfidence: 0.5,
334
- minSuppressionThreshold: 0.3
335
- };
336
-
337
- const FaceDetector = self.FaceDetector || self.module.exports.FaceDetector || self.exports.FaceDetector;
338
- faceDetector = await FaceDetector.createFromOptions(vision, options);
339
- log('✅ Face Detector loaded');
340
- } catch (error) {
341
- log('❌ Failed to load Face Detector:', error);
342
- throw error;
343
- }
344
- }
345
-
346
- /**
347
- * Load and initialize Face Landmarks
348
- */
349
- async function initializeFaceLandmarks() {
350
- if (faceLandmarker) return;
351
-
352
- // Ensure vision runtime is initialized first
353
- await initializeVision();
354
-
355
- try {
356
- log('Loading Face Landmarker...');
357
-
358
- const options = {
359
- baseOptions: {
360
- modelAssetPath: 'https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/1/face_landmarker.task',
361
- delegate: 'GPU'
362
- },
363
- runningMode: 'VIDEO',
364
- numFaces: 1,
365
- outputFaceBlendshapes: true
366
- };
367
-
368
- const FaceLandmarker = self.FaceLandmarker || self.module.exports.FaceLandmarker || self.exports.FaceLandmarker;
369
- faceLandmarker = await FaceLandmarker.createFromOptions(vision, options);
370
- log('✅ Face Landmarker loaded (blendshapes enabled)');
371
- } catch (error) {
372
- log('❌ Failed to load Face Landmarker:', error);
373
- throw error;
374
- }
375
- }
376
-
377
- /**
378
- * Load and initialize Hand Tracking via GestureRecognizer
379
- * (provides landmarks + handedness + ML-based gesture classification in one call)
380
- */
381
- async function initializeHandTracking() {
382
- if (gestureRecognizer) return;
383
-
384
- await initializeVision();
385
-
386
- try {
387
- log('Loading Gesture Recognizer...');
388
-
389
- const options = {
390
- baseOptions: {
391
- modelAssetPath: 'https://storage.googleapis.com/mediapipe-models/gesture_recognizer/gesture_recognizer/float16/1/gesture_recognizer.task',
392
- delegate: 'GPU'
393
- },
394
- runningMode: 'VIDEO',
395
- numHands: 2
396
- };
397
-
398
- const GestureRecognizer = self.GestureRecognizer || self.module.exports.GestureRecognizer || self.exports.GestureRecognizer;
399
- gestureRecognizer = await GestureRecognizer.createFromOptions(vision, options);
400
- log('✅ Gesture Recognizer loaded');
401
- } catch (error) {
402
- log('❌ Failed to load Gesture Recognizer:', error);
403
- throw error;
404
- }
405
- }
406
-
407
- /**
408
- * Load and initialize Pose Detection
409
- */
410
- async function initializePoseDetection() {
411
- if (poseLandmarker) return;
412
-
413
- // Ensure vision runtime is initialized first
414
- await initializeVision();
415
-
416
- try {
417
- log('Loading Pose Landmarker...');
418
-
419
- const options = {
420
- baseOptions: {
421
- modelAssetPath: 'https://storage.googleapis.com/mediapipe-models/pose_landmarker/pose_landmarker_lite/float16/1/pose_landmarker_lite.task',
422
- delegate: 'GPU'
423
- },
424
- runningMode: 'VIDEO',
425
- numPoses: 1
426
- };
427
-
428
- const PoseLandmarker = self.PoseLandmarker || self.module.exports.PoseLandmarker || self.exports.PoseLandmarker;
429
- poseLandmarker = await PoseLandmarker.createFromOptions(vision, options);
430
- log('✅ Pose Landmarker loaded');
431
- } catch (error) {
432
- log('❌ Failed to load Pose Landmarker:', error);
433
- throw error;
434
- }
435
- }
436
-
437
- /**
438
- * Load and initialize Body Segmentation
439
- */
440
- async function initializeBodySegmentation() {
441
- if (imageSegmenter) return;
442
-
443
- // Ensure vision runtime is initialized first
444
- await initializeVision();
445
-
446
- try {
447
- log('Loading Image Segmenter...');
448
-
449
- const options = {
450
- baseOptions: {
451
- modelAssetPath: 'https://storage.googleapis.com/mediapipe-models/image_segmenter/selfie_segmenter/float16/1/selfie_segmenter.tflite',
452
- delegate: 'GPU'
453
- },
454
- runningMode: 'IMAGE',
455
- outputCategoryMask: true,
456
- outputConfidenceMasks: false
457
- };
458
-
459
- const ImageSegmenter = self.ImageSegmenter || self.module.exports.ImageSegmenter || self.exports.ImageSegmenter;
460
- imageSegmenter = await ImageSegmenter.createFromOptions(vision, options);
461
- log('✅ Image Segmenter loaded');
462
- } catch (error) {
463
- log('❌ Failed to load Image Segmenter:', error);
464
- throw error;
465
- }
466
- }
467
-
468
- function computeHandBounds(landmarks) {
469
- let minX = 1, minY = 1, maxX = 0, maxY = 0;
470
- for (const p of landmarks) {
471
- if (p.x < minX) minX = p.x;
472
- if (p.y < minY) minY = p.y;
473
- if (p.x > maxX) maxX = p.x;
474
- if (p.y > maxY) maxY = p.y;
475
- }
476
- return { x: minX, y: minY, width: maxX - minX, height: maxY - minY };
477
- }
478
-
479
- // Map MediaPipe GestureRecognizer category names to our API field names
480
- const GESTURE_NAME_MAP = {
481
- 'Closed_Fist': 'fist',
482
- 'Open_Palm': 'openPalm',
483
- 'Victory': 'peace',
484
- 'Thumb_Up': 'thumbsUp',
485
- 'Pointing_Up': 'pointing',
486
- 'Thumb_Down': 'thumbsDown',
487
- 'ILoveYou': 'iLoveYou'
488
- };
489
-
490
- function mapGestures(gestureCategories) {
491
- const result = { fist: 0, openPalm: 0, peace: 0, thumbsUp: 0, pointing: 0, thumbsDown: 0, iLoveYou: 0 };
492
- if (!gestureCategories) return result;
493
- for (const cat of gestureCategories) {
494
- const key = GESTURE_NAME_MAP[cat.categoryName];
495
- if (key) result[key] = cat.score;
496
- }
497
- return result;
498
- }
499
-
500
- // BlazePose landmark index groups for named body parts
501
- const POSE_FACE_INDICES = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
502
- const POSE_TORSO_INDICES = [11, 12, 23, 24];
503
- const POSE_LEFT_ARM_INDICES = [11, 13, 15];
504
- const POSE_RIGHT_ARM_INDICES = [12, 14, 16];
505
- const POSE_LEFT_LEG_INDICES = [23, 25, 27, 29, 31];
506
- const POSE_RIGHT_LEG_INDICES = [24, 26, 28, 30, 32];
507
-
508
- function extractPoseGroup(landmarks, indices) {
509
- return indices.map(i => {
510
- const lm = landmarks[i];
511
- return lm ? { x: lm.x, y: lm.y } : { x: 0, y: 0 };
512
- });
513
- }
514
-
515
- function computePoseConfidence(landmarks) {
516
- let sum = 0;
517
- for (const lm of landmarks) sum += (lm.visibility || 0);
518
- return landmarks.length > 0 ? sum / landmarks.length : 0;
519
- }
520
-
521
- /**
522
- * Process video frame with active CV features
523
- * @param {ImageData|ImageBitmap} imageInput - Image input (ImageData or ImageBitmap)
524
- * @param {number} timestamp - Frame timestamp
525
- * @param {string[]} features - Active CV features
526
- */
527
- async function processFrame(imageInput, timestamp, features) {
528
- const results = {};
529
-
530
- try {
531
- // Process face detection
532
- if (features.includes('faceDetection') && faceDetector) {
533
- const detectionResult = faceDetector.detectForVideo(imageInput, timestamp);
534
- results.faces = detectionResult.detections.map((detection, index) => ({
535
- id: index,
536
- bounds: {
537
- x: detection.boundingBox.originX / imageInput.width,
538
- y: detection.boundingBox.originY / imageInput.height,
539
- width: detection.boundingBox.width / imageInput.width,
540
- height: detection.boundingBox.height / imageInput.height
541
- },
542
- center: {
543
- x: (detection.boundingBox.originX + detection.boundingBox.width / 2) / imageInput.width,
544
- y: (detection.boundingBox.originY + detection.boundingBox.height / 2) / imageInput.height
545
- },
546
- landmarks: [],
547
- expressions: EMPTY_EXPRESSIONS,
548
- headPose: EMPTY_HEAD_POSE,
549
- blendshapes: EMPTY_BLENDSHAPES,
550
- confidence: detection.categories[0]?.score || 0
551
- }));
552
- }
553
-
554
- // Process face landmarks (used by faceMesh and emotionDetection)
555
- if ((features.includes('faceMesh') || features.includes('emotionDetection')) && faceLandmarker) {
556
- const landmarkResult = faceLandmarker.detectForVideo(imageInput, timestamp);
557
- if (landmarkResult.faceLandmarks.length > 0) {
558
- const landmarks = landmarkResult.faceLandmarks[0];
559
-
560
- if (!results.faces) {
561
- results.faces = [{
562
- id: 0,
563
- bounds: null,
564
- center: null,
565
- landmarks: [],
566
- expressions: EMPTY_EXPRESSIONS,
567
- headPose: EMPTY_HEAD_POSE,
568
- blendshapes: EMPTY_BLENDSHAPES,
569
- confidence: 0.8
570
- }];
571
- }
572
-
573
- const mappedLandmarks = landmarks.map((landmark) => ({
574
- x: landmark.x,
575
- y: landmark.y,
576
- z: landmark.z || 0
577
- }));
578
-
579
- if (results.faces[0]) {
580
- results.faces[0].landmarks = mappedLandmarks;
581
- results.faces[0].headPose = computeHeadPoseFromLandmarks(landmarks);
582
-
583
- if (features.includes('emotionDetection') &&
584
- landmarkResult.faceBlendshapes &&
585
- landmarkResult.faceBlendshapes.length > 0) {
586
- const bs = buildBlendshapesRecord(landmarkResult.faceBlendshapes[0].categories);
587
- results.faces[0].blendshapes = bs;
588
- results.faces[0].expressions = mapBlendshapesToEmotions(bs);
589
- }
590
- }
591
- } else if (!results.faces) {
592
- results.faces = [];
593
- }
594
- }
595
-
596
- // Process hand tracking (GestureRecognizer provides landmarks + gestures in one call)
597
- if (features.includes('handTracking') && gestureRecognizer) {
598
- const handResult = gestureRecognizer.recognizeForVideo(imageInput, timestamp);
599
- results.hands = handResult.landmarks.map((landmarks, index) => {
600
- const mapped = landmarks.map((landmark) => ({
601
- x: landmark.x,
602
- y: landmark.y,
603
- z: landmark.z || 0
604
- }));
605
-
606
- const rawHandedness = handResult.handednesses[index]?.[0]?.categoryName || 'unknown';
607
-
608
- return {
609
- id: index,
610
- handedness: rawHandedness.toLowerCase(),
611
- confidence: handResult.handednesses[index]?.[0]?.score || 0,
612
- bounds: computeHandBounds(mapped),
613
- landmarks: mapped,
614
- palm: mapped[9],
615
- gestures: mapGestures(handResult.gestures[index])
616
- };
617
- });
618
- }
619
-
620
- // Process pose detection
621
- if (features.includes('poseDetection') && poseLandmarker) {
622
- const poseResult = poseLandmarker.detectForVideo(imageInput, timestamp);
623
- if (poseResult.landmarks.length > 0) {
624
- const poseLandmarks = poseResult.landmarks[0].map((landmark) => ({
625
- x: landmark.x,
626
- y: landmark.y,
627
- z: landmark.z || 0,
628
- visibility: landmark.visibility || 1
629
- }));
630
-
631
- results.pose = {
632
- confidence: computePoseConfidence(poseLandmarks),
633
- landmarks: poseLandmarks,
634
- face: extractPoseGroup(poseLandmarks, POSE_FACE_INDICES),
635
- torso: extractPoseGroup(poseLandmarks, POSE_TORSO_INDICES),
636
- leftArm: extractPoseGroup(poseLandmarks, POSE_LEFT_ARM_INDICES),
637
- rightArm: extractPoseGroup(poseLandmarks, POSE_RIGHT_ARM_INDICES),
638
- leftLeg: extractPoseGroup(poseLandmarks, POSE_LEFT_LEG_INDICES),
639
- rightLeg: extractPoseGroup(poseLandmarks, POSE_RIGHT_LEG_INDICES),
640
- worldLandmarks: poseResult.worldLandmarks?.[0]?.map((landmark) => ({
641
- x: landmark.x,
642
- y: landmark.y,
643
- z: landmark.z || 0,
644
- visibility: landmark.visibility || 1
645
- })) || []
646
- };
647
- } else {
648
- results.pose = null;
649
- }
650
- }
651
-
652
- // Process body segmentation
653
- if (features.includes('bodySegmentation') && imageSegmenter) {
654
- const segmentResult = imageSegmenter.segment(imageInput);
655
- if (segmentResult.categoryMask) {
656
- try {
657
- results.segmentation = {
658
- mask: segmentResult.categoryMask.getAsUint8Array(),
659
- width: segmentResult.categoryMask.width,
660
- height: segmentResult.categoryMask.height
661
- };
662
- log('Segmentation mask:', results.segmentation.width, 'x', results.segmentation.height);
663
- } finally {
664
- segmentResult.categoryMask.close();
665
- }
666
- } else {
667
- results.segmentation = null;
668
- }
669
- }
670
-
671
- return results;
672
- } catch (error) {
673
- log('❌ Error processing frame:', error);
674
- return {};
675
- }
676
- }
677
-
678
- // Note: Removed reusable canvas functions - no longer needed with direct ImageBitmap processing!
679
-
680
- /**
681
- * Clean up WASM instance with proper memory management
682
- */
683
- function cleanupWasmInstance(instance, featureName) {
684
- if (instance) {
685
- try {
686
- log(`🧹 Cleaning up ${featureName} WASM instance...`);
687
- instance.close();
688
-
689
- // Force garbage collection if available (Chrome DevTools)
690
- if (typeof gc === 'function') {
691
- gc();
692
- }
693
-
694
- // Give time for WASM cleanup
695
- return new Promise(resolve => {
696
- setTimeout(resolve, 100);
697
- });
698
- } catch (error) {
699
- log(`⚠️ Error cleaning up ${featureName}:`, error);
700
- }
701
- }
702
- return Promise.resolve();
703
- }
704
-
705
- /**
706
- * Process configuration queue sequentially
707
- */
708
- async function processConfigQueue() {
709
- if (processingConfig || configQueue.length === 0) return;
710
-
711
- processingConfig = true;
712
-
713
- try {
714
- while (configQueue.length > 0) {
715
- const { features, resolve, reject } = configQueue.shift();
716
-
717
- try {
718
- await handleConfigUpdateInternal(features);
719
- resolve({ configured: true, activeFeatures: Array.from(activeFeatures) });
720
- } catch (error) {
721
- reject(error);
722
- }
723
- }
724
- } finally {
725
- processingConfig = false;
726
- }
727
- }
728
-
729
- /**
730
- * Queue configuration update to prevent race conditions
731
- */
732
- function queueConfigUpdate(features) {
733
- return new Promise((resolve, reject) => {
734
- configQueue.push({ features, resolve, reject });
735
- processConfigQueue();
736
- });
737
- }
738
-
739
- /**
740
- * Handle feature configuration updates (internal)
741
- */
742
- async function handleConfigUpdateInternal(features) {
743
- if (!workerHealthy) {
744
- throw new Error('Worker is in unhealthy state, restart required');
745
- }
746
-
747
- const newFeatures = new Set(features);
748
- const toEnable = features.filter(f => !activeFeatures.has(f));
749
- const toDisable = Array.from(activeFeatures).filter(f => !newFeatures.has(f));
750
-
751
- log(`🔄 Config update: enable [${toEnable.join(', ')}], disable [${toDisable.join(', ')}]`);
752
-
753
- // Disable unused features first (cleanup instances)
754
- const cleanupPromises = [];
755
- for (const feature of toDisable) {
756
- switch (feature) {
757
- case 'faceDetection':
758
- cleanupPromises.push(cleanupWasmInstance(faceDetector, 'FaceDetector'));
759
- faceDetector = null;
760
- break;
761
- case 'faceMesh':
762
- // Only teardown FaceLandmarker if emotionDetection also not active
763
- if (!newFeatures.has('emotionDetection')) {
764
- cleanupPromises.push(cleanupWasmInstance(faceLandmarker, 'FaceLandmarker'));
765
- faceLandmarker = null;
766
- }
767
- break;
768
- case 'emotionDetection':
769
- // Only teardown FaceLandmarker if faceMesh also not active
770
- if (!newFeatures.has('faceMesh')) {
771
- cleanupPromises.push(cleanupWasmInstance(faceLandmarker, 'FaceLandmarker'));
772
- faceLandmarker = null;
773
- }
774
- break;
775
- case 'handTracking':
776
- cleanupPromises.push(cleanupWasmInstance(gestureRecognizer, 'GestureRecognizer'));
777
- gestureRecognizer = null;
778
- break;
779
- case 'poseDetection':
780
- cleanupPromises.push(cleanupWasmInstance(poseLandmarker, 'PoseLandmarker'));
781
- poseLandmarker = null;
782
- break;
783
- case 'bodySegmentation':
784
- cleanupPromises.push(cleanupWasmInstance(imageSegmenter, 'ImageSegmenter'));
785
- imageSegmenter = null;
786
- break;
787
- }
788
- activeFeatures.delete(feature);
789
- log(`🗑️ Disabled feature: ${feature}`);
790
- }
791
-
792
- // Wait for all cleanup to complete
793
- if (cleanupPromises.length > 0) {
794
- await Promise.all(cleanupPromises);
795
- log('✅ All cleanup completed');
796
- }
797
-
798
- // Note: No canvas cleanup needed - using direct ImageBitmap processing!
799
-
800
- // Enable new features
801
- for (const feature of toEnable) {
802
- try {
803
- switch (feature) {
804
- case 'faceDetection':
805
- await initializeFaceDetection();
806
- break;
807
- case 'faceMesh':
808
- case 'emotionDetection':
809
- // Both share the FaceLandmarker (with blendshapes enabled)
810
- await initializeFaceLandmarks();
811
- break;
812
- case 'handTracking':
813
- await initializeHandTracking();
814
- break;
815
- case 'poseDetection':
816
- await initializePoseDetection();
817
- break;
818
- case 'bodySegmentation':
819
- await initializeBodySegmentation();
820
- break;
821
- }
822
- activeFeatures.add(feature);
823
- log(`✅ Enabled feature: ${feature}`);
824
- } catch (error) {
825
- log(`❌ Failed to enable feature ${feature}:`, error);
826
-
827
- // Check if this is a memory error
828
- if (error.message && error.message.includes('Out of memory')) {
829
- memoryPressureDetected = true;
830
- workerHealthy = false;
831
- throw new Error(`Memory exhausted while enabling ${feature}. Worker restart required.`);
832
- }
833
-
834
- throw error;
835
- }
836
- }
837
- }
838
-
839
- /**
840
- * Legacy function for backward compatibility
841
- */
842
- async function handleConfigUpdate(features) {
843
- return await queueConfigUpdate(features);
844
- }
845
-
846
- // Message handler
847
- self.onmessage = async (event) => {
848
- const message = event.data;
849
-
850
- // Only log non-process messages to avoid per-frame spam
851
- if (message.type !== 'process') {
852
- log('Received message:', message.type);
853
- }
854
-
855
- try {
856
- switch (message.type) {
857
- case 'debug': {
858
- DEBUG = !!message.enabled;
859
- log('Debug mode', DEBUG ? 'enabled' : 'disabled');
860
- break;
861
- }
862
-
863
- case 'init': {
864
- log('Received init message');
865
-
866
- try {
867
- await initializeVision();
868
- log('Vision runtime ready for feature loading');
869
- } catch (error) {
870
- log('❌ Vision runtime initialization failed:', error);
871
- throw error;
872
- }
873
-
874
- const response = {
875
- type: 'result',
876
- success: true,
877
- data: { initialized: true }
878
- };
879
- self.postMessage(response);
880
- break;
881
- }
882
-
883
- case 'config': {
884
- log('Received config message:', message.features);
885
-
886
- try {
887
- const result = await handleConfigUpdate(message.features);
888
-
889
- const response = {
890
- type: 'result',
891
- success: true,
892
- data: result
893
- };
894
- self.postMessage(response);
895
- } catch (error) {
896
- log('❌ Config update failed:', error);
897
-
898
- // Check if worker needs restart
899
- if (!workerHealthy || memoryPressureDetected) {
900
- const errorResponse = {
901
- type: 'result',
902
- success: false,
903
- error: error.message,
904
- restartRequired: true
905
- };
906
- self.postMessage(errorResponse);
907
- } else {
908
- throw error; // Re-throw for normal error handling
909
- }
910
- }
911
- break;
912
- }
913
-
914
- case 'process': {
915
- try {
916
- // 🚀 OPTIMIZED: Pass ImageBitmap directly to MediaPipe (no conversion!)
917
- const results = await processFrame(message.bitmap, message.timestamp, message.features);
918
-
919
- const response = {
920
- type: 'result',
921
- success: true,
922
- data: results
923
- };
924
- self.postMessage(response);
925
- } finally {
926
- // Clean up ImageBitmap after processing
927
- if (message.bitmap && typeof message.bitmap.close === 'function') {
928
- message.bitmap.close();
929
- }
930
- }
931
- break;
932
- }
933
-
934
- default:
935
- log('❌ Unknown message type:', message.type);
936
- const errorResponse = {
937
- type: 'result',
938
- success: false,
939
- error: `Unknown message type: ${message.type}`
940
- };
941
- self.postMessage(errorResponse);
942
- }
943
- } catch (error) {
944
- log('❌ Error handling message:', error);
945
- const errorResponse = {
946
- type: 'result',
947
- success: false,
948
- error: error instanceof Error ? error.message : String(error)
949
- };
950
- self.postMessage(errorResponse);
951
- }
952
- };
953
-
954
- log('CV Tasks Worker initialized and ready');