npm - @viji-dev/core - Versions diffs - 0.3.19 → 0.3.21 - Mend

@viji-dev/core 0.3.19 → 0.3.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/README.md +2 -13
package/dist/artist-dts-p5.js +1 -1
package/dist/artist-dts.js +1 -1
package/dist/artist-global.d.ts +65 -7
package/dist/assets/cv-tasks.worker.js +302 -47
package/dist/assets/viji.worker-bm-hvzXt.js +25975 -0
package/dist/assets/viji.worker-bm-hvzXt.js.map +1 -0
package/dist/{essentia-wasm.web-C7QoUtrj.js → essentia-wasm.web-C1URJxCY.js} +2 -2
package/dist/{essentia-wasm.web-C7QoUtrj.js.map → essentia-wasm.web-C1URJxCY.js.map} +1 -1
package/dist/{index-BKGarA3m.js → index-trkn0FNW.js} +699 -739
package/dist/index-trkn0FNW.js.map +1 -0
package/dist/index.d.ts +161 -206
package/dist/index.js +1 -1
package/dist/shader-uniforms.js +403 -23
package/package.json +3 -2
package/dist/assets/viji.worker-BnDb6mPh.js +0 -4325
package/dist/assets/viji.worker-BnDb6mPh.js.map +0 -1
package/dist/index-BKGarA3m.js.map +0 -1

package/dist/assets/cv-tasks.worker.js CHANGED Viewed

@@ -5,24 +5,27 @@
  * Uses importScripts() to load MediaPipe Tasks Vision UMD bundle.
  */
+// Debug logging — controlled by CVSystem via 'debug' message
+let DEBUG = false;
+function log(...args) {
+  if (DEBUG) {
+    console.log('🔧 [CV Tasks Worker]', ...args);
+  }
+}
 // Define CommonJS environment for MediaPipe bundle
 self.exports = {};
 self.module = { exports: {} };
 // Import MediaPipe Tasks Vision UMD bundle
-console.log('🔧 [CV Tasks Worker] Starting to load vision_bundle.js...');
+log('Starting to load vision_bundle.js...');
 try {
   importScripts('/dist/assets/vision_bundle.js');
-  console.log('✅ [CV Tasks Worker] vision_bundle.js loaded successfully');
+  log('vision_bundle.js loaded successfully');
 } catch (error) {
   console.error('❌ [CV Tasks Worker] Failed to load vision_bundle.js:', error);
 }
-// Debug: Check what's available after import (disabled for production)
-// console.log('🔧 [CV Tasks Worker] Available globals after import:', Object.keys(self));
-// console.log('🔧 [CV Tasks Worker] module.exports:', self.module.exports);
-// console.log('🔧 [CV Tasks Worker] exports:', self.exports);
 // MediaPipe model instances
 let faceDetector = null;
 let faceLandmarker = null;
@@ -45,14 +48,236 @@ let processingConfig = false;
 let workerHealthy = true;
 let memoryPressureDetected = false;
-// Note: No longer need reusable canvas - passing ImageBitmap directly to MediaPipe!
+// Safe zero-defaults for face data when features are inactive
+const EMPTY_EXPRESSIONS = Object.freeze({
+  neutral: 0, happy: 0, sad: 0, angry: 0, surprised: 0, disgusted: 0, fearful: 0
+});
-// Debug logging
-const DEBUG = true; // Temporarily enabled to debug segmentation
-function log(...args) {
-  if (DEBUG) {
-    console.log('🔧 [CV Tasks Worker]', ...args);
+const EMPTY_HEAD_POSE = Object.freeze({ pitch: 0, yaw: 0, roll: 0 });
+const EMPTY_BLENDSHAPES = Object.freeze({
+  browDownLeft: 0, browDownRight: 0, browInnerUp: 0, browOuterUpLeft: 0, browOuterUpRight: 0,
+  cheekPuff: 0, cheekSquintLeft: 0, cheekSquintRight: 0,
+  eyeBlinkLeft: 0, eyeBlinkRight: 0,
+  eyeLookDownLeft: 0, eyeLookDownRight: 0, eyeLookInLeft: 0, eyeLookInRight: 0,
+  eyeLookOutLeft: 0, eyeLookOutRight: 0, eyeLookUpLeft: 0, eyeLookUpRight: 0,
+  eyeSquintLeft: 0, eyeSquintRight: 0, eyeWideLeft: 0, eyeWideRight: 0,
+  jawForward: 0, jawLeft: 0, jawOpen: 0, jawRight: 0,
+  mouthClose: 0, mouthDimpleLeft: 0, mouthDimpleRight: 0,
+  mouthFrownLeft: 0, mouthFrownRight: 0, mouthFunnel: 0, mouthLeft: 0,
+  mouthLowerDownLeft: 0, mouthLowerDownRight: 0, mouthPressLeft: 0, mouthPressRight: 0,
+  mouthPucker: 0, mouthRight: 0, mouthRollLower: 0, mouthRollUpper: 0,
+  mouthShrugLower: 0, mouthShrugUpper: 0, mouthSmileLeft: 0, mouthSmileRight: 0,
+  mouthStretchLeft: 0, mouthStretchRight: 0, mouthUpperUpLeft: 0, mouthUpperUpRight: 0,
+  noseSneerLeft: 0, noseSneerRight: 0, tongueOut: 0
+});
+/**
+ * Convert MediaPipe faceBlendshapes categories array to a flat record
+ */
+function buildBlendshapesRecord(categories) {
+  const record = {};
+  for (const cat of categories) {
+    record[cat.categoryName] = cat.score;
+  }
+  return record;
+}
+/**
+ * EMFACS-based emotion prototype vectors (Ekman FACS → ARKit blendshapes).
+ * Weights reflect each blendshape's reliability in MediaPipe's 2D web model.
+ * Known near-zero blendshapes (cheekSquint*, noseSneer*, eyeWide*) are
+ * down-weighted and supplemented by correlated signals that do activate.
+ *
+ * Reference: Aldenhoven et al. (2026) "Real-Time Emotion Recognition
+ * Performance of Mobile Devices" — EMFACS cosine similarity approach,
+ * 68.3% accuracy on 7 emotions, exceeding human raters (58.9%).
+ */
+const EMOTION_PROTOTYPES = {
+  // mouthSmile is unique to happiness — no other emotion uses it.
+  // eyeSquint is a secondary "Duchenne smile" indicator.
+  happy: {
+    mouthSmileLeft: 1.0, mouthSmileRight: 1.0,
+    eyeSquintLeft: 0.3, eyeSquintRight: 0.3
+  },
+  // Pouty/trembling lip: mouthShrugLower (chin raiser) is the primary signal,
+  // mouthPucker (compressed lips) secondary. Compact prototype so it wins
+  // over angry when the differentiating upper-face signals are absent.
+  sad: {
+    mouthShrugLower: 1.0,
+    mouthPucker: 0.8
+  },
+  // Shares sad's mouth signals at lower weight, differentiated by upper-face
+  // tension: eyeSquint + browDown. These extra dimensions shift the cosine
+  // direction away from sad only when genuinely activated.
+  angry: {
+    mouthShrugLower: 0.6, mouthPucker: 0.5,
+    eyeSquintLeft: 1.0, eyeSquintRight: 1.0,
+    browDownLeft: 1.0, browDownRight: 1.0
+  },
+  // Brow raise only — the simplest, most distinctive prototype.
+  // jawOpen removed to avoid overlap with fearful.
+  surprised: {
+    browInnerUp: 1.0,
+    browOuterUpLeft: 1.0, browOuterUpRight: 1.0
+  },
+  // mouthUpperUp (upper lip raise) is the unique primary signal.
+  // mouthFrown supports, browDown at low weight for wrinkled-brow disgust.
+  disgusted: {
+    mouthUpperUpLeft: 1.0, mouthUpperUpRight: 1.0,
+    mouthFrownLeft: 0.8, mouthFrownRight: 0.8,
+    browDownLeft: 0.3, browDownRight: 0.3
+  },
+  // Shares surprised's brow raise, differentiated by jawOpen (rare in other
+  // emotions at even 10-20%). jawOpen is the primary differentiator.
+  fearful: {
+    browInnerUp: 0.8, browOuterUpLeft: 0.8, browOuterUpRight: 0.8,
+    jawOpen: 1.0
+  }
+};
+const PROTOTYPE_KEYS = Object.keys(EMPTY_BLENDSHAPES);
+// Pre-compute prototype magnitudes for cosine similarity
+const PROTOTYPE_MAGNITUDES = {};
+for (const [emotion, proto] of Object.entries(EMOTION_PROTOTYPES)) {
+  let sumSq = 0;
+  for (const key of PROTOTYPE_KEYS) {
+    const v = proto[key] || 0;
+    sumSq += v * v;
+  }
+  PROTOTYPE_MAGNITUDES[emotion] = Math.sqrt(sumSq);
+}
+// Noise floor: blendshape values below this are treated as zero to
+// prevent resting-state activations from matching emotion prototypes
+const BLENDSHAPE_NOISE_FLOOR = 0.10;
+/**
+ * Cosine similarity between observed blendshape vector and a prototype.
+ * cos(v, p) = (v · p) / (|v| * |p|)
+ * Applies a noise floor to observed values to suppress resting-state noise.
+ */
+function emotionCosineSimilarity(observed, prototype, protoMag) {
+  let dot = 0;
+  let magO = 0;
+  for (const key of PROTOTYPE_KEYS) {
+    const raw = observed[key] || 0;
+    const o = raw > BLENDSHAPE_NOISE_FLOOR ? raw : 0;
+    const p = prototype[key] || 0;
+    dot += o * p;
+    magO += o * o;
+  }
+  magO = Math.sqrt(magO);
+  if (magO < 1e-8 || protoMag < 1e-8) return 0;
+  return dot / (magO * protoMag);
+}
+// Cross-suppression: when one emotion is confident, competing emotions are
+// reduced. Uses raw (pre-suppression) scores so order doesn't matter.
+// [suppressor] → { [target]: strength 0-1 }
+const EMOTION_INHIBITIONS = {
+  happy:     { angry: 0.7, sad: 0.5, disgusted: 0.4, fearful: 0.3 },
+  sad:       { happy: 0.3, angry: 0.2 },
+  angry:     { happy: 0.3, sad: 0.2 },
+  surprised: { angry: 0.3, sad: 0.3 },
+  disgusted: { happy: 0.4, surprised: 0.2 },
+  fearful:   { happy: 0.3, angry: 0.2 }
+};
+/**
+ * Classify observed blendshapes into emotions using a 3-stage pipeline:
+ *   1. Cosine similarity against EMFACS prototypes (base scores)
+ *   2. Key-signal boosters for defining blendshapes (mouthPress → angry)
+ *   3. Cross-emotion inhibition matrix (happy suppresses angry, etc.)
+ *
+ * Returns { neutral, happy, sad, angry, surprised, disgusted, fearful }
+ */
+function mapBlendshapesToEmotions(bs) {
+  const NF = BLENDSHAPE_NOISE_FLOOR;
+  // --- Stage 1: Cosine similarity base scores ---
+  const scores = {};
+  for (const [emotion, proto] of Object.entries(EMOTION_PROTOTYPES)) {
+    scores[emotion] = Math.max(0, emotionCosineSimilarity(bs, proto, PROTOTYPE_MAGNITUDES[emotion]));
   }
+  // --- Stage 2: Key-signal boosters ---
+  // mouthPress is a defining angry signal not in the prototype (to avoid
+  // resting-state contamination) but boosts angry when clearly present
+  const mouthPress = Math.max(0,
+    ((bs.mouthPressLeft || 0) + (bs.mouthPressRight || 0)) / 2 - NF);
+  if (mouthPress > 0) {
+    scores.angry = Math.min(1, scores.angry + mouthPress * 0.3);
+  }
+  // --- Stage 3: Cross-emotion inhibition ---
+  // Snapshot raw scores so suppression is non-circular
+  const raw = {};
+  for (const key in scores) raw[key] = scores[key];
+  for (const [suppressor, targets] of Object.entries(EMOTION_INHIBITIONS)) {
+    const suppressorScore = raw[suppressor] || 0;
+    if (suppressorScore > 0.1) {
+      for (const [target, strength] of Object.entries(targets)) {
+        scores[target] *= (1 - suppressorScore * strength);
+      }
+    }
+  }
+  // --- Neutral: dominant when no emotion is confident ---
+  let maxScore = 0;
+  for (const emotion of Object.keys(EMOTION_PROTOTYPES)) {
+    if (scores[emotion] > maxScore) maxScore = scores[emotion];
+  }
+  const neutralThreshold = 0.35;
+  scores.neutral = maxScore < neutralThreshold ? 1.0 : Math.max(0, 1.0 - maxScore * 1.5);
+  return {
+    neutral: scores.neutral,
+    happy: scores.happy || 0,
+    sad: scores.sad || 0,
+    angry: scores.angry || 0,
+    surprised: scores.surprised || 0,
+    disgusted: scores.disgusted || 0,
+    fearful: scores.fearful || 0
+  };
+}
+/**
+ * Compute head pose (pitch, yaw, roll in degrees) from 468 face landmarks.
+ * Uses nose tip, chin, eye corners, and forehead to derive 3D orientation.
+ */
+function computeHeadPoseFromLandmarks(landmarks) {
+  // Key landmark indices (MediaPipe FaceMesh)
+  const noseTip = landmarks[1];
+  const chin = landmarks[152];
+  const leftEye = landmarks[33];
+  const rightEye = landmarks[263];
+  const forehead = landmarks[10];
+  if (!noseTip || !chin || !leftEye || !rightEye || !forehead) {
+    return { pitch: 0, yaw: 0, roll: 0 };
+  }
+  // Yaw: horizontal angle from eye midpoint to nose tip
+  const eyeMidX = (leftEye.x + rightEye.x) / 2;
+  const eyeMidY = (leftEye.y + rightEye.y) / 2;
+  const yaw = Math.atan2(noseTip.x - eyeMidX, Math.abs(noseTip.z - ((leftEye.z + rightEye.z) / 2 || 0)) + 0.001) * (180 / Math.PI);
+  // Pitch: vertical angle from forehead to chin through nose
+  const faceVerticalLen = Math.sqrt((chin.x - forehead.x) ** 2 + (chin.y - forehead.y) ** 2) || 0.001;
+  const noseRelY = (noseTip.y - forehead.y) / faceVerticalLen;
+  const pitch = (noseRelY - 0.5) * 180;
+  // Roll: tilt from horizontal eye line
+  const roll = Math.atan2(rightEye.y - leftEye.y, rightEye.x - leftEye.x) * (180 / Math.PI);
+  return {
+    pitch: isNaN(pitch) ? 0 : Math.max(-90, Math.min(90, pitch)),
+    yaw: isNaN(yaw) ? 0 : Math.max(-90, Math.min(90, yaw)),
+    roll: isNaN(roll) ? 0 : Math.max(-180, Math.min(180, roll))
+  };
 }
 /**
@@ -60,21 +285,18 @@ function log(...args) {
  */
 async function initializeVision() {
   if (isInitialized) {
-    console.log('🔧 [CV Tasks Worker] Vision already initialized, skipping');
+    log('Vision already initialized, skipping');
     return;
   }
   try {
-    console.log('🔧 [CV Tasks Worker] Starting MediaPipe Tasks Vision initialization...');
+    log('Starting MediaPipe Tasks Vision initialization...');
-    // Initialize the vision runtime with WASM files
-    // MediaPipe Tasks Vision expects the base path without trailing slash
     const wasmBasePath = '/dist/assets/wasm';
     log('WASM base path:', wasmBasePath);
-    // Try different ways to access FilesetResolver
     const FilesetResolver = self.FilesetResolver || self.module.exports.FilesetResolver || self.exports.FilesetResolver;
-    console.log('🔧 [CV Tasks Worker] FilesetResolver found:', !!FilesetResolver);
+    log('FilesetResolver found:', !!FilesetResolver);
     if (!FilesetResolver) {
       throw new Error('FilesetResolver not found in any expected location');
@@ -139,12 +361,13 @@ async function initializeFaceLandmarks() {
         delegate: 'GPU'
       },
       runningMode: 'VIDEO',
-      numFaces: 1
+      numFaces: 1,
+      outputFaceBlendshapes: true
     };
     const FaceLandmarker = self.FaceLandmarker || self.module.exports.FaceLandmarker || self.exports.FaceLandmarker;
     faceLandmarker = await FaceLandmarker.createFromOptions(vision, options);
-    log('✅ Face Landmarker loaded');
+    log('✅ Face Landmarker loaded (blendshapes enabled)');
   } catch (error) {
     log('❌ Failed to load Face Landmarker:', error);
     throw error;
@@ -256,42 +479,60 @@ async function processFrame(imageInput, timestamp, features) {
     if (features.includes('faceDetection') && faceDetector) {
       const detectionResult = faceDetector.detectForVideo(imageInput, timestamp);
       results.faces = detectionResult.detections.map((detection) => ({
-        boundingBox: {
-          // Normalize coordinates to 0-1 range to match other CV features
+        bounds: {
           x: detection.boundingBox.originX / imageInput.width,
           y: detection.boundingBox.originY / imageInput.height,
           width: detection.boundingBox.width / imageInput.width,
           height: detection.boundingBox.height / imageInput.height
         },
+        center: {
+          x: (detection.boundingBox.originX + detection.boundingBox.width / 2) / imageInput.width,
+          y: (detection.boundingBox.originY + detection.boundingBox.height / 2) / imageInput.height
+        },
         landmarks: [],
-        expressions: {},
+        expressions: EMPTY_EXPRESSIONS,
+        headPose: EMPTY_HEAD_POSE,
+        blendshapes: EMPTY_BLENDSHAPES,
         confidence: detection.categories[0]?.score || 0
       }));
     }
-    // Process face landmarks
-    if (features.includes('faceMesh') && faceLandmarker) {
+    // Process face landmarks (used by faceMesh and emotionDetection)
+    if ((features.includes('faceMesh') || features.includes('emotionDetection')) && faceLandmarker) {
       const landmarkResult = faceLandmarker.detectForVideo(imageInput, timestamp);
       if (landmarkResult.faceLandmarks.length > 0) {
         const landmarks = landmarkResult.faceLandmarks[0];
-        // If no face detection results exist, create a basic face structure
         if (!results.faces) {
           results.faces = [{
-            boundingBox: null, // No bounding box when only mesh is enabled
+            bounds: null,
+            center: null,
             landmarks: [],
-            expressions: {},
-            confidence: 0.8 // Default confidence for mesh-only detection
+            expressions: EMPTY_EXPRESSIONS,
+            headPose: EMPTY_HEAD_POSE,
+            blendshapes: EMPTY_BLENDSHAPES,
+            confidence: 0.8
           }];
         }
-        // Add landmarks to the first face (mesh only processes one face)
+        const mappedLandmarks = landmarks.map((landmark) => ({
+          x: landmark.x,
+          y: landmark.y,
+          z: landmark.z || 0
+        }));
         if (results.faces[0]) {
-          results.faces[0].landmarks = landmarks.map((landmark) => ({
-            x: landmark.x,
-            y: landmark.y,
-            z: landmark.z || 0
-          }));
+          results.faces[0].landmarks = mappedLandmarks;
+          results.faces[0].headPose = computeHeadPoseFromLandmarks(landmarks);
+          // Populate emotion data when emotionDetection is active
+          if (features.includes('emotionDetection') &&
+              landmarkResult.faceBlendshapes &&
+              landmarkResult.faceBlendshapes.length > 0) {
+            const bs = buildBlendshapesRecord(landmarkResult.faceBlendshapes[0].categories);
+            results.faces[0].blendshapes = bs;
+            results.faces[0].expressions = mapBlendshapesToEmotions(bs);
+          }
         }
       }
     }
@@ -343,14 +584,7 @@ async function processFrame(imageInput, timestamp, features) {
             height: segmentResult.categoryMask.height
           };
-          // Debug logging (temporary)
-          if (DEBUG) {
-            console.log('🔧 [CV Tasks Worker] Segmentation mask:', {
-              width: results.segmentation.width,
-              height: results.segmentation.height,
-              maskSize: results.segmentation.mask.length
-            });
-          }
+          log('Segmentation mask:', results.segmentation.width, 'x', results.segmentation.height);
         } finally {
           // CRITICAL: Close MPMask instance to prevent resource leaks
           segmentResult.categoryMask.close();
@@ -449,8 +683,18 @@ async function handleConfigUpdateInternal(features) {
         faceDetector = null;
         break;
       case 'faceMesh':
-        cleanupPromises.push(cleanupWasmInstance(faceLandmarker, 'FaceLandmarker'));
-        faceLandmarker = null;
+        // Only teardown FaceLandmarker if emotionDetection also not active
+        if (!newFeatures.has('emotionDetection')) {
+          cleanupPromises.push(cleanupWasmInstance(faceLandmarker, 'FaceLandmarker'));
+          faceLandmarker = null;
+        }
+        break;
+      case 'emotionDetection':
+        // Only teardown FaceLandmarker if faceMesh also not active
+        if (!newFeatures.has('faceMesh')) {
+          cleanupPromises.push(cleanupWasmInstance(faceLandmarker, 'FaceLandmarker'));
+          faceLandmarker = null;
+        }
         break;
       case 'handTracking':
         cleanupPromises.push(cleanupWasmInstance(handLandmarker, 'HandLandmarker'));
@@ -485,6 +729,8 @@ async function handleConfigUpdateInternal(features) {
           await initializeFaceDetection();
           break;
         case 'faceMesh':
+        case 'emotionDetection':
+          // Both share the FaceLandmarker (with blendshapes enabled)
           await initializeFaceLandmarks();
           break;
         case 'handTracking':
@@ -525,10 +771,19 @@ async function handleConfigUpdate(features) {
 self.onmessage = async (event) => {
   const message = event.data;
-  console.log('🔧 [CV Tasks Worker] Received message:', message.type, message);
+  // Only log non-process messages to avoid per-frame spam
+  if (message.type !== 'process') {
+    log('Received message:', message.type);
+  }
   try {
     switch (message.type) {
+      case 'debug': {
+        DEBUG = !!message.enabled;
+        log('Debug mode', DEBUG ? 'enabled' : 'disabled');
+        break;
+      }
       case 'init': {
         log('Received init message');