npm - @mediapipe/tasks-vision - Versions diffs - 0.10.0 → 0.10.2-rc1 - Mend

@mediapipe/tasks-vision 0.10.0 → 0.10.2-rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/README.md +58 -32
package/package.json +5 -2
package/vision.d.ts +98 -32
package/vision_bundle.cjs +2 -0
package/vision_bundle.cjs.map +1 -0
package/vision_bundle.mjs +2 -0
package/vision_bundle.mjs.map +1 -0
package/wasm/vision_wasm_internal.wasm +0 -0
package/wasm/vision_wasm_nosimd_internal.wasm +0 -0
package/vision_bundle.js +0 -1
package/wasm/vision_wasm_internal.js +0 -8495
package/wasm/vision_wasm_nosimd_internal.js +0 -8485

package/README.md CHANGED Viewed

@@ -2,23 +2,25 @@
 This package contains the vision tasks for MediaPipe.
-## Face Detection
+## Face Detector
 The MediaPipe Face Detector task lets you detect the presence and location of
 faces within images or videos.
 ```
 const vision = await FilesetResolver.forVisionTasks(
-    "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
+    "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision/wasm"
 );
 const faceDetector = await FaceDetector.createFromModelPath(vision,
-    "https://storage.googleapis.com/mediapipe-tasks/face_detector/face_detection_short_range.tflite"
+    "https://storage.googleapis.com/mediapipe-models/face_detector/blaze_face_short_range/float16/1/blaze_face_short_range.tflite"
 );
 const image = document.getElementById("image") as HTMLImageElement;
 const detections = faceDetector.detect(image);
 ```
-## Face Landmark Detection
+For more information, refer to the [Face Detector](https://developers.google.com/mediapipe/solutions/vision/face_detector/web_js) documentation.
+## Face Landmarker
 The MediaPipe Face Landmarker task lets you detect the landmarks of faces in
 an image. You can use this Task to localize key points of a face and render
@@ -26,31 +28,33 @@ visual effects over the faces.
 ```
 const vision = await FilesetResolver.forVisionTasks(
-    "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
+    "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision/wasm"
 );
 const faceLandmarker = await FaceLandmarker.createFromModelPath(vision,
-    "https://storage.googleapis.com/mediapipe-tasks/face_landmarker/face_landmarker.task"
+    "https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/1/face_landmarker.task`"
 );
 const image = document.getElementById("image") as HTMLImageElement;
 const landmarks = faceLandmarker.detect(image);
 ```
+For more information, refer to the [Face Landmarker](https://developers.google.com/mediapipe/solutions/vision/face_landmarker/web_js) documentation.
 ## Face Stylizer
 The MediaPipe Face Stylizer lets you perform face stylization on images.
 ```
 const vision = await FilesetResolver.forVisionTasks(
-    "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
+    "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision/wasm"
 );
 const faceStylizer = await FaceStylizer.createFromModelPath(vision,
-    "https://storage.googleapis.com/mediapipe-tasks/face_stylizer/face_stylizer_with_metadata.tflite"
+    "https://storage.googleapis.com/mediapipe-models/face_stylizer/blaze_face_stylizer/float32/1/blaze_face_stylizer.task"
 );
 const image = document.getElementById("image") as HTMLImageElement;
 const stylizedImage = faceStylizer.stylize(image);
 ```
-## Gesture Recognition
+## Gesture Recognizer
 The MediaPipe Gesture Recognizer task lets you recognize hand gestures in real
 time, and provides the recognized hand gesture results along with the landmarks
@@ -59,16 +63,18 @@ from a user, and invoke application features that correspond to those gestures.
 ```
 const vision = await FilesetResolver.forVisionTasks(
-    "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
+    "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision/wasm"
 );
 const gestureRecognizer = await GestureRecognizer.createFromModelPath(vision,
-    "https://storage.googleapis.com/mediapipe-tasks/gesture_recognizer/gesture_recognizer.task"
+    "hhttps://storage.googleapis.com/mediapipe-models/gesture_recognizer/gesture_recognizer/float16/1/gesture_recognizer.task"
 );
 const image = document.getElementById("image") as HTMLImageElement;
 const recognitions = gestureRecognizer.recognize(image);
 ```
-## Hand Landmark Detection
+For more information, refer to the [Gesture Recognizer](https://developers.google.com/mediapipe/solutions/vision/gesture_recognizer/web_js) documentation.
+## Hand Landmarker
 The MediaPipe Hand Landmarker task lets you detect the landmarks of the hands in
 an image. You can use this Task to localize key points of the hands and render
@@ -76,18 +82,18 @@ visual effects over the hands.
 ```
 const vision = await FilesetResolver.forVisionTasks(
-    "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
+    "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision/wasm"
 );
 const handLandmarker = await HandLandmarker.createFromModelPath(vision,
-    "https://storage.googleapis.com/mediapipe-tasks/hand_landmarker/hand_landmarker.task"
+    "https://storage.googleapis.com/mediapipe-models/hand_landmarker/hand_landmarker/float16/1/hand_landmarker.task"
 );
 const image = document.getElementById("image") as HTMLImageElement;
 const landmarks = handLandmarker.detect(image);
 ```
-For more information, refer to the [Handlandmark Detection](https://developers.google.com/mediapipe/solutions/vision/hand_landmarker/web_js) documentation.
+For more information, refer to the [Hand Landmarker](https://developers.google.com/mediapipe/solutions/vision/hand_landmarker/web_js) documentation.
-## Image Classification
+## Image Classifier
 The MediaPipe Image Classifier task lets you perform classification on images.
 You can use this task to identify what an image represents among a set of
@@ -95,27 +101,42 @@ categories defined at training time.
 ```
 const vision = await FilesetResolver.forVisionTasks(
-    "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
+    "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision/wasm"
 );
 const imageClassifier = await ImageClassifier.createFromModelPath(vision,
-    "https://storage.googleapis.com/mediapipe-tasks/image_classifier/efficientnet_lite0_uint8.tflite"
+    "https://storage.googleapis.com/mediapipe-models/image_classifier/efficientnet_lite0/float32/1/efficientnet_lite0.tflite"
 );
 const image = document.getElementById("image") as HTMLImageElement;
 const classifications = imageClassifier.classify(image);
 ```
-For more information, refer to the [Image Classification](https://developers.google.com/mediapipe/solutions/vision/image_classifier/web_js) documentation.
+For more information, refer to the [Image Classifier](https://developers.google.com/mediapipe/solutions/vision/image_classifier/web_js) documentation.
+## Image Embedder
+The MediaPipe Image Embedder extracts embeddings from an image.
+```
+const vision = await FilesetResolver.forVisionTasks(
+    "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision/wasm"
+);
+const imageEmbedder = await ImageEmbedder.createFromModelPath(vision,
+    "https://storage.googleapis.com/mediapipe-models/image_embedder/mobilenet_v3_small/float32/1/mobilenet_v3_small.tflite"
+);
+const image = document.getElementById("image") as HTMLImageElement;
+const embeddings = imageSegmenter.embed(image);
+```
-## Image Segmentation
+## Image Segmenter
 The MediaPipe Image Segmenter lets you segment an image into categories.
 ```
 const vision = await FilesetResolver.forVisionTasks(
-    "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
+    "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision/wasm"
 );
 const imageSegmenter = await ImageSegmenter.createFromModelPath(vision,
-    "https://storage.googleapis.com/mediapipe-tasks/image_segmenter/selfie_segmentation.tflite"
+    "https://storage.googleapis.com/mediapipe-models/image_segmenter/deeplab_v3/float32/1/deeplab_v3.tflite"
 );
 const image = document.getElementById("image") as HTMLImageElement;
 imageSegmenter.segment(image, (masks, width, height) => {
@@ -123,18 +144,20 @@ imageSegmenter.segment(image, (masks, width, height) => {
 });
 ```
-## Interactive Segmentation
+For more information, refer to the [Image Segmenter](https://developers.google.com/mediapipe/solutions/vision/image_segmenter/web_js) documentation.
+## Interactive Segmenter
 The MediaPipe Interactive Segmenter lets you select a region of interest to
 segment an image by.
 ```
 const vision = await FilesetResolver.forVisionTasks(
-    "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
+    "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision/wasm"
 );
 const interactiveSegmenter = await InteractiveSegmenter.createFromModelPath(
     vision,
-    "https://storage.googleapis.com/mediapipe-tasks/interactive_segmenter/ptm_512_hdt_ptm_woid.tflite
+    "https://storage.googleapis.com/mediapipe-models/interactive_segmenter/magic_touch/float32/1/magic_touch.tflite"
 );
 const image = document.getElementById("image") as HTMLImageElement;
 interactiveSegmenter.segment(image, { keypoint: { x: 0.1, y: 0.2 } },
@@ -142,17 +165,19 @@ interactiveSegmenter.segment(image, { keypoint: { x: 0.1, y: 0.2 } },
 );
 ```
-## Object Detection
+For more information, refer to the [Interactive Segmenter](https://developers.google.com/mediapipe/solutions/vision/interactive_segmenter/web_js) documentation.
+## Object Detector
 The MediaPipe Object Detector task lets you detect the presence and location of
 multiple classes of objects within images or videos.
 ```
 const vision = await FilesetResolver.forVisionTasks(
-    "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
+    "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision/wasm"
 );
 const objectDetector = await ObjectDetector.createFromModelPath(vision,
-    "https://storage.googleapis.com/mediapipe-tasks/object_detector/efficientdet_lite0_uint8.tflite"
+    "https://storage.googleapis.com/mediapipe-models/object_detector/efficientdet_lite0/float16/1/efficientdet_lite0.tflite"
 );
 const image = document.getElementById("image") as HTMLImageElement;
 const detections = objectDetector.detect(image);
@@ -160,8 +185,7 @@ const detections = objectDetector.detect(image);
 For more information, refer to the [Object Detector](https://developers.google.com/mediapipe/solutions/vision/object_detector/web_js) documentation.
-## Pose Landmark Detection
+## Pose Landmarker
 The MediaPipe Pose Landmarker task lets you detect the landmarks of body poses
 in an image. You can use this Task to localize key points of a pose and render
@@ -169,11 +193,13 @@ visual effects over the body.
 ```
 const vision = await FilesetResolver.forVisionTasks(
-    "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
+    "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision/wasm"
 );
 const poseLandmarker = await PoseLandmarker.createFromModelPath(vision,
-    "model.task"
+    "https://storage.googleapis.com/mediapipe-models/pose_landmarker/pose_landmarker_lite/float16/1/pose_landmarker_lite.task
 );
 const image = document.getElementById("image") as HTMLImageElement;
 const landmarks = poseLandmarker.detect(image);
 ```
+For more information, refer to the [Pose Landmarker](https://developers.google.com/mediapipe/solutions/vision/pose_landmarker/web_js) documentation.

package/package.json CHANGED Viewed

@@ -1,10 +1,13 @@
 {
   "name": "@mediapipe/tasks-vision",
-  "version": "0.10.0",
+  "version": "0.10.2-rc1",
   "description": "MediaPipe Vision Tasks",
-  "main": "vision_bundle.js",
+  "main": "vision_bundle.cjs",
+  "browser": "vision_bundle.mjs",
+  "module": "vision_bundle.mjs",
   "author": "mediapipe@google.com",
   "license": "Apache-2.0",
+  "type": "module",
   "types": "vision.d.ts",
   "homepage": "http://mediapipe.dev",
   "keywords": [ "AR", "ML", "Augmented", "MediaPipe", "MediaPipe Tasks" ]

package/vision.d.ts CHANGED Viewed

@@ -172,13 +172,14 @@ export declare interface Detection {
     /** The bounding box of the detected objects. */
     boundingBox?: BoundingBox;
     /**
-     * Optional list of keypoints associated with the detection. Keypoints
-     * represent interesting points related to the detection. For example, the
-     * keypoints represent the eye, ear and mouth from face detection model. Or
-     * in the template matching detection, e.g. KNIFT, they can represent the
-     * feature points for template matching.
-     */
-    keypoints?: NormalizedKeypoint[];
+     * List of keypoints associated with the detection. Keypoints represent
+     * interesting points related to the detection. For example, the keypoints
+     * represent the eye, ear and mouth from face detection model. Or in the
+     * template matching detection, e.g. KNIFT, they can represent the feature
+     * points for template matching. Contains an empty list if no keypoints are
+     * detected.
+     */
+    keypoints: NormalizedKeypoint[];
 }
 /** Detection results of a model. */
@@ -554,9 +555,9 @@ export declare interface FaceLandmarkerResult {
     /** Detected face landmarks in normalized image coordinates. */
     faceLandmarks: NormalizedLandmark[][];
     /** Optional face blendshapes results. */
-    faceBlendshapes?: Classifications[];
+    faceBlendshapes: Classifications[];
     /** Optional facial transformation matrix. */
-    facialTransformationMatrixes?: Matrix[];
+    facialTransformationMatrixes: Matrix[];
 }
 /** Performs face stylization on images. */
@@ -636,7 +637,7 @@ export declare class FaceStylizer extends VisionTaskRunner {
     /**
      * Performs face stylization on the provided single image and returns the
      * result. This method creates a copy of the resulting image and should not be
-     * used in high-throughput applictions. Only use this method when the
+     * used in high-throughput applications. Only use this method when the
      * FaceStylizer is created with the image running mode.
      *
      * @param image An image to process.
@@ -647,7 +648,7 @@ export declare class FaceStylizer extends VisionTaskRunner {
     /**
      * Performs face stylization on the provided single image and returns the
      * result. This method creates a copy of the resulting image and should not be
-     * used in high-throughput applictions. Only use this method when the
+     * used in high-throughput applications. Only use this method when the
      * FaceStylizer is created with the image running mode.
      *
      * The 'imageProcessingOptions' parameter can be used to specify one or all
@@ -714,7 +715,7 @@ export declare class FaceStylizer extends VisionTaskRunner {
     /**
      * Performs face stylization on the provided video frame. This method creates
      * a copy of the resulting image and should not be used in high-throughput
-     * applictions. Only use this method when the FaceStylizer is created with the
+     * applications. Only use this method when the FaceStylizer is created with the
      * video running mode.
      *
      * The input frame can be of any size. It's required to provide the video
@@ -1322,7 +1323,7 @@ export declare class ImageSegmenter extends VisionTaskRunner {
     /**
      * Performs image segmentation on the provided single image and returns the
      * segmentation result. This method creates a copy of the resulting masks and
-     * should not be used in high-throughput applictions. Only use this method
+     * should not be used in high-throughput applications. Only use this method
      * when the ImageSegmenter is created with running mode `image`.
      *
      * @param image An image to process.
@@ -1333,7 +1334,7 @@ export declare class ImageSegmenter extends VisionTaskRunner {
     /**
      * Performs image segmentation on the provided single image and returns the
      * segmentation result. This method creates a copy of the resulting masks and
-     * should not be used in high-v applictions. Only use this method when
+     * should not be used in high-v applications. Only use this method when
      * the ImageSegmenter is created with running mode `image`.
      *
      * @param image An image to process.
@@ -1385,7 +1386,7 @@ export declare class ImageSegmenter extends VisionTaskRunner {
     /**
      * Performs image segmentation on the provided video frame and returns the
      * segmentation result. This method creates a copy of the resulting masks and
-     * should not be used in high-v applictions. Only use this method when
+     * should not be used in high-v applications. Only use this method when
      * the ImageSegmenter is created with running mode `video`.
      *
      * @param videoFrame A video frame to process.
@@ -1432,19 +1433,46 @@ export declare interface ImageSegmenterOptions extends VisionTaskOptions {
 }
 /** The output result of ImageSegmenter. */
-export declare interface ImageSegmenterResult {
+export declare class ImageSegmenterResult {
+    /**
+     * Multiple masks represented as `Float32Array` or `WebGLTexture`-backed
+     * `MPImage`s where, for each mask, each pixel represents the prediction
+     * confidence, usually in the [0, 1] range.
+     */
+    readonly confidenceMasks?: MPMask[] | undefined;
+    /**
+     * A category mask represented as a `Uint8ClampedArray` or
+     * `WebGLTexture`-backed `MPImage` where each pixel represents the class
+     * which the pixel in the original image was predicted to belong to.
+     */
+    readonly categoryMask?: MPMask | undefined;
+    /**
+     * The quality scores of the result masks, in the range of [0, 1].
+     * Defaults to `1` if the model doesn't output quality scores. Each
+     * element corresponds to the score of the category in the model outputs.
+     */
+    readonly qualityScores?: number[] | undefined;
+    constructor(
     /**
      * Multiple masks represented as `Float32Array` or `WebGLTexture`-backed
      * `MPImage`s where, for each mask, each pixel represents the prediction
      * confidence, usually in the [0, 1] range.
      */
-    confidenceMasks?: MPMask[];
+    confidenceMasks?: MPMask[] | undefined,
     /**
      * A category mask represented as a `Uint8ClampedArray` or
-     * `WebGLTexture`-backed `MPImage` where each pixel represents the class which
-     * the pixel in the original image was predicted to belong to.
+     * `WebGLTexture`-backed `MPImage` where each pixel represents the class
+     * which the pixel in the original image was predicted to belong to.
      */
-    categoryMask?: MPMask;
+    categoryMask?: MPMask | undefined,
+    /**
+     * The quality scores of the result masks, in the range of [0, 1].
+     * Defaults to `1` if the model doesn't output quality scores. Each
+     * element corresponds to the score of the category in the model outputs.
+     */
+    qualityScores?: number[] | undefined);
+    /** Frees the resources held by the category and confidence masks. */
+    close(): void;
 }
 /**
@@ -1603,19 +1631,46 @@ export declare interface InteractiveSegmenterOptions extends TaskRunnerOptions {
 }
 /** The output result of InteractiveSegmenter. */
-export declare interface InteractiveSegmenterResult {
+export declare class InteractiveSegmenterResult {
+    /**
+     * Multiple masks represented as `Float32Array` or `WebGLTexture`-backed
+     * `MPImage`s where, for each mask, each pixel represents the prediction
+     * confidence, usually in the [0, 1] range.
+     */
+    readonly confidenceMasks?: MPMask[] | undefined;
+    /**
+     * A category mask represented as a `Uint8ClampedArray` or
+     * `WebGLTexture`-backed `MPImage` where each pixel represents the class
+     * which the pixel in the original image was predicted to belong to.
+     */
+    readonly categoryMask?: MPMask | undefined;
+    /**
+     * The quality scores of the result masks, in the range of [0, 1].
+     * Defaults to `1` if the model doesn't output quality scores. Each
+     * element corresponds to the score of the category in the model outputs.
+     */
+    readonly qualityScores?: number[] | undefined;
+    constructor(
     /**
      * Multiple masks represented as `Float32Array` or `WebGLTexture`-backed
      * `MPImage`s where, for each mask, each pixel represents the prediction
      * confidence, usually in the [0, 1] range.
      */
-    confidenceMasks?: MPMask[];
+    confidenceMasks?: MPMask[] | undefined,
     /**
      * A category mask represented as a `Uint8ClampedArray` or
-     * `WebGLTexture`-backed `MPImage` where each pixel represents the class which
-     * the pixel in the original image was predicted to belong to.
+     * `WebGLTexture`-backed `MPImage` where each pixel represents the class
+     * which the pixel in the original image was predicted to belong to.
+     */
+    categoryMask?: MPMask | undefined,
+    /**
+     * The quality scores of the result masks, in the range of [0, 1].
+     * Defaults to `1` if the model doesn't output quality scores. Each
+     * element corresponds to the score of the category in the model outputs.
      */
-    categoryMask?: MPMask;
+    qualityScores?: number[] | undefined);
+    /** Frees the resources held by the category and confidence masks. */
+    close(): void;
 }
 /**
@@ -1786,8 +1841,8 @@ export declare class MPMask {
     getAsUint8Array(): Uint8Array;
     /**
      * Returns the underlying mask as a single channel `Float32Array`. Note that
-     * this involves an expensive GPU to CPU transfer if the current mask is only
-     * available as a `WebGLTexture`.
+     * this involves an expensive GPU to CPU transfer if the current mask is
+     * only available as a `WebGLTexture`.
      *
      * @return The current mask as a Float32Array.
      */
@@ -1801,6 +1856,11 @@ export declare class MPMask {
      * @return The current mask as a WebGLTexture.
      */
     getAsWebGLTexture(): WebGLTexture;
+    /**
+     * Returns the texture format used for writing float textures on this
+     * platform.
+     */
+    getTexImage2DFormat(): GLenum;
     /**
      * Creates a copy of the resources stored in this `MPMask`. You can
      * invoke this method to extend the lifetime of a mask returned by a
@@ -2140,13 +2200,19 @@ export declare interface PoseLandmarkerOptions extends VisionTaskOptions {
  * Represents the pose landmarks deection results generated by `PoseLandmarker`.
  * Each vector element represents a single pose detected in the image.
  */
-export declare interface PoseLandmarkerResult {
-    /** Pose landmarks of detected poses. */
-    landmarks: NormalizedLandmark[][];
+export declare class PoseLandmarkerResult {
+    readonly landmarks: NormalizedLandmark[][];
     /** Pose landmarks in world coordinates of detected poses. */
-    worldLandmarks: Landmark[][];
+    readonly worldLandmarks: Landmark[][];
+    /** Segmentation mask for the detected pose. */
+    readonly segmentationMasks?: MPMask[] | undefined;
+    constructor(/** Pose landmarks of detected poses. */ landmarks: NormalizedLandmark[][],
+    /** Pose landmarks in world coordinates of detected poses. */
+    worldLandmarks: Landmark[][],
     /** Segmentation mask for the detected pose. */
-    segmentationMasks?: MPMask[];
+    segmentationMasks?: MPMask[] | undefined);
+    /** Frees the resources held by the segmentation masks. */
+    close(): void;
 }
 /**