npm - @mediapipe/tasks-vision - Versions diffs - 0.1.0-alpha-11 → 0.1.0-alpha-12 - Mend

@mediapipe/tasks-vision 0.1.0-alpha-11 → 0.1.0-alpha-12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md +6 -5
package/package.json +1 -1
package/vision.d.ts +117 -69
package/vision_bundle.js +1 -1
package/wasm/vision_wasm_internal.js +275 -303
package/wasm/vision_wasm_internal.wasm +0 -0
package/wasm/vision_wasm_nosimd_internal.js +287 -315
package/wasm/vision_wasm_nosimd_internal.wasm +0 -0

package/README.md CHANGED Viewed

@@ -12,7 +12,7 @@ const vision = await FilesetResolver.forVisionTasks(
     "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
 );
 const faceDetector = await FaceDetector.createFromModelPath(vision,
-    "https://storage.googleapis.com/mediapipe-tasks/object_detector/efficientdet_lite0_uint8.tflite"
+    "https://storage.googleapis.com/mediapipe-tasks/face_detector/face_detection_short_range.tflite"
 );
 const image = document.getElementById("image") as HTMLImageElement;
 const detections = faceDetector.detect(image);
@@ -29,7 +29,7 @@ const vision = await FilesetResolver.forVisionTasks(
     "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
 );
 const faceLandmarker = await FaceLandmarker.createFromModelPath(vision,
-    "model.task"
+    "https://storage.googleapis.com/mediapipe-tasks/face_landmarker/face_landmarker.task"
 );
 const image = document.getElementById("image") as HTMLImageElement;
 const landmarks = faceLandmarker.detect(image);
@@ -44,7 +44,7 @@ const vision = await FilesetResolver.forVisionTasks(
     "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
 );
 const faceStylizer = await FaceStylizer.createFromModelPath(vision,
-    "model.tflite"
+    "https://storage.googleapis.com/mediapipe-tasks/face_stylizer/face_stylizer_with_metadata.tflite"
 );
 const image = document.getElementById("image") as HTMLImageElement;
 const stylizedImage = faceStylizer.stylize(image);
@@ -115,7 +115,7 @@ const vision = await FilesetResolver.forVisionTasks(
     "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
 );
 const imageSegmenter = await ImageSegmenter.createFromModelPath(vision,
-    "model.tflite"
+    "https://storage.googleapis.com/mediapipe-tasks/image_segmenter/selfie_segmentation.tflite"
 );
 const image = document.getElementById("image") as HTMLImageElement;
 imageSegmenter.segment(image, (masks, width, height) => {
@@ -133,7 +133,8 @@ const vision = await FilesetResolver.forVisionTasks(
     "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
 );
 const interactiveSegmenter = await InteractiveSegmenter.createFromModelPath(
-    vision, "model.tflite"
+    vision,
+    "https://storage.googleapis.com/mediapipe-tasks/interactive_segmenter/ptm_512_hdt_ptm_woid.tflite
 );
 const image = document.getElementById("image") as HTMLImageElement;
 interactiveSegmenter.segment(image, { keypoint: { x: 0.1, y: 0.2 } },

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@mediapipe/tasks-vision",
-  "version": "0.1.0-alpha-11",
+  "version": "0.1.0-alpha-12",
   "description": "MediaPipe Vision Tasks",
   "main": "vision_bundle.js",
   "author": "mediapipe@google.com",

package/vision.d.ts CHANGED Viewed

@@ -533,7 +533,7 @@ export declare interface FaceLandmarkerResult {
 }
 /**
- * A class containing the Pairs of landmark indices to be rendered with
+ * A class containing the pairs of landmark indices to be rendered with
  * connections.
  */
 export declare class FaceLandmarksConnections {
@@ -593,10 +593,6 @@ export declare class FaceStylizer extends VisionTaskRunner {
      * synchronously once the callback returns. Only use this method when the
      * FaceStylizer is created with the image running mode.
      *
-     * The input image can be of any size. To ensure that the output image has
-     * reasonable quality, the stylized output image size is determined by the
-     * model output size.
-     *
      * @param image An image to process.
      * @param callback The callback that is invoked with the stylized image. The
      *    lifetime of the returned data is only guaranteed for the duration of the
@@ -617,11 +613,6 @@ export declare class FaceStylizer extends VisionTaskRunner {
      *  If both are specified, the crop around the region-of-interest is extracted
      *  first, then the specified rotation is applied to the crop.
      *
-     * The input image can be of any size. To ensure that the output image has
-     * reasonable quality, the stylized output image size is the smaller of the
-     * model output size and the size of the 'regionOfInterest' specified in
-     * 'imageProcessingOptions'.
-     *
      * @param image An image to process.
      * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
      *    to process the input image before running inference.
@@ -638,9 +629,6 @@ export declare class FaceStylizer extends VisionTaskRunner {
      * frame's timestamp (in milliseconds). The input timestamps must be
      * monotonically increasing.
      *
-     * To ensure that the output image has reasonable quality, the stylized
-     * output image size is determined by the model output size.
-     *
      * @param videoFrame A video frame to process.
      * @param timestamp The timestamp of the current frame, in ms.
      * @param callback The callback that is invoked with the stylized image. The
@@ -665,10 +653,6 @@ export declare class FaceStylizer extends VisionTaskRunner {
      * frame's timestamp (in milliseconds). The input timestamps must be
      * monotonically increasing.
      *
-     * To ensure that the output image has reasonable quality, the stylized
-     * output image size is the smaller of the model output size and the size of
-     * the 'regionOfInterest' specified in 'imageProcessingOptions'.
-     *
      * @param videoFrame A video frame to process.
      * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
      *    to process the input image before running inference.
@@ -738,6 +722,11 @@ export declare class FilesetResolver {
 /** Performs hand gesture recognition on images. */
 export declare class GestureRecognizer extends VisionTaskRunner {
+    /**
+     * An array containing the pairs of hand landmark indices to be rendered with
+     * connections.
+     */
+    static HAND_CONNECTIONS: Connection[];
     /**
      * Initializes the Wasm runtime and creates a new gesture recognizer from the
      * provided options.
@@ -857,6 +846,11 @@ export declare interface GestureRecognizerResult {
 /** Performs hand landmarks detection on images. */
 export declare class HandLandmarker extends VisionTaskRunner {
+    /**
+     * An array containing the pairs of hand landmark indices to be rendered with
+     * connections.
+     */
+    static HAND_CONNECTIONS: Connection[];
     /**
      * Initializes the Wasm runtime and creates a new `HandLandmarker` from the
      * provided options.
@@ -1219,7 +1213,7 @@ export declare class ImageSegmenter extends VisionTaskRunner {
      *    lifetime of the returned data is only guaranteed for the duration of the
      *    callback.
      */
-    segment(image: ImageSource, callback: SegmentationMaskCallback): void;
+    segment(image: ImageSource, callback: ImageSegmenterCallack): void;
     /**
      * Performs image segmentation on the provided single image and invokes the
      * callback with the response. The method returns synchronously once the
@@ -1233,19 +1227,7 @@ export declare class ImageSegmenter extends VisionTaskRunner {
      *    lifetime of the returned data is only guaranteed for the duration of the
      *    callback.
      */
-    segment(image: ImageSource, imageProcessingOptions: ImageProcessingOptions, callback: SegmentationMaskCallback): void;
-    /**
-     * Get the category label list of the ImageSegmenter can recognize. For
-     * `CATEGORY_MASK` type, the index in the category mask corresponds to the
-     * category in the label list. For `CONFIDENCE_MASK` type, the output mask
-     * list at index corresponds to the category in the label list.
-     *
-     * If there is no labelmap provided in the model file, empty label array is
-     * returned.
-     *
-     * @return The labels used by the current model.
-     */
-    getLabels(): string[];
+    segment(image: ImageSource, imageProcessingOptions: ImageProcessingOptions, callback: ImageSegmenterCallack): void;
     /**
      * Performs image segmentation on the provided video frame and invokes the
      * callback with the response. The method returns synchronously once the
@@ -1258,7 +1240,7 @@ export declare class ImageSegmenter extends VisionTaskRunner {
      *    lifetime of the returned data is only guaranteed for the duration of the
      *    callback.
      */
-    segmentForVideo(videoFrame: ImageSource, timestamp: number, callback: SegmentationMaskCallback): void;
+    segmentForVideo(videoFrame: ImageSource, timestamp: number, callback: ImageSegmenterCallack): void;
     /**
      * Performs image segmentation on the provided video frame and invokes the
      * callback with the response. The method returns synchronously once the
@@ -1273,9 +1255,29 @@ export declare class ImageSegmenter extends VisionTaskRunner {
      *    lifetime of the returned data is only guaranteed for the duration of the
      *    callback.
      */
-    segmentForVideo(videoFrame: ImageSource, imageProcessingOptions: ImageProcessingOptions, timestamp: number, callback: SegmentationMaskCallback): void;
+    segmentForVideo(videoFrame: ImageSource, imageProcessingOptions: ImageProcessingOptions, timestamp: number, callback: ImageSegmenterCallack): void;
+    /**
+     * Get the category label list of the ImageSegmenter can recognize. For
+     * `CATEGORY_MASK` type, the index in the category mask corresponds to the
+     * category in the label list. For `CONFIDENCE_MASK` type, the output mask
+     * list at index corresponds to the category in the label list.
+     *
+     * If there is no labelmap provided in the model file, empty label array is
+     * returned.
+     *
+     * @return The labels used by the current model.
+     */
+    getLabels(): string[];
 }
+/**
+ * A callback that receives the computed masks from the image segmenter. The
+ * returned data is only valid for the duration of the callback. If
+ * asynchronous processing is needed, all data needs to be copied before the
+ * callback returns.
+ */
+export declare type ImageSegmenterCallack = (result: ImageSegmenterResult) => void;
 /** Options to configure the MediaPipe Image Segmenter Task */
 export declare interface ImageSegmenterOptions extends VisionTaskOptions {
     /**
@@ -1283,20 +1285,44 @@ export declare interface ImageSegmenterOptions extends VisionTaskOptions {
      * Metadata, if any. Defaults to English.
      */
     displayNamesLocale?: string | undefined;
+    /** Whether to output confidence masks. Defaults to true. */
+    outputConfidenceMasks?: boolean | undefined;
+    /** Whether to output the category masks. Defaults to false. */
+    outputCategoryMask?: boolean | undefined;
+}
+/**
+ * Copyright 2023 The MediaPipe Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/** The output result of ImageSegmenter. */
+export declare interface ImageSegmenterResult {
     /**
-     * The output type of segmentation results.
-     *
-     * The two supported modes are:
-     * - Category Mask:   Gives a single output mask where each pixel represents
-     *                    the class which the pixel in the original image was
-     *                    predicted to belong to.
-     * - Confidence Mask: Gives a list of output masks (one for each class). For
-     *                    each mask, the pixel represents the prediction
-     *                    confidence, usually in the [0.0, 0.1] range.
-     *
-     * Defaults to `CATEGORY_MASK`.
+     * Multiple masks as Float32Arrays or WebGLTextures where, for each mask, each
+     * pixel represents the prediction confidence, usually in the [0, 1] range.
+     */
+    confidenceMasks?: Float32Array[] | WebGLTexture[];
+    /**
+     * A category mask as a Uint8ClampedArray or WebGLTexture where each
+     * pixel represents the class which the pixel in the original image was
+     * predicted to belong to.
      */
-    outputType?: "CATEGORY_MASK" | "CONFIDENCE_MASK" | undefined;
+    categoryMask?: Uint8ClampedArray | WebGLTexture;
+    /** The width of the masks. */
+    width: number;
+    /** The height of the masks. */
+    height: number;
 }
 /**
@@ -1387,7 +1413,7 @@ export declare class InteractiveSegmenter extends VisionTaskRunner {
      *    lifetime of the returned data is only guaranteed for the duration of the
      *    callback.
      */
-    segment(image: ImageSource, roi: RegionOfInterest, callback: SegmentationMaskCallback): void;
+    segment(image: ImageSource, roi: RegionOfInterest, callback: InteractiveSegmenterCallack): void;
     /**
      * Performs interactive segmentation on the provided single image and invokes
      * the callback with the response. The `roi` parameter is used to represent a
@@ -1413,25 +1439,57 @@ export declare class InteractiveSegmenter extends VisionTaskRunner {
      *    lifetime of the returned data is only guaranteed for the duration of the
      *    callback.
      */
-    segment(image: ImageSource, roi: RegionOfInterest, imageProcessingOptions: ImageProcessingOptions, callback: SegmentationMaskCallback): void;
+    segment(image: ImageSource, roi: RegionOfInterest, imageProcessingOptions: ImageProcessingOptions, callback: InteractiveSegmenterCallack): void;
 }
+/**
+ * A callback that receives the computed masks from the interactive segmenter.
+ * The returned data is only valid for the duration of the callback. If
+ * asynchronous processing is needed, all data needs to be copied before the
+ * callback returns.
+ */
+export declare type InteractiveSegmenterCallack = (result: InteractiveSegmenterResult) => void;
 /** Options to configure the MediaPipe Interactive Segmenter Task */
 export declare interface InteractiveSegmenterOptions extends TaskRunnerOptions {
+    /** Whether to output confidence masks. Defaults to true. */
+    outputConfidenceMasks?: boolean | undefined;
+    /** Whether to output the category masks. Defaults to false. */
+    outputCategoryMask?: boolean | undefined;
+}
+/**
+ * Copyright 2023 The MediaPipe Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/** The output result of InteractiveSegmenter. */
+export declare interface InteractiveSegmenterResult {
     /**
-     * The output type of segmentation results.
-     *
-     * The two supported modes are:
-     * - Category Mask:   Gives a single output mask where each pixel represents
-     *                    the class which the pixel in the original image was
-     *                    predicted to belong to.
-     * - Confidence Mask: Gives a list of output masks (one for each class). For
-     *                    each mask, the pixel represents the prediction
-     *                    confidence, usually in the [0.0, 0.1] range.
-     *
-     * Defaults to `CATEGORY_MASK`.
+     * Multiple masks as Float32Arrays or WebGLTextures where, for each mask, each
+     * pixel represents the prediction confidence, usually in the [0, 1] range.
+     */
+    confidenceMasks?: Float32Array[] | WebGLTexture[];
+    /**
+     * A category mask as a Uint8ClampedArray or WebGLTexture where each
+     * pixel represents the class which the pixel in the original image was
+     * predicted to belong to.
      */
-    outputType?: "CATEGORY_MASK" | "CONFIDENCE_MASK" | undefined;
+    categoryMask?: Uint8ClampedArray | WebGLTexture;
+    /** The width of the masks. */
+    width: number;
+    /** The height of the masks. */
+    height: number;
 }
 /**
@@ -1646,16 +1704,6 @@ declare type RunningMode = "IMAGE" | "VIDEO";
  */
 export declare type SegmentationMask = Uint8ClampedArray | Float32Array | WebGLTexture;
-/**
- * A callback that receives the computed masks from the segmentation tasks. The
- * callback either receives a single element array with a category mask (as a
- * `[Uint8ClampedArray]`) or multiple confidence masks (as a `Float32Array[]`).
- * The returned data is only valid for the duration of the callback. If
- * asynchronous processing is needed, all data needs to be copied before the
- * callback returns.
- */
-export declare type SegmentationMaskCallback = (masks: SegmentationMask[], width: number, height: number) => void;
 /** Base class for all MediaPipe Tasks. */
 declare abstract class TaskRunner {
     protected constructor();