npm - @mediapipe/tasks-vision - Versions diffs - 0.1.0-alpha-6 → 0.1.0-alpha-8 - Mend

@mediapipe/tasks-vision 0.1.0-alpha-6 → 0.1.0-alpha-8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md +16 -0
package/package.json +1 -1
package/vision.d.ts +257 -7
package/vision_bundle.js +1 -1
package/wasm/vision_wasm_internal.js +301 -281
package/wasm/vision_wasm_internal.wasm +0 -0
package/wasm/vision_wasm_nosimd_internal.js +302 -282
package/wasm/vision_wasm_nosimd_internal.wasm +0 -0

package/README.md CHANGED Viewed

@@ -2,6 +2,22 @@
 This package contains the vision tasks for MediaPipe.
+## Face Detection
+The MediaPipe Face Detector task lets you detect the presence and location of
+faces within images or videos.
+```
+const vision = await FilesetResolver.forVisionTasks(
+    "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
+);
+const faceDetector = await FaceDetector.createFromModelPath(vision,
+    "https://storage.googleapis.com/mediapipe-tasks/object_detector/efficientdet_lite0_uint8.tflite"
+);
+const image = document.getElementById("image") as HTMLImageElement;
+const detections = faceDetector.detect(image);
+```
 ## Face Landmark Detection
 The MediaPipe Face Landmarker task lets you detect the landmarks of faces in

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@mediapipe/tasks-vision",
-  "version": "0.1.0-alpha-6",
+  "version": "0.1.0-alpha-8",
   "description": "MediaPipe Vision Tasks",
   "main": "vision_bundle.js",
   "author": "mediapipe@google.com",

package/vision.d.ts CHANGED Viewed

@@ -29,6 +29,21 @@ declare interface BaseOptions_2 {
     delegate?: "CPU" | "GPU" | undefined;
 }
+/**
+ * Copyright 2023 The MediaPipe Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 /** An integer bounding box, axis aligned. */
 export declare interface BoundingBox {
     /** The X coordinate of the top-left corner, in pixels. */
@@ -138,14 +153,30 @@ declare interface ClassifierOptions {
     categoryDenylist?: string[] | undefined;
 }
-/** Represents one object detected by the `ObjectDetector`. */
+/** Represents one detection by a detection task. */
 export declare interface Detection {
     /** A list of `Category` objects. */
     categories: Category[];
     /** The bounding box of the detected objects. */
     boundingBox?: BoundingBox;
+    /**
+     * Optional list of keypoints associated with the detection. Keypoints
+     * represent interesting points related to the detection. For example, the
+     * keypoints represent the eye, ear and mouth from face detection model. Or
+     * in the template matching detection, e.g. KNIFT, they can represent the
+     * feature points for template matching.
+     */
+    keypoints?: NormalizedKeypoint[];
 }
+/** Detection results of a model. */
+declare interface DetectionResult {
+    /** A list of Detections. */
+    detections: Detection[];
+}
+export { DetectionResult as FaceDetectorResult }
+export { DetectionResult as ObjectDetectorResult }
 /**
  * Copyright 2022 The MediaPipe Authors. All Rights Reserved.
  *
@@ -224,6 +255,200 @@ export declare interface Embedding {
     headName: string;
 }
+/** Performs face detection on images. */
+export declare class FaceDetector extends VisionTaskRunner {
+    /**
+     * Initializes the Wasm runtime and creates a new face detector from the
+     * provided options.
+     * @param wasmFileset A configuration object that provides the location of the
+     *     Wasm binary and its loader.
+     * @param faceDetectorOptions The options for the FaceDetector. Note that
+     *     either a path to the model asset or a model buffer needs to be
+     *     provided (via `baseOptions`).
+     */
+    static createFromOptions(wasmFileset: WasmFileset, faceDetectorOptions: FaceDetectorOptions): Promise<FaceDetector>;
+    /**
+     * Initializes the Wasm runtime and creates a new face detector based on the
+     * provided model asset buffer.
+     * @param wasmFileset A configuration object that provides the location of the
+     *     Wasm binary and its loader.
+     * @param modelAssetBuffer A binary representation of the model.
+     */
+    static createFromModelBuffer(wasmFileset: WasmFileset, modelAssetBuffer: Uint8Array): Promise<FaceDetector>;
+    /**
+     * Initializes the Wasm runtime and creates a new face detector based on the
+     * path to the model asset.
+     * @param wasmFileset A configuration object that provides the location of the
+     *     Wasm binary and its loader.
+     * @param modelAssetPath The path to the model asset.
+     */
+    static createFromModelPath(wasmFileset: WasmFileset, modelAssetPath: string): Promise<FaceDetector>;
+    private constructor();
+    /**
+     * Sets new options for the FaceDetector.
+     *
+     * Calling `setOptions()` with a subset of options only affects those options.
+     * You can reset an option back to its default value by explicitly setting it
+     * to `undefined`.
+     *
+     * @param options The options for the FaceDetector.
+     */
+    setOptions(options: FaceDetectorOptions): Promise<void>;
+    /**
+     * Performs face detection on the provided single image and waits
+     * synchronously for the response. Only use this method when the
+     * FaceDetector is created with running mode `image`.
+     *
+     * @param image An image to process.
+     * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
+     *    to process the input image before running inference.
+     * @return A result containing the list of detected faces.
+     */
+    detect(image: ImageSource, imageProcessingOptions?: ImageProcessingOptions): DetectionResult;
+    /**
+     * Performs face detection on the provided video frame and waits
+     * synchronously for the response. Only use this method when the
+     * FaceDetector is created with running mode `video`.
+     *
+     * @param videoFrame A video frame to process.
+     * @param timestamp The timestamp of the current frame, in ms.
+     * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
+     *    to process the input image before running inference.
+     * @return A result containing the list of detected faces.
+     */
+    detectForVideo(videoFrame: ImageSource, timestamp: number, imageProcessingOptions?: ImageProcessingOptions): DetectionResult;
+}
+/** Options to configure the MediaPipe Face Detector Task */
+export declare interface FaceDetectorOptions extends VisionTaskOptions {
+    /**
+     * The minimum confidence score for the face detection to be considered
+     * successful. Defaults to 0.5.
+     */
+    minDetectionConfidence?: number | undefined;
+    /**
+     * The minimum non-maximum-suppression threshold for face detection to be
+     * considered overlapped. Defaults to 0.3.
+     */
+    minSuppressionThreshold?: number | undefined;
+}
+/**
+ * Performs face landmarks detection on images.
+ *
+ * This API expects a pre-trained face landmarker model asset bundle.
+ */
+export declare class FaceLandmarker extends VisionTaskRunner {
+    /**
+     * Initializes the Wasm runtime and creates a new `FaceLandmarker` from the
+     * provided options.
+     * @param wasmFileset A configuration object that provides the location of the
+     *     Wasm binary and its loader.
+     * @param faceLandmarkerOptions The options for the FaceLandmarker.
+     *     Note that either a path to the model asset or a model buffer needs to
+     *     be provided (via `baseOptions`).
+     */
+    static createFromOptions(wasmFileset: WasmFileset, faceLandmarkerOptions: FaceLandmarkerOptions): Promise<FaceLandmarker>;
+    /**
+     * Initializes the Wasm runtime and creates a new `FaceLandmarker` based on
+     * the provided model asset buffer.
+     * @param wasmFileset A configuration object that provides the location of the
+     *     Wasm binary and its loader.
+     * @param modelAssetBuffer A binary representation of the model.
+     */
+    static createFromModelBuffer(wasmFileset: WasmFileset, modelAssetBuffer: Uint8Array): Promise<FaceLandmarker>;
+    /**
+     * Initializes the Wasm runtime and creates a new `FaceLandmarker` based on
+     * the path to the model asset.
+     * @param wasmFileset A configuration object that provides the location of the
+     *     Wasm binary and its loader.
+     * @param modelAssetPath The path to the model asset.
+     */
+    static createFromModelPath(wasmFileset: WasmFileset, modelAssetPath: string): Promise<FaceLandmarker>;
+    private constructor();
+    /**
+     * Sets new options for this `FaceLandmarker`.
+     *
+     * Calling `setOptions()` with a subset of options only affects those options.
+     * You can reset an option back to its default value by explicitly setting it
+     * to `undefined`.
+     *
+     * @param options The options for the face landmarker.
+     */
+    setOptions(options: FaceLandmarkerOptions): Promise<void>;
+    /**
+     * Performs face landmarks detection on the provided single image and waits
+     * synchronously for the response. Only use this method when the
+     * FaceLandmarker is created with running mode `image`.
+     *
+     * @param image An image to process.
+     * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
+     *    to process the input image before running inference.
+     * @return The detected face landmarks.
+     */
+    detect(image: ImageSource, imageProcessingOptions?: ImageProcessingOptions): FaceLandmarkerResult;
+    /**
+     * Performs face landmarks detection on the provided video frame and waits
+     * synchronously for the response. Only use this method when the
+     * FaceLandmarker is created with running mode `video`.
+     *
+     * @param videoFrame A video frame to process.
+     * @param timestamp The timestamp of the current frame, in ms.
+     * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
+     *    to process the input image before running inference.
+     * @return The detected face landmarks.
+     */
+    detectForVideo(videoFrame: ImageSource, timestamp: number, imageProcessingOptions?: ImageProcessingOptions): FaceLandmarkerResult;
+}
+/** Options to configure the MediaPipe FaceLandmarker Task */
+export declare interface FaceLandmarkerOptions extends VisionTaskOptions {
+    /**
+     * The maximum number of faces can be detected by the FaceLandmarker.
+     * Defaults to 1.
+     */
+    numFaces?: number | undefined;
+    /**
+     * The minimum confidence score for the face detection to be considered
+     * successful. Defaults to 0.5.
+     */
+    minFaceDetectionConfidence?: number | undefined;
+    /**
+     * The minimum confidence score of face presence score in the face landmark
+     * detection. Defaults to 0.5.
+     */
+    minFacePresenceConfidence?: number | undefined;
+    /**
+     * The minimum confidence score for the face tracking to be considered
+     * successful. Defaults to 0.5.
+     */
+    minTrackingConfidence?: number | undefined;
+    /**
+     * Whether FaceLandmarker outputs face blendshapes classification. Face
+     * blendshapes are used for rendering the 3D face model.
+     */
+    outputFaceBlendshapes?: boolean | undefined;
+    /**
+     * Whether FaceLandmarker outputs facial transformation_matrix. Facial
+     * transformation matrix is used to transform the face landmarks in canonical
+     * face to the detected face, so that users can apply face effects on the
+     * detected landmarks.
+     */
+    outputFacialTransformationMatrixes?: boolean | undefined;
+}
+/**
+ * Represents the face landmarks deection results generated by `FaceLandmarker`.
+ */
+export declare interface FaceLandmarkerResult {
+    /** Detected face landmarks in normalized image coordinates. */
+    faceLandmarks: NormalizedLandmark[][];
+    /** Optional face blendshapes results. */
+    faceBlendshapes?: Classifications[];
+    /** Optional facial transformation matrix. */
+    facialTransformationMatrixes?: Matrix[];
+}
 /** Performs face stylization on images. */
 export declare class FaceStylizer extends VisionTaskRunner {
     /**
@@ -1123,6 +1348,31 @@ export declare interface Landmark {
     z: number;
 }
+/**
+ * Copyright 2023 The MediaPipe Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/** A two-dimensional matrix. */
+declare interface Matrix {
+    /** The number of rows. */
+    rows: number;
+    /** The number of columns. */
+    columns: number;
+    /** The values as a flattened one-dimensional array. */
+    data: number[];
+}
 /**
  * Copyright 2023 The MediaPipe Authors. All Rights Reserved.
  *
@@ -1231,9 +1481,9 @@ export declare class ObjectDetector extends VisionTaskRunner {
      * @param image An image to process.
      * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
      *    to process the input image before running inference.
-     * @return The list of detected objects
+     * @return A result containing a list of detected objects.
      */
-    detect(image: ImageSource, imageProcessingOptions?: ImageProcessingOptions): Detection[];
+    detect(image: ImageSource, imageProcessingOptions?: ImageProcessingOptions): DetectionResult;
     /**
      * Performs object detection on the provided video frame and waits
      * synchronously for the response. Only use this method when the
@@ -1243,9 +1493,9 @@ export declare class ObjectDetector extends VisionTaskRunner {
      * @param timestamp The timestamp of the current frame, in ms.
      * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
      *    to process the input image before running inference.
-     * @return The list of detected objects
+     * @return A result containing a list of detected objects.
      */
-    detectForVideo(videoFrame: ImageSource, timestamp: number, imageProcessingOptions?: ImageProcessingOptions): Detection[];
+    detectForVideo(videoFrame: ImageSource, timestamp: number, imageProcessingOptions?: ImageProcessingOptions): DetectionResult;
 }
 /** Options to configure the MediaPipe Object Detector Task */
@@ -1283,7 +1533,7 @@ declare type RunningMode = "IMAGE" | "VIDEO";
 /**
  * The segmentation tasks return the segmentation either as a WebGLTexture (when
  * the output is on GPU) or as a typed JavaScript arrays for CPU-based
- * category or confidence masks. `Uint8ClampedArray`s are used to represend
+ * category or confidence masks. `Uint8ClampedArray`s are used to represent
  * CPU-based category masks and `Float32Array`s are used for CPU-based
  * confidence masks.
  */
@@ -1316,7 +1566,7 @@ declare interface TaskRunnerOptions {
 declare interface VisionTaskOptions extends TaskRunnerOptions {
     /**
      * The canvas element to bind textures to. This has to be set for GPU
-     * processing. The task will initialize a WebGL context and throw an eror if
+     * processing. The task will initialize a WebGL context and throw an error if
      * this fails (e.g. if you have already initialized a different type of
      * context).
      */