npm - @mediapipe/tasks-vision - Versions diffs - 0.1.0-alpha-3 → 0.1.0-alpha-4 - Mend

@mediapipe/tasks-vision 0.1.0-alpha-3 → 0.1.0-alpha-4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md +17 -0
package/package.json +1 -1
package/vision.d.ts +136 -0
package/vision_bundle.js +1 -1
package/wasm/vision_wasm_internal.js +209 -162
package/wasm/vision_wasm_internal.wasm +0 -0
package/wasm/vision_wasm_nosimd_internal.js +209 -162
package/wasm/vision_wasm_nosimd_internal.wasm +0 -0

package/README.md CHANGED Viewed

@@ -39,6 +39,23 @@ const classifications = imageClassifier.classify(image);
 For more information, refer to the [Image Classification](https://developers.google.com/mediapipe/solutions/vision/image_classifier/web_js) documentation.
+## Image Segmentation
+The MediaPipe Image Segmenter lets you segment an image into categories.
+```
+const vision = await FilesetResolver.forVisionTasks(
+    "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
+);
+const imageSegmenter = await ImageSegmenter.createFromModelPath(vision,
+    "model.tflite"
+);
+const image = document.getElementById("image") as HTMLImageElement;
+imageSegmenter.segment(image, (masks, width, height) => {
+  ...
+});
+```
 ## Gesture Recognition
 The MediaPipe Gesture Recognizer task lets you recognize hand gestures in real

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@mediapipe/tasks-vision",
-  "version": "0.1.0-alpha-3",
+  "version": "0.1.0-alpha-4",
   "description": "MediaPipe Vision Tasks",
   "main": "vision_bundle.js",
   "author": "mediapipe@google.com",

package/vision.d.ts CHANGED Viewed

@@ -699,6 +699,124 @@ declare interface ImageProcessingOptions {
     rotationDegrees?: number;
 }
+/** Performs image segmentation on images. */
+export declare class ImageSegmenter extends VisionTaskRunner {
+    /**
+     * Initializes the Wasm runtime and creates a new image segmenter from the
+     * provided options.
+     * @param wasmFileset A configuration object that provides the location of
+     *     the Wasm binary and its loader.
+     * @param imageSegmenterOptions The options for the Image Segmenter. Note
+     *     that either a path to the model asset or a model buffer needs to be
+     *     provided (via `baseOptions`).
+     */
+    static createFromOptions(wasmFileset: WasmFileset, imageSegmenterOptions: ImageSegmenterOptions): Promise<ImageSegmenter>;
+    /**
+     * Initializes the Wasm runtime and creates a new image segmenter based on
+     * the provided model asset buffer.
+     * @param wasmFileset A configuration object that provides the location of
+     *     the Wasm binary and its loader.
+     * @param modelAssetBuffer A binary representation of the model.
+     */
+    static createFromModelBuffer(wasmFileset: WasmFileset, modelAssetBuffer: Uint8Array): Promise<ImageSegmenter>;
+    /**
+     * Initializes the Wasm runtime and creates a new image segmenter based on
+     * the path to the model asset.
+     * @param wasmFileset A configuration object that provides the location of
+     *     the Wasm binary and its loader.
+     * @param modelAssetPath The path to the model asset.
+     */
+    static createFromModelPath(wasmFileset: WasmFileset, modelAssetPath: string): Promise<ImageSegmenter>;
+    private constructor();
+    /**
+     * Sets new options for the image segmenter.
+     *
+     * Calling `setOptions()` with a subset of options only affects those
+     * options. You can reset an option back to its default value by
+     * explicitly setting it to `undefined`.
+     *
+     * @param options The options for the image segmenter.
+     */
+    setOptions(options: ImageSegmenterOptions): Promise<void>;
+    /**
+     * Performs image segmentation on the provided single image and invokes the
+     * callback with the response. The method returns synchronously once the
+     * callback returns. Only use this method when the ImageSegmenter is
+     * created with running mode `image`.
+     *
+     * @param image An image to process.
+     * @param callback The callback that is invoked with the segmented masks. The
+     *    lifetime of the returned data is only guaranteed for the duration of the
+     *    callback.
+     */
+    segment(image: ImageSource, callback: SegmentationMaskCallback): void;
+    /**
+     * Performs image segmentation on the provided single image and invokes the
+     * callback with the response. The method returns synchronously once the
+     * callback returns. Only use this method when the ImageSegmenter is
+     * created with running mode `image`.
+     *
+     * @param image An image to process.
+     * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
+     *    to process the input image before running inference.
+     * @param callback The callback that is invoked with the segmented masks. The
+     *    lifetime of the returned data is only guaranteed for the duration of the
+     *    callback.
+     */
+    segment(image: ImageSource, imageProcessingOptions: ImageProcessingOptions, callback: SegmentationMaskCallback): void;
+    /**
+     * Performs image segmentation on the provided video frame and invokes the
+     * callback with the response. The method returns synchronously once the
+     * callback returns. Only use this method when the ImageSegmenter is
+     * created with running mode `video`.
+     *
+     * @param videoFrame A video frame to process.
+     * @param timestamp The timestamp of the current frame, in ms.
+     * @param callback The callback that is invoked with the segmented masks. The
+     *    lifetime of the returned data is only guaranteed for the duration of the
+     *    callback.
+     */
+    segmentForVideo(videoFrame: ImageSource, timestamp: number, callback: SegmentationMaskCallback): void;
+    /**
+     * Performs image segmentation on the provided video frame and invokes the
+     * callback with the response. The method returns synchronously once the
+     * callback returns. Only use this method when the ImageSegmenter is
+     * created with running mode `video`.
+     *
+     * @param videoFrame A video frame to process.
+     * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
+     *    to process the input image before running inference.
+     * @param timestamp The timestamp of the current frame, in ms.
+     * @param callback The callback that is invoked with the segmented masks. The
+     *    lifetime of the returned data is only guaranteed for the duration of the
+     *    callback.
+     */
+    segmentForVideo(videoFrame: ImageSource, imageProcessingOptions: ImageProcessingOptions, timestamp: number, callback: SegmentationMaskCallback): void;
+}
+/** Options to configure the MediaPipe Image Segmenter Task */
+export declare interface ImageSegmenterOptions extends VisionTaskOptions {
+    /**
+     * The locale to use for display names specified through the TFLite Model
+     * Metadata, if any. Defaults to English.
+     */
+    displayNamesLocale?: string | undefined;
+    /**
+     * The output type of segmentation results.
+     *
+     * The two supported modes are:
+     * - Category Mask:   Gives a single output mask where each pixel represents
+     *                    the class which the pixel in the original image was
+     *                    predicted to belong to.
+     * - Confidence Mask: Gives a list of output masks (one for each class). For
+     *                    each mask, the pixel represents the prediction
+     *                    confidence, usually in the [0.0, 0.1] range.
+     *
+     * Defaults to `CATEGORY_MASK`.
+     */
+    outputType?: "CATEGORY_MASK" | "CONFIDENCE_MASK" | undefined;
+}
 /**
  * Valid types of image sources which we can run our GraphRunner over.
  */
@@ -839,6 +957,24 @@ declare interface RectF {
  */
 declare type RunningMode = "IMAGE" | "VIDEO";
+/**
+ * The ImageSegmenter returns the segmentation result as a Uint8Array (when
+ * the default mode of `CATEGORY_MASK` is used) or as a Float32Array (for
+ * output type `CONFIDENCE_MASK`). The `WebGLTexture` output type is reserved
+ * for future usage.
+ */
+export declare type SegmentationMask = Uint8Array | Float32Array | WebGLTexture;
+/**
+ * A callback that receives the computed masks from the image segmenter. The
+ * callback either receives a single element array with a category mask (as a
+ * `[Uint8Array]`) or multiple confidence masks (as a `Float32Array[]`).
+ * The returned data is only valid for the duration of the callback. If
+ * asynchronous processing is needed, all data needs to be copied before the
+ * callback returns.
+ */
+export declare type SegmentationMaskCallback = (masks: SegmentationMask[], width: number, height: number) => void;
 /** Base class for all MediaPipe Tasks. */
 declare abstract class TaskRunner {
     protected constructor();