@mediapipe/tasks-vision 0.1.0-alpha-3 → 0.1.0-alpha-4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -39,6 +39,23 @@ const classifications = imageClassifier.classify(image);
39
39
 
40
40
  For more information, refer to the [Image Classification](https://developers.google.com/mediapipe/solutions/vision/image_classifier/web_js) documentation.
41
41
 
42
+ ## Image Segmentation
43
+
44
+ The MediaPipe Image Segmenter lets you segment an image into categories.
45
+
46
+ ```
47
+ const vision = await FilesetResolver.forVisionTasks(
48
+ "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
49
+ );
50
+ const imageSegmenter = await ImageSegmenter.createFromModelPath(vision,
51
+ "model.tflite"
52
+ );
53
+ const image = document.getElementById("image") as HTMLImageElement;
54
+ imageSegmenter.segment(image, (masks, width, height) => {
55
+ ...
56
+ });
57
+ ```
58
+
42
59
  ## Gesture Recognition
43
60
 
44
61
  The MediaPipe Gesture Recognizer task lets you recognize hand gestures in real
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mediapipe/tasks-vision",
3
- "version": "0.1.0-alpha-3",
3
+ "version": "0.1.0-alpha-4",
4
4
  "description": "MediaPipe Vision Tasks",
5
5
  "main": "vision_bundle.js",
6
6
  "author": "mediapipe@google.com",
package/vision.d.ts CHANGED
@@ -699,6 +699,124 @@ declare interface ImageProcessingOptions {
699
699
  rotationDegrees?: number;
700
700
  }
701
701
 
702
+ /** Performs image segmentation on images. */
703
+ export declare class ImageSegmenter extends VisionTaskRunner {
704
+ /**
705
+ * Initializes the Wasm runtime and creates a new image segmenter from the
706
+ * provided options.
707
+ * @param wasmFileset A configuration object that provides the location of
708
+ * the Wasm binary and its loader.
709
+ * @param imageSegmenterOptions The options for the Image Segmenter. Note
710
+ * that either a path to the model asset or a model buffer needs to be
711
+ * provided (via `baseOptions`).
712
+ */
713
+ static createFromOptions(wasmFileset: WasmFileset, imageSegmenterOptions: ImageSegmenterOptions): Promise<ImageSegmenter>;
714
+ /**
715
+ * Initializes the Wasm runtime and creates a new image segmenter based on
716
+ * the provided model asset buffer.
717
+ * @param wasmFileset A configuration object that provides the location of
718
+ * the Wasm binary and its loader.
719
+ * @param modelAssetBuffer A binary representation of the model.
720
+ */
721
+ static createFromModelBuffer(wasmFileset: WasmFileset, modelAssetBuffer: Uint8Array): Promise<ImageSegmenter>;
722
+ /**
723
+ * Initializes the Wasm runtime and creates a new image segmenter based on
724
+ * the path to the model asset.
725
+ * @param wasmFileset A configuration object that provides the location of
726
+ * the Wasm binary and its loader.
727
+ * @param modelAssetPath The path to the model asset.
728
+ */
729
+ static createFromModelPath(wasmFileset: WasmFileset, modelAssetPath: string): Promise<ImageSegmenter>;
730
+ private constructor();
731
+ /**
732
+ * Sets new options for the image segmenter.
733
+ *
734
+ * Calling `setOptions()` with a subset of options only affects those
735
+ * options. You can reset an option back to its default value by
736
+ * explicitly setting it to `undefined`.
737
+ *
738
+ * @param options The options for the image segmenter.
739
+ */
740
+ setOptions(options: ImageSegmenterOptions): Promise<void>;
741
+ /**
742
+ * Performs image segmentation on the provided single image and invokes the
743
+ * callback with the response. The method returns synchronously once the
744
+ * callback returns. Only use this method when the ImageSegmenter is
745
+ * created with running mode `image`.
746
+ *
747
+ * @param image An image to process.
748
+ * @param callback The callback that is invoked with the segmented masks. The
749
+ * lifetime of the returned data is only guaranteed for the duration of the
750
+ * callback.
751
+ */
752
+ segment(image: ImageSource, callback: SegmentationMaskCallback): void;
753
+ /**
754
+ * Performs image segmentation on the provided single image and invokes the
755
+ * callback with the response. The method returns synchronously once the
756
+ * callback returns. Only use this method when the ImageSegmenter is
757
+ * created with running mode `image`.
758
+ *
759
+ * @param image An image to process.
760
+ * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
761
+ * to process the input image before running inference.
762
+ * @param callback The callback that is invoked with the segmented masks. The
763
+ * lifetime of the returned data is only guaranteed for the duration of the
764
+ * callback.
765
+ */
766
+ segment(image: ImageSource, imageProcessingOptions: ImageProcessingOptions, callback: SegmentationMaskCallback): void;
767
+ /**
768
+ * Performs image segmentation on the provided video frame and invokes the
769
+ * callback with the response. The method returns synchronously once the
770
+ * callback returns. Only use this method when the ImageSegmenter is
771
+ * created with running mode `video`.
772
+ *
773
+ * @param videoFrame A video frame to process.
774
+ * @param timestamp The timestamp of the current frame, in ms.
775
+ * @param callback The callback that is invoked with the segmented masks. The
776
+ * lifetime of the returned data is only guaranteed for the duration of the
777
+ * callback.
778
+ */
779
+ segmentForVideo(videoFrame: ImageSource, timestamp: number, callback: SegmentationMaskCallback): void;
780
+ /**
781
+ * Performs image segmentation on the provided video frame and invokes the
782
+ * callback with the response. The method returns synchronously once the
783
+ * callback returns. Only use this method when the ImageSegmenter is
784
+ * created with running mode `video`.
785
+ *
786
+ * @param videoFrame A video frame to process.
787
+ * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
788
+ * to process the input image before running inference.
789
+ * @param timestamp The timestamp of the current frame, in ms.
790
+ * @param callback The callback that is invoked with the segmented masks. The
791
+ * lifetime of the returned data is only guaranteed for the duration of the
792
+ * callback.
793
+ */
794
+ segmentForVideo(videoFrame: ImageSource, imageProcessingOptions: ImageProcessingOptions, timestamp: number, callback: SegmentationMaskCallback): void;
795
+ }
796
+
797
+ /** Options to configure the MediaPipe Image Segmenter Task */
798
+ export declare interface ImageSegmenterOptions extends VisionTaskOptions {
799
+ /**
800
+ * The locale to use for display names specified through the TFLite Model
801
+ * Metadata, if any. Defaults to English.
802
+ */
803
+ displayNamesLocale?: string | undefined;
804
+ /**
805
+ * The output type of segmentation results.
806
+ *
807
+ * The two supported modes are:
808
+ * - Category Mask: Gives a single output mask where each pixel represents
809
+ * the class which the pixel in the original image was
810
+ * predicted to belong to.
811
+ * - Confidence Mask: Gives a list of output masks (one for each class). For
812
+ * each mask, the pixel represents the prediction
813
+ * confidence, usually in the [0.0, 0.1] range.
814
+ *
815
+ * Defaults to `CATEGORY_MASK`.
816
+ */
817
+ outputType?: "CATEGORY_MASK" | "CONFIDENCE_MASK" | undefined;
818
+ }
819
+
702
820
  /**
703
821
  * Valid types of image sources which we can run our GraphRunner over.
704
822
  */
@@ -839,6 +957,24 @@ declare interface RectF {
839
957
  */
840
958
  declare type RunningMode = "IMAGE" | "VIDEO";
841
959
 
960
+ /**
961
+ * The ImageSegmenter returns the segmentation result as a Uint8Array (when
962
+ * the default mode of `CATEGORY_MASK` is used) or as a Float32Array (for
963
+ * output type `CONFIDENCE_MASK`). The `WebGLTexture` output type is reserved
964
+ * for future usage.
965
+ */
966
+ export declare type SegmentationMask = Uint8Array | Float32Array | WebGLTexture;
967
+
968
+ /**
969
+ * A callback that receives the computed masks from the image segmenter. The
970
+ * callback either receives a single element array with a category mask (as a
971
+ * `[Uint8Array]`) or multiple confidence masks (as a `Float32Array[]`).
972
+ * The returned data is only valid for the duration of the callback. If
973
+ * asynchronous processing is needed, all data needs to be copied before the
974
+ * callback returns.
975
+ */
976
+ export declare type SegmentationMaskCallback = (masks: SegmentationMask[], width: number, height: number) => void;
977
+
842
978
  /** Base class for all MediaPipe Tasks. */
843
979
  declare abstract class TaskRunner {
844
980
  protected constructor();