@mediapipe/tasks-vision 0.1.0-alpha-4 → 0.1.0-alpha-5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,23 +2,57 @@
2
2
 
3
3
  This package contains the vision tasks for MediaPipe.
4
4
 
5
- ## Object Detection
5
+ ## Face Stylizer
6
6
 
7
- The MediaPipe Object Detector task lets you detect the presence and location of
8
- multiple classes of objects within images or videos.
7
+ The MediaPipe Face Stylizer lets you perform face stylization on images.
9
8
 
10
9
  ```
11
10
  const vision = await FilesetResolver.forVisionTasks(
12
11
  "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
13
12
  );
14
- const objectDetector = await ObjectDetector.createFromModelPath(vision,
15
- "https://storage.googleapis.com/mediapipe-tasks/object_detector/efficientdet_lite0_uint8.tflite"
13
+ const faceStylizer = await FaceStylizer.createFromModelPath(vision,
14
+ "model.tflite"
16
15
  );
17
16
  const image = document.getElementById("image") as HTMLImageElement;
18
- const detections = objectDetector.detect(image);
17
+ const stylizedImage = faceStylizer.stylize(image);
19
18
  ```
20
19
 
21
- For more information, refer to the [Object Detector](https://developers.google.com/mediapipe/solutions/vision/object_detector/web_js) documentation.
20
+ ## Gesture Recognition
21
+
22
+ The MediaPipe Gesture Recognizer task lets you recognize hand gestures in real
23
+ time, and provides the recognized hand gesture results along with the landmarks
24
+ of the detected hands. You can use this task to recognize specific hand gestures
25
+ from a user, and invoke application features that correspond to those gestures.
26
+
27
+ ```
28
+ const vision = await FilesetResolver.forVisionTasks(
29
+ "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
30
+ );
31
+ const gestureRecognizer = await GestureRecognizer.createFromModelPath(vision,
32
+ "https://storage.googleapis.com/mediapipe-tasks/gesture_recognizer/gesture_recognizer.task"
33
+ );
34
+ const image = document.getElementById("image") as HTMLImageElement;
35
+ const recognitions = gestureRecognizer.recognize(image);
36
+ ```
37
+
38
+ ## Hand Landmark Detection
39
+
40
+ The MediaPipe Hand Landmarker task lets you detect the landmarks of the hands in
41
+ an image. You can use this Task to localize key points of the hands and render
42
+ visual effects over the hands.
43
+
44
+ ```
45
+ const vision = await FilesetResolver.forVisionTasks(
46
+ "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
47
+ );
48
+ const handLandmarker = await HandLandmarker.createFromModelPath(vision,
49
+ "https://storage.googleapis.com/mediapipe-tasks/hand_landmarker/hand_landmarker.task"
50
+ );
51
+ const image = document.getElementById("image") as HTMLImageElement;
52
+ const landmarks = handLandmarker.detect(image);
53
+ ```
54
+
55
+ For more information, refer to the [Handlandmark Detection](https://developers.google.com/mediapipe/solutions/vision/hand_landmarker/web_js) documentation.
22
56
 
23
57
  ## Image Classification
24
58
 
@@ -56,40 +90,39 @@ imageSegmenter.segment(image, (masks, width, height) => {
56
90
  });
57
91
  ```
58
92
 
59
- ## Gesture Recognition
93
+ ## Interactive Segmentation
60
94
 
61
- The MediaPipe Gesture Recognizer task lets you recognize hand gestures in real
62
- time, and provides the recognized hand gesture results along with the landmarks
63
- of the detected hands. You can use this task to recognize specific hand gestures
64
- from a user, and invoke application features that correspond to those gestures.
95
+ The MediaPipe Interactive Segmenter lets you select a region of interest to
96
+ segment an image by.
65
97
 
66
98
  ```
67
99
  const vision = await FilesetResolver.forVisionTasks(
68
100
  "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
69
101
  );
70
- const gestureRecognizer = await GestureRecognizer.createFromModelPath(vision,
71
- "https://storage.googleapis.com/mediapipe-tasks/gesture_recognizer/gesture_recognizer.task"
102
+ const interactiveSegmenter = await InteractiveSegmenter.createFromModelPath(
103
+ vision, "model.tflite"
72
104
  );
73
105
  const image = document.getElementById("image") as HTMLImageElement;
74
- const recognitions = gestureRecognizer.recognize(image);
106
+ interactiveSegmenter.segment(image, { keypoint: { x: 0.1, y: 0.2 } },
107
+ (masks, width, height) => { ... }
108
+ );
75
109
  ```
76
110
 
77
- ## Handlandmark Detection
111
+ ## Object Detection
78
112
 
79
- The MediaPipe Hand Landmarker task lets you detect the landmarks of the hands in
80
- an image. You can use this Task to localize key points of the hands and render
81
- visual effects over the hands.
113
+ The MediaPipe Object Detector task lets you detect the presence and location of
114
+ multiple classes of objects within images or videos.
82
115
 
83
116
  ```
84
117
  const vision = await FilesetResolver.forVisionTasks(
85
118
  "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
86
119
  );
87
- const handLandmarker = await HandLandmarker.createFromModelPath(vision,
88
- "https://storage.googleapis.com/mediapipe-tasks/hand_landmarker/hand_landmarker.task"
120
+ const objectDetector = await ObjectDetector.createFromModelPath(vision,
121
+ "https://storage.googleapis.com/mediapipe-tasks/object_detector/efficientdet_lite0_uint8.tflite"
89
122
  );
90
123
  const image = document.getElementById("image") as HTMLImageElement;
91
- const landmarks = handLandmarker.detect(image);
124
+ const detections = objectDetector.detect(image);
92
125
  ```
93
126
 
94
- For more information, refer to the [Handlandmark Detection](https://developers.google.com/mediapipe/solutions/vision/hand_landmarker/web_js) documentation.
127
+ For more information, refer to the [Object Detector](https://developers.google.com/mediapipe/solutions/vision/object_detector/web_js) documentation.
95
128
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mediapipe/tasks-vision",
3
- "version": "0.1.0-alpha-4",
3
+ "version": "0.1.0-alpha-5",
4
4
  "description": "MediaPipe Vision Tasks",
5
5
  "main": "vision_bundle.js",
6
6
  "author": "mediapipe@google.com",
package/vision.d.ts CHANGED
@@ -224,6 +224,141 @@ export declare interface Embedding {
224
224
  headName: string;
225
225
  }
226
226
 
227
+ /** Performs face stylization on images. */
228
+ export declare class FaceStylizer extends VisionTaskRunner {
229
+ /**
230
+ * Initializes the Wasm runtime and creates a new Face Stylizer from the
231
+ * provided options.
232
+ * @param wasmFileset A configuration object that provides the location of
233
+ * the Wasm binary and its loader.
234
+ * @param faceStylizerOptions The options for the Face Stylizer. Note
235
+ * that either a path to the model asset or a model buffer needs to be
236
+ * provided (via `baseOptions`).
237
+ */
238
+ static createFromOptions(wasmFileset: WasmFileset, faceStylizerOptions: FaceStylizerOptions): Promise<FaceStylizer>;
239
+ /**
240
+ * Initializes the Wasm runtime and creates a new Face Stylizer based on
241
+ * the provided model asset buffer.
242
+ * @param wasmFileset A configuration object that provides the location of
243
+ * the Wasm binary and its loader.
244
+ * @param modelAssetBuffer A binary representation of the model.
245
+ */
246
+ static createFromModelBuffer(wasmFileset: WasmFileset, modelAssetBuffer: Uint8Array): Promise<FaceStylizer>;
247
+ /**
248
+ * Initializes the Wasm runtime and creates a new Face Stylizer based on
249
+ * the path to the model asset.
250
+ * @param wasmFileset A configuration object that provides the location of
251
+ * the Wasm binary and its loader.
252
+ * @param modelAssetPath The path to the model asset.
253
+ */
254
+ static createFromModelPath(wasmFileset: WasmFileset, modelAssetPath: string): Promise<FaceStylizer>;
255
+ private constructor();
256
+ /**
257
+ * Sets new options for the Face Stylizer.
258
+ *
259
+ * Calling `setOptions()` with a subset of options only affects those
260
+ * options. You can reset an option back to its default value by
261
+ * explicitly setting it to `undefined`.
262
+ *
263
+ * @param options The options for the Face Stylizer.
264
+ */
265
+ setOptions(options: FaceStylizerOptions): Promise<void>;
266
+ /**
267
+ * Performs face stylization on the provided single image. The method returns
268
+ * synchronously once the callback returns. Only use this method when the
269
+ * FaceStylizer is created with the image running mode.
270
+ *
271
+ * The input image can be of any size. To ensure that the output image has
272
+ * reasonable quailty, the stylized output image size is determined by the
273
+ * model output size.
274
+ *
275
+ * @param image An image to process.
276
+ * @param callback The callback that is invoked with the stylized image. The
277
+ * lifetime of the returned data is only guaranteed for the duration of the
278
+ * callback.
279
+ */
280
+ stylize(image: ImageSource, callback: ImageCallback): void;
281
+ /**
282
+ * Performs face stylization on the provided single image. The method returns
283
+ * synchronously once the callback returns. Only use this method when the
284
+ * FaceStylizer is created with the image running mode.
285
+ *
286
+ * The 'imageProcessingOptions' parameter can be used to specify one or all
287
+ * of:
288
+ * - the rotation to apply to the image before performing stylization, by
289
+ * setting its 'rotationDegrees' property.
290
+ * - the region-of-interest on which to perform stylization, by setting its
291
+ * 'regionOfInterest' property. If not specified, the full image is used.
292
+ * If both are specified, the crop around the region-of-interest is extracted
293
+ * first, then the specified rotation is applied to the crop.
294
+ *
295
+ * The input image can be of any size. To ensure that the output image has
296
+ * reasonable quailty, the stylized output image size is the smaller of the
297
+ * model output size and the size of the 'regionOfInterest' specified in
298
+ * 'imageProcessingOptions'.
299
+ *
300
+ * @param image An image to process.
301
+ * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
302
+ * to process the input image before running inference.
303
+ * @param callback The callback that is invoked with the stylized image. The
304
+ * lifetime of the returned data is only guaranteed for the duration of the
305
+ * callback.
306
+ */
307
+ stylize(image: ImageSource, imageProcessingOptions: ImageProcessingOptions, callback: ImageCallback): void;
308
+ /**
309
+ * Performs face stylization on the provided video frame. Only use this method
310
+ * when the FaceStylizer is created with the video running mode.
311
+ *
312
+ * The input frame can be of any size. It's required to provide the video
313
+ * frame's timestamp (in milliseconds). The input timestamps must be
314
+ * monotonically increasing.
315
+ *
316
+ * To ensure that the output image has reasonable quality, the stylized
317
+ * output image size is determined by the model output size.
318
+ *
319
+ * @param videoFrame A video frame to process.
320
+ * @param timestamp The timestamp of the current frame, in ms.
321
+ * @param callback The callback that is invoked with the stylized image. The
322
+ * lifetime of the returned data is only guaranteed for the duration of
323
+ * the callback.
324
+ */
325
+ stylizeForVideo(videoFrame: ImageSource, timestamp: number, callback: ImageCallback): void;
326
+ /**
327
+ * Performs face stylization on the provided video frame. Only use this
328
+ * method when the FaceStylizer is created with the video running mode.
329
+ *
330
+ * The 'imageProcessingOptions' parameter can be used to specify one or all
331
+ * of:
332
+ * - the rotation to apply to the image before performing stylization, by
333
+ * setting its 'rotationDegrees' property.
334
+ * - the region-of-interest on which to perform stylization, by setting its
335
+ * 'regionOfInterest' property. If not specified, the full image is used.
336
+ * If both are specified, the crop around the region-of-interest is
337
+ * extracted first, then the specified rotation is applied to the crop.
338
+ *
339
+ * The input frame can be of any size. It's required to provide the video
340
+ * frame's timestamp (in milliseconds). The input timestamps must be
341
+ * monotonically increasing.
342
+ *
343
+ * To ensure that the output image has reasonable quailty, the stylized
344
+ * output image size is the smaller of the model output size and the size of
345
+ * the 'regionOfInterest' specified in 'imageProcessingOptions'.
346
+ *
347
+ * @param videoFrame A video frame to process.
348
+ * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
349
+ * to process the input image before running inference.
350
+ * @param timestamp The timestamp of the current frame, in ms.
351
+ * @param callback The callback that is invoked with the stylized image. The
352
+ * lifetime of the returned data is only guaranteed for the duration of
353
+ * the callback.
354
+ */
355
+ stylizeForVideo(videoFrame: ImageSource, imageProcessingOptions: ImageProcessingOptions, timestamp: number, callback: ImageCallback): void;
356
+ }
357
+
358
+ /** Options to configure the MediaPipe Face Stylizer Task */
359
+ export declare interface FaceStylizerOptions extends VisionTaskOptions {
360
+ }
361
+
227
362
  /**
228
363
  * Resolves the files required for the MediaPipe Task APIs.
229
364
  *
@@ -363,7 +498,7 @@ export declare interface GestureRecognizerOptions extends VisionTaskOptions {
363
498
  */
364
499
  minTrackingConfidence?: number | undefined;
365
500
  /**
366
- * Sets the optional `ClassifierOptions` controling the canned gestures
501
+ * Sets the optional `ClassifierOptions` controlling the canned gestures
367
502
  * classifier, such as score threshold, allow list and deny list of gestures.
368
503
  * The categories for canned gesture
369
504
  * classifiers are: ["None", "Closed_Fist", "Open_Palm", "Pointing_Up",
@@ -495,6 +630,16 @@ export declare interface HandLandmarkerResult {
495
630
  handednesses: Category[][];
496
631
  }
497
632
 
633
+ /**
634
+ * A callback that receives an `ImageData` object from a Vision task. The
635
+ * lifetime of the underlying data is limited to the duration of the callback.
636
+ * If asynchronous processing is needed, all data needs to be copied before the
637
+ * callback returns.
638
+ *
639
+ * The `WebGLTexture` output type is reserved for future usage.
640
+ */
641
+ export declare type ImageCallback = (image: ImageData | WebGLTexture, width: number, height: number) => void;
642
+
498
643
  /** Performs classification on images. */
499
644
  export declare class ImageClassifier extends VisionTaskRunner {
500
645
  /**
@@ -764,6 +909,18 @@ export declare class ImageSegmenter extends VisionTaskRunner {
764
909
  * callback.
765
910
  */
766
911
  segment(image: ImageSource, imageProcessingOptions: ImageProcessingOptions, callback: SegmentationMaskCallback): void;
912
+ /**
913
+ * Get the category label list of the ImageSegmenter can recognize. For
914
+ * `CATEGORY_MASK` type, the index in the category mask corresponds to the
915
+ * category in the label list. For `CONFIDENCE_MASK` type, the output mask
916
+ * list at index corresponds to the category in the label list.
917
+ *
918
+ * If there is no labelmap provided in the model file, empty label array is
919
+ * returned.
920
+ *
921
+ * @return The labels used by the current model.
922
+ */
923
+ getLabels(): string[];
767
924
  /**
768
925
  * Performs image segmentation on the provided video frame and invokes the
769
926
  * callback with the response. The method returns synchronously once the
@@ -822,6 +979,136 @@ export declare interface ImageSegmenterOptions extends VisionTaskOptions {
822
979
  */
823
980
  export declare type ImageSource = HTMLCanvasElement | HTMLVideoElement | HTMLImageElement | ImageData | ImageBitmap;
824
981
 
982
+ /**
983
+ * Performs interactive segmentation on images.
984
+ *
985
+ * Users can represent user interaction through `RegionOfInterest`, which gives
986
+ * a hint to InteractiveSegmenter to perform segmentation focusing on the given
987
+ * region of interest.
988
+ *
989
+ * The API expects a TFLite model with mandatory TFLite Model Metadata.
990
+ *
991
+ * Input tensor:
992
+ * (kTfLiteUInt8/kTfLiteFloat32)
993
+ * - image input of size `[batch x height x width x channels]`.
994
+ * - batch inference is not supported (`batch` is required to be 1).
995
+ * - RGB inputs is supported (`channels` is required to be 3).
996
+ * - if type is kTfLiteFloat32, NormalizationOptions are required to be
997
+ * attached to the metadata for input normalization.
998
+ * Output tensors:
999
+ * (kTfLiteUInt8/kTfLiteFloat32)
1000
+ * - list of segmented masks.
1001
+ * - if `output_type` is CATEGORY_MASK, uint8 Image, Image vector of size 1.
1002
+ * - if `output_type` is CONFIDENCE_MASK, float32 Image list of size
1003
+ * `channels`.
1004
+ * - batch is always 1
1005
+ */
1006
+ export declare class InteractiveSegmenter extends VisionTaskRunner {
1007
+ /**
1008
+ * Initializes the Wasm runtime and creates a new interactive segmenter from
1009
+ * the provided options.
1010
+ * @param wasmFileset A configuration object that provides the location of
1011
+ * the Wasm binary and its loader.
1012
+ * @param interactiveSegmenterOptions The options for the Interactive
1013
+ * Segmenter. Note that either a path to the model asset or a model buffer
1014
+ * needs to be provided (via `baseOptions`).
1015
+ * @return A new `InteractiveSegmenter`.
1016
+ */
1017
+ static createFromOptions(wasmFileset: WasmFileset, interactiveSegmenterOptions: InteractiveSegmenterOptions): Promise<InteractiveSegmenter>;
1018
+ /**
1019
+ * Initializes the Wasm runtime and creates a new interactive segmenter based
1020
+ * on the provided model asset buffer.
1021
+ * @param wasmFileset A configuration object that provides the location of
1022
+ * the Wasm binary and its loader.
1023
+ * @param modelAssetBuffer A binary representation of the model.
1024
+ * @return A new `InteractiveSegmenter`.
1025
+ */
1026
+ static createFromModelBuffer(wasmFileset: WasmFileset, modelAssetBuffer: Uint8Array): Promise<InteractiveSegmenter>;
1027
+ /**
1028
+ * Initializes the Wasm runtime and creates a new interactive segmenter based
1029
+ * on the path to the model asset.
1030
+ * @param wasmFileset A configuration object that provides the location of
1031
+ * the Wasm binary and its loader.
1032
+ * @param modelAssetPath The path to the model asset.
1033
+ * @return A new `InteractiveSegmenter`.
1034
+ */
1035
+ static createFromModelPath(wasmFileset: WasmFileset, modelAssetPath: string): Promise<InteractiveSegmenter>;
1036
+ private constructor();
1037
+ /**
1038
+ * Sets new options for the interactive segmenter.
1039
+ *
1040
+ * Calling `setOptions()` with a subset of options only affects those
1041
+ * options. You can reset an option back to its default value by
1042
+ * explicitly setting it to `undefined`.
1043
+ *
1044
+ * @param options The options for the interactive segmenter.
1045
+ * @return A Promise that resolves when the settings have been applied.
1046
+ */
1047
+ setOptions(options: InteractiveSegmenterOptions): Promise<void>;
1048
+ /**
1049
+ * Performs interactive segmentation on the provided single image and invokes
1050
+ * the callback with the response. The `roi` parameter is used to represent a
1051
+ * user's region of interest for segmentation.
1052
+ *
1053
+ * If the output_type is `CATEGORY_MASK`, the callback is invoked with vector
1054
+ * of images that represent per-category segmented image mask. If the
1055
+ * output_type is `CONFIDENCE_MASK`, the callback is invoked with a vector of
1056
+ * images that contains only one confidence image mask. The method returns
1057
+ * synchronously once the callback returns.
1058
+ *
1059
+ * @param image An image to process.
1060
+ * @param roi The region of interest for segmentation.
1061
+ * @param callback The callback that is invoked with the segmented masks. The
1062
+ * lifetime of the returned data is only guaranteed for the duration of the
1063
+ * callback.
1064
+ */
1065
+ segment(image: ImageSource, roi: RegionOfInterest, callback: SegmentationMaskCallback): void;
1066
+ /**
1067
+ * Performs interactive segmentation on the provided single image and invokes
1068
+ * the callback with the response. The `roi` parameter is used to represent a
1069
+ * user's region of interest for segmentation.
1070
+ *
1071
+ * The 'image_processing_options' parameter can be used to specify the
1072
+ * rotation to apply to the image before performing segmentation, by setting
1073
+ * its 'rotationDegrees' field. Note that specifying a region-of-interest
1074
+ * using the 'regionOfInterest' field is NOT supported and will result in an
1075
+ * error.
1076
+ *
1077
+ * If the output_type is `CATEGORY_MASK`, the callback is invoked with vector
1078
+ * of images that represent per-category segmented image mask. If the
1079
+ * output_type is `CONFIDENCE_MASK`, the callback is invoked with a vector of
1080
+ * images that contains only one confidence image mask. The method returns
1081
+ * synchronously once the callback returns.
1082
+ *
1083
+ * @param image An image to process.
1084
+ * @param roi The region of interest for segmentation.
1085
+ * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
1086
+ * to process the input image before running inference.
1087
+ * @param callback The callback that is invoked with the segmented masks. The
1088
+ * lifetime of the returned data is only guaranteed for the duration of the
1089
+ * callback.
1090
+ */
1091
+ segment(image: ImageSource, roi: RegionOfInterest, imageProcessingOptions: ImageProcessingOptions, callback: SegmentationMaskCallback): void;
1092
+ }
1093
+
1094
+ /** Options to configure the MediaPipe Interactive Segmenter Task */
1095
+ export declare interface InteractiveSegmenterOptions extends TaskRunnerOptions {
1096
+ /**
1097
+ * The output type of segmentation results.
1098
+ *
1099
+ * The two supported modes are:
1100
+ * - Category Mask: Gives a single output mask where each pixel represents
1101
+ * the class which the pixel in the original image was
1102
+ * predicted to belong to.
1103
+ * - Confidence Mask: Gives a list of output masks (one for each class). For
1104
+ * each mask, the pixel represents the prediction
1105
+ * confidence, usually in the [0.0, 0.1] range.
1106
+ *
1107
+ * Defaults to `CATEGORY_MASK`.
1108
+ */
1109
+ outputType?: "CATEGORY_MASK" | "CONFIDENCE_MASK" | undefined;
1110
+ }
1111
+
825
1112
  /**
826
1113
  * Landmark represents a point in 3D space with x, y, z coordinates. The
827
1114
  * landmark coordinates are in meters. z represents the landmark depth,
@@ -836,6 +1123,36 @@ export declare interface Landmark {
836
1123
  z: number;
837
1124
  }
838
1125
 
1126
+ /**
1127
+ * Copyright 2023 The MediaPipe Authors. All Rights Reserved.
1128
+ *
1129
+ * Licensed under the Apache License, Version 2.0 (the "License");
1130
+ * you may not use this file except in compliance with the License.
1131
+ * You may obtain a copy of the License at
1132
+ *
1133
+ * http://www.apache.org/licenses/LICENSE-2.0
1134
+ *
1135
+ * Unless required by applicable law or agreed to in writing, software
1136
+ * distributed under the License is distributed on an "AS IS" BASIS,
1137
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1138
+ * See the License for the specific language governing permissions and
1139
+ * limitations under the License.
1140
+ */
1141
+ /**
1142
+ * A keypoint, defined by the coordinates (x, y), normalized by the image
1143
+ * dimensions.
1144
+ */
1145
+ declare interface NormalizedKeypoint {
1146
+ /** X in normalized image coordinates. */
1147
+ x: number;
1148
+ /** Y in normalized image coordinates. */
1149
+ y: number;
1150
+ /** Optional label of the keypoint. */
1151
+ label?: string;
1152
+ /** Optional score of the keypoint. */
1153
+ score?: number;
1154
+ }
1155
+
839
1156
  /**
840
1157
  * Copyright 2022 The MediaPipe Authors. All Rights Reserved.
841
1158
  *
@@ -950,6 +1267,12 @@ declare interface RectF {
950
1267
  bottom: number;
951
1268
  }
952
1269
 
1270
+ /** A Region-Of-Interest (ROI) to represent a region within an image. */
1271
+ export declare interface RegionOfInterest {
1272
+ /** The ROI in keypoint format. */
1273
+ keypoint: NormalizedKeypoint;
1274
+ }
1275
+
953
1276
  /**
954
1277
  * The two running modes of a vision task.
955
1278
  * 1) The image mode for processing single image inputs.
@@ -958,17 +1281,18 @@ declare interface RectF {
958
1281
  declare type RunningMode = "IMAGE" | "VIDEO";
959
1282
 
960
1283
  /**
961
- * The ImageSegmenter returns the segmentation result as a Uint8Array (when
962
- * the default mode of `CATEGORY_MASK` is used) or as a Float32Array (for
963
- * output type `CONFIDENCE_MASK`). The `WebGLTexture` output type is reserved
964
- * for future usage.
1284
+ * The segmentation tasks return the segmentation either as a WebGLTexture (when
1285
+ * the output is on GPU) or as a typed JavaScript arrays for CPU-based
1286
+ * category or confidence masks. `Uint8ClampedArray`s are used to represend
1287
+ * CPU-based category masks and `Float32Array`s are used for CPU-based
1288
+ * confidence masks.
965
1289
  */
966
- export declare type SegmentationMask = Uint8Array | Float32Array | WebGLTexture;
1290
+ export declare type SegmentationMask = Uint8ClampedArray | Float32Array | WebGLTexture;
967
1291
 
968
1292
  /**
969
- * A callback that receives the computed masks from the image segmenter. The
1293
+ * A callback that receives the computed masks from the segmentation tasks. The
970
1294
  * callback either receives a single element array with a category mask (as a
971
- * `[Uint8Array]`) or multiple confidence masks (as a `Float32Array[]`).
1295
+ * `[Uint8ClampedArray]`) or multiple confidence masks (as a `Float32Array[]`).
972
1296
  * The returned data is only valid for the duration of the callback. If
973
1297
  * asynchronous processing is needed, all data needs to be copied before the
974
1298
  * callback returns.
@@ -990,6 +1314,13 @@ declare interface TaskRunnerOptions {
990
1314
 
991
1315
  /** The options for configuring a MediaPipe vision task. */
992
1316
  declare interface VisionTaskOptions extends TaskRunnerOptions {
1317
+ /**
1318
+ * The canvas element to bind textures to. This has to be set for GPU
1319
+ * processing. The task will initialize a WebGL context and throw an eror if
1320
+ * this fails (e.g. if you have already initialized a different type of
1321
+ * context).
1322
+ */
1323
+ canvas?: HTMLCanvasElement | OffscreenCanvas;
993
1324
  /**
994
1325
  * The running mode of the task. Default to the image mode.
995
1326
  * Vision tasks have two running modes: