@mediapipe/tasks-vision 0.1.0-alpha-4 → 0.1.0-alpha-5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +56 -23
- package/package.json +1 -1
- package/vision.d.ts +339 -8
- package/vision_bundle.js +1 -1
- package/wasm/vision_wasm_internal.js +441 -307
- package/wasm/vision_wasm_internal.wasm +0 -0
- package/wasm/vision_wasm_nosimd_internal.js +431 -307
- package/wasm/vision_wasm_nosimd_internal.wasm +0 -0
package/README.md
CHANGED
|
@@ -2,23 +2,57 @@
|
|
|
2
2
|
|
|
3
3
|
This package contains the vision tasks for MediaPipe.
|
|
4
4
|
|
|
5
|
-
##
|
|
5
|
+
## Face Stylizer
|
|
6
6
|
|
|
7
|
-
The MediaPipe
|
|
8
|
-
multiple classes of objects within images or videos.
|
|
7
|
+
The MediaPipe Face Stylizer lets you perform face stylization on images.
|
|
9
8
|
|
|
10
9
|
```
|
|
11
10
|
const vision = await FilesetResolver.forVisionTasks(
|
|
12
11
|
"https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
|
|
13
12
|
);
|
|
14
|
-
const
|
|
15
|
-
"
|
|
13
|
+
const faceStylizer = await FaceStylizer.createFromModelPath(vision,
|
|
14
|
+
"model.tflite"
|
|
16
15
|
);
|
|
17
16
|
const image = document.getElementById("image") as HTMLImageElement;
|
|
18
|
-
const
|
|
17
|
+
const stylizedImage = faceStylizer.stylize(image);
|
|
19
18
|
```
|
|
20
19
|
|
|
21
|
-
|
|
20
|
+
## Gesture Recognition
|
|
21
|
+
|
|
22
|
+
The MediaPipe Gesture Recognizer task lets you recognize hand gestures in real
|
|
23
|
+
time, and provides the recognized hand gesture results along with the landmarks
|
|
24
|
+
of the detected hands. You can use this task to recognize specific hand gestures
|
|
25
|
+
from a user, and invoke application features that correspond to those gestures.
|
|
26
|
+
|
|
27
|
+
```
|
|
28
|
+
const vision = await FilesetResolver.forVisionTasks(
|
|
29
|
+
"https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
|
|
30
|
+
);
|
|
31
|
+
const gestureRecognizer = await GestureRecognizer.createFromModelPath(vision,
|
|
32
|
+
"https://storage.googleapis.com/mediapipe-tasks/gesture_recognizer/gesture_recognizer.task"
|
|
33
|
+
);
|
|
34
|
+
const image = document.getElementById("image") as HTMLImageElement;
|
|
35
|
+
const recognitions = gestureRecognizer.recognize(image);
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Hand Landmark Detection
|
|
39
|
+
|
|
40
|
+
The MediaPipe Hand Landmarker task lets you detect the landmarks of the hands in
|
|
41
|
+
an image. You can use this Task to localize key points of the hands and render
|
|
42
|
+
visual effects over the hands.
|
|
43
|
+
|
|
44
|
+
```
|
|
45
|
+
const vision = await FilesetResolver.forVisionTasks(
|
|
46
|
+
"https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
|
|
47
|
+
);
|
|
48
|
+
const handLandmarker = await HandLandmarker.createFromModelPath(vision,
|
|
49
|
+
"https://storage.googleapis.com/mediapipe-tasks/hand_landmarker/hand_landmarker.task"
|
|
50
|
+
);
|
|
51
|
+
const image = document.getElementById("image") as HTMLImageElement;
|
|
52
|
+
const landmarks = handLandmarker.detect(image);
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
For more information, refer to the [Handlandmark Detection](https://developers.google.com/mediapipe/solutions/vision/hand_landmarker/web_js) documentation.
|
|
22
56
|
|
|
23
57
|
## Image Classification
|
|
24
58
|
|
|
@@ -56,40 +90,39 @@ imageSegmenter.segment(image, (masks, width, height) => {
|
|
|
56
90
|
});
|
|
57
91
|
```
|
|
58
92
|
|
|
59
|
-
##
|
|
93
|
+
## Interactive Segmentation
|
|
60
94
|
|
|
61
|
-
The MediaPipe
|
|
62
|
-
|
|
63
|
-
of the detected hands. You can use this task to recognize specific hand gestures
|
|
64
|
-
from a user, and invoke application features that correspond to those gestures.
|
|
95
|
+
The MediaPipe Interactive Segmenter lets you select a region of interest to
|
|
96
|
+
segment an image by.
|
|
65
97
|
|
|
66
98
|
```
|
|
67
99
|
const vision = await FilesetResolver.forVisionTasks(
|
|
68
100
|
"https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
|
|
69
101
|
);
|
|
70
|
-
const
|
|
71
|
-
"
|
|
102
|
+
const interactiveSegmenter = await InteractiveSegmenter.createFromModelPath(
|
|
103
|
+
vision, "model.tflite"
|
|
72
104
|
);
|
|
73
105
|
const image = document.getElementById("image") as HTMLImageElement;
|
|
74
|
-
|
|
106
|
+
interactiveSegmenter.segment(image, { keypoint: { x: 0.1, y: 0.2 } },
|
|
107
|
+
(masks, width, height) => { ... }
|
|
108
|
+
);
|
|
75
109
|
```
|
|
76
110
|
|
|
77
|
-
##
|
|
111
|
+
## Object Detection
|
|
78
112
|
|
|
79
|
-
The MediaPipe
|
|
80
|
-
|
|
81
|
-
visual effects over the hands.
|
|
113
|
+
The MediaPipe Object Detector task lets you detect the presence and location of
|
|
114
|
+
multiple classes of objects within images or videos.
|
|
82
115
|
|
|
83
116
|
```
|
|
84
117
|
const vision = await FilesetResolver.forVisionTasks(
|
|
85
118
|
"https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
|
|
86
119
|
);
|
|
87
|
-
const
|
|
88
|
-
"https://storage.googleapis.com/mediapipe-tasks/
|
|
120
|
+
const objectDetector = await ObjectDetector.createFromModelPath(vision,
|
|
121
|
+
"https://storage.googleapis.com/mediapipe-tasks/object_detector/efficientdet_lite0_uint8.tflite"
|
|
89
122
|
);
|
|
90
123
|
const image = document.getElementById("image") as HTMLImageElement;
|
|
91
|
-
const
|
|
124
|
+
const detections = objectDetector.detect(image);
|
|
92
125
|
```
|
|
93
126
|
|
|
94
|
-
For more information, refer to the [
|
|
127
|
+
For more information, refer to the [Object Detector](https://developers.google.com/mediapipe/solutions/vision/object_detector/web_js) documentation.
|
|
95
128
|
|
package/package.json
CHANGED
package/vision.d.ts
CHANGED
|
@@ -224,6 +224,141 @@ export declare interface Embedding {
|
|
|
224
224
|
headName: string;
|
|
225
225
|
}
|
|
226
226
|
|
|
227
|
+
/** Performs face stylization on images. */
|
|
228
|
+
export declare class FaceStylizer extends VisionTaskRunner {
|
|
229
|
+
/**
|
|
230
|
+
* Initializes the Wasm runtime and creates a new Face Stylizer from the
|
|
231
|
+
* provided options.
|
|
232
|
+
* @param wasmFileset A configuration object that provides the location of
|
|
233
|
+
* the Wasm binary and its loader.
|
|
234
|
+
* @param faceStylizerOptions The options for the Face Stylizer. Note
|
|
235
|
+
* that either a path to the model asset or a model buffer needs to be
|
|
236
|
+
* provided (via `baseOptions`).
|
|
237
|
+
*/
|
|
238
|
+
static createFromOptions(wasmFileset: WasmFileset, faceStylizerOptions: FaceStylizerOptions): Promise<FaceStylizer>;
|
|
239
|
+
/**
|
|
240
|
+
* Initializes the Wasm runtime and creates a new Face Stylizer based on
|
|
241
|
+
* the provided model asset buffer.
|
|
242
|
+
* @param wasmFileset A configuration object that provides the location of
|
|
243
|
+
* the Wasm binary and its loader.
|
|
244
|
+
* @param modelAssetBuffer A binary representation of the model.
|
|
245
|
+
*/
|
|
246
|
+
static createFromModelBuffer(wasmFileset: WasmFileset, modelAssetBuffer: Uint8Array): Promise<FaceStylizer>;
|
|
247
|
+
/**
|
|
248
|
+
* Initializes the Wasm runtime and creates a new Face Stylizer based on
|
|
249
|
+
* the path to the model asset.
|
|
250
|
+
* @param wasmFileset A configuration object that provides the location of
|
|
251
|
+
* the Wasm binary and its loader.
|
|
252
|
+
* @param modelAssetPath The path to the model asset.
|
|
253
|
+
*/
|
|
254
|
+
static createFromModelPath(wasmFileset: WasmFileset, modelAssetPath: string): Promise<FaceStylizer>;
|
|
255
|
+
private constructor();
|
|
256
|
+
/**
|
|
257
|
+
* Sets new options for the Face Stylizer.
|
|
258
|
+
*
|
|
259
|
+
* Calling `setOptions()` with a subset of options only affects those
|
|
260
|
+
* options. You can reset an option back to its default value by
|
|
261
|
+
* explicitly setting it to `undefined`.
|
|
262
|
+
*
|
|
263
|
+
* @param options The options for the Face Stylizer.
|
|
264
|
+
*/
|
|
265
|
+
setOptions(options: FaceStylizerOptions): Promise<void>;
|
|
266
|
+
/**
|
|
267
|
+
* Performs face stylization on the provided single image. The method returns
|
|
268
|
+
* synchronously once the callback returns. Only use this method when the
|
|
269
|
+
* FaceStylizer is created with the image running mode.
|
|
270
|
+
*
|
|
271
|
+
* The input image can be of any size. To ensure that the output image has
|
|
272
|
+
* reasonable quailty, the stylized output image size is determined by the
|
|
273
|
+
* model output size.
|
|
274
|
+
*
|
|
275
|
+
* @param image An image to process.
|
|
276
|
+
* @param callback The callback that is invoked with the stylized image. The
|
|
277
|
+
* lifetime of the returned data is only guaranteed for the duration of the
|
|
278
|
+
* callback.
|
|
279
|
+
*/
|
|
280
|
+
stylize(image: ImageSource, callback: ImageCallback): void;
|
|
281
|
+
/**
|
|
282
|
+
* Performs face stylization on the provided single image. The method returns
|
|
283
|
+
* synchronously once the callback returns. Only use this method when the
|
|
284
|
+
* FaceStylizer is created with the image running mode.
|
|
285
|
+
*
|
|
286
|
+
* The 'imageProcessingOptions' parameter can be used to specify one or all
|
|
287
|
+
* of:
|
|
288
|
+
* - the rotation to apply to the image before performing stylization, by
|
|
289
|
+
* setting its 'rotationDegrees' property.
|
|
290
|
+
* - the region-of-interest on which to perform stylization, by setting its
|
|
291
|
+
* 'regionOfInterest' property. If not specified, the full image is used.
|
|
292
|
+
* If both are specified, the crop around the region-of-interest is extracted
|
|
293
|
+
* first, then the specified rotation is applied to the crop.
|
|
294
|
+
*
|
|
295
|
+
* The input image can be of any size. To ensure that the output image has
|
|
296
|
+
* reasonable quailty, the stylized output image size is the smaller of the
|
|
297
|
+
* model output size and the size of the 'regionOfInterest' specified in
|
|
298
|
+
* 'imageProcessingOptions'.
|
|
299
|
+
*
|
|
300
|
+
* @param image An image to process.
|
|
301
|
+
* @param imageProcessingOptions the `ImageProcessingOptions` specifying how
|
|
302
|
+
* to process the input image before running inference.
|
|
303
|
+
* @param callback The callback that is invoked with the stylized image. The
|
|
304
|
+
* lifetime of the returned data is only guaranteed for the duration of the
|
|
305
|
+
* callback.
|
|
306
|
+
*/
|
|
307
|
+
stylize(image: ImageSource, imageProcessingOptions: ImageProcessingOptions, callback: ImageCallback): void;
|
|
308
|
+
/**
|
|
309
|
+
* Performs face stylization on the provided video frame. Only use this method
|
|
310
|
+
* when the FaceStylizer is created with the video running mode.
|
|
311
|
+
*
|
|
312
|
+
* The input frame can be of any size. It's required to provide the video
|
|
313
|
+
* frame's timestamp (in milliseconds). The input timestamps must be
|
|
314
|
+
* monotonically increasing.
|
|
315
|
+
*
|
|
316
|
+
* To ensure that the output image has reasonable quality, the stylized
|
|
317
|
+
* output image size is determined by the model output size.
|
|
318
|
+
*
|
|
319
|
+
* @param videoFrame A video frame to process.
|
|
320
|
+
* @param timestamp The timestamp of the current frame, in ms.
|
|
321
|
+
* @param callback The callback that is invoked with the stylized image. The
|
|
322
|
+
* lifetime of the returned data is only guaranteed for the duration of
|
|
323
|
+
* the callback.
|
|
324
|
+
*/
|
|
325
|
+
stylizeForVideo(videoFrame: ImageSource, timestamp: number, callback: ImageCallback): void;
|
|
326
|
+
/**
|
|
327
|
+
* Performs face stylization on the provided video frame. Only use this
|
|
328
|
+
* method when the FaceStylizer is created with the video running mode.
|
|
329
|
+
*
|
|
330
|
+
* The 'imageProcessingOptions' parameter can be used to specify one or all
|
|
331
|
+
* of:
|
|
332
|
+
* - the rotation to apply to the image before performing stylization, by
|
|
333
|
+
* setting its 'rotationDegrees' property.
|
|
334
|
+
* - the region-of-interest on which to perform stylization, by setting its
|
|
335
|
+
* 'regionOfInterest' property. If not specified, the full image is used.
|
|
336
|
+
* If both are specified, the crop around the region-of-interest is
|
|
337
|
+
* extracted first, then the specified rotation is applied to the crop.
|
|
338
|
+
*
|
|
339
|
+
* The input frame can be of any size. It's required to provide the video
|
|
340
|
+
* frame's timestamp (in milliseconds). The input timestamps must be
|
|
341
|
+
* monotonically increasing.
|
|
342
|
+
*
|
|
343
|
+
* To ensure that the output image has reasonable quailty, the stylized
|
|
344
|
+
* output image size is the smaller of the model output size and the size of
|
|
345
|
+
* the 'regionOfInterest' specified in 'imageProcessingOptions'.
|
|
346
|
+
*
|
|
347
|
+
* @param videoFrame A video frame to process.
|
|
348
|
+
* @param imageProcessingOptions the `ImageProcessingOptions` specifying how
|
|
349
|
+
* to process the input image before running inference.
|
|
350
|
+
* @param timestamp The timestamp of the current frame, in ms.
|
|
351
|
+
* @param callback The callback that is invoked with the stylized image. The
|
|
352
|
+
* lifetime of the returned data is only guaranteed for the duration of
|
|
353
|
+
* the callback.
|
|
354
|
+
*/
|
|
355
|
+
stylizeForVideo(videoFrame: ImageSource, imageProcessingOptions: ImageProcessingOptions, timestamp: number, callback: ImageCallback): void;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
/** Options to configure the MediaPipe Face Stylizer Task */
|
|
359
|
+
export declare interface FaceStylizerOptions extends VisionTaskOptions {
|
|
360
|
+
}
|
|
361
|
+
|
|
227
362
|
/**
|
|
228
363
|
* Resolves the files required for the MediaPipe Task APIs.
|
|
229
364
|
*
|
|
@@ -363,7 +498,7 @@ export declare interface GestureRecognizerOptions extends VisionTaskOptions {
|
|
|
363
498
|
*/
|
|
364
499
|
minTrackingConfidence?: number | undefined;
|
|
365
500
|
/**
|
|
366
|
-
* Sets the optional `ClassifierOptions`
|
|
501
|
+
* Sets the optional `ClassifierOptions` controlling the canned gestures
|
|
367
502
|
* classifier, such as score threshold, allow list and deny list of gestures.
|
|
368
503
|
* The categories for canned gesture
|
|
369
504
|
* classifiers are: ["None", "Closed_Fist", "Open_Palm", "Pointing_Up",
|
|
@@ -495,6 +630,16 @@ export declare interface HandLandmarkerResult {
|
|
|
495
630
|
handednesses: Category[][];
|
|
496
631
|
}
|
|
497
632
|
|
|
633
|
+
/**
|
|
634
|
+
* A callback that receives an `ImageData` object from a Vision task. The
|
|
635
|
+
* lifetime of the underlying data is limited to the duration of the callback.
|
|
636
|
+
* If asynchronous processing is needed, all data needs to be copied before the
|
|
637
|
+
* callback returns.
|
|
638
|
+
*
|
|
639
|
+
* The `WebGLTexture` output type is reserved for future usage.
|
|
640
|
+
*/
|
|
641
|
+
export declare type ImageCallback = (image: ImageData | WebGLTexture, width: number, height: number) => void;
|
|
642
|
+
|
|
498
643
|
/** Performs classification on images. */
|
|
499
644
|
export declare class ImageClassifier extends VisionTaskRunner {
|
|
500
645
|
/**
|
|
@@ -764,6 +909,18 @@ export declare class ImageSegmenter extends VisionTaskRunner {
|
|
|
764
909
|
* callback.
|
|
765
910
|
*/
|
|
766
911
|
segment(image: ImageSource, imageProcessingOptions: ImageProcessingOptions, callback: SegmentationMaskCallback): void;
|
|
912
|
+
/**
|
|
913
|
+
* Get the category label list of the ImageSegmenter can recognize. For
|
|
914
|
+
* `CATEGORY_MASK` type, the index in the category mask corresponds to the
|
|
915
|
+
* category in the label list. For `CONFIDENCE_MASK` type, the output mask
|
|
916
|
+
* list at index corresponds to the category in the label list.
|
|
917
|
+
*
|
|
918
|
+
* If there is no labelmap provided in the model file, empty label array is
|
|
919
|
+
* returned.
|
|
920
|
+
*
|
|
921
|
+
* @return The labels used by the current model.
|
|
922
|
+
*/
|
|
923
|
+
getLabels(): string[];
|
|
767
924
|
/**
|
|
768
925
|
* Performs image segmentation on the provided video frame and invokes the
|
|
769
926
|
* callback with the response. The method returns synchronously once the
|
|
@@ -822,6 +979,136 @@ export declare interface ImageSegmenterOptions extends VisionTaskOptions {
|
|
|
822
979
|
*/
|
|
823
980
|
export declare type ImageSource = HTMLCanvasElement | HTMLVideoElement | HTMLImageElement | ImageData | ImageBitmap;
|
|
824
981
|
|
|
982
|
+
/**
|
|
983
|
+
* Performs interactive segmentation on images.
|
|
984
|
+
*
|
|
985
|
+
* Users can represent user interaction through `RegionOfInterest`, which gives
|
|
986
|
+
* a hint to InteractiveSegmenter to perform segmentation focusing on the given
|
|
987
|
+
* region of interest.
|
|
988
|
+
*
|
|
989
|
+
* The API expects a TFLite model with mandatory TFLite Model Metadata.
|
|
990
|
+
*
|
|
991
|
+
* Input tensor:
|
|
992
|
+
* (kTfLiteUInt8/kTfLiteFloat32)
|
|
993
|
+
* - image input of size `[batch x height x width x channels]`.
|
|
994
|
+
* - batch inference is not supported (`batch` is required to be 1).
|
|
995
|
+
* - RGB inputs is supported (`channels` is required to be 3).
|
|
996
|
+
* - if type is kTfLiteFloat32, NormalizationOptions are required to be
|
|
997
|
+
* attached to the metadata for input normalization.
|
|
998
|
+
* Output tensors:
|
|
999
|
+
* (kTfLiteUInt8/kTfLiteFloat32)
|
|
1000
|
+
* - list of segmented masks.
|
|
1001
|
+
* - if `output_type` is CATEGORY_MASK, uint8 Image, Image vector of size 1.
|
|
1002
|
+
* - if `output_type` is CONFIDENCE_MASK, float32 Image list of size
|
|
1003
|
+
* `channels`.
|
|
1004
|
+
* - batch is always 1
|
|
1005
|
+
*/
|
|
1006
|
+
export declare class InteractiveSegmenter extends VisionTaskRunner {
|
|
1007
|
+
/**
|
|
1008
|
+
* Initializes the Wasm runtime and creates a new interactive segmenter from
|
|
1009
|
+
* the provided options.
|
|
1010
|
+
* @param wasmFileset A configuration object that provides the location of
|
|
1011
|
+
* the Wasm binary and its loader.
|
|
1012
|
+
* @param interactiveSegmenterOptions The options for the Interactive
|
|
1013
|
+
* Segmenter. Note that either a path to the model asset or a model buffer
|
|
1014
|
+
* needs to be provided (via `baseOptions`).
|
|
1015
|
+
* @return A new `InteractiveSegmenter`.
|
|
1016
|
+
*/
|
|
1017
|
+
static createFromOptions(wasmFileset: WasmFileset, interactiveSegmenterOptions: InteractiveSegmenterOptions): Promise<InteractiveSegmenter>;
|
|
1018
|
+
/**
|
|
1019
|
+
* Initializes the Wasm runtime and creates a new interactive segmenter based
|
|
1020
|
+
* on the provided model asset buffer.
|
|
1021
|
+
* @param wasmFileset A configuration object that provides the location of
|
|
1022
|
+
* the Wasm binary and its loader.
|
|
1023
|
+
* @param modelAssetBuffer A binary representation of the model.
|
|
1024
|
+
* @return A new `InteractiveSegmenter`.
|
|
1025
|
+
*/
|
|
1026
|
+
static createFromModelBuffer(wasmFileset: WasmFileset, modelAssetBuffer: Uint8Array): Promise<InteractiveSegmenter>;
|
|
1027
|
+
/**
|
|
1028
|
+
* Initializes the Wasm runtime and creates a new interactive segmenter based
|
|
1029
|
+
* on the path to the model asset.
|
|
1030
|
+
* @param wasmFileset A configuration object that provides the location of
|
|
1031
|
+
* the Wasm binary and its loader.
|
|
1032
|
+
* @param modelAssetPath The path to the model asset.
|
|
1033
|
+
* @return A new `InteractiveSegmenter`.
|
|
1034
|
+
*/
|
|
1035
|
+
static createFromModelPath(wasmFileset: WasmFileset, modelAssetPath: string): Promise<InteractiveSegmenter>;
|
|
1036
|
+
private constructor();
|
|
1037
|
+
/**
|
|
1038
|
+
* Sets new options for the interactive segmenter.
|
|
1039
|
+
*
|
|
1040
|
+
* Calling `setOptions()` with a subset of options only affects those
|
|
1041
|
+
* options. You can reset an option back to its default value by
|
|
1042
|
+
* explicitly setting it to `undefined`.
|
|
1043
|
+
*
|
|
1044
|
+
* @param options The options for the interactive segmenter.
|
|
1045
|
+
* @return A Promise that resolves when the settings have been applied.
|
|
1046
|
+
*/
|
|
1047
|
+
setOptions(options: InteractiveSegmenterOptions): Promise<void>;
|
|
1048
|
+
/**
|
|
1049
|
+
* Performs interactive segmentation on the provided single image and invokes
|
|
1050
|
+
* the callback with the response. The `roi` parameter is used to represent a
|
|
1051
|
+
* user's region of interest for segmentation.
|
|
1052
|
+
*
|
|
1053
|
+
* If the output_type is `CATEGORY_MASK`, the callback is invoked with vector
|
|
1054
|
+
* of images that represent per-category segmented image mask. If the
|
|
1055
|
+
* output_type is `CONFIDENCE_MASK`, the callback is invoked with a vector of
|
|
1056
|
+
* images that contains only one confidence image mask. The method returns
|
|
1057
|
+
* synchronously once the callback returns.
|
|
1058
|
+
*
|
|
1059
|
+
* @param image An image to process.
|
|
1060
|
+
* @param roi The region of interest for segmentation.
|
|
1061
|
+
* @param callback The callback that is invoked with the segmented masks. The
|
|
1062
|
+
* lifetime of the returned data is only guaranteed for the duration of the
|
|
1063
|
+
* callback.
|
|
1064
|
+
*/
|
|
1065
|
+
segment(image: ImageSource, roi: RegionOfInterest, callback: SegmentationMaskCallback): void;
|
|
1066
|
+
/**
|
|
1067
|
+
* Performs interactive segmentation on the provided single image and invokes
|
|
1068
|
+
* the callback with the response. The `roi` parameter is used to represent a
|
|
1069
|
+
* user's region of interest for segmentation.
|
|
1070
|
+
*
|
|
1071
|
+
* The 'image_processing_options' parameter can be used to specify the
|
|
1072
|
+
* rotation to apply to the image before performing segmentation, by setting
|
|
1073
|
+
* its 'rotationDegrees' field. Note that specifying a region-of-interest
|
|
1074
|
+
* using the 'regionOfInterest' field is NOT supported and will result in an
|
|
1075
|
+
* error.
|
|
1076
|
+
*
|
|
1077
|
+
* If the output_type is `CATEGORY_MASK`, the callback is invoked with vector
|
|
1078
|
+
* of images that represent per-category segmented image mask. If the
|
|
1079
|
+
* output_type is `CONFIDENCE_MASK`, the callback is invoked with a vector of
|
|
1080
|
+
* images that contains only one confidence image mask. The method returns
|
|
1081
|
+
* synchronously once the callback returns.
|
|
1082
|
+
*
|
|
1083
|
+
* @param image An image to process.
|
|
1084
|
+
* @param roi The region of interest for segmentation.
|
|
1085
|
+
* @param imageProcessingOptions the `ImageProcessingOptions` specifying how
|
|
1086
|
+
* to process the input image before running inference.
|
|
1087
|
+
* @param callback The callback that is invoked with the segmented masks. The
|
|
1088
|
+
* lifetime of the returned data is only guaranteed for the duration of the
|
|
1089
|
+
* callback.
|
|
1090
|
+
*/
|
|
1091
|
+
segment(image: ImageSource, roi: RegionOfInterest, imageProcessingOptions: ImageProcessingOptions, callback: SegmentationMaskCallback): void;
|
|
1092
|
+
}
|
|
1093
|
+
|
|
1094
|
+
/** Options to configure the MediaPipe Interactive Segmenter Task */
|
|
1095
|
+
export declare interface InteractiveSegmenterOptions extends TaskRunnerOptions {
|
|
1096
|
+
/**
|
|
1097
|
+
* The output type of segmentation results.
|
|
1098
|
+
*
|
|
1099
|
+
* The two supported modes are:
|
|
1100
|
+
* - Category Mask: Gives a single output mask where each pixel represents
|
|
1101
|
+
* the class which the pixel in the original image was
|
|
1102
|
+
* predicted to belong to.
|
|
1103
|
+
* - Confidence Mask: Gives a list of output masks (one for each class). For
|
|
1104
|
+
* each mask, the pixel represents the prediction
|
|
1105
|
+
* confidence, usually in the [0.0, 0.1] range.
|
|
1106
|
+
*
|
|
1107
|
+
* Defaults to `CATEGORY_MASK`.
|
|
1108
|
+
*/
|
|
1109
|
+
outputType?: "CATEGORY_MASK" | "CONFIDENCE_MASK" | undefined;
|
|
1110
|
+
}
|
|
1111
|
+
|
|
825
1112
|
/**
|
|
826
1113
|
* Landmark represents a point in 3D space with x, y, z coordinates. The
|
|
827
1114
|
* landmark coordinates are in meters. z represents the landmark depth,
|
|
@@ -836,6 +1123,36 @@ export declare interface Landmark {
|
|
|
836
1123
|
z: number;
|
|
837
1124
|
}
|
|
838
1125
|
|
|
1126
|
+
/**
|
|
1127
|
+
* Copyright 2023 The MediaPipe Authors. All Rights Reserved.
|
|
1128
|
+
*
|
|
1129
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
1130
|
+
* you may not use this file except in compliance with the License.
|
|
1131
|
+
* You may obtain a copy of the License at
|
|
1132
|
+
*
|
|
1133
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
1134
|
+
*
|
|
1135
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
1136
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
1137
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
1138
|
+
* See the License for the specific language governing permissions and
|
|
1139
|
+
* limitations under the License.
|
|
1140
|
+
*/
|
|
1141
|
+
/**
|
|
1142
|
+
* A keypoint, defined by the coordinates (x, y), normalized by the image
|
|
1143
|
+
* dimensions.
|
|
1144
|
+
*/
|
|
1145
|
+
declare interface NormalizedKeypoint {
|
|
1146
|
+
/** X in normalized image coordinates. */
|
|
1147
|
+
x: number;
|
|
1148
|
+
/** Y in normalized image coordinates. */
|
|
1149
|
+
y: number;
|
|
1150
|
+
/** Optional label of the keypoint. */
|
|
1151
|
+
label?: string;
|
|
1152
|
+
/** Optional score of the keypoint. */
|
|
1153
|
+
score?: number;
|
|
1154
|
+
}
|
|
1155
|
+
|
|
839
1156
|
/**
|
|
840
1157
|
* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
|
|
841
1158
|
*
|
|
@@ -950,6 +1267,12 @@ declare interface RectF {
|
|
|
950
1267
|
bottom: number;
|
|
951
1268
|
}
|
|
952
1269
|
|
|
1270
|
+
/** A Region-Of-Interest (ROI) to represent a region within an image. */
|
|
1271
|
+
export declare interface RegionOfInterest {
|
|
1272
|
+
/** The ROI in keypoint format. */
|
|
1273
|
+
keypoint: NormalizedKeypoint;
|
|
1274
|
+
}
|
|
1275
|
+
|
|
953
1276
|
/**
|
|
954
1277
|
* The two running modes of a vision task.
|
|
955
1278
|
* 1) The image mode for processing single image inputs.
|
|
@@ -958,17 +1281,18 @@ declare interface RectF {
|
|
|
958
1281
|
declare type RunningMode = "IMAGE" | "VIDEO";
|
|
959
1282
|
|
|
960
1283
|
/**
|
|
961
|
-
* The
|
|
962
|
-
* the
|
|
963
|
-
*
|
|
964
|
-
* for
|
|
1284
|
+
* The segmentation tasks return the segmentation either as a WebGLTexture (when
|
|
1285
|
+
* the output is on GPU) or as a typed JavaScript arrays for CPU-based
|
|
1286
|
+
* category or confidence masks. `Uint8ClampedArray`s are used to represend
|
|
1287
|
+
* CPU-based category masks and `Float32Array`s are used for CPU-based
|
|
1288
|
+
* confidence masks.
|
|
965
1289
|
*/
|
|
966
|
-
export declare type SegmentationMask =
|
|
1290
|
+
export declare type SegmentationMask = Uint8ClampedArray | Float32Array | WebGLTexture;
|
|
967
1291
|
|
|
968
1292
|
/**
|
|
969
|
-
* A callback that receives the computed masks from the
|
|
1293
|
+
* A callback that receives the computed masks from the segmentation tasks. The
|
|
970
1294
|
* callback either receives a single element array with a category mask (as a
|
|
971
|
-
* `[
|
|
1295
|
+
* `[Uint8ClampedArray]`) or multiple confidence masks (as a `Float32Array[]`).
|
|
972
1296
|
* The returned data is only valid for the duration of the callback. If
|
|
973
1297
|
* asynchronous processing is needed, all data needs to be copied before the
|
|
974
1298
|
* callback returns.
|
|
@@ -990,6 +1314,13 @@ declare interface TaskRunnerOptions {
|
|
|
990
1314
|
|
|
991
1315
|
/** The options for configuring a MediaPipe vision task. */
|
|
992
1316
|
declare interface VisionTaskOptions extends TaskRunnerOptions {
|
|
1317
|
+
/**
|
|
1318
|
+
* The canvas element to bind textures to. This has to be set for GPU
|
|
1319
|
+
* processing. The task will initialize a WebGL context and throw an eror if
|
|
1320
|
+
* this fails (e.g. if you have already initialized a different type of
|
|
1321
|
+
* context).
|
|
1322
|
+
*/
|
|
1323
|
+
canvas?: HTMLCanvasElement | OffscreenCanvas;
|
|
993
1324
|
/**
|
|
994
1325
|
* The running mode of the task. Default to the image mode.
|
|
995
1326
|
* Vision tasks have two running modes:
|