@mediapipe/tasks-vision 0.1.0-alpha-3 → 0.1.0-alpha-5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +73 -23
- package/package.json +1 -1
- package/vision.d.ts +468 -1
- package/vision_bundle.js +1 -1
- package/wasm/vision_wasm_internal.js +479 -298
- package/wasm/vision_wasm_internal.wasm +0 -0
- package/wasm/vision_wasm_nosimd_internal.js +468 -297
- package/wasm/vision_wasm_nosimd_internal.wasm +0 -0
package/README.md
CHANGED
|
@@ -2,23 +2,57 @@
|
|
|
2
2
|
|
|
3
3
|
This package contains the vision tasks for MediaPipe.
|
|
4
4
|
|
|
5
|
-
##
|
|
5
|
+
## Face Stylizer
|
|
6
6
|
|
|
7
|
-
The MediaPipe
|
|
8
|
-
multiple classes of objects within images or videos.
|
|
7
|
+
The MediaPipe Face Stylizer lets you perform face stylization on images.
|
|
9
8
|
|
|
10
9
|
```
|
|
11
10
|
const vision = await FilesetResolver.forVisionTasks(
|
|
12
11
|
"https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
|
|
13
12
|
);
|
|
14
|
-
const
|
|
15
|
-
"
|
|
13
|
+
const faceStylizer = await FaceStylizer.createFromModelPath(vision,
|
|
14
|
+
"model.tflite"
|
|
16
15
|
);
|
|
17
16
|
const image = document.getElementById("image") as HTMLImageElement;
|
|
18
|
-
const
|
|
17
|
+
const stylizedImage = faceStylizer.stylize(image);
|
|
19
18
|
```
|
|
20
19
|
|
|
21
|
-
|
|
20
|
+
## Gesture Recognition
|
|
21
|
+
|
|
22
|
+
The MediaPipe Gesture Recognizer task lets you recognize hand gestures in real
|
|
23
|
+
time, and provides the recognized hand gesture results along with the landmarks
|
|
24
|
+
of the detected hands. You can use this task to recognize specific hand gestures
|
|
25
|
+
from a user, and invoke application features that correspond to those gestures.
|
|
26
|
+
|
|
27
|
+
```
|
|
28
|
+
const vision = await FilesetResolver.forVisionTasks(
|
|
29
|
+
"https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
|
|
30
|
+
);
|
|
31
|
+
const gestureRecognizer = await GestureRecognizer.createFromModelPath(vision,
|
|
32
|
+
"https://storage.googleapis.com/mediapipe-tasks/gesture_recognizer/gesture_recognizer.task"
|
|
33
|
+
);
|
|
34
|
+
const image = document.getElementById("image") as HTMLImageElement;
|
|
35
|
+
const recognitions = gestureRecognizer.recognize(image);
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Hand Landmark Detection
|
|
39
|
+
|
|
40
|
+
The MediaPipe Hand Landmarker task lets you detect the landmarks of the hands in
|
|
41
|
+
an image. You can use this Task to localize key points of the hands and render
|
|
42
|
+
visual effects over the hands.
|
|
43
|
+
|
|
44
|
+
```
|
|
45
|
+
const vision = await FilesetResolver.forVisionTasks(
|
|
46
|
+
"https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
|
|
47
|
+
);
|
|
48
|
+
const handLandmarker = await HandLandmarker.createFromModelPath(vision,
|
|
49
|
+
"https://storage.googleapis.com/mediapipe-tasks/hand_landmarker/hand_landmarker.task"
|
|
50
|
+
);
|
|
51
|
+
const image = document.getElementById("image") as HTMLImageElement;
|
|
52
|
+
const landmarks = handLandmarker.detect(image);
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
For more information, refer to the [Handlandmark Detection](https://developers.google.com/mediapipe/solutions/vision/hand_landmarker/web_js) documentation.
|
|
22
56
|
|
|
23
57
|
## Image Classification
|
|
24
58
|
|
|
@@ -39,40 +73,56 @@ const classifications = imageClassifier.classify(image);
|
|
|
39
73
|
|
|
40
74
|
For more information, refer to the [Image Classification](https://developers.google.com/mediapipe/solutions/vision/image_classifier/web_js) documentation.
|
|
41
75
|
|
|
42
|
-
##
|
|
76
|
+
## Image Segmentation
|
|
43
77
|
|
|
44
|
-
The MediaPipe
|
|
45
|
-
time, and provides the recognized hand gesture results along with the landmarks
|
|
46
|
-
of the detected hands. You can use this task to recognize specific hand gestures
|
|
47
|
-
from a user, and invoke application features that correspond to those gestures.
|
|
78
|
+
The MediaPipe Image Segmenter lets you segment an image into categories.
|
|
48
79
|
|
|
49
80
|
```
|
|
50
81
|
const vision = await FilesetResolver.forVisionTasks(
|
|
51
82
|
"https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
|
|
52
83
|
);
|
|
53
|
-
const
|
|
54
|
-
"
|
|
84
|
+
const imageSegmenter = await ImageSegmenter.createFromModelPath(vision,
|
|
85
|
+
"model.tflite"
|
|
55
86
|
);
|
|
56
87
|
const image = document.getElementById("image") as HTMLImageElement;
|
|
57
|
-
|
|
88
|
+
imageSegmenter.segment(image, (masks, width, height) => {
|
|
89
|
+
...
|
|
90
|
+
});
|
|
58
91
|
```
|
|
59
92
|
|
|
60
|
-
##
|
|
93
|
+
## Interactive Segmentation
|
|
61
94
|
|
|
62
|
-
The MediaPipe
|
|
63
|
-
an image.
|
|
64
|
-
visual effects over the hands.
|
|
95
|
+
The MediaPipe Interactive Segmenter lets you select a region of interest to
|
|
96
|
+
segment an image by.
|
|
65
97
|
|
|
66
98
|
```
|
|
67
99
|
const vision = await FilesetResolver.forVisionTasks(
|
|
68
100
|
"https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
|
|
69
101
|
);
|
|
70
|
-
const
|
|
71
|
-
"
|
|
102
|
+
const interactiveSegmenter = await InteractiveSegmenter.createFromModelPath(
|
|
103
|
+
vision, "model.tflite"
|
|
72
104
|
);
|
|
73
105
|
const image = document.getElementById("image") as HTMLImageElement;
|
|
74
|
-
|
|
106
|
+
interactiveSegmenter.segment(image, { keypoint: { x: 0.1, y: 0.2 } },
|
|
107
|
+
(masks, width, height) => { ... }
|
|
108
|
+
);
|
|
75
109
|
```
|
|
76
110
|
|
|
77
|
-
|
|
111
|
+
## Object Detection
|
|
112
|
+
|
|
113
|
+
The MediaPipe Object Detector task lets you detect the presence and location of
|
|
114
|
+
multiple classes of objects within images or videos.
|
|
115
|
+
|
|
116
|
+
```
|
|
117
|
+
const vision = await FilesetResolver.forVisionTasks(
|
|
118
|
+
"https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
|
|
119
|
+
);
|
|
120
|
+
const objectDetector = await ObjectDetector.createFromModelPath(vision,
|
|
121
|
+
"https://storage.googleapis.com/mediapipe-tasks/object_detector/efficientdet_lite0_uint8.tflite"
|
|
122
|
+
);
|
|
123
|
+
const image = document.getElementById("image") as HTMLImageElement;
|
|
124
|
+
const detections = objectDetector.detect(image);
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
For more information, refer to the [Object Detector](https://developers.google.com/mediapipe/solutions/vision/object_detector/web_js) documentation.
|
|
78
128
|
|
package/package.json
CHANGED
package/vision.d.ts
CHANGED
|
@@ -224,6 +224,141 @@ export declare interface Embedding {
|
|
|
224
224
|
headName: string;
|
|
225
225
|
}
|
|
226
226
|
|
|
227
|
+
/** Performs face stylization on images. */
|
|
228
|
+
export declare class FaceStylizer extends VisionTaskRunner {
|
|
229
|
+
/**
|
|
230
|
+
* Initializes the Wasm runtime and creates a new Face Stylizer from the
|
|
231
|
+
* provided options.
|
|
232
|
+
* @param wasmFileset A configuration object that provides the location of
|
|
233
|
+
* the Wasm binary and its loader.
|
|
234
|
+
* @param faceStylizerOptions The options for the Face Stylizer. Note
|
|
235
|
+
* that either a path to the model asset or a model buffer needs to be
|
|
236
|
+
* provided (via `baseOptions`).
|
|
237
|
+
*/
|
|
238
|
+
static createFromOptions(wasmFileset: WasmFileset, faceStylizerOptions: FaceStylizerOptions): Promise<FaceStylizer>;
|
|
239
|
+
/**
|
|
240
|
+
* Initializes the Wasm runtime and creates a new Face Stylizer based on
|
|
241
|
+
* the provided model asset buffer.
|
|
242
|
+
* @param wasmFileset A configuration object that provides the location of
|
|
243
|
+
* the Wasm binary and its loader.
|
|
244
|
+
* @param modelAssetBuffer A binary representation of the model.
|
|
245
|
+
*/
|
|
246
|
+
static createFromModelBuffer(wasmFileset: WasmFileset, modelAssetBuffer: Uint8Array): Promise<FaceStylizer>;
|
|
247
|
+
/**
|
|
248
|
+
* Initializes the Wasm runtime and creates a new Face Stylizer based on
|
|
249
|
+
* the path to the model asset.
|
|
250
|
+
* @param wasmFileset A configuration object that provides the location of
|
|
251
|
+
* the Wasm binary and its loader.
|
|
252
|
+
* @param modelAssetPath The path to the model asset.
|
|
253
|
+
*/
|
|
254
|
+
static createFromModelPath(wasmFileset: WasmFileset, modelAssetPath: string): Promise<FaceStylizer>;
|
|
255
|
+
private constructor();
|
|
256
|
+
/**
|
|
257
|
+
* Sets new options for the Face Stylizer.
|
|
258
|
+
*
|
|
259
|
+
* Calling `setOptions()` with a subset of options only affects those
|
|
260
|
+
* options. You can reset an option back to its default value by
|
|
261
|
+
* explicitly setting it to `undefined`.
|
|
262
|
+
*
|
|
263
|
+
* @param options The options for the Face Stylizer.
|
|
264
|
+
*/
|
|
265
|
+
setOptions(options: FaceStylizerOptions): Promise<void>;
|
|
266
|
+
/**
|
|
267
|
+
* Performs face stylization on the provided single image. The method returns
|
|
268
|
+
* synchronously once the callback returns. Only use this method when the
|
|
269
|
+
* FaceStylizer is created with the image running mode.
|
|
270
|
+
*
|
|
271
|
+
* The input image can be of any size. To ensure that the output image has
|
|
272
|
+
* reasonable quailty, the stylized output image size is determined by the
|
|
273
|
+
* model output size.
|
|
274
|
+
*
|
|
275
|
+
* @param image An image to process.
|
|
276
|
+
* @param callback The callback that is invoked with the stylized image. The
|
|
277
|
+
* lifetime of the returned data is only guaranteed for the duration of the
|
|
278
|
+
* callback.
|
|
279
|
+
*/
|
|
280
|
+
stylize(image: ImageSource, callback: ImageCallback): void;
|
|
281
|
+
/**
|
|
282
|
+
* Performs face stylization on the provided single image. The method returns
|
|
283
|
+
* synchronously once the callback returns. Only use this method when the
|
|
284
|
+
* FaceStylizer is created with the image running mode.
|
|
285
|
+
*
|
|
286
|
+
* The 'imageProcessingOptions' parameter can be used to specify one or all
|
|
287
|
+
* of:
|
|
288
|
+
* - the rotation to apply to the image before performing stylization, by
|
|
289
|
+
* setting its 'rotationDegrees' property.
|
|
290
|
+
* - the region-of-interest on which to perform stylization, by setting its
|
|
291
|
+
* 'regionOfInterest' property. If not specified, the full image is used.
|
|
292
|
+
* If both are specified, the crop around the region-of-interest is extracted
|
|
293
|
+
* first, then the specified rotation is applied to the crop.
|
|
294
|
+
*
|
|
295
|
+
* The input image can be of any size. To ensure that the output image has
|
|
296
|
+
* reasonable quailty, the stylized output image size is the smaller of the
|
|
297
|
+
* model output size and the size of the 'regionOfInterest' specified in
|
|
298
|
+
* 'imageProcessingOptions'.
|
|
299
|
+
*
|
|
300
|
+
* @param image An image to process.
|
|
301
|
+
* @param imageProcessingOptions the `ImageProcessingOptions` specifying how
|
|
302
|
+
* to process the input image before running inference.
|
|
303
|
+
* @param callback The callback that is invoked with the stylized image. The
|
|
304
|
+
* lifetime of the returned data is only guaranteed for the duration of the
|
|
305
|
+
* callback.
|
|
306
|
+
*/
|
|
307
|
+
stylize(image: ImageSource, imageProcessingOptions: ImageProcessingOptions, callback: ImageCallback): void;
|
|
308
|
+
/**
|
|
309
|
+
* Performs face stylization on the provided video frame. Only use this method
|
|
310
|
+
* when the FaceStylizer is created with the video running mode.
|
|
311
|
+
*
|
|
312
|
+
* The input frame can be of any size. It's required to provide the video
|
|
313
|
+
* frame's timestamp (in milliseconds). The input timestamps must be
|
|
314
|
+
* monotonically increasing.
|
|
315
|
+
*
|
|
316
|
+
* To ensure that the output image has reasonable quality, the stylized
|
|
317
|
+
* output image size is determined by the model output size.
|
|
318
|
+
*
|
|
319
|
+
* @param videoFrame A video frame to process.
|
|
320
|
+
* @param timestamp The timestamp of the current frame, in ms.
|
|
321
|
+
* @param callback The callback that is invoked with the stylized image. The
|
|
322
|
+
* lifetime of the returned data is only guaranteed for the duration of
|
|
323
|
+
* the callback.
|
|
324
|
+
*/
|
|
325
|
+
stylizeForVideo(videoFrame: ImageSource, timestamp: number, callback: ImageCallback): void;
|
|
326
|
+
/**
|
|
327
|
+
* Performs face stylization on the provided video frame. Only use this
|
|
328
|
+
* method when the FaceStylizer is created with the video running mode.
|
|
329
|
+
*
|
|
330
|
+
* The 'imageProcessingOptions' parameter can be used to specify one or all
|
|
331
|
+
* of:
|
|
332
|
+
* - the rotation to apply to the image before performing stylization, by
|
|
333
|
+
* setting its 'rotationDegrees' property.
|
|
334
|
+
* - the region-of-interest on which to perform stylization, by setting its
|
|
335
|
+
* 'regionOfInterest' property. If not specified, the full image is used.
|
|
336
|
+
* If both are specified, the crop around the region-of-interest is
|
|
337
|
+
* extracted first, then the specified rotation is applied to the crop.
|
|
338
|
+
*
|
|
339
|
+
* The input frame can be of any size. It's required to provide the video
|
|
340
|
+
* frame's timestamp (in milliseconds). The input timestamps must be
|
|
341
|
+
* monotonically increasing.
|
|
342
|
+
*
|
|
343
|
+
* To ensure that the output image has reasonable quailty, the stylized
|
|
344
|
+
* output image size is the smaller of the model output size and the size of
|
|
345
|
+
* the 'regionOfInterest' specified in 'imageProcessingOptions'.
|
|
346
|
+
*
|
|
347
|
+
* @param videoFrame A video frame to process.
|
|
348
|
+
* @param imageProcessingOptions the `ImageProcessingOptions` specifying how
|
|
349
|
+
* to process the input image before running inference.
|
|
350
|
+
* @param timestamp The timestamp of the current frame, in ms.
|
|
351
|
+
* @param callback The callback that is invoked with the stylized image. The
|
|
352
|
+
* lifetime of the returned data is only guaranteed for the duration of
|
|
353
|
+
* the callback.
|
|
354
|
+
*/
|
|
355
|
+
stylizeForVideo(videoFrame: ImageSource, imageProcessingOptions: ImageProcessingOptions, timestamp: number, callback: ImageCallback): void;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
/** Options to configure the MediaPipe Face Stylizer Task */
|
|
359
|
+
export declare interface FaceStylizerOptions extends VisionTaskOptions {
|
|
360
|
+
}
|
|
361
|
+
|
|
227
362
|
/**
|
|
228
363
|
* Resolves the files required for the MediaPipe Task APIs.
|
|
229
364
|
*
|
|
@@ -363,7 +498,7 @@ export declare interface GestureRecognizerOptions extends VisionTaskOptions {
|
|
|
363
498
|
*/
|
|
364
499
|
minTrackingConfidence?: number | undefined;
|
|
365
500
|
/**
|
|
366
|
-
* Sets the optional `ClassifierOptions`
|
|
501
|
+
* Sets the optional `ClassifierOptions` controlling the canned gestures
|
|
367
502
|
* classifier, such as score threshold, allow list and deny list of gestures.
|
|
368
503
|
* The categories for canned gesture
|
|
369
504
|
* classifiers are: ["None", "Closed_Fist", "Open_Palm", "Pointing_Up",
|
|
@@ -495,6 +630,16 @@ export declare interface HandLandmarkerResult {
|
|
|
495
630
|
handednesses: Category[][];
|
|
496
631
|
}
|
|
497
632
|
|
|
633
|
+
/**
|
|
634
|
+
* A callback that receives an `ImageData` object from a Vision task. The
|
|
635
|
+
* lifetime of the underlying data is limited to the duration of the callback.
|
|
636
|
+
* If asynchronous processing is needed, all data needs to be copied before the
|
|
637
|
+
* callback returns.
|
|
638
|
+
*
|
|
639
|
+
* The `WebGLTexture` output type is reserved for future usage.
|
|
640
|
+
*/
|
|
641
|
+
export declare type ImageCallback = (image: ImageData | WebGLTexture, width: number, height: number) => void;
|
|
642
|
+
|
|
498
643
|
/** Performs classification on images. */
|
|
499
644
|
export declare class ImageClassifier extends VisionTaskRunner {
|
|
500
645
|
/**
|
|
@@ -699,11 +844,271 @@ declare interface ImageProcessingOptions {
|
|
|
699
844
|
rotationDegrees?: number;
|
|
700
845
|
}
|
|
701
846
|
|
|
847
|
+
/** Performs image segmentation on images. */
|
|
848
|
+
export declare class ImageSegmenter extends VisionTaskRunner {
|
|
849
|
+
/**
|
|
850
|
+
* Initializes the Wasm runtime and creates a new image segmenter from the
|
|
851
|
+
* provided options.
|
|
852
|
+
* @param wasmFileset A configuration object that provides the location of
|
|
853
|
+
* the Wasm binary and its loader.
|
|
854
|
+
* @param imageSegmenterOptions The options for the Image Segmenter. Note
|
|
855
|
+
* that either a path to the model asset or a model buffer needs to be
|
|
856
|
+
* provided (via `baseOptions`).
|
|
857
|
+
*/
|
|
858
|
+
static createFromOptions(wasmFileset: WasmFileset, imageSegmenterOptions: ImageSegmenterOptions): Promise<ImageSegmenter>;
|
|
859
|
+
/**
|
|
860
|
+
* Initializes the Wasm runtime and creates a new image segmenter based on
|
|
861
|
+
* the provided model asset buffer.
|
|
862
|
+
* @param wasmFileset A configuration object that provides the location of
|
|
863
|
+
* the Wasm binary and its loader.
|
|
864
|
+
* @param modelAssetBuffer A binary representation of the model.
|
|
865
|
+
*/
|
|
866
|
+
static createFromModelBuffer(wasmFileset: WasmFileset, modelAssetBuffer: Uint8Array): Promise<ImageSegmenter>;
|
|
867
|
+
/**
|
|
868
|
+
* Initializes the Wasm runtime and creates a new image segmenter based on
|
|
869
|
+
* the path to the model asset.
|
|
870
|
+
* @param wasmFileset A configuration object that provides the location of
|
|
871
|
+
* the Wasm binary and its loader.
|
|
872
|
+
* @param modelAssetPath The path to the model asset.
|
|
873
|
+
*/
|
|
874
|
+
static createFromModelPath(wasmFileset: WasmFileset, modelAssetPath: string): Promise<ImageSegmenter>;
|
|
875
|
+
private constructor();
|
|
876
|
+
/**
|
|
877
|
+
* Sets new options for the image segmenter.
|
|
878
|
+
*
|
|
879
|
+
* Calling `setOptions()` with a subset of options only affects those
|
|
880
|
+
* options. You can reset an option back to its default value by
|
|
881
|
+
* explicitly setting it to `undefined`.
|
|
882
|
+
*
|
|
883
|
+
* @param options The options for the image segmenter.
|
|
884
|
+
*/
|
|
885
|
+
setOptions(options: ImageSegmenterOptions): Promise<void>;
|
|
886
|
+
/**
|
|
887
|
+
* Performs image segmentation on the provided single image and invokes the
|
|
888
|
+
* callback with the response. The method returns synchronously once the
|
|
889
|
+
* callback returns. Only use this method when the ImageSegmenter is
|
|
890
|
+
* created with running mode `image`.
|
|
891
|
+
*
|
|
892
|
+
* @param image An image to process.
|
|
893
|
+
* @param callback The callback that is invoked with the segmented masks. The
|
|
894
|
+
* lifetime of the returned data is only guaranteed for the duration of the
|
|
895
|
+
* callback.
|
|
896
|
+
*/
|
|
897
|
+
segment(image: ImageSource, callback: SegmentationMaskCallback): void;
|
|
898
|
+
/**
|
|
899
|
+
* Performs image segmentation on the provided single image and invokes the
|
|
900
|
+
* callback with the response. The method returns synchronously once the
|
|
901
|
+
* callback returns. Only use this method when the ImageSegmenter is
|
|
902
|
+
* created with running mode `image`.
|
|
903
|
+
*
|
|
904
|
+
* @param image An image to process.
|
|
905
|
+
* @param imageProcessingOptions the `ImageProcessingOptions` specifying how
|
|
906
|
+
* to process the input image before running inference.
|
|
907
|
+
* @param callback The callback that is invoked with the segmented masks. The
|
|
908
|
+
* lifetime of the returned data is only guaranteed for the duration of the
|
|
909
|
+
* callback.
|
|
910
|
+
*/
|
|
911
|
+
segment(image: ImageSource, imageProcessingOptions: ImageProcessingOptions, callback: SegmentationMaskCallback): void;
|
|
912
|
+
/**
|
|
913
|
+
* Get the category label list of the ImageSegmenter can recognize. For
|
|
914
|
+
* `CATEGORY_MASK` type, the index in the category mask corresponds to the
|
|
915
|
+
* category in the label list. For `CONFIDENCE_MASK` type, the output mask
|
|
916
|
+
* list at index corresponds to the category in the label list.
|
|
917
|
+
*
|
|
918
|
+
* If there is no labelmap provided in the model file, empty label array is
|
|
919
|
+
* returned.
|
|
920
|
+
*
|
|
921
|
+
* @return The labels used by the current model.
|
|
922
|
+
*/
|
|
923
|
+
getLabels(): string[];
|
|
924
|
+
/**
|
|
925
|
+
* Performs image segmentation on the provided video frame and invokes the
|
|
926
|
+
* callback with the response. The method returns synchronously once the
|
|
927
|
+
* callback returns. Only use this method when the ImageSegmenter is
|
|
928
|
+
* created with running mode `video`.
|
|
929
|
+
*
|
|
930
|
+
* @param videoFrame A video frame to process.
|
|
931
|
+
* @param timestamp The timestamp of the current frame, in ms.
|
|
932
|
+
* @param callback The callback that is invoked with the segmented masks. The
|
|
933
|
+
* lifetime of the returned data is only guaranteed for the duration of the
|
|
934
|
+
* callback.
|
|
935
|
+
*/
|
|
936
|
+
segmentForVideo(videoFrame: ImageSource, timestamp: number, callback: SegmentationMaskCallback): void;
|
|
937
|
+
/**
|
|
938
|
+
* Performs image segmentation on the provided video frame and invokes the
|
|
939
|
+
* callback with the response. The method returns synchronously once the
|
|
940
|
+
* callback returns. Only use this method when the ImageSegmenter is
|
|
941
|
+
* created with running mode `video`.
|
|
942
|
+
*
|
|
943
|
+
* @param videoFrame A video frame to process.
|
|
944
|
+
* @param imageProcessingOptions the `ImageProcessingOptions` specifying how
|
|
945
|
+
* to process the input image before running inference.
|
|
946
|
+
* @param timestamp The timestamp of the current frame, in ms.
|
|
947
|
+
* @param callback The callback that is invoked with the segmented masks. The
|
|
948
|
+
* lifetime of the returned data is only guaranteed for the duration of the
|
|
949
|
+
* callback.
|
|
950
|
+
*/
|
|
951
|
+
segmentForVideo(videoFrame: ImageSource, imageProcessingOptions: ImageProcessingOptions, timestamp: number, callback: SegmentationMaskCallback): void;
|
|
952
|
+
}
|
|
953
|
+
|
|
954
|
+
/** Options to configure the MediaPipe Image Segmenter Task */
|
|
955
|
+
export declare interface ImageSegmenterOptions extends VisionTaskOptions {
|
|
956
|
+
/**
|
|
957
|
+
* The locale to use for display names specified through the TFLite Model
|
|
958
|
+
* Metadata, if any. Defaults to English.
|
|
959
|
+
*/
|
|
960
|
+
displayNamesLocale?: string | undefined;
|
|
961
|
+
/**
|
|
962
|
+
* The output type of segmentation results.
|
|
963
|
+
*
|
|
964
|
+
* The two supported modes are:
|
|
965
|
+
* - Category Mask: Gives a single output mask where each pixel represents
|
|
966
|
+
* the class which the pixel in the original image was
|
|
967
|
+
* predicted to belong to.
|
|
968
|
+
* - Confidence Mask: Gives a list of output masks (one for each class). For
|
|
969
|
+
* each mask, the pixel represents the prediction
|
|
970
|
+
* confidence, usually in the [0.0, 0.1] range.
|
|
971
|
+
*
|
|
972
|
+
* Defaults to `CATEGORY_MASK`.
|
|
973
|
+
*/
|
|
974
|
+
outputType?: "CATEGORY_MASK" | "CONFIDENCE_MASK" | undefined;
|
|
975
|
+
}
|
|
976
|
+
|
|
702
977
|
/**
|
|
703
978
|
* Valid types of image sources which we can run our GraphRunner over.
|
|
704
979
|
*/
|
|
705
980
|
export declare type ImageSource = HTMLCanvasElement | HTMLVideoElement | HTMLImageElement | ImageData | ImageBitmap;
|
|
706
981
|
|
|
982
|
+
/**
|
|
983
|
+
* Performs interactive segmentation on images.
|
|
984
|
+
*
|
|
985
|
+
* Users can represent user interaction through `RegionOfInterest`, which gives
|
|
986
|
+
* a hint to InteractiveSegmenter to perform segmentation focusing on the given
|
|
987
|
+
* region of interest.
|
|
988
|
+
*
|
|
989
|
+
* The API expects a TFLite model with mandatory TFLite Model Metadata.
|
|
990
|
+
*
|
|
991
|
+
* Input tensor:
|
|
992
|
+
* (kTfLiteUInt8/kTfLiteFloat32)
|
|
993
|
+
* - image input of size `[batch x height x width x channels]`.
|
|
994
|
+
* - batch inference is not supported (`batch` is required to be 1).
|
|
995
|
+
* - RGB inputs is supported (`channels` is required to be 3).
|
|
996
|
+
* - if type is kTfLiteFloat32, NormalizationOptions are required to be
|
|
997
|
+
* attached to the metadata for input normalization.
|
|
998
|
+
* Output tensors:
|
|
999
|
+
* (kTfLiteUInt8/kTfLiteFloat32)
|
|
1000
|
+
* - list of segmented masks.
|
|
1001
|
+
* - if `output_type` is CATEGORY_MASK, uint8 Image, Image vector of size 1.
|
|
1002
|
+
* - if `output_type` is CONFIDENCE_MASK, float32 Image list of size
|
|
1003
|
+
* `channels`.
|
|
1004
|
+
* - batch is always 1
|
|
1005
|
+
*/
|
|
1006
|
+
export declare class InteractiveSegmenter extends VisionTaskRunner {
|
|
1007
|
+
/**
|
|
1008
|
+
* Initializes the Wasm runtime and creates a new interactive segmenter from
|
|
1009
|
+
* the provided options.
|
|
1010
|
+
* @param wasmFileset A configuration object that provides the location of
|
|
1011
|
+
* the Wasm binary and its loader.
|
|
1012
|
+
* @param interactiveSegmenterOptions The options for the Interactive
|
|
1013
|
+
* Segmenter. Note that either a path to the model asset or a model buffer
|
|
1014
|
+
* needs to be provided (via `baseOptions`).
|
|
1015
|
+
* @return A new `InteractiveSegmenter`.
|
|
1016
|
+
*/
|
|
1017
|
+
static createFromOptions(wasmFileset: WasmFileset, interactiveSegmenterOptions: InteractiveSegmenterOptions): Promise<InteractiveSegmenter>;
|
|
1018
|
+
/**
|
|
1019
|
+
* Initializes the Wasm runtime and creates a new interactive segmenter based
|
|
1020
|
+
* on the provided model asset buffer.
|
|
1021
|
+
* @param wasmFileset A configuration object that provides the location of
|
|
1022
|
+
* the Wasm binary and its loader.
|
|
1023
|
+
* @param modelAssetBuffer A binary representation of the model.
|
|
1024
|
+
* @return A new `InteractiveSegmenter`.
|
|
1025
|
+
*/
|
|
1026
|
+
static createFromModelBuffer(wasmFileset: WasmFileset, modelAssetBuffer: Uint8Array): Promise<InteractiveSegmenter>;
|
|
1027
|
+
/**
|
|
1028
|
+
* Initializes the Wasm runtime and creates a new interactive segmenter based
|
|
1029
|
+
* on the path to the model asset.
|
|
1030
|
+
* @param wasmFileset A configuration object that provides the location of
|
|
1031
|
+
* the Wasm binary and its loader.
|
|
1032
|
+
* @param modelAssetPath The path to the model asset.
|
|
1033
|
+
* @return A new `InteractiveSegmenter`.
|
|
1034
|
+
*/
|
|
1035
|
+
static createFromModelPath(wasmFileset: WasmFileset, modelAssetPath: string): Promise<InteractiveSegmenter>;
|
|
1036
|
+
private constructor();
|
|
1037
|
+
/**
|
|
1038
|
+
* Sets new options for the interactive segmenter.
|
|
1039
|
+
*
|
|
1040
|
+
* Calling `setOptions()` with a subset of options only affects those
|
|
1041
|
+
* options. You can reset an option back to its default value by
|
|
1042
|
+
* explicitly setting it to `undefined`.
|
|
1043
|
+
*
|
|
1044
|
+
* @param options The options for the interactive segmenter.
|
|
1045
|
+
* @return A Promise that resolves when the settings have been applied.
|
|
1046
|
+
*/
|
|
1047
|
+
setOptions(options: InteractiveSegmenterOptions): Promise<void>;
|
|
1048
|
+
/**
|
|
1049
|
+
* Performs interactive segmentation on the provided single image and invokes
|
|
1050
|
+
* the callback with the response. The `roi` parameter is used to represent a
|
|
1051
|
+
* user's region of interest for segmentation.
|
|
1052
|
+
*
|
|
1053
|
+
* If the output_type is `CATEGORY_MASK`, the callback is invoked with vector
|
|
1054
|
+
* of images that represent per-category segmented image mask. If the
|
|
1055
|
+
* output_type is `CONFIDENCE_MASK`, the callback is invoked with a vector of
|
|
1056
|
+
* images that contains only one confidence image mask. The method returns
|
|
1057
|
+
* synchronously once the callback returns.
|
|
1058
|
+
*
|
|
1059
|
+
* @param image An image to process.
|
|
1060
|
+
* @param roi The region of interest for segmentation.
|
|
1061
|
+
* @param callback The callback that is invoked with the segmented masks. The
|
|
1062
|
+
* lifetime of the returned data is only guaranteed for the duration of the
|
|
1063
|
+
* callback.
|
|
1064
|
+
*/
|
|
1065
|
+
segment(image: ImageSource, roi: RegionOfInterest, callback: SegmentationMaskCallback): void;
|
|
1066
|
+
/**
|
|
1067
|
+
* Performs interactive segmentation on the provided single image and invokes
|
|
1068
|
+
* the callback with the response. The `roi` parameter is used to represent a
|
|
1069
|
+
* user's region of interest for segmentation.
|
|
1070
|
+
*
|
|
1071
|
+
* The 'image_processing_options' parameter can be used to specify the
|
|
1072
|
+
* rotation to apply to the image before performing segmentation, by setting
|
|
1073
|
+
* its 'rotationDegrees' field. Note that specifying a region-of-interest
|
|
1074
|
+
* using the 'regionOfInterest' field is NOT supported and will result in an
|
|
1075
|
+
* error.
|
|
1076
|
+
*
|
|
1077
|
+
* If the output_type is `CATEGORY_MASK`, the callback is invoked with vector
|
|
1078
|
+
* of images that represent per-category segmented image mask. If the
|
|
1079
|
+
* output_type is `CONFIDENCE_MASK`, the callback is invoked with a vector of
|
|
1080
|
+
* images that contains only one confidence image mask. The method returns
|
|
1081
|
+
* synchronously once the callback returns.
|
|
1082
|
+
*
|
|
1083
|
+
* @param image An image to process.
|
|
1084
|
+
* @param roi The region of interest for segmentation.
|
|
1085
|
+
* @param imageProcessingOptions the `ImageProcessingOptions` specifying how
|
|
1086
|
+
* to process the input image before running inference.
|
|
1087
|
+
* @param callback The callback that is invoked with the segmented masks. The
|
|
1088
|
+
* lifetime of the returned data is only guaranteed for the duration of the
|
|
1089
|
+
* callback.
|
|
1090
|
+
*/
|
|
1091
|
+
segment(image: ImageSource, roi: RegionOfInterest, imageProcessingOptions: ImageProcessingOptions, callback: SegmentationMaskCallback): void;
|
|
1092
|
+
}
|
|
1093
|
+
|
|
1094
|
+
/** Options to configure the MediaPipe Interactive Segmenter Task */
|
|
1095
|
+
export declare interface InteractiveSegmenterOptions extends TaskRunnerOptions {
|
|
1096
|
+
/**
|
|
1097
|
+
* The output type of segmentation results.
|
|
1098
|
+
*
|
|
1099
|
+
* The two supported modes are:
|
|
1100
|
+
* - Category Mask: Gives a single output mask where each pixel represents
|
|
1101
|
+
* the class which the pixel in the original image was
|
|
1102
|
+
* predicted to belong to.
|
|
1103
|
+
* - Confidence Mask: Gives a list of output masks (one for each class). For
|
|
1104
|
+
* each mask, the pixel represents the prediction
|
|
1105
|
+
* confidence, usually in the [0.0, 0.1] range.
|
|
1106
|
+
*
|
|
1107
|
+
* Defaults to `CATEGORY_MASK`.
|
|
1108
|
+
*/
|
|
1109
|
+
outputType?: "CATEGORY_MASK" | "CONFIDENCE_MASK" | undefined;
|
|
1110
|
+
}
|
|
1111
|
+
|
|
707
1112
|
/**
|
|
708
1113
|
* Landmark represents a point in 3D space with x, y, z coordinates. The
|
|
709
1114
|
* landmark coordinates are in meters. z represents the landmark depth,
|
|
@@ -718,6 +1123,36 @@ export declare interface Landmark {
|
|
|
718
1123
|
z: number;
|
|
719
1124
|
}
|
|
720
1125
|
|
|
1126
|
+
/**
|
|
1127
|
+
* Copyright 2023 The MediaPipe Authors. All Rights Reserved.
|
|
1128
|
+
*
|
|
1129
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
1130
|
+
* you may not use this file except in compliance with the License.
|
|
1131
|
+
* You may obtain a copy of the License at
|
|
1132
|
+
*
|
|
1133
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
1134
|
+
*
|
|
1135
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
1136
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
1137
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
1138
|
+
* See the License for the specific language governing permissions and
|
|
1139
|
+
* limitations under the License.
|
|
1140
|
+
*/
|
|
1141
|
+
/**
|
|
1142
|
+
* A keypoint, defined by the coordinates (x, y), normalized by the image
|
|
1143
|
+
* dimensions.
|
|
1144
|
+
*/
|
|
1145
|
+
declare interface NormalizedKeypoint {
|
|
1146
|
+
/** X in normalized image coordinates. */
|
|
1147
|
+
x: number;
|
|
1148
|
+
/** Y in normalized image coordinates. */
|
|
1149
|
+
y: number;
|
|
1150
|
+
/** Optional label of the keypoint. */
|
|
1151
|
+
label?: string;
|
|
1152
|
+
/** Optional score of the keypoint. */
|
|
1153
|
+
score?: number;
|
|
1154
|
+
}
|
|
1155
|
+
|
|
721
1156
|
/**
|
|
722
1157
|
* Copyright 2022 The MediaPipe Authors. All Rights Reserved.
|
|
723
1158
|
*
|
|
@@ -832,6 +1267,12 @@ declare interface RectF {
|
|
|
832
1267
|
bottom: number;
|
|
833
1268
|
}
|
|
834
1269
|
|
|
1270
|
+
/** A Region-Of-Interest (ROI) to represent a region within an image. */
|
|
1271
|
+
export declare interface RegionOfInterest {
|
|
1272
|
+
/** The ROI in keypoint format. */
|
|
1273
|
+
keypoint: NormalizedKeypoint;
|
|
1274
|
+
}
|
|
1275
|
+
|
|
835
1276
|
/**
|
|
836
1277
|
* The two running modes of a vision task.
|
|
837
1278
|
* 1) The image mode for processing single image inputs.
|
|
@@ -839,6 +1280,25 @@ declare interface RectF {
|
|
|
839
1280
|
*/
|
|
840
1281
|
declare type RunningMode = "IMAGE" | "VIDEO";
|
|
841
1282
|
|
|
1283
|
+
/**
|
|
1284
|
+
* The segmentation tasks return the segmentation either as a WebGLTexture (when
|
|
1285
|
+
* the output is on GPU) or as a typed JavaScript arrays for CPU-based
|
|
1286
|
+
* category or confidence masks. `Uint8ClampedArray`s are used to represend
|
|
1287
|
+
* CPU-based category masks and `Float32Array`s are used for CPU-based
|
|
1288
|
+
* confidence masks.
|
|
1289
|
+
*/
|
|
1290
|
+
export declare type SegmentationMask = Uint8ClampedArray | Float32Array | WebGLTexture;
|
|
1291
|
+
|
|
1292
|
+
/**
|
|
1293
|
+
* A callback that receives the computed masks from the segmentation tasks. The
|
|
1294
|
+
* callback either receives a single element array with a category mask (as a
|
|
1295
|
+
* `[Uint8ClampedArray]`) or multiple confidence masks (as a `Float32Array[]`).
|
|
1296
|
+
* The returned data is only valid for the duration of the callback. If
|
|
1297
|
+
* asynchronous processing is needed, all data needs to be copied before the
|
|
1298
|
+
* callback returns.
|
|
1299
|
+
*/
|
|
1300
|
+
export declare type SegmentationMaskCallback = (masks: SegmentationMask[], width: number, height: number) => void;
|
|
1301
|
+
|
|
842
1302
|
/** Base class for all MediaPipe Tasks. */
|
|
843
1303
|
declare abstract class TaskRunner {
|
|
844
1304
|
protected constructor();
|
|
@@ -854,6 +1314,13 @@ declare interface TaskRunnerOptions {
|
|
|
854
1314
|
|
|
855
1315
|
/** The options for configuring a MediaPipe vision task. */
|
|
856
1316
|
declare interface VisionTaskOptions extends TaskRunnerOptions {
|
|
1317
|
+
/**
|
|
1318
|
+
* The canvas element to bind textures to. This has to be set for GPU
|
|
1319
|
+
* processing. The task will initialize a WebGL context and throw an eror if
|
|
1320
|
+
* this fails (e.g. if you have already initialized a different type of
|
|
1321
|
+
* context).
|
|
1322
|
+
*/
|
|
1323
|
+
canvas?: HTMLCanvasElement | OffscreenCanvas;
|
|
857
1324
|
/**
|
|
858
1325
|
* The running mode of the task. Default to the image mode.
|
|
859
1326
|
* Vision tasks have two running modes:
|