@mediapipe/tasks-vision 0.1.0-alpha-11 → 0.1.0-alpha-12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -5
- package/package.json +1 -1
- package/vision.d.ts +117 -69
- package/vision_bundle.js +1 -1
- package/wasm/vision_wasm_internal.js +275 -303
- package/wasm/vision_wasm_internal.wasm +0 -0
- package/wasm/vision_wasm_nosimd_internal.js +287 -315
- package/wasm/vision_wasm_nosimd_internal.wasm +0 -0
package/README.md
CHANGED
|
@@ -12,7 +12,7 @@ const vision = await FilesetResolver.forVisionTasks(
|
|
|
12
12
|
"https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
|
|
13
13
|
);
|
|
14
14
|
const faceDetector = await FaceDetector.createFromModelPath(vision,
|
|
15
|
-
"https://storage.googleapis.com/mediapipe-tasks/
|
|
15
|
+
"https://storage.googleapis.com/mediapipe-tasks/face_detector/face_detection_short_range.tflite"
|
|
16
16
|
);
|
|
17
17
|
const image = document.getElementById("image") as HTMLImageElement;
|
|
18
18
|
const detections = faceDetector.detect(image);
|
|
@@ -29,7 +29,7 @@ const vision = await FilesetResolver.forVisionTasks(
|
|
|
29
29
|
"https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
|
|
30
30
|
);
|
|
31
31
|
const faceLandmarker = await FaceLandmarker.createFromModelPath(vision,
|
|
32
|
-
"
|
|
32
|
+
"https://storage.googleapis.com/mediapipe-tasks/face_landmarker/face_landmarker.task"
|
|
33
33
|
);
|
|
34
34
|
const image = document.getElementById("image") as HTMLImageElement;
|
|
35
35
|
const landmarks = faceLandmarker.detect(image);
|
|
@@ -44,7 +44,7 @@ const vision = await FilesetResolver.forVisionTasks(
|
|
|
44
44
|
"https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
|
|
45
45
|
);
|
|
46
46
|
const faceStylizer = await FaceStylizer.createFromModelPath(vision,
|
|
47
|
-
"
|
|
47
|
+
"https://storage.googleapis.com/mediapipe-tasks/face_stylizer/face_stylizer_with_metadata.tflite"
|
|
48
48
|
);
|
|
49
49
|
const image = document.getElementById("image") as HTMLImageElement;
|
|
50
50
|
const stylizedImage = faceStylizer.stylize(image);
|
|
@@ -115,7 +115,7 @@ const vision = await FilesetResolver.forVisionTasks(
|
|
|
115
115
|
"https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
|
|
116
116
|
);
|
|
117
117
|
const imageSegmenter = await ImageSegmenter.createFromModelPath(vision,
|
|
118
|
-
"
|
|
118
|
+
"https://storage.googleapis.com/mediapipe-tasks/image_segmenter/selfie_segmentation.tflite"
|
|
119
119
|
);
|
|
120
120
|
const image = document.getElementById("image") as HTMLImageElement;
|
|
121
121
|
imageSegmenter.segment(image, (masks, width, height) => {
|
|
@@ -133,7 +133,8 @@ const vision = await FilesetResolver.forVisionTasks(
|
|
|
133
133
|
"https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
|
|
134
134
|
);
|
|
135
135
|
const interactiveSegmenter = await InteractiveSegmenter.createFromModelPath(
|
|
136
|
-
vision,
|
|
136
|
+
vision,
|
|
137
|
+
"https://storage.googleapis.com/mediapipe-tasks/interactive_segmenter/ptm_512_hdt_ptm_woid.tflite
|
|
137
138
|
);
|
|
138
139
|
const image = document.getElementById("image") as HTMLImageElement;
|
|
139
140
|
interactiveSegmenter.segment(image, { keypoint: { x: 0.1, y: 0.2 } },
|
package/package.json
CHANGED
package/vision.d.ts
CHANGED
|
@@ -533,7 +533,7 @@ export declare interface FaceLandmarkerResult {
|
|
|
533
533
|
}
|
|
534
534
|
|
|
535
535
|
/**
|
|
536
|
-
* A class containing the
|
|
536
|
+
* A class containing the pairs of landmark indices to be rendered with
|
|
537
537
|
* connections.
|
|
538
538
|
*/
|
|
539
539
|
export declare class FaceLandmarksConnections {
|
|
@@ -593,10 +593,6 @@ export declare class FaceStylizer extends VisionTaskRunner {
|
|
|
593
593
|
* synchronously once the callback returns. Only use this method when the
|
|
594
594
|
* FaceStylizer is created with the image running mode.
|
|
595
595
|
*
|
|
596
|
-
* The input image can be of any size. To ensure that the output image has
|
|
597
|
-
* reasonable quality, the stylized output image size is determined by the
|
|
598
|
-
* model output size.
|
|
599
|
-
*
|
|
600
596
|
* @param image An image to process.
|
|
601
597
|
* @param callback The callback that is invoked with the stylized image. The
|
|
602
598
|
* lifetime of the returned data is only guaranteed for the duration of the
|
|
@@ -617,11 +613,6 @@ export declare class FaceStylizer extends VisionTaskRunner {
|
|
|
617
613
|
* If both are specified, the crop around the region-of-interest is extracted
|
|
618
614
|
* first, then the specified rotation is applied to the crop.
|
|
619
615
|
*
|
|
620
|
-
* The input image can be of any size. To ensure that the output image has
|
|
621
|
-
* reasonable quality, the stylized output image size is the smaller of the
|
|
622
|
-
* model output size and the size of the 'regionOfInterest' specified in
|
|
623
|
-
* 'imageProcessingOptions'.
|
|
624
|
-
*
|
|
625
616
|
* @param image An image to process.
|
|
626
617
|
* @param imageProcessingOptions the `ImageProcessingOptions` specifying how
|
|
627
618
|
* to process the input image before running inference.
|
|
@@ -638,9 +629,6 @@ export declare class FaceStylizer extends VisionTaskRunner {
|
|
|
638
629
|
* frame's timestamp (in milliseconds). The input timestamps must be
|
|
639
630
|
* monotonically increasing.
|
|
640
631
|
*
|
|
641
|
-
* To ensure that the output image has reasonable quality, the stylized
|
|
642
|
-
* output image size is determined by the model output size.
|
|
643
|
-
*
|
|
644
632
|
* @param videoFrame A video frame to process.
|
|
645
633
|
* @param timestamp The timestamp of the current frame, in ms.
|
|
646
634
|
* @param callback The callback that is invoked with the stylized image. The
|
|
@@ -665,10 +653,6 @@ export declare class FaceStylizer extends VisionTaskRunner {
|
|
|
665
653
|
* frame's timestamp (in milliseconds). The input timestamps must be
|
|
666
654
|
* monotonically increasing.
|
|
667
655
|
*
|
|
668
|
-
* To ensure that the output image has reasonable quality, the stylized
|
|
669
|
-
* output image size is the smaller of the model output size and the size of
|
|
670
|
-
* the 'regionOfInterest' specified in 'imageProcessingOptions'.
|
|
671
|
-
*
|
|
672
656
|
* @param videoFrame A video frame to process.
|
|
673
657
|
* @param imageProcessingOptions the `ImageProcessingOptions` specifying how
|
|
674
658
|
* to process the input image before running inference.
|
|
@@ -738,6 +722,11 @@ export declare class FilesetResolver {
|
|
|
738
722
|
|
|
739
723
|
/** Performs hand gesture recognition on images. */
|
|
740
724
|
export declare class GestureRecognizer extends VisionTaskRunner {
|
|
725
|
+
/**
|
|
726
|
+
* An array containing the pairs of hand landmark indices to be rendered with
|
|
727
|
+
* connections.
|
|
728
|
+
*/
|
|
729
|
+
static HAND_CONNECTIONS: Connection[];
|
|
741
730
|
/**
|
|
742
731
|
* Initializes the Wasm runtime and creates a new gesture recognizer from the
|
|
743
732
|
* provided options.
|
|
@@ -857,6 +846,11 @@ export declare interface GestureRecognizerResult {
|
|
|
857
846
|
|
|
858
847
|
/** Performs hand landmarks detection on images. */
|
|
859
848
|
export declare class HandLandmarker extends VisionTaskRunner {
|
|
849
|
+
/**
|
|
850
|
+
* An array containing the pairs of hand landmark indices to be rendered with
|
|
851
|
+
* connections.
|
|
852
|
+
*/
|
|
853
|
+
static HAND_CONNECTIONS: Connection[];
|
|
860
854
|
/**
|
|
861
855
|
* Initializes the Wasm runtime and creates a new `HandLandmarker` from the
|
|
862
856
|
* provided options.
|
|
@@ -1219,7 +1213,7 @@ export declare class ImageSegmenter extends VisionTaskRunner {
|
|
|
1219
1213
|
* lifetime of the returned data is only guaranteed for the duration of the
|
|
1220
1214
|
* callback.
|
|
1221
1215
|
*/
|
|
1222
|
-
segment(image: ImageSource, callback:
|
|
1216
|
+
segment(image: ImageSource, callback: ImageSegmenterCallack): void;
|
|
1223
1217
|
/**
|
|
1224
1218
|
* Performs image segmentation on the provided single image and invokes the
|
|
1225
1219
|
* callback with the response. The method returns synchronously once the
|
|
@@ -1233,19 +1227,7 @@ export declare class ImageSegmenter extends VisionTaskRunner {
|
|
|
1233
1227
|
* lifetime of the returned data is only guaranteed for the duration of the
|
|
1234
1228
|
* callback.
|
|
1235
1229
|
*/
|
|
1236
|
-
segment(image: ImageSource, imageProcessingOptions: ImageProcessingOptions, callback:
|
|
1237
|
-
/**
|
|
1238
|
-
* Get the category label list of the ImageSegmenter can recognize. For
|
|
1239
|
-
* `CATEGORY_MASK` type, the index in the category mask corresponds to the
|
|
1240
|
-
* category in the label list. For `CONFIDENCE_MASK` type, the output mask
|
|
1241
|
-
* list at index corresponds to the category in the label list.
|
|
1242
|
-
*
|
|
1243
|
-
* If there is no labelmap provided in the model file, empty label array is
|
|
1244
|
-
* returned.
|
|
1245
|
-
*
|
|
1246
|
-
* @return The labels used by the current model.
|
|
1247
|
-
*/
|
|
1248
|
-
getLabels(): string[];
|
|
1230
|
+
segment(image: ImageSource, imageProcessingOptions: ImageProcessingOptions, callback: ImageSegmenterCallack): void;
|
|
1249
1231
|
/**
|
|
1250
1232
|
* Performs image segmentation on the provided video frame and invokes the
|
|
1251
1233
|
* callback with the response. The method returns synchronously once the
|
|
@@ -1258,7 +1240,7 @@ export declare class ImageSegmenter extends VisionTaskRunner {
|
|
|
1258
1240
|
* lifetime of the returned data is only guaranteed for the duration of the
|
|
1259
1241
|
* callback.
|
|
1260
1242
|
*/
|
|
1261
|
-
segmentForVideo(videoFrame: ImageSource, timestamp: number, callback:
|
|
1243
|
+
segmentForVideo(videoFrame: ImageSource, timestamp: number, callback: ImageSegmenterCallack): void;
|
|
1262
1244
|
/**
|
|
1263
1245
|
* Performs image segmentation on the provided video frame and invokes the
|
|
1264
1246
|
* callback with the response. The method returns synchronously once the
|
|
@@ -1273,9 +1255,29 @@ export declare class ImageSegmenter extends VisionTaskRunner {
|
|
|
1273
1255
|
* lifetime of the returned data is only guaranteed for the duration of the
|
|
1274
1256
|
* callback.
|
|
1275
1257
|
*/
|
|
1276
|
-
segmentForVideo(videoFrame: ImageSource, imageProcessingOptions: ImageProcessingOptions, timestamp: number, callback:
|
|
1258
|
+
segmentForVideo(videoFrame: ImageSource, imageProcessingOptions: ImageProcessingOptions, timestamp: number, callback: ImageSegmenterCallack): void;
|
|
1259
|
+
/**
|
|
1260
|
+
* Get the category label list of the ImageSegmenter can recognize. For
|
|
1261
|
+
* `CATEGORY_MASK` type, the index in the category mask corresponds to the
|
|
1262
|
+
* category in the label list. For `CONFIDENCE_MASK` type, the output mask
|
|
1263
|
+
* list at index corresponds to the category in the label list.
|
|
1264
|
+
*
|
|
1265
|
+
* If there is no labelmap provided in the model file, empty label array is
|
|
1266
|
+
* returned.
|
|
1267
|
+
*
|
|
1268
|
+
* @return The labels used by the current model.
|
|
1269
|
+
*/
|
|
1270
|
+
getLabels(): string[];
|
|
1277
1271
|
}
|
|
1278
1272
|
|
|
1273
|
+
/**
|
|
1274
|
+
* A callback that receives the computed masks from the image segmenter. The
|
|
1275
|
+
* returned data is only valid for the duration of the callback. If
|
|
1276
|
+
* asynchronous processing is needed, all data needs to be copied before the
|
|
1277
|
+
* callback returns.
|
|
1278
|
+
*/
|
|
1279
|
+
export declare type ImageSegmenterCallack = (result: ImageSegmenterResult) => void;
|
|
1280
|
+
|
|
1279
1281
|
/** Options to configure the MediaPipe Image Segmenter Task */
|
|
1280
1282
|
export declare interface ImageSegmenterOptions extends VisionTaskOptions {
|
|
1281
1283
|
/**
|
|
@@ -1283,20 +1285,44 @@ export declare interface ImageSegmenterOptions extends VisionTaskOptions {
|
|
|
1283
1285
|
* Metadata, if any. Defaults to English.
|
|
1284
1286
|
*/
|
|
1285
1287
|
displayNamesLocale?: string | undefined;
|
|
1288
|
+
/** Whether to output confidence masks. Defaults to true. */
|
|
1289
|
+
outputConfidenceMasks?: boolean | undefined;
|
|
1290
|
+
/** Whether to output the category masks. Defaults to false. */
|
|
1291
|
+
outputCategoryMask?: boolean | undefined;
|
|
1292
|
+
}
|
|
1293
|
+
|
|
1294
|
+
/**
|
|
1295
|
+
* Copyright 2023 The MediaPipe Authors. All Rights Reserved.
|
|
1296
|
+
*
|
|
1297
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
1298
|
+
* you may not use this file except in compliance with the License.
|
|
1299
|
+
* You may obtain a copy of the License at
|
|
1300
|
+
*
|
|
1301
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
1302
|
+
*
|
|
1303
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
1304
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
1305
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
1306
|
+
* See the License for the specific language governing permissions and
|
|
1307
|
+
* limitations under the License.
|
|
1308
|
+
*/
|
|
1309
|
+
/** The output result of ImageSegmenter. */
|
|
1310
|
+
export declare interface ImageSegmenterResult {
|
|
1286
1311
|
/**
|
|
1287
|
-
*
|
|
1288
|
-
*
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
*
|
|
1293
|
-
*
|
|
1294
|
-
*
|
|
1295
|
-
* confidence, usually in the [0.0, 0.1] range.
|
|
1296
|
-
*
|
|
1297
|
-
* Defaults to `CATEGORY_MASK`.
|
|
1312
|
+
* Multiple masks as Float32Arrays or WebGLTextures where, for each mask, each
|
|
1313
|
+
* pixel represents the prediction confidence, usually in the [0, 1] range.
|
|
1314
|
+
*/
|
|
1315
|
+
confidenceMasks?: Float32Array[] | WebGLTexture[];
|
|
1316
|
+
/**
|
|
1317
|
+
* A category mask as a Uint8ClampedArray or WebGLTexture where each
|
|
1318
|
+
* pixel represents the class which the pixel in the original image was
|
|
1319
|
+
* predicted to belong to.
|
|
1298
1320
|
*/
|
|
1299
|
-
|
|
1321
|
+
categoryMask?: Uint8ClampedArray | WebGLTexture;
|
|
1322
|
+
/** The width of the masks. */
|
|
1323
|
+
width: number;
|
|
1324
|
+
/** The height of the masks. */
|
|
1325
|
+
height: number;
|
|
1300
1326
|
}
|
|
1301
1327
|
|
|
1302
1328
|
/**
|
|
@@ -1387,7 +1413,7 @@ export declare class InteractiveSegmenter extends VisionTaskRunner {
|
|
|
1387
1413
|
* lifetime of the returned data is only guaranteed for the duration of the
|
|
1388
1414
|
* callback.
|
|
1389
1415
|
*/
|
|
1390
|
-
segment(image: ImageSource, roi: RegionOfInterest, callback:
|
|
1416
|
+
segment(image: ImageSource, roi: RegionOfInterest, callback: InteractiveSegmenterCallack): void;
|
|
1391
1417
|
/**
|
|
1392
1418
|
* Performs interactive segmentation on the provided single image and invokes
|
|
1393
1419
|
* the callback with the response. The `roi` parameter is used to represent a
|
|
@@ -1413,25 +1439,57 @@ export declare class InteractiveSegmenter extends VisionTaskRunner {
|
|
|
1413
1439
|
* lifetime of the returned data is only guaranteed for the duration of the
|
|
1414
1440
|
* callback.
|
|
1415
1441
|
*/
|
|
1416
|
-
segment(image: ImageSource, roi: RegionOfInterest, imageProcessingOptions: ImageProcessingOptions, callback:
|
|
1442
|
+
segment(image: ImageSource, roi: RegionOfInterest, imageProcessingOptions: ImageProcessingOptions, callback: InteractiveSegmenterCallack): void;
|
|
1417
1443
|
}
|
|
1418
1444
|
|
|
1445
|
+
/**
|
|
1446
|
+
* A callback that receives the computed masks from the interactive segmenter.
|
|
1447
|
+
* The returned data is only valid for the duration of the callback. If
|
|
1448
|
+
* asynchronous processing is needed, all data needs to be copied before the
|
|
1449
|
+
* callback returns.
|
|
1450
|
+
*/
|
|
1451
|
+
export declare type InteractiveSegmenterCallack = (result: InteractiveSegmenterResult) => void;
|
|
1452
|
+
|
|
1419
1453
|
/** Options to configure the MediaPipe Interactive Segmenter Task */
|
|
1420
1454
|
export declare interface InteractiveSegmenterOptions extends TaskRunnerOptions {
|
|
1455
|
+
/** Whether to output confidence masks. Defaults to true. */
|
|
1456
|
+
outputConfidenceMasks?: boolean | undefined;
|
|
1457
|
+
/** Whether to output the category masks. Defaults to false. */
|
|
1458
|
+
outputCategoryMask?: boolean | undefined;
|
|
1459
|
+
}
|
|
1460
|
+
|
|
1461
|
+
/**
|
|
1462
|
+
* Copyright 2023 The MediaPipe Authors. All Rights Reserved.
|
|
1463
|
+
*
|
|
1464
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
1465
|
+
* you may not use this file except in compliance with the License.
|
|
1466
|
+
* You may obtain a copy of the License at
|
|
1467
|
+
*
|
|
1468
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
1469
|
+
*
|
|
1470
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
1471
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
1472
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
1473
|
+
* See the License for the specific language governing permissions and
|
|
1474
|
+
* limitations under the License.
|
|
1475
|
+
*/
|
|
1476
|
+
/** The output result of InteractiveSegmenter. */
|
|
1477
|
+
export declare interface InteractiveSegmenterResult {
|
|
1421
1478
|
/**
|
|
1422
|
-
*
|
|
1423
|
-
*
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
*
|
|
1428
|
-
*
|
|
1429
|
-
*
|
|
1430
|
-
* confidence, usually in the [0.0, 0.1] range.
|
|
1431
|
-
*
|
|
1432
|
-
* Defaults to `CATEGORY_MASK`.
|
|
1479
|
+
* Multiple masks as Float32Arrays or WebGLTextures where, for each mask, each
|
|
1480
|
+
* pixel represents the prediction confidence, usually in the [0, 1] range.
|
|
1481
|
+
*/
|
|
1482
|
+
confidenceMasks?: Float32Array[] | WebGLTexture[];
|
|
1483
|
+
/**
|
|
1484
|
+
* A category mask as a Uint8ClampedArray or WebGLTexture where each
|
|
1485
|
+
* pixel represents the class which the pixel in the original image was
|
|
1486
|
+
* predicted to belong to.
|
|
1433
1487
|
*/
|
|
1434
|
-
|
|
1488
|
+
categoryMask?: Uint8ClampedArray | WebGLTexture;
|
|
1489
|
+
/** The width of the masks. */
|
|
1490
|
+
width: number;
|
|
1491
|
+
/** The height of the masks. */
|
|
1492
|
+
height: number;
|
|
1435
1493
|
}
|
|
1436
1494
|
|
|
1437
1495
|
/**
|
|
@@ -1646,16 +1704,6 @@ declare type RunningMode = "IMAGE" | "VIDEO";
|
|
|
1646
1704
|
*/
|
|
1647
1705
|
export declare type SegmentationMask = Uint8ClampedArray | Float32Array | WebGLTexture;
|
|
1648
1706
|
|
|
1649
|
-
/**
|
|
1650
|
-
* A callback that receives the computed masks from the segmentation tasks. The
|
|
1651
|
-
* callback either receives a single element array with a category mask (as a
|
|
1652
|
-
* `[Uint8ClampedArray]`) or multiple confidence masks (as a `Float32Array[]`).
|
|
1653
|
-
* The returned data is only valid for the duration of the callback. If
|
|
1654
|
-
* asynchronous processing is needed, all data needs to be copied before the
|
|
1655
|
-
* callback returns.
|
|
1656
|
-
*/
|
|
1657
|
-
export declare type SegmentationMaskCallback = (masks: SegmentationMask[], width: number, height: number) => void;
|
|
1658
|
-
|
|
1659
1707
|
/** Base class for all MediaPipe Tasks. */
|
|
1660
1708
|
declare abstract class TaskRunner {
|
|
1661
1709
|
protected constructor();
|