@mediapipe/tasks-vision 0.1.0-alpha-11 → 0.1.0-alpha-12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -12,7 +12,7 @@ const vision = await FilesetResolver.forVisionTasks(
12
12
  "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
13
13
  );
14
14
  const faceDetector = await FaceDetector.createFromModelPath(vision,
15
- "https://storage.googleapis.com/mediapipe-tasks/object_detector/efficientdet_lite0_uint8.tflite"
15
+ "https://storage.googleapis.com/mediapipe-tasks/face_detector/face_detection_short_range.tflite"
16
16
  );
17
17
  const image = document.getElementById("image") as HTMLImageElement;
18
18
  const detections = faceDetector.detect(image);
@@ -29,7 +29,7 @@ const vision = await FilesetResolver.forVisionTasks(
29
29
  "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
30
30
  );
31
31
  const faceLandmarker = await FaceLandmarker.createFromModelPath(vision,
32
- "model.task"
32
+ "https://storage.googleapis.com/mediapipe-tasks/face_landmarker/face_landmarker.task"
33
33
  );
34
34
  const image = document.getElementById("image") as HTMLImageElement;
35
35
  const landmarks = faceLandmarker.detect(image);
@@ -44,7 +44,7 @@ const vision = await FilesetResolver.forVisionTasks(
44
44
  "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
45
45
  );
46
46
  const faceStylizer = await FaceStylizer.createFromModelPath(vision,
47
- "model.tflite"
47
+ "https://storage.googleapis.com/mediapipe-tasks/face_stylizer/face_stylizer_with_metadata.tflite"
48
48
  );
49
49
  const image = document.getElementById("image") as HTMLImageElement;
50
50
  const stylizedImage = faceStylizer.stylize(image);
@@ -115,7 +115,7 @@ const vision = await FilesetResolver.forVisionTasks(
115
115
  "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
116
116
  );
117
117
  const imageSegmenter = await ImageSegmenter.createFromModelPath(vision,
118
- "model.tflite"
118
+ "https://storage.googleapis.com/mediapipe-tasks/image_segmenter/selfie_segmentation.tflite"
119
119
  );
120
120
  const image = document.getElementById("image") as HTMLImageElement;
121
121
  imageSegmenter.segment(image, (masks, width, height) => {
@@ -133,7 +133,8 @@ const vision = await FilesetResolver.forVisionTasks(
133
133
  "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
134
134
  );
135
135
  const interactiveSegmenter = await InteractiveSegmenter.createFromModelPath(
136
- vision, "model.tflite"
136
+ vision,
137
+ "https://storage.googleapis.com/mediapipe-tasks/interactive_segmenter/ptm_512_hdt_ptm_woid.tflite
137
138
  );
138
139
  const image = document.getElementById("image") as HTMLImageElement;
139
140
  interactiveSegmenter.segment(image, { keypoint: { x: 0.1, y: 0.2 } },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mediapipe/tasks-vision",
3
- "version": "0.1.0-alpha-11",
3
+ "version": "0.1.0-alpha-12",
4
4
  "description": "MediaPipe Vision Tasks",
5
5
  "main": "vision_bundle.js",
6
6
  "author": "mediapipe@google.com",
package/vision.d.ts CHANGED
@@ -533,7 +533,7 @@ export declare interface FaceLandmarkerResult {
533
533
  }
534
534
 
535
535
  /**
536
- * A class containing the Pairs of landmark indices to be rendered with
536
+ * A class containing the pairs of landmark indices to be rendered with
537
537
  * connections.
538
538
  */
539
539
  export declare class FaceLandmarksConnections {
@@ -593,10 +593,6 @@ export declare class FaceStylizer extends VisionTaskRunner {
593
593
  * synchronously once the callback returns. Only use this method when the
594
594
  * FaceStylizer is created with the image running mode.
595
595
  *
596
- * The input image can be of any size. To ensure that the output image has
597
- * reasonable quality, the stylized output image size is determined by the
598
- * model output size.
599
- *
600
596
  * @param image An image to process.
601
597
  * @param callback The callback that is invoked with the stylized image. The
602
598
  * lifetime of the returned data is only guaranteed for the duration of the
@@ -617,11 +613,6 @@ export declare class FaceStylizer extends VisionTaskRunner {
617
613
  * If both are specified, the crop around the region-of-interest is extracted
618
614
  * first, then the specified rotation is applied to the crop.
619
615
  *
620
- * The input image can be of any size. To ensure that the output image has
621
- * reasonable quality, the stylized output image size is the smaller of the
622
- * model output size and the size of the 'regionOfInterest' specified in
623
- * 'imageProcessingOptions'.
624
- *
625
616
  * @param image An image to process.
626
617
  * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
627
618
  * to process the input image before running inference.
@@ -638,9 +629,6 @@ export declare class FaceStylizer extends VisionTaskRunner {
638
629
  * frame's timestamp (in milliseconds). The input timestamps must be
639
630
  * monotonically increasing.
640
631
  *
641
- * To ensure that the output image has reasonable quality, the stylized
642
- * output image size is determined by the model output size.
643
- *
644
632
  * @param videoFrame A video frame to process.
645
633
  * @param timestamp The timestamp of the current frame, in ms.
646
634
  * @param callback The callback that is invoked with the stylized image. The
@@ -665,10 +653,6 @@ export declare class FaceStylizer extends VisionTaskRunner {
665
653
  * frame's timestamp (in milliseconds). The input timestamps must be
666
654
  * monotonically increasing.
667
655
  *
668
- * To ensure that the output image has reasonable quality, the stylized
669
- * output image size is the smaller of the model output size and the size of
670
- * the 'regionOfInterest' specified in 'imageProcessingOptions'.
671
- *
672
656
  * @param videoFrame A video frame to process.
673
657
  * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
674
658
  * to process the input image before running inference.
@@ -738,6 +722,11 @@ export declare class FilesetResolver {
738
722
 
739
723
  /** Performs hand gesture recognition on images. */
740
724
  export declare class GestureRecognizer extends VisionTaskRunner {
725
+ /**
726
+ * An array containing the pairs of hand landmark indices to be rendered with
727
+ * connections.
728
+ */
729
+ static HAND_CONNECTIONS: Connection[];
741
730
  /**
742
731
  * Initializes the Wasm runtime and creates a new gesture recognizer from the
743
732
  * provided options.
@@ -857,6 +846,11 @@ export declare interface GestureRecognizerResult {
857
846
 
858
847
  /** Performs hand landmarks detection on images. */
859
848
  export declare class HandLandmarker extends VisionTaskRunner {
849
+ /**
850
+ * An array containing the pairs of hand landmark indices to be rendered with
851
+ * connections.
852
+ */
853
+ static HAND_CONNECTIONS: Connection[];
860
854
  /**
861
855
  * Initializes the Wasm runtime and creates a new `HandLandmarker` from the
862
856
  * provided options.
@@ -1219,7 +1213,7 @@ export declare class ImageSegmenter extends VisionTaskRunner {
1219
1213
  * lifetime of the returned data is only guaranteed for the duration of the
1220
1214
  * callback.
1221
1215
  */
1222
- segment(image: ImageSource, callback: SegmentationMaskCallback): void;
1216
+ segment(image: ImageSource, callback: ImageSegmenterCallack): void;
1223
1217
  /**
1224
1218
  * Performs image segmentation on the provided single image and invokes the
1225
1219
  * callback with the response. The method returns synchronously once the
@@ -1233,19 +1227,7 @@ export declare class ImageSegmenter extends VisionTaskRunner {
1233
1227
  * lifetime of the returned data is only guaranteed for the duration of the
1234
1228
  * callback.
1235
1229
  */
1236
- segment(image: ImageSource, imageProcessingOptions: ImageProcessingOptions, callback: SegmentationMaskCallback): void;
1237
- /**
1238
- * Get the category label list of the ImageSegmenter can recognize. For
1239
- * `CATEGORY_MASK` type, the index in the category mask corresponds to the
1240
- * category in the label list. For `CONFIDENCE_MASK` type, the output mask
1241
- * list at index corresponds to the category in the label list.
1242
- *
1243
- * If there is no labelmap provided in the model file, empty label array is
1244
- * returned.
1245
- *
1246
- * @return The labels used by the current model.
1247
- */
1248
- getLabels(): string[];
1230
+ segment(image: ImageSource, imageProcessingOptions: ImageProcessingOptions, callback: ImageSegmenterCallack): void;
1249
1231
  /**
1250
1232
  * Performs image segmentation on the provided video frame and invokes the
1251
1233
  * callback with the response. The method returns synchronously once the
@@ -1258,7 +1240,7 @@ export declare class ImageSegmenter extends VisionTaskRunner {
1258
1240
  * lifetime of the returned data is only guaranteed for the duration of the
1259
1241
  * callback.
1260
1242
  */
1261
- segmentForVideo(videoFrame: ImageSource, timestamp: number, callback: SegmentationMaskCallback): void;
1243
+ segmentForVideo(videoFrame: ImageSource, timestamp: number, callback: ImageSegmenterCallack): void;
1262
1244
  /**
1263
1245
  * Performs image segmentation on the provided video frame and invokes the
1264
1246
  * callback with the response. The method returns synchronously once the
@@ -1273,9 +1255,29 @@ export declare class ImageSegmenter extends VisionTaskRunner {
1273
1255
  * lifetime of the returned data is only guaranteed for the duration of the
1274
1256
  * callback.
1275
1257
  */
1276
- segmentForVideo(videoFrame: ImageSource, imageProcessingOptions: ImageProcessingOptions, timestamp: number, callback: SegmentationMaskCallback): void;
1258
+ segmentForVideo(videoFrame: ImageSource, imageProcessingOptions: ImageProcessingOptions, timestamp: number, callback: ImageSegmenterCallack): void;
1259
+ /**
1260
+ * Get the category label list of the ImageSegmenter can recognize. For
1261
+ * `CATEGORY_MASK` type, the index in the category mask corresponds to the
1262
+ * category in the label list. For `CONFIDENCE_MASK` type, the output mask
1263
+ * list at index corresponds to the category in the label list.
1264
+ *
1265
+ * If there is no labelmap provided in the model file, empty label array is
1266
+ * returned.
1267
+ *
1268
+ * @return The labels used by the current model.
1269
+ */
1270
+ getLabels(): string[];
1277
1271
  }
1278
1272
 
1273
+ /**
1274
+ * A callback that receives the computed masks from the image segmenter. The
1275
+ * returned data is only valid for the duration of the callback. If
1276
+ * asynchronous processing is needed, all data needs to be copied before the
1277
+ * callback returns.
1278
+ */
1279
+ export declare type ImageSegmenterCallack = (result: ImageSegmenterResult) => void;
1280
+
1279
1281
  /** Options to configure the MediaPipe Image Segmenter Task */
1280
1282
  export declare interface ImageSegmenterOptions extends VisionTaskOptions {
1281
1283
  /**
@@ -1283,20 +1285,44 @@ export declare interface ImageSegmenterOptions extends VisionTaskOptions {
1283
1285
  * Metadata, if any. Defaults to English.
1284
1286
  */
1285
1287
  displayNamesLocale?: string | undefined;
1288
+ /** Whether to output confidence masks. Defaults to true. */
1289
+ outputConfidenceMasks?: boolean | undefined;
1290
+ /** Whether to output the category masks. Defaults to false. */
1291
+ outputCategoryMask?: boolean | undefined;
1292
+ }
1293
+
1294
+ /**
1295
+ * Copyright 2023 The MediaPipe Authors. All Rights Reserved.
1296
+ *
1297
+ * Licensed under the Apache License, Version 2.0 (the "License");
1298
+ * you may not use this file except in compliance with the License.
1299
+ * You may obtain a copy of the License at
1300
+ *
1301
+ * http://www.apache.org/licenses/LICENSE-2.0
1302
+ *
1303
+ * Unless required by applicable law or agreed to in writing, software
1304
+ * distributed under the License is distributed on an "AS IS" BASIS,
1305
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1306
+ * See the License for the specific language governing permissions and
1307
+ * limitations under the License.
1308
+ */
1309
+ /** The output result of ImageSegmenter. */
1310
+ export declare interface ImageSegmenterResult {
1286
1311
  /**
1287
- * The output type of segmentation results.
1288
- *
1289
- * The two supported modes are:
1290
- * - Category Mask: Gives a single output mask where each pixel represents
1291
- * the class which the pixel in the original image was
1292
- * predicted to belong to.
1293
- * - Confidence Mask: Gives a list of output masks (one for each class). For
1294
- * each mask, the pixel represents the prediction
1295
- * confidence, usually in the [0.0, 0.1] range.
1296
- *
1297
- * Defaults to `CATEGORY_MASK`.
1312
+ * Multiple masks as Float32Arrays or WebGLTextures where, for each mask, each
1313
+ * pixel represents the prediction confidence, usually in the [0, 1] range.
1314
+ */
1315
+ confidenceMasks?: Float32Array[] | WebGLTexture[];
1316
+ /**
1317
+ * A category mask as a Uint8ClampedArray or WebGLTexture where each
1318
+ * pixel represents the class which the pixel in the original image was
1319
+ * predicted to belong to.
1298
1320
  */
1299
- outputType?: "CATEGORY_MASK" | "CONFIDENCE_MASK" | undefined;
1321
+ categoryMask?: Uint8ClampedArray | WebGLTexture;
1322
+ /** The width of the masks. */
1323
+ width: number;
1324
+ /** The height of the masks. */
1325
+ height: number;
1300
1326
  }
1301
1327
 
1302
1328
  /**
@@ -1387,7 +1413,7 @@ export declare class InteractiveSegmenter extends VisionTaskRunner {
1387
1413
  * lifetime of the returned data is only guaranteed for the duration of the
1388
1414
  * callback.
1389
1415
  */
1390
- segment(image: ImageSource, roi: RegionOfInterest, callback: SegmentationMaskCallback): void;
1416
+ segment(image: ImageSource, roi: RegionOfInterest, callback: InteractiveSegmenterCallack): void;
1391
1417
  /**
1392
1418
  * Performs interactive segmentation on the provided single image and invokes
1393
1419
  * the callback with the response. The `roi` parameter is used to represent a
@@ -1413,25 +1439,57 @@ export declare class InteractiveSegmenter extends VisionTaskRunner {
1413
1439
  * lifetime of the returned data is only guaranteed for the duration of the
1414
1440
  * callback.
1415
1441
  */
1416
- segment(image: ImageSource, roi: RegionOfInterest, imageProcessingOptions: ImageProcessingOptions, callback: SegmentationMaskCallback): void;
1442
+ segment(image: ImageSource, roi: RegionOfInterest, imageProcessingOptions: ImageProcessingOptions, callback: InteractiveSegmenterCallack): void;
1417
1443
  }
1418
1444
 
1445
+ /**
1446
+ * A callback that receives the computed masks from the interactive segmenter.
1447
+ * The returned data is only valid for the duration of the callback. If
1448
+ * asynchronous processing is needed, all data needs to be copied before the
1449
+ * callback returns.
1450
+ */
1451
+ export declare type InteractiveSegmenterCallack = (result: InteractiveSegmenterResult) => void;
1452
+
1419
1453
  /** Options to configure the MediaPipe Interactive Segmenter Task */
1420
1454
  export declare interface InteractiveSegmenterOptions extends TaskRunnerOptions {
1455
+ /** Whether to output confidence masks. Defaults to true. */
1456
+ outputConfidenceMasks?: boolean | undefined;
1457
+ /** Whether to output the category masks. Defaults to false. */
1458
+ outputCategoryMask?: boolean | undefined;
1459
+ }
1460
+
1461
+ /**
1462
+ * Copyright 2023 The MediaPipe Authors. All Rights Reserved.
1463
+ *
1464
+ * Licensed under the Apache License, Version 2.0 (the "License");
1465
+ * you may not use this file except in compliance with the License.
1466
+ * You may obtain a copy of the License at
1467
+ *
1468
+ * http://www.apache.org/licenses/LICENSE-2.0
1469
+ *
1470
+ * Unless required by applicable law or agreed to in writing, software
1471
+ * distributed under the License is distributed on an "AS IS" BASIS,
1472
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1473
+ * See the License for the specific language governing permissions and
1474
+ * limitations under the License.
1475
+ */
1476
+ /** The output result of InteractiveSegmenter. */
1477
+ export declare interface InteractiveSegmenterResult {
1421
1478
  /**
1422
- * The output type of segmentation results.
1423
- *
1424
- * The two supported modes are:
1425
- * - Category Mask: Gives a single output mask where each pixel represents
1426
- * the class which the pixel in the original image was
1427
- * predicted to belong to.
1428
- * - Confidence Mask: Gives a list of output masks (one for each class). For
1429
- * each mask, the pixel represents the prediction
1430
- * confidence, usually in the [0.0, 0.1] range.
1431
- *
1432
- * Defaults to `CATEGORY_MASK`.
1479
+ * Multiple masks as Float32Arrays or WebGLTextures where, for each mask, each
1480
+ * pixel represents the prediction confidence, usually in the [0, 1] range.
1481
+ */
1482
+ confidenceMasks?: Float32Array[] | WebGLTexture[];
1483
+ /**
1484
+ * A category mask as a Uint8ClampedArray or WebGLTexture where each
1485
+ * pixel represents the class which the pixel in the original image was
1486
+ * predicted to belong to.
1433
1487
  */
1434
- outputType?: "CATEGORY_MASK" | "CONFIDENCE_MASK" | undefined;
1488
+ categoryMask?: Uint8ClampedArray | WebGLTexture;
1489
+ /** The width of the masks. */
1490
+ width: number;
1491
+ /** The height of the masks. */
1492
+ height: number;
1435
1493
  }
1436
1494
 
1437
1495
  /**
@@ -1646,16 +1704,6 @@ declare type RunningMode = "IMAGE" | "VIDEO";
1646
1704
  */
1647
1705
  export declare type SegmentationMask = Uint8ClampedArray | Float32Array | WebGLTexture;
1648
1706
 
1649
- /**
1650
- * A callback that receives the computed masks from the segmentation tasks. The
1651
- * callback either receives a single element array with a category mask (as a
1652
- * `[Uint8ClampedArray]`) or multiple confidence masks (as a `Float32Array[]`).
1653
- * The returned data is only valid for the duration of the callback. If
1654
- * asynchronous processing is needed, all data needs to be copied before the
1655
- * callback returns.
1656
- */
1657
- export declare type SegmentationMaskCallback = (masks: SegmentationMask[], width: number, height: number) => void;
1658
-
1659
1707
  /** Base class for all MediaPipe Tasks. */
1660
1708
  declare abstract class TaskRunner {
1661
1709
  protected constructor();