@mediapipe/tasks-vision 0.1.0-alpha-6 → 0.1.0-alpha-8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,6 +2,22 @@
2
2
 
3
3
  This package contains the vision tasks for MediaPipe.
4
4
 
5
+ ## Face Detection
6
+
7
+ The MediaPipe Face Detector task lets you detect the presence and location of
8
+ faces within images or videos.
9
+
10
+ ```
11
+ const vision = await FilesetResolver.forVisionTasks(
12
+ "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
13
+ );
14
+ const faceDetector = await FaceDetector.createFromModelPath(vision,
15
+ "https://storage.googleapis.com/mediapipe-tasks/object_detector/efficientdet_lite0_uint8.tflite"
16
+ );
17
+ const image = document.getElementById("image") as HTMLImageElement;
18
+ const detections = faceDetector.detect(image);
19
+ ```
20
+
5
21
  ## Face Landmark Detection
6
22
 
7
23
  The MediaPipe Face Landmarker task lets you detect the landmarks of faces in
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mediapipe/tasks-vision",
3
- "version": "0.1.0-alpha-6",
3
+ "version": "0.1.0-alpha-8",
4
4
  "description": "MediaPipe Vision Tasks",
5
5
  "main": "vision_bundle.js",
6
6
  "author": "mediapipe@google.com",
package/vision.d.ts CHANGED
@@ -29,6 +29,21 @@ declare interface BaseOptions_2 {
29
29
  delegate?: "CPU" | "GPU" | undefined;
30
30
  }
31
31
 
32
+ /**
33
+ * Copyright 2023 The MediaPipe Authors. All Rights Reserved.
34
+ *
35
+ * Licensed under the Apache License, Version 2.0 (the "License");
36
+ * you may not use this file except in compliance with the License.
37
+ * You may obtain a copy of the License at
38
+ *
39
+ * http://www.apache.org/licenses/LICENSE-2.0
40
+ *
41
+ * Unless required by applicable law or agreed to in writing, software
42
+ * distributed under the License is distributed on an "AS IS" BASIS,
43
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
44
+ * See the License for the specific language governing permissions and
45
+ * limitations under the License.
46
+ */
32
47
  /** An integer bounding box, axis aligned. */
33
48
  export declare interface BoundingBox {
34
49
  /** The X coordinate of the top-left corner, in pixels. */
@@ -138,14 +153,30 @@ declare interface ClassifierOptions {
138
153
  categoryDenylist?: string[] | undefined;
139
154
  }
140
155
 
141
- /** Represents one object detected by the `ObjectDetector`. */
156
+ /** Represents one detection by a detection task. */
142
157
  export declare interface Detection {
143
158
  /** A list of `Category` objects. */
144
159
  categories: Category[];
145
160
  /** The bounding box of the detected objects. */
146
161
  boundingBox?: BoundingBox;
162
+ /**
163
+ * Optional list of keypoints associated with the detection. Keypoints
164
+ * represent interesting points related to the detection. For example, the
165
+ * keypoints represent the eye, ear and mouth from face detection model. Or
166
+ * in the template matching detection, e.g. KNIFT, they can represent the
167
+ * feature points for template matching.
168
+ */
169
+ keypoints?: NormalizedKeypoint[];
147
170
  }
148
171
 
172
+ /** Detection results of a model. */
173
+ declare interface DetectionResult {
174
+ /** A list of Detections. */
175
+ detections: Detection[];
176
+ }
177
+ export { DetectionResult as FaceDetectorResult }
178
+ export { DetectionResult as ObjectDetectorResult }
179
+
149
180
  /**
150
181
  * Copyright 2022 The MediaPipe Authors. All Rights Reserved.
151
182
  *
@@ -224,6 +255,200 @@ export declare interface Embedding {
224
255
  headName: string;
225
256
  }
226
257
 
258
+ /** Performs face detection on images. */
259
+ export declare class FaceDetector extends VisionTaskRunner {
260
+ /**
261
+ * Initializes the Wasm runtime and creates a new face detector from the
262
+ * provided options.
263
+ * @param wasmFileset A configuration object that provides the location of the
264
+ * Wasm binary and its loader.
265
+ * @param faceDetectorOptions The options for the FaceDetector. Note that
266
+ * either a path to the model asset or a model buffer needs to be
267
+ * provided (via `baseOptions`).
268
+ */
269
+ static createFromOptions(wasmFileset: WasmFileset, faceDetectorOptions: FaceDetectorOptions): Promise<FaceDetector>;
270
+ /**
271
+ * Initializes the Wasm runtime and creates a new face detector based on the
272
+ * provided model asset buffer.
273
+ * @param wasmFileset A configuration object that provides the location of the
274
+ * Wasm binary and its loader.
275
+ * @param modelAssetBuffer A binary representation of the model.
276
+ */
277
+ static createFromModelBuffer(wasmFileset: WasmFileset, modelAssetBuffer: Uint8Array): Promise<FaceDetector>;
278
+ /**
279
+ * Initializes the Wasm runtime and creates a new face detector based on the
280
+ * path to the model asset.
281
+ * @param wasmFileset A configuration object that provides the location of the
282
+ * Wasm binary and its loader.
283
+ * @param modelAssetPath The path to the model asset.
284
+ */
285
+ static createFromModelPath(wasmFileset: WasmFileset, modelAssetPath: string): Promise<FaceDetector>;
286
+ private constructor();
287
+ /**
288
+ * Sets new options for the FaceDetector.
289
+ *
290
+ * Calling `setOptions()` with a subset of options only affects those options.
291
+ * You can reset an option back to its default value by explicitly setting it
292
+ * to `undefined`.
293
+ *
294
+ * @param options The options for the FaceDetector.
295
+ */
296
+ setOptions(options: FaceDetectorOptions): Promise<void>;
297
+ /**
298
+ * Performs face detection on the provided single image and waits
299
+ * synchronously for the response. Only use this method when the
300
+ * FaceDetector is created with running mode `image`.
301
+ *
302
+ * @param image An image to process.
303
+ * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
304
+ * to process the input image before running inference.
305
+ * @return A result containing the list of detected faces.
306
+ */
307
+ detect(image: ImageSource, imageProcessingOptions?: ImageProcessingOptions): DetectionResult;
308
+ /**
309
+ * Performs face detection on the provided video frame and waits
310
+ * synchronously for the response. Only use this method when the
311
+ * FaceDetector is created with running mode `video`.
312
+ *
313
+ * @param videoFrame A video frame to process.
314
+ * @param timestamp The timestamp of the current frame, in ms.
315
+ * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
316
+ * to process the input image before running inference.
317
+ * @return A result containing the list of detected faces.
318
+ */
319
+ detectForVideo(videoFrame: ImageSource, timestamp: number, imageProcessingOptions?: ImageProcessingOptions): DetectionResult;
320
+ }
321
+
322
+ /** Options to configure the MediaPipe Face Detector Task */
323
+ export declare interface FaceDetectorOptions extends VisionTaskOptions {
324
+ /**
325
+ * The minimum confidence score for the face detection to be considered
326
+ * successful. Defaults to 0.5.
327
+ */
328
+ minDetectionConfidence?: number | undefined;
329
+ /**
330
+ * The minimum non-maximum-suppression threshold for face detection to be
331
+ * considered overlapped. Defaults to 0.3.
332
+ */
333
+ minSuppressionThreshold?: number | undefined;
334
+ }
335
+
336
+ /**
337
+ * Performs face landmarks detection on images.
338
+ *
339
+ * This API expects a pre-trained face landmarker model asset bundle.
340
+ */
341
+ export declare class FaceLandmarker extends VisionTaskRunner {
342
+ /**
343
+ * Initializes the Wasm runtime and creates a new `FaceLandmarker` from the
344
+ * provided options.
345
+ * @param wasmFileset A configuration object that provides the location of the
346
+ * Wasm binary and its loader.
347
+ * @param faceLandmarkerOptions The options for the FaceLandmarker.
348
+ * Note that either a path to the model asset or a model buffer needs to
349
+ * be provided (via `baseOptions`).
350
+ */
351
+ static createFromOptions(wasmFileset: WasmFileset, faceLandmarkerOptions: FaceLandmarkerOptions): Promise<FaceLandmarker>;
352
+ /**
353
+ * Initializes the Wasm runtime and creates a new `FaceLandmarker` based on
354
+ * the provided model asset buffer.
355
+ * @param wasmFileset A configuration object that provides the location of the
356
+ * Wasm binary and its loader.
357
+ * @param modelAssetBuffer A binary representation of the model.
358
+ */
359
+ static createFromModelBuffer(wasmFileset: WasmFileset, modelAssetBuffer: Uint8Array): Promise<FaceLandmarker>;
360
+ /**
361
+ * Initializes the Wasm runtime and creates a new `FaceLandmarker` based on
362
+ * the path to the model asset.
363
+ * @param wasmFileset A configuration object that provides the location of the
364
+ * Wasm binary and its loader.
365
+ * @param modelAssetPath The path to the model asset.
366
+ */
367
+ static createFromModelPath(wasmFileset: WasmFileset, modelAssetPath: string): Promise<FaceLandmarker>;
368
+ private constructor();
369
+ /**
370
+ * Sets new options for this `FaceLandmarker`.
371
+ *
372
+ * Calling `setOptions()` with a subset of options only affects those options.
373
+ * You can reset an option back to its default value by explicitly setting it
374
+ * to `undefined`.
375
+ *
376
+ * @param options The options for the face landmarker.
377
+ */
378
+ setOptions(options: FaceLandmarkerOptions): Promise<void>;
379
+ /**
380
+ * Performs face landmarks detection on the provided single image and waits
381
+ * synchronously for the response. Only use this method when the
382
+ * FaceLandmarker is created with running mode `image`.
383
+ *
384
+ * @param image An image to process.
385
+ * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
386
+ * to process the input image before running inference.
387
+ * @return The detected face landmarks.
388
+ */
389
+ detect(image: ImageSource, imageProcessingOptions?: ImageProcessingOptions): FaceLandmarkerResult;
390
+ /**
391
+ * Performs face landmarks detection on the provided video frame and waits
392
+ * synchronously for the response. Only use this method when the
393
+ * FaceLandmarker is created with running mode `video`.
394
+ *
395
+ * @param videoFrame A video frame to process.
396
+ * @param timestamp The timestamp of the current frame, in ms.
397
+ * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
398
+ * to process the input image before running inference.
399
+ * @return The detected face landmarks.
400
+ */
401
+ detectForVideo(videoFrame: ImageSource, timestamp: number, imageProcessingOptions?: ImageProcessingOptions): FaceLandmarkerResult;
402
+ }
403
+
404
+ /** Options to configure the MediaPipe FaceLandmarker Task */
405
+ export declare interface FaceLandmarkerOptions extends VisionTaskOptions {
406
+ /**
407
+ * The maximum number of faces can be detected by the FaceLandmarker.
408
+ * Defaults to 1.
409
+ */
410
+ numFaces?: number | undefined;
411
+ /**
412
+ * The minimum confidence score for the face detection to be considered
413
+ * successful. Defaults to 0.5.
414
+ */
415
+ minFaceDetectionConfidence?: number | undefined;
416
+ /**
417
+ * The minimum confidence score of face presence score in the face landmark
418
+ * detection. Defaults to 0.5.
419
+ */
420
+ minFacePresenceConfidence?: number | undefined;
421
+ /**
422
+ * The minimum confidence score for the face tracking to be considered
423
+ * successful. Defaults to 0.5.
424
+ */
425
+ minTrackingConfidence?: number | undefined;
426
+ /**
427
+ * Whether FaceLandmarker outputs face blendshapes classification. Face
428
+ * blendshapes are used for rendering the 3D face model.
429
+ */
430
+ outputFaceBlendshapes?: boolean | undefined;
431
+ /**
432
+ * Whether FaceLandmarker outputs facial transformation_matrix. Facial
433
+ * transformation matrix is used to transform the face landmarks in canonical
434
+ * face to the detected face, so that users can apply face effects on the
435
+ * detected landmarks.
436
+ */
437
+ outputFacialTransformationMatrixes?: boolean | undefined;
438
+ }
439
+
440
+ /**
441
+ * Represents the face landmarks deection results generated by `FaceLandmarker`.
442
+ */
443
+ export declare interface FaceLandmarkerResult {
444
+ /** Detected face landmarks in normalized image coordinates. */
445
+ faceLandmarks: NormalizedLandmark[][];
446
+ /** Optional face blendshapes results. */
447
+ faceBlendshapes?: Classifications[];
448
+ /** Optional facial transformation matrix. */
449
+ facialTransformationMatrixes?: Matrix[];
450
+ }
451
+
227
452
  /** Performs face stylization on images. */
228
453
  export declare class FaceStylizer extends VisionTaskRunner {
229
454
  /**
@@ -1123,6 +1348,31 @@ export declare interface Landmark {
1123
1348
  z: number;
1124
1349
  }
1125
1350
 
1351
+ /**
1352
+ * Copyright 2023 The MediaPipe Authors. All Rights Reserved.
1353
+ *
1354
+ * Licensed under the Apache License, Version 2.0 (the "License");
1355
+ * you may not use this file except in compliance with the License.
1356
+ * You may obtain a copy of the License at
1357
+ *
1358
+ * http://www.apache.org/licenses/LICENSE-2.0
1359
+ *
1360
+ * Unless required by applicable law or agreed to in writing, software
1361
+ * distributed under the License is distributed on an "AS IS" BASIS,
1362
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1363
+ * See the License for the specific language governing permissions and
1364
+ * limitations under the License.
1365
+ */
1366
+ /** A two-dimensional matrix. */
1367
+ declare interface Matrix {
1368
+ /** The number of rows. */
1369
+ rows: number;
1370
+ /** The number of columns. */
1371
+ columns: number;
1372
+ /** The values as a flattened one-dimensional array. */
1373
+ data: number[];
1374
+ }
1375
+
1126
1376
  /**
1127
1377
  * Copyright 2023 The MediaPipe Authors. All Rights Reserved.
1128
1378
  *
@@ -1231,9 +1481,9 @@ export declare class ObjectDetector extends VisionTaskRunner {
1231
1481
  * @param image An image to process.
1232
1482
  * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
1233
1483
  * to process the input image before running inference.
1234
- * @return The list of detected objects
1484
+ * @return A result containing a list of detected objects.
1235
1485
  */
1236
- detect(image: ImageSource, imageProcessingOptions?: ImageProcessingOptions): Detection[];
1486
+ detect(image: ImageSource, imageProcessingOptions?: ImageProcessingOptions): DetectionResult;
1237
1487
  /**
1238
1488
  * Performs object detection on the provided video frame and waits
1239
1489
  * synchronously for the response. Only use this method when the
@@ -1243,9 +1493,9 @@ export declare class ObjectDetector extends VisionTaskRunner {
1243
1493
  * @param timestamp The timestamp of the current frame, in ms.
1244
1494
  * @param imageProcessingOptions the `ImageProcessingOptions` specifying how
1245
1495
  * to process the input image before running inference.
1246
- * @return The list of detected objects
1496
+ * @return A result containing a list of detected objects.
1247
1497
  */
1248
- detectForVideo(videoFrame: ImageSource, timestamp: number, imageProcessingOptions?: ImageProcessingOptions): Detection[];
1498
+ detectForVideo(videoFrame: ImageSource, timestamp: number, imageProcessingOptions?: ImageProcessingOptions): DetectionResult;
1249
1499
  }
1250
1500
 
1251
1501
  /** Options to configure the MediaPipe Object Detector Task */
@@ -1283,7 +1533,7 @@ declare type RunningMode = "IMAGE" | "VIDEO";
1283
1533
  /**
1284
1534
  * The segmentation tasks return the segmentation either as a WebGLTexture (when
1285
1535
  * the output is on GPU) or as a typed JavaScript arrays for CPU-based
1286
- * category or confidence masks. `Uint8ClampedArray`s are used to represend
1536
+ * category or confidence masks. `Uint8ClampedArray`s are used to represent
1287
1537
  * CPU-based category masks and `Float32Array`s are used for CPU-based
1288
1538
  * confidence masks.
1289
1539
  */
@@ -1316,7 +1566,7 @@ declare interface TaskRunnerOptions {
1316
1566
  declare interface VisionTaskOptions extends TaskRunnerOptions {
1317
1567
  /**
1318
1568
  * The canvas element to bind textures to. This has to be set for GPU
1319
- * processing. The task will initialize a WebGL context and throw an eror if
1569
+ * processing. The task will initialize a WebGL context and throw an error if
1320
1570
  * this fails (e.g. if you have already initialized a different type of
1321
1571
  * context).
1322
1572
  */