PyPI - mediapipe-nightly - Versions diffs - 0.10.21.post20241223__cp311-cp311-manylinux_2_28_x86_64.whl - Mend

mediapipe-nightly 0.10.21.post20241223__cp311-cp311-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (593) hide show

mediapipe/python/solutions/pose.py ADDED Viewed

@@ -0,0 +1,192 @@
+# Copyright 2020-2021 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""MediaPipe Pose."""
+import enum
+from typing import NamedTuple
+import numpy as np
+# The following imports are needed because python pb2 silently discards
+# unknown protobuf fields.
+# pylint: disable=unused-import
+from mediapipe.calculators.core import constant_side_packet_calculator_pb2
+from mediapipe.calculators.core import gate_calculator_pb2
+from mediapipe.calculators.core import split_vector_calculator_pb2
+from mediapipe.calculators.image import warp_affine_calculator_pb2
+from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
+from mediapipe.calculators.tensor import inference_calculator_pb2
+from mediapipe.calculators.tensor import tensors_to_classification_calculator_pb2
+from mediapipe.calculators.tensor import tensors_to_detections_calculator_pb2
+from mediapipe.calculators.tensor import tensors_to_landmarks_calculator_pb2
+from mediapipe.calculators.tensor import tensors_to_segmentation_calculator_pb2
+from mediapipe.calculators.tflite import ssd_anchors_calculator_pb2
+from mediapipe.calculators.util import detections_to_rects_calculator_pb2
+from mediapipe.calculators.util import landmarks_smoothing_calculator_pb2
+from mediapipe.calculators.util import local_file_contents_calculator_pb2
+from mediapipe.calculators.util import logic_calculator_pb2
+from mediapipe.calculators.util import non_max_suppression_calculator_pb2
+from mediapipe.calculators.util import rect_transformation_calculator_pb2
+from mediapipe.calculators.util import thresholding_calculator_pb2
+from mediapipe.calculators.util import visibility_smoothing_calculator_pb2
+from mediapipe.framework.tool import switch_container_pb2
+# pylint: enable=unused-import
+from mediapipe.python.solution_base import SolutionBase
+from mediapipe.python.solutions import download_utils
+# pylint: disable=unused-import
+from mediapipe.python.solutions.pose_connections import POSE_CONNECTIONS
+# pylint: enable=unused-import
+class PoseLandmark(enum.IntEnum):
+  """The 33 pose landmarks."""
+  NOSE = 0
+  LEFT_EYE_INNER = 1
+  LEFT_EYE = 2
+  LEFT_EYE_OUTER = 3
+  RIGHT_EYE_INNER = 4
+  RIGHT_EYE = 5
+  RIGHT_EYE_OUTER = 6
+  LEFT_EAR = 7
+  RIGHT_EAR = 8
+  MOUTH_LEFT = 9
+  MOUTH_RIGHT = 10
+  LEFT_SHOULDER = 11
+  RIGHT_SHOULDER = 12
+  LEFT_ELBOW = 13
+  RIGHT_ELBOW = 14
+  LEFT_WRIST = 15
+  RIGHT_WRIST = 16
+  LEFT_PINKY = 17
+  RIGHT_PINKY = 18
+  LEFT_INDEX = 19
+  RIGHT_INDEX = 20
+  LEFT_THUMB = 21
+  RIGHT_THUMB = 22
+  LEFT_HIP = 23
+  RIGHT_HIP = 24
+  LEFT_KNEE = 25
+  RIGHT_KNEE = 26
+  LEFT_ANKLE = 27
+  RIGHT_ANKLE = 28
+  LEFT_HEEL = 29
+  RIGHT_HEEL = 30
+  LEFT_FOOT_INDEX = 31
+  RIGHT_FOOT_INDEX = 32
+_BINARYPB_FILE_PATH = 'mediapipe/modules/pose_landmark/pose_landmark_cpu.binarypb'
+def _download_oss_pose_landmark_model(model_complexity):
+  """Downloads the pose landmark lite/heavy model from the MediaPipe Github repo if it doesn't exist in the package."""
+  if model_complexity == 0:
+    download_utils.download_oss_model(
+        'mediapipe/modules/pose_landmark/pose_landmark_lite.tflite')
+  elif model_complexity == 2:
+    download_utils.download_oss_model(
+        'mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite')
+class Pose(SolutionBase):
+  """MediaPipe Pose.
+  MediaPipe Pose processes an RGB image and returns pose landmarks on the most
+  prominent person detected.
+  Please refer to https://solutions.mediapipe.dev/pose#python-solution-api for
+  usage examples.
+  """
+  def __init__(self,
+               static_image_mode=False,
+               model_complexity=1,
+               smooth_landmarks=True,
+               enable_segmentation=False,
+               smooth_segmentation=True,
+               min_detection_confidence=0.5,
+               min_tracking_confidence=0.5):
+    """Initializes a MediaPipe Pose object.
+    Args:
+      static_image_mode: Whether to treat the input images as a batch of static
+        and possibly unrelated images, or a video stream. See details in
+        https://solutions.mediapipe.dev/pose#static_image_mode.
+      model_complexity: Complexity of the pose landmark model: 0, 1 or 2. See
+        details in https://solutions.mediapipe.dev/pose#model_complexity.
+      smooth_landmarks: Whether to filter landmarks across different input
+        images to reduce jitter. See details in
+        https://solutions.mediapipe.dev/pose#smooth_landmarks.
+      enable_segmentation: Whether to predict segmentation mask. See details in
+        https://solutions.mediapipe.dev/pose#enable_segmentation.
+      smooth_segmentation: Whether to filter segmentation across different input
+        images to reduce jitter. See details in
+        https://solutions.mediapipe.dev/pose#smooth_segmentation.
+      min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for person
+        detection to be considered successful. See details in
+        https://solutions.mediapipe.dev/pose#min_detection_confidence.
+      min_tracking_confidence: Minimum confidence value ([0.0, 1.0]) for the
+        pose landmarks to be considered tracked successfully. See details in
+        https://solutions.mediapipe.dev/pose#min_tracking_confidence.
+    """
+    _download_oss_pose_landmark_model(model_complexity)
+    super().__init__(
+        binary_graph_path=_BINARYPB_FILE_PATH,
+        side_inputs={
+            'model_complexity': model_complexity,
+            'smooth_landmarks': smooth_landmarks and not static_image_mode,
+            'enable_segmentation': enable_segmentation,
+            'smooth_segmentation':
+                smooth_segmentation and not static_image_mode,
+            'use_prev_landmarks': not static_image_mode,
+        },
+        calculator_params={
+            'posedetectioncpu__TensorsToDetectionsCalculator.min_score_thresh':
+                min_detection_confidence,
+            'poselandmarkbyroicpu__tensorstoposelandmarksandsegmentation__ThresholdingCalculator.threshold':
+                min_tracking_confidence,
+        },
+        outputs=['pose_landmarks', 'pose_world_landmarks', 'segmentation_mask'])
+  def process(self, image: np.ndarray) -> NamedTuple:
+    """Processes an RGB image and returns the pose landmarks on the most prominent person detected.
+    Args:
+      image: An RGB image represented as a numpy ndarray.
+    Raises:
+      RuntimeError: If the underlying graph throws any error.
+      ValueError: If the input image is not three channel RGB.
+    Returns:
+      A NamedTuple with fields describing the landmarks on the most prominate
+      person detected:
+        1) "pose_landmarks" field that contains the pose landmarks.
+        2) "pose_world_landmarks" field that contains the pose landmarks in
+        real-world 3D coordinates that are in meters with the origin at the
+        center between hips.
+        3) "segmentation_mask" field that contains the segmentation mask if
+           "enable_segmentation" is set to true.
+    """
+    results = super().process(input_data={'image': image})
+    if results.pose_landmarks:  # pytype: disable=attribute-error
+      for landmark in results.pose_landmarks.landmark:  # pytype: disable=attribute-error
+        landmark.ClearField('presence')
+    if results.pose_world_landmarks:  # pytype: disable=attribute-error
+      for landmark in results.pose_world_landmarks.landmark:  # pytype: disable=attribute-error
+        landmark.ClearField('presence')
+    return results

mediapipe/python/solutions/pose_connections.py ADDED Viewed

@@ -0,0 +1,22 @@
+# Copyright 2021 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""MediaPipe Pose connections."""
+POSE_CONNECTIONS = frozenset([(0, 1), (1, 2), (2, 3), (3, 7), (0, 4), (4, 5),
+                              (5, 6), (6, 8), (9, 10), (11, 12), (11, 13),
+                              (13, 15), (15, 17), (15, 19), (15, 21), (17, 19),
+                              (12, 14), (14, 16), (16, 18), (16, 20), (16, 22),
+                              (18, 20), (11, 23), (12, 24), (23, 24), (23, 25),
+                              (24, 26), (25, 27), (26, 28), (27, 29), (28, 30),
+                              (29, 31), (30, 32), (27, 31), (28, 32)])

mediapipe/python/solutions/pose_test.py ADDED Viewed

@@ -0,0 +1,262 @@
+# Copyright 2020 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for mediapipe.python.solutions.pose."""
+import json
+import os
+# pylint: disable=unused-import
+import tempfile
+# pylint: enable=unused-import
+from typing import NamedTuple
+from absl.testing import absltest
+from absl.testing import parameterized
+import cv2
+import numpy as np
+import numpy.testing as npt
+from PIL import Image
+# resources dependency
+# undeclared dependency
+from mediapipe.python.solutions import drawing_styles
+from mediapipe.python.solutions import drawing_utils as mp_drawing
+from mediapipe.python.solutions import pose as mp_pose
+TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
+DIFF_THRESHOLD = 15  # pixels
+EXPECTED_POSE_LANDMARKS = np.array([[460, 283], [467, 273], [471, 273],
+                                    [474, 273], [465, 273], [465, 273],
+                                    [466, 273], [491, 277], [480, 277],
+                                    [470, 294], [465, 294], [545, 319],
+                                    [453, 329], [622, 323], [375, 316],
+                                    [696, 316], [299, 307], [719, 316],
+                                    [278, 306], [721, 311], [274, 304],
+                                    [713, 313], [283, 306], [520, 476],
+                                    [467, 471], [612, 550], [358, 490],
+                                    [701, 613], [349, 611], [709, 624],
+                                    [363, 630], [730, 633], [303, 628]])
+WORLD_DIFF_THRESHOLD = 0.2  # meters
+EXPECTED_POSE_WORLD_LANDMARKS = np.array([
+    [-0.11, -0.59, -0.15], [-0.09, -0.64, -0.16], [-0.09, -0.64, -0.16],
+    [-0.09, -0.64, -0.16], [-0.11, -0.64, -0.14], [-0.11, -0.64, -0.14],
+    [-0.11, -0.64, -0.14], [0.01, -0.65, -0.15], [-0.06, -0.64, -0.05],
+    [-0.07, -0.57, -0.15], [-0.09, -0.57, -0.12], [0.18, -0.49, -0.09],
+    [-0.14, -0.5, -0.03], [0.41, -0.48, -0.11], [-0.42, -0.5, -0.02],
+    [0.64, -0.49, -0.17], [-0.63, -0.51, -0.13], [0.7, -0.5, -0.19],
+    [-0.71, -0.53, -0.15], [0.72, -0.51, -0.23], [-0.69, -0.54, -0.19],
+    [0.66, -0.49, -0.19], [-0.64, -0.52, -0.15], [0.09, 0., -0.04],
+    [-0.09, -0., 0.03], [0.41, 0.23, -0.09], [-0.43, 0.1, -0.11],
+    [0.69, 0.49, -0.04], [-0.48, 0.47, -0.02], [0.72, 0.52, -0.04],
+    [-0.48, 0.51, -0.02], [0.8, 0.5, -0.14], [-0.59, 0.52, -0.11],
+])
+IOU_THRESHOLD = 0.85  # percents
+class PoseTest(parameterized.TestCase):
+  def _landmarks_list_to_array(self, landmark_list, image_shape):
+    rows, cols, _ = image_shape
+    return np.asarray([(lmk.x * cols, lmk.y * rows, lmk.z * cols)
+                       for lmk in landmark_list.landmark])
+  def _world_landmarks_list_to_array(self, landmark_list):
+    return np.asarray([(lmk.x, lmk.y, lmk.z)
+                       for lmk in landmark_list.landmark])
+  def _assert_diff_less(self, array1, array2, threshold):
+    npt.assert_array_less(np.abs(array1 - array2), threshold)
+  def _get_output_path(self, name):
+    return os.path.join(tempfile.gettempdir(), self.id().split('.')[-1] + name)
+  def _annotate(self, frame: np.ndarray, results: NamedTuple, idx: int):
+    mp_drawing.draw_landmarks(
+        frame,
+        results.pose_landmarks,
+        mp_pose.POSE_CONNECTIONS,
+        landmark_drawing_spec=drawing_styles.get_default_pose_landmarks_style())
+    path = self._get_output_path('_frame_{}.png'.format(idx))
+    cv2.imwrite(path, frame)
+  def _annotate_segmentation(self, segmentation, expected_segmentation,
+                             idx: int):
+    path = self._get_output_path('_segmentation_{}.png'.format(idx))
+    self._segmentation_to_rgb(segmentation).save(path)
+    path = self._get_output_path('_segmentation_diff_{}.png'.format(idx))
+    self._segmentation_diff_to_rgb(
+        expected_segmentation, segmentation).save(path)
+  def _rgb_to_segmentation(self, img, back_color=(255, 0, 0),
+                           front_color=(0, 0, 255)):
+    img = np.array(img)
+    # Check all pixels are either front or back.
+    is_back = (img == back_color).all(axis=2)
+    is_front = (img == front_color).all(axis=2)
+    np.logical_or(is_back, is_front).all()
+    segm = np.zeros(img.shape[:2], dtype=np.uint8)
+    segm[is_front] = 1
+    return segm
+  def _segmentation_to_rgb(self, segm, back_color=(255, 0, 0),
+                           front_color=(0, 0, 255)):
+    height, width = segm.shape
+    img = np.zeros((height, width, 3), dtype=np.uint8)
+    img[:, :] = back_color
+    img[segm == 1] = front_color
+    return Image.fromarray(img)
+  def _segmentation_iou(self, segm_expected, segm_actual):
+    intersection = segm_expected * segm_actual
+    expected_dot = segm_expected * segm_expected
+    actual_dot = segm_actual * segm_actual
+    eps = np.finfo(np.float32).eps
+    result = intersection.sum() / (expected_dot.sum() +
+                                   actual_dot.sum() -
+                                   intersection.sum() + eps)
+    return result
+  def _segmentation_diff_to_rgb(self, segm_expected, segm_actual,
+                                expected_color=(0, 255, 0),
+                                actual_color=(255, 0, 0)):
+    height, width = segm_expected.shape
+    img = np.zeros((height, width, 3), dtype=np.uint8)
+    img[np.logical_and(segm_expected == 1, segm_actual == 0)] = expected_color
+    img[np.logical_and(segm_expected == 0, segm_actual == 1)] = actual_color
+    return Image.fromarray(img)
+  def test_invalid_image_shape(self):
+    with mp_pose.Pose() as pose:
+      with self.assertRaisesRegex(
+          ValueError, 'Input image must contain three channel rgb data.'):
+        pose.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
+  def test_blank_image(self):
+    with mp_pose.Pose(enable_segmentation=True) as pose:
+      image = np.zeros([100, 100, 3], dtype=np.uint8)
+      image.fill(255)
+      results = pose.process(image)
+      self.assertIsNone(results.pose_landmarks)
+      self.assertIsNone(results.segmentation_mask)
+  @parameterized.named_parameters(('static_lite', True, 0, 3),
+                                  ('static_full', True, 1, 3),
+                                  ('static_heavy', True, 2, 3),
+                                  ('video_lite', False, 0, 3),
+                                  ('video_full', False, 1, 3),
+                                  ('video_heavy', False, 2, 3))
+  def test_on_image(self, static_image_mode, model_complexity, num_frames):
+    image_path = os.path.join(os.path.dirname(__file__), 'testdata/pose.jpg')
+    expected_segmentation_path = os.path.join(
+        os.path.dirname(__file__), 'testdata/pose_segmentation.png')
+    image = cv2.imread(image_path)
+    expected_segmentation = self._rgb_to_segmentation(
+        Image.open(expected_segmentation_path).convert('RGB'))
+    with mp_pose.Pose(static_image_mode=static_image_mode,
+                      model_complexity=model_complexity,
+                      enable_segmentation=True) as pose:
+      for idx in range(num_frames):
+        results = pose.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+        segmentation = results.segmentation_mask.round().astype(np.uint8)
+        # TODO: Add rendering of world 3D when supported.
+        self._annotate(image.copy(), results, idx)
+        self._annotate_segmentation(segmentation, expected_segmentation, idx)
+        self._assert_diff_less(
+            self._landmarks_list_to_array(results.pose_landmarks,
+                                          image.shape)[:, :2],
+            EXPECTED_POSE_LANDMARKS, DIFF_THRESHOLD)
+        self._assert_diff_less(
+            self._world_landmarks_list_to_array(results.pose_world_landmarks),
+            EXPECTED_POSE_WORLD_LANDMARKS, WORLD_DIFF_THRESHOLD)
+        self.assertGreaterEqual(
+            self._segmentation_iou(expected_segmentation, segmentation),
+            IOU_THRESHOLD)
+  @parameterized.named_parameters(
+      ('full', 1, 'pose_squats.full.npz'))
+  def test_on_video(self, model_complexity, expected_name):
+    """Tests pose models on a video."""
+    # Set threshold for comparing actual and expected predictions in pixels.
+    diff_threshold = 15
+    world_diff_threshold = 0.1
+    video_path = os.path.join(os.path.dirname(__file__),
+                              'testdata/pose_squats.mp4')
+    expected_path = os.path.join(os.path.dirname(__file__),
+                                 'testdata/{}'.format(expected_name))
+    # Predict pose landmarks for each frame.
+    video_cap = cv2.VideoCapture(video_path)
+    actual_per_frame = []
+    actual_world_per_frame = []
+    frame_idx = 0
+    with mp_pose.Pose(static_image_mode=False,
+                      model_complexity=model_complexity) as pose:
+      while True:
+        # Get next frame of the video.
+        success, input_frame = video_cap.read()
+        if not success:
+          break
+        # Run pose tracker.
+        input_frame = cv2.cvtColor(input_frame, cv2.COLOR_BGR2RGB)
+        result = pose.process(image=input_frame)
+        pose_landmarks = self._landmarks_list_to_array(result.pose_landmarks,
+                                                       input_frame.shape)
+        pose_world_landmarks = self._world_landmarks_list_to_array(
+            result.pose_world_landmarks)
+        actual_per_frame.append(pose_landmarks)
+        actual_world_per_frame.append(pose_world_landmarks)
+        input_frame = cv2.cvtColor(input_frame, cv2.COLOR_RGB2BGR)
+        self._annotate(input_frame, result, frame_idx)
+        frame_idx += 1
+    actual = np.array(actual_per_frame)
+    actual_world = np.array(actual_world_per_frame)
+    # Dump actual .npz.
+    npz_path = self._get_output_path(expected_name)
+    np.savez(npz_path, predictions=actual, predictions_world=actual_world)
+    # Dump actual JSON.
+    json_path = self._get_output_path(expected_name.replace('.npz', '.json'))
+    with open(json_path, 'w') as fl:
+      dump_data = {
+          'predictions': np.around(actual, 3).tolist(),
+          'predictions_world': np.around(actual_world, 3).tolist()
+      }
+      fl.write(json.dumps(dump_data, indent=2, separators=(',', ': ')))
+    # Validate actual vs. expected landmarks.
+    expected = np.load(expected_path)['predictions']
+    assert actual.shape == expected.shape, (
+        'Unexpected shape of predictions: {} instead of {}'.format(
+            actual.shape, expected.shape))
+    self._assert_diff_less(
+        actual[..., :2], expected[..., :2], threshold=diff_threshold)
+    # Validate actual vs. expected world landmarks.
+    expected_world = np.load(expected_path)['predictions_world']
+    assert actual_world.shape == expected_world.shape, (
+        'Unexpected shape of world predictions: {} instead of {}'.format(
+            actual_world.shape, expected_world.shape))
+    self._assert_diff_less(
+        actual_world, expected_world, threshold=world_diff_threshold)
+if __name__ == '__main__':
+  absltest.main()

mediapipe/python/solutions/selfie_segmentation.py ADDED Viewed

@@ -0,0 +1,76 @@
+# Copyright 2021 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""MediaPipe Selfie Segmentation."""
+from typing import NamedTuple
+import numpy as np
+# The following imports are needed because python pb2 silently discards
+# unknown protobuf fields.
+# pylint: disable=unused-import
+from mediapipe.calculators.core import constant_side_packet_calculator_pb2
+from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
+from mediapipe.calculators.tensor import inference_calculator_pb2
+from mediapipe.calculators.tensor import tensors_to_segmentation_calculator_pb2
+from mediapipe.calculators.util import local_file_contents_calculator_pb2
+from mediapipe.framework.tool import switch_container_pb2
+# pylint: enable=unused-import
+from mediapipe.python.solution_base import SolutionBase
+_BINARYPB_FILE_PATH = 'mediapipe/modules/selfie_segmentation/selfie_segmentation_cpu.binarypb'
+class SelfieSegmentation(SolutionBase):
+  """MediaPipe Selfie Segmentation.
+  MediaPipe Selfie Segmentation processes an RGB image and returns a
+  segmentation mask.
+  Please refer to
+  https://solutions.mediapipe.dev/selfie_segmentation#python-solution-api for
+  usage examples.
+  """
+  def __init__(self, model_selection=0):
+    """Initializes a MediaPipe Selfie Segmentation object.
+    Args:
+      model_selection: 0 or 1. 0 to select a general-purpose model, and 1 to
+        select a model more optimized for landscape images. See details in
+        https://solutions.mediapipe.dev/selfie_segmentation#model_selection.
+    """
+    super().__init__(
+        binary_graph_path=_BINARYPB_FILE_PATH,
+        side_inputs={
+            'model_selection': model_selection,
+        },
+        outputs=['segmentation_mask'])
+  def process(self, image: np.ndarray) -> NamedTuple:
+    """Processes an RGB image and returns a segmentation mask.
+    Args:
+      image: An RGB image represented as a numpy ndarray.
+    Raises:
+      RuntimeError: If the underlying graph throws any error.
+      ValueError: If the input image is not three channel RGB.
+    Returns:
+      A NamedTuple object with a "segmentation_mask" field that contains a float
+      type 2d np array representing the mask.
+    """
+    return super().process(input_data={'image': image})

mediapipe/python/solutions/selfie_segmentation_test.py ADDED Viewed

@@ -0,0 +1,68 @@
+# Copyright 2021 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for mediapipe.python.solutions.selfie_segmentation."""
+import os
+from absl.testing import absltest
+from absl.testing import parameterized
+import cv2
+import numpy as np
+# resources dependency
+# undeclared dependency
+from mediapipe.python.solutions import selfie_segmentation as mp_selfie_segmentation
+TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
+class SelfieSegmentationTest(parameterized.TestCase):
+  def _draw(self, frame: np.ndarray, mask: np.ndarray):
+    frame = np.minimum(frame, np.stack((mask,) * 3, axis=-1))
+    path = os.path.join(tempfile.gettempdir(), self.id().split('.')[-1] + '.png')
+    cv2.imwrite(path, frame)
+  def test_invalid_image_shape(self):
+    with mp_selfie_segmentation.SelfieSegmentation() as selfie_segmentation:
+      with self.assertRaisesRegex(
+          ValueError, 'Input image must contain three channel rgb data.'):
+        selfie_segmentation.process(
+            np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
+  def test_blank_image(self):
+    with mp_selfie_segmentation.SelfieSegmentation() as selfie_segmentation:
+      image = np.zeros([100, 100, 3], dtype=np.uint8)
+      image.fill(255)
+      results = selfie_segmentation.process(image)
+      normalized_segmentation_mask = (results.segmentation_mask *
+                                      255).astype(int)
+      self.assertLess(np.amax(normalized_segmentation_mask), 1)
+  @parameterized.named_parameters(('general', 0), ('landscape', 1))
+  def test_segmentation(self, model_selection):
+    image_path = os.path.join(os.path.dirname(__file__),
+                              'testdata/portrait.jpg')
+    image = cv2.imread(image_path)
+    with mp_selfie_segmentation.SelfieSegmentation(
+        model_selection=model_selection) as selfie_segmentation:
+      results = selfie_segmentation.process(
+          cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+      normalized_segmentation_mask = (results.segmentation_mask *
+                                      255).astype(int)
+      self._draw(image.copy(), normalized_segmentation_mask)
+if __name__ == '__main__':
+  absltest.main()