PyPI - mediapipe-nightly - Versions diffs - 0.10.21.post20241223__cp312-cp312-macosx_11_0_universal2.whl - Mend

mediapipe-nightly 0.10.21.post20241223__cp312-cp312-macosx_11_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (594) hide show

mediapipe/python/solutions/holistic.py ADDED Viewed

@@ -0,0 +1,167 @@
+# Copyright 2020-2021 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""MediaPipe Holistic."""
+from typing import NamedTuple
+import numpy as np
+# The following imports are needed because python pb2 silently discards
+# unknown protobuf fields.
+# pylint: disable=unused-import
+from mediapipe.calculators.core import constant_side_packet_calculator_pb2
+from mediapipe.calculators.core import gate_calculator_pb2
+from mediapipe.calculators.core import split_vector_calculator_pb2
+from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
+from mediapipe.calculators.tensor import inference_calculator_pb2
+from mediapipe.calculators.tensor import tensors_to_classification_calculator_pb2
+from mediapipe.calculators.tensor import tensors_to_floats_calculator_pb2
+from mediapipe.calculators.tensor import tensors_to_landmarks_calculator_pb2
+from mediapipe.calculators.tflite import ssd_anchors_calculator_pb2
+from mediapipe.calculators.util import detections_to_rects_calculator_pb2
+from mediapipe.calculators.util import landmark_projection_calculator_pb2
+from mediapipe.calculators.util import local_file_contents_calculator_pb2
+from mediapipe.calculators.util import non_max_suppression_calculator_pb2
+from mediapipe.calculators.util import rect_transformation_calculator_pb2
+from mediapipe.framework.tool import switch_container_pb2
+from mediapipe.modules.holistic_landmark.calculators import roi_tracking_calculator_pb2
+# pylint: enable=unused-import
+from mediapipe.python.solution_base import SolutionBase
+from mediapipe.python.solutions import download_utils
+# pylint: disable=unused-import
+from mediapipe.python.solutions.face_mesh_connections import FACEMESH_CONTOURS
+from mediapipe.python.solutions.face_mesh_connections import FACEMESH_TESSELATION
+from mediapipe.python.solutions.hands import HandLandmark
+from mediapipe.python.solutions.hands_connections import HAND_CONNECTIONS
+from mediapipe.python.solutions.pose import PoseLandmark
+from mediapipe.python.solutions.pose_connections import POSE_CONNECTIONS
+# pylint: enable=unused-import
+_BINARYPB_FILE_PATH = 'mediapipe/modules/holistic_landmark/holistic_landmark_cpu.binarypb'
+def _download_oss_pose_landmark_model(model_complexity):
+  """Downloads the pose landmark lite/heavy model from the MediaPipe Github repo if it doesn't exist in the package."""
+  if model_complexity == 0:
+    download_utils.download_oss_model(
+        'mediapipe/modules/pose_landmark/pose_landmark_lite.tflite')
+  elif model_complexity == 2:
+    download_utils.download_oss_model(
+        'mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite')
+class Holistic(SolutionBase):
+  """MediaPipe Holistic.
+  MediaPipe Holistic processes an RGB image and returns pose landmarks, left and
+  right hand landmarks, and face mesh landmarks on the most prominent person
+  detected.
+  Please refer to https://solutions.mediapipe.dev/holistic#python-solution-api
+  for usage examples.
+  """
+  def __init__(self,
+               static_image_mode=False,
+               model_complexity=1,
+               smooth_landmarks=True,
+               enable_segmentation=False,
+               smooth_segmentation=True,
+               refine_face_landmarks=False,
+               min_detection_confidence=0.5,
+               min_tracking_confidence=0.5):
+    """Initializes a MediaPipe Holistic object.
+    Args:
+      static_image_mode: Whether to treat the input images as a batch of static
+        and possibly unrelated images, or a video stream. See details in
+        https://solutions.mediapipe.dev/holistic#static_image_mode.
+      model_complexity: Complexity of the pose landmark model: 0, 1 or 2. See
+        details in https://solutions.mediapipe.dev/holistic#model_complexity.
+      smooth_landmarks: Whether to filter landmarks across different input
+        images to reduce jitter. See details in
+        https://solutions.mediapipe.dev/holistic#smooth_landmarks.
+      enable_segmentation: Whether to predict segmentation mask. See details in
+        https://solutions.mediapipe.dev/holistic#enable_segmentation.
+      smooth_segmentation: Whether to filter segmentation across different input
+        images to reduce jitter. See details in
+        https://solutions.mediapipe.dev/holistic#smooth_segmentation.
+      refine_face_landmarks: Whether to further refine the landmark coordinates
+        around the eyes and lips, and output additional landmarks around the
+        irises. Default to False. See details in
+        https://solutions.mediapipe.dev/holistic#refine_face_landmarks.
+      min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for person
+        detection to be considered successful. See details in
+        https://solutions.mediapipe.dev/holistic#min_detection_confidence.
+      min_tracking_confidence: Minimum confidence value ([0.0, 1.0]) for the
+        pose landmarks to be considered tracked successfully. See details in
+        https://solutions.mediapipe.dev/holistic#min_tracking_confidence.
+    """
+    _download_oss_pose_landmark_model(model_complexity)
+    super().__init__(
+        binary_graph_path=_BINARYPB_FILE_PATH,
+        side_inputs={
+            'model_complexity': model_complexity,
+            'smooth_landmarks': smooth_landmarks and not static_image_mode,
+            'enable_segmentation': enable_segmentation,
+            'smooth_segmentation':
+                smooth_segmentation and not static_image_mode,
+            'refine_face_landmarks': refine_face_landmarks,
+            'use_prev_landmarks': not static_image_mode,
+        },
+        calculator_params={
+            'poselandmarkcpu__posedetectioncpu__TensorsToDetectionsCalculator.min_score_thresh':
+                min_detection_confidence,
+            'poselandmarkcpu__poselandmarkbyroicpu__tensorstoposelandmarksandsegmentation__ThresholdingCalculator.threshold':
+                min_tracking_confidence,
+        },
+        outputs=[
+            'pose_landmarks', 'pose_world_landmarks', 'left_hand_landmarks',
+            'right_hand_landmarks', 'face_landmarks', 'segmentation_mask'
+        ])
+  def process(self, image: np.ndarray) -> NamedTuple:
+    """Processes an RGB image and returns the pose landmarks, left and right hand landmarks, and face landmarks on the most prominent person detected.
+    Args:
+      image: An RGB image represented as a numpy ndarray.
+    Raises:
+      RuntimeError: If the underlying graph throws any error.
+      ValueError: If the input image is not three channel RGB.
+    Returns:
+      A NamedTuple with fields describing the landmarks on the most prominate
+      person detected:
+        1) "pose_landmarks" field that contains the pose landmarks.
+        2) "pose_world_landmarks" field that contains the pose landmarks in
+        real-world 3D coordinates that are in meters with the origin at the
+        center between hips.
+        3) "left_hand_landmarks" field that contains the left-hand landmarks.
+        4) "right_hand_landmarks" field that contains the right-hand landmarks.
+        5) "face_landmarks" field that contains the face landmarks.
+        6) "segmentation_mask" field that contains the segmentation mask if
+           "enable_segmentation" is set to true.
+    """
+    results = super().process(input_data={'image': image})
+    if results.pose_landmarks:  # pytype: disable=attribute-error
+      for landmark in results.pose_landmarks.landmark:  # pytype: disable=attribute-error
+        landmark.ClearField('presence')
+    if results.pose_world_landmarks:  # pytype: disable=attribute-error
+      for landmark in results.pose_world_landmarks.landmark:  # pytype: disable=attribute-error
+        landmark.ClearField('presence')
+    return results

mediapipe/python/solutions/holistic_test.py ADDED Viewed

@@ -0,0 +1,142 @@
+# Copyright 2020 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for mediapipe.python.solutions.pose."""
+import os
+import tempfile  # pylint: disable=unused-import
+from typing import NamedTuple
+from absl.testing import absltest
+from absl.testing import parameterized
+import cv2
+import numpy as np
+import numpy.testing as npt
+# resources dependency
+# undeclared dependency
+from mediapipe.python.solutions import drawing_styles
+from mediapipe.python.solutions import drawing_utils as mp_drawing
+from mediapipe.python.solutions import holistic as mp_holistic
+TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
+POSE_DIFF_THRESHOLD = 30  # pixels
+HAND_DIFF_THRESHOLD = 30  # pixels
+EXPECTED_POSE_LANDMARKS = np.array([[782, 243], [791, 232], [796, 233],
+                                    [801, 233], [773, 231], [766, 231],
+                                    [759, 232], [802, 242], [751, 239],
+                                    [791, 258], [766, 258], [830, 301],
+                                    [708, 298], [910, 248], [635, 234],
+                                    [954, 161], [593, 136], [961, 137],
+                                    [583, 110], [952, 132], [592, 106],
+                                    [950, 141], [596, 115], [793, 500],
+                                    [724, 502], [874, 626], [640, 629],
+                                    [965, 756], [542, 760], [962, 779],
+                                    [533, 781], [1025, 797], [487, 803]])
+EXPECTED_LEFT_HAND_LANDMARKS = np.array([[958, 167], [950, 161], [945, 151],
+                                         [945, 141], [947, 134], [945, 136],
+                                         [939, 122], [935, 113], [931, 106],
+                                         [951, 134], [946, 118], [942, 108],
+                                         [938, 100], [957, 135], [954, 120],
+                                         [951, 111], [948, 103], [964, 138],
+                                         [964, 128], [965, 122], [965, 117]])
+EXPECTED_RIGHT_HAND_LANDMARKS = np.array([[590, 135], [602, 125], [609, 114],
+                                          [613, 103], [617, 96], [596, 100],
+                                          [595, 84], [594, 74], [593, 68],
+                                          [588, 100], [586, 84], [585, 73],
+                                          [584, 65], [581, 103], [579, 89],
+                                          [579, 79], [579, 72], [575, 109],
+                                          [571, 99], [570, 93], [569, 87]])
+class PoseTest(parameterized.TestCase):
+  def _landmarks_list_to_array(self, landmark_list, image_shape):
+    rows, cols, _ = image_shape
+    return np.asarray([(lmk.x * cols, lmk.y * rows)
+                       for lmk in landmark_list.landmark])
+  def _assert_diff_less(self, array1, array2, threshold):
+    npt.assert_array_less(np.abs(array1 - array2), threshold)
+  def _annotate(self, frame: np.ndarray, results: NamedTuple, idx: int):
+    mp_drawing.draw_landmarks(
+        frame,
+        results.face_landmarks,
+        mp_holistic.FACEMESH_TESSELATION,
+        landmark_drawing_spec=None,
+        connection_drawing_spec=drawing_styles
+        .get_default_face_mesh_tesselation_style())
+    mp_drawing.draw_landmarks(
+        frame,
+        results.pose_landmarks,
+        mp_holistic.POSE_CONNECTIONS,
+        landmark_drawing_spec=drawing_styles.get_default_pose_landmarks_style())
+    path = os.path.join(tempfile.gettempdir(), self.id().split('.')[-1] +
+                                              '_frame_{}.png'.format(idx))
+    cv2.imwrite(path, frame)
+  def test_invalid_image_shape(self):
+    with mp_holistic.Holistic() as holistic:
+      with self.assertRaisesRegex(
+          ValueError, 'Input image must contain three channel rgb data.'):
+        holistic.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
+  def test_blank_image(self):
+    with mp_holistic.Holistic() as holistic:
+      image = np.zeros([100, 100, 3], dtype=np.uint8)
+      image.fill(255)
+      results = holistic.process(image)
+      self.assertIsNone(results.pose_landmarks)
+  @parameterized.named_parameters(('static_lite', True, 0, False, 3),
+                                  ('static_full', True, 1, False, 3),
+                                  ('static_heavy', True, 2, False, 3),
+                                  ('video_lite', False, 0, False, 3),
+                                  ('video_full', False, 1, False, 3),
+                                  ('video_heavy', False, 2, False, 3),
+                                  ('static_full_refine_face', True, 1, True, 3),
+                                  ('video_full_refine_face', False, 1, True, 3))
+  def test_on_image(self, static_image_mode, model_complexity,
+                    refine_face_landmarks, num_frames):
+    image_path = os.path.join(os.path.dirname(__file__),
+                              'testdata/holistic.jpg')
+    image = cv2.imread(image_path)
+    with mp_holistic.Holistic(
+        static_image_mode=static_image_mode,
+        model_complexity=model_complexity,
+        refine_face_landmarks=refine_face_landmarks) as holistic:
+      for idx in range(num_frames):
+        results = holistic.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+        self._annotate(image.copy(), results, idx)
+        self._assert_diff_less(
+            self._landmarks_list_to_array(results.pose_landmarks, image.shape),
+            EXPECTED_POSE_LANDMARKS,
+            POSE_DIFF_THRESHOLD)
+        self._assert_diff_less(
+            self._landmarks_list_to_array(results.left_hand_landmarks,
+                                          image.shape),
+            EXPECTED_LEFT_HAND_LANDMARKS,
+            HAND_DIFF_THRESHOLD)
+        self._assert_diff_less(
+            self._landmarks_list_to_array(results.right_hand_landmarks,
+                                          image.shape),
+            EXPECTED_RIGHT_HAND_LANDMARKS,
+            HAND_DIFF_THRESHOLD)
+        # TODO: Verify the correctness of the face landmarks.
+        self.assertLen(results.face_landmarks.landmark,
+                       478 if refine_face_landmarks else 468)
+if __name__ == '__main__':
+  absltest.main()

mediapipe/python/solutions/objectron.py ADDED Viewed

@@ -0,0 +1,288 @@
+# Copyright 2020-2021 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""MediaPipe Objectron."""
+import enum
+from typing import List, Tuple, NamedTuple, Optional
+import attr
+import numpy as np
+# pylint: disable=unused-import
+from mediapipe.calculators.core import constant_side_packet_calculator_pb2
+from mediapipe.calculators.core import gate_calculator_pb2
+from mediapipe.calculators.core import split_vector_calculator_pb2
+from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
+from mediapipe.calculators.tensor import inference_calculator_pb2
+from mediapipe.calculators.tensor import tensors_to_detections_calculator_pb2
+from mediapipe.calculators.tensor import tensors_to_floats_calculator_pb2
+from mediapipe.calculators.tensor import tensors_to_landmarks_calculator_pb2
+from mediapipe.calculators.tflite import ssd_anchors_calculator_pb2
+from mediapipe.calculators.util import association_calculator_pb2
+from mediapipe.calculators.util import collection_has_min_size_calculator_pb2
+from mediapipe.calculators.util import detection_label_id_to_text_calculator_pb2
+from mediapipe.calculators.util import detections_to_rects_calculator_pb2
+from mediapipe.calculators.util import landmark_projection_calculator_pb2
+from mediapipe.calculators.util import local_file_contents_calculator_pb2
+from mediapipe.calculators.util import non_max_suppression_calculator_pb2
+from mediapipe.calculators.util import rect_transformation_calculator_pb2
+from mediapipe.calculators.util import thresholding_calculator_pb2
+from mediapipe.framework.formats import landmark_pb2
+from mediapipe.modules.objectron.calculators import annotation_data_pb2
+from mediapipe.modules.objectron.calculators import frame_annotation_to_rect_calculator_pb2
+from mediapipe.modules.objectron.calculators import lift_2d_frame_annotation_to_3d_calculator_pb2
+# pylint: enable=unused-import
+from mediapipe.python.solution_base import SolutionBase
+from mediapipe.python.solutions import download_utils
+class BoxLandmark(enum.IntEnum):
+  """The 9 3D box landmarks."""
+  #
+  #       3 + + + + + + + + 7
+  #       +\                +\          UP
+  #       + \               + \
+  #       +  \              +  \        |
+  #       +   4 + + + + + + + + 8       | y
+  #       +   +             +   +       |
+  #       +   +             +   +       |
+  #       +   +     (0)     +   +       .------- x
+  #       +   +             +   +        \
+  #       1 + + + + + + + + 5   +         \
+  #        \  +              \  +          \ z
+  #         \ +               \ +           \
+  #          \+                \+
+  #           2 + + + + + + + + 6
+  CENTER = 0
+  BACK_BOTTOM_LEFT = 1
+  FRONT_BOTTOM_LEFT = 2
+  BACK_TOP_LEFT = 3
+  FRONT_TOP_LEFT = 4
+  BACK_BOTTOM_RIGHT = 5
+  FRONT_BOTTOM_RIGHT = 6
+  BACK_TOP_RIGHT = 7
+  FRONT_TOP_RIGHT = 8
+_BINARYPB_FILE_PATH = 'mediapipe/modules/objectron/objectron_cpu.binarypb'
+BOX_CONNECTIONS = frozenset([
+    (BoxLandmark.BACK_BOTTOM_LEFT, BoxLandmark.FRONT_BOTTOM_LEFT),
+    (BoxLandmark.BACK_BOTTOM_LEFT, BoxLandmark.BACK_TOP_LEFT),
+    (BoxLandmark.BACK_BOTTOM_LEFT, BoxLandmark.BACK_BOTTOM_RIGHT),
+    (BoxLandmark.FRONT_BOTTOM_LEFT, BoxLandmark.FRONT_TOP_LEFT),
+    (BoxLandmark.FRONT_BOTTOM_LEFT, BoxLandmark.FRONT_BOTTOM_RIGHT),
+    (BoxLandmark.BACK_TOP_LEFT, BoxLandmark.FRONT_TOP_LEFT),
+    (BoxLandmark.BACK_TOP_LEFT, BoxLandmark.BACK_TOP_RIGHT),
+    (BoxLandmark.FRONT_TOP_LEFT, BoxLandmark.FRONT_TOP_RIGHT),
+    (BoxLandmark.BACK_BOTTOM_RIGHT, BoxLandmark.FRONT_BOTTOM_RIGHT),
+    (BoxLandmark.BACK_BOTTOM_RIGHT, BoxLandmark.BACK_TOP_RIGHT),
+    (BoxLandmark.FRONT_BOTTOM_RIGHT, BoxLandmark.FRONT_TOP_RIGHT),
+    (BoxLandmark.BACK_TOP_RIGHT, BoxLandmark.FRONT_TOP_RIGHT),
+])
+@attr.s(auto_attribs=True)
+class ObjectronModel(object):
+  model_path: str
+  label_name: str
+@attr.s(auto_attribs=True, frozen=True)
+class ShoeModel(ObjectronModel):
+  model_path: str = ('mediapipe/modules/objectron/'
+                     'object_detection_3d_sneakers.tflite')
+  label_name: str = 'Footwear'
+@attr.s(auto_attribs=True, frozen=True)
+class ChairModel(ObjectronModel):
+  model_path: str = ('mediapipe/modules/objectron/'
+                     'object_detection_3d_chair.tflite')
+  label_name: str = 'Chair'
+@attr.s(auto_attribs=True, frozen=True)
+class CameraModel(ObjectronModel):
+  model_path: str = ('mediapipe/modules/objectron/'
+                     'object_detection_3d_camera.tflite')
+  label_name: str = 'Camera'
+@attr.s(auto_attribs=True, frozen=True)
+class CupModel(ObjectronModel):
+  model_path: str = ('mediapipe/modules/objectron/'
+                     'object_detection_3d_cup.tflite')
+  label_name: str = 'Coffee cup, Mug'
+_MODEL_DICT = {
+    'Shoe': ShoeModel(),
+    'Chair': ChairModel(),
+    'Cup': CupModel(),
+    'Camera': CameraModel()
+}
+def _download_oss_objectron_models(objectron_model: str):
+  """Downloads the objectron models from the MediaPipe Github repo if they don't exist in the package."""
+  download_utils.download_oss_model(
+      'mediapipe/modules/objectron/object_detection_ssd_mobilenetv2_oidv4_fp16.tflite'
+  )
+  download_utils.download_oss_model(objectron_model)
+def get_model_by_name(name: str) -> ObjectronModel:
+  if name not in _MODEL_DICT:
+    raise ValueError(f'{name} is not a valid model name for Objectron.')
+  _download_oss_objectron_models(_MODEL_DICT[name].model_path)
+  return _MODEL_DICT[name]
+@attr.s(auto_attribs=True)
+class ObjectronOutputs(object):
+  landmarks_2d: landmark_pb2.NormalizedLandmarkList
+  landmarks_3d: landmark_pb2.LandmarkList
+  rotation: np.ndarray
+  translation: np.ndarray
+  scale: np.ndarray
+class Objectron(SolutionBase):
+  """MediaPipe Objectron.
+  MediaPipe Objectron processes an RGB image and returns the 3D box landmarks
+  and 2D rectangular bounding box of each detected object.
+  """
+  def __init__(self,
+               static_image_mode: bool = False,
+               max_num_objects: int = 5,
+               min_detection_confidence: float = 0.5,
+               min_tracking_confidence: float = 0.99,
+               model_name: str = 'Shoe',
+               focal_length: Tuple[float, float] = (1.0, 1.0),
+               principal_point: Tuple[float, float] = (0.0, 0.0),
+               image_size: Optional[Tuple[int, int]] = None,
+               ):
+    """Initializes a MediaPipe Objectron class.
+    Args:
+      static_image_mode: Whether to treat the input images as a batch of static
+        and possibly unrelated images, or a video stream.
+      max_num_objects: Maximum number of objects to detect.
+      min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for object
+        detection to be considered successful.
+      min_tracking_confidence: Minimum confidence value ([0.0, 1.0]) for the
+        box landmarks to be considered tracked successfully.
+      model_name: Name of model to use for predicting box landmarks, currently
+        support {'Shoe', 'Chair', 'Cup', 'Camera'}.
+      focal_length: Camera focal length `(fx, fy)`, by default is defined in NDC
+        space. To use focal length (fx_pixel, fy_pixel) in pixel space, users
+        should provide image_size = (image_width, image_height) to enable
+        conversions inside the API.
+      principal_point: Camera principal point (px, py), by default is defined in
+        NDC space. To use principal point (px_pixel, py_pixel) in pixel space,
+        users should provide image_size = (image_width, image_height) to enable
+        conversions inside the API.
+      image_size (Optional): size (image_width, image_height) of the input image
+        , ONLY needed when use focal_length and principal_point in pixel space.
+    Raises:
+      ConnectionError: If the objectron open source model can't be downloaded
+        from the MediaPipe Github repo.
+    """
+    # Get Camera parameters.
+    fx, fy = focal_length
+    px, py = principal_point
+    if image_size is not None:
+      half_width = image_size[0] / 2.0
+      half_height = image_size[1] / 2.0
+      fx = fx / half_width
+      fy = fy / half_height
+      px = - (px - half_width) / half_width
+      py = - (py - half_height) / half_height
+    # Create and init model.
+    model = get_model_by_name(model_name)
+    super().__init__(
+        binary_graph_path=_BINARYPB_FILE_PATH,
+        side_inputs={
+            'box_landmark_model_path': model.model_path,
+            'allowed_labels': model.label_name,
+            'max_num_objects': max_num_objects,
+            'use_prev_landmarks': not static_image_mode,
+        },
+        calculator_params={
+            ('objectdetectionoidv4subgraph'
+             '__TensorsToDetectionsCalculator.min_score_thresh'):
+                min_detection_confidence,
+            ('boxlandmarksubgraph__ThresholdingCalculator'
+             '.threshold'):
+                min_tracking_confidence,
+            ('Lift2DFrameAnnotationTo3DCalculator'
+             '.normalized_focal_x'): fx,
+            ('Lift2DFrameAnnotationTo3DCalculator'
+             '.normalized_focal_y'): fy,
+            ('Lift2DFrameAnnotationTo3DCalculator'
+             '.normalized_principal_point_x'): px,
+            ('Lift2DFrameAnnotationTo3DCalculator'
+             '.normalized_principal_point_y'): py,
+        },
+        outputs=['detected_objects'])
+  def process(self, image: np.ndarray) -> NamedTuple:
+    """Processes an RGB image and returns the box landmarks and rectangular bounding box of each detected object.
+    Args:
+      image: An RGB image represented as a numpy ndarray.
+    Raises:
+      RuntimeError: If the underlying graph throws any error.
+      ValueError: If the input image is not three channel RGB.
+    Returns:
+      A NamedTuple object with a "detected_objects" field that contains a list
+      of detected 3D bounding boxes. Each detected box is represented as an
+      "ObjectronOutputs" instance.
+    """
+    results = super().process(input_data={'image': image})
+    if results.detected_objects:  # pytype: disable=attribute-error
+      results.detected_objects = self._convert_format(results.detected_objects)  # type: ignore
+    else:
+      results.detected_objects = None  # pytype: disable=not-writable
+    return results
+  def _convert_format(
+      self,
+      inputs: annotation_data_pb2.FrameAnnotation) -> List[ObjectronOutputs]:
+    new_outputs = list()
+    for annotation in inputs.annotations:
+      # Get 3d object pose.
+      rotation = np.reshape(np.array(annotation.rotation), (3, 3))
+      translation = np.array(annotation.translation)
+      scale = np.array(annotation.scale)
+      # Get 2d/3d landmakrs.
+      landmarks_2d = landmark_pb2.NormalizedLandmarkList()
+      landmarks_3d = landmark_pb2.LandmarkList()
+      for keypoint in annotation.keypoints:
+        point_2d = keypoint.point_2d
+        landmarks_2d.landmark.add(x=point_2d.x, y=point_2d.y)
+        point_3d = keypoint.point_3d
+        landmarks_3d.landmark.add(x=point_3d.x, y=point_3d.y, z=point_3d.z)
+      # Add to objectron outputs.
+      new_outputs.append(ObjectronOutputs(landmarks_2d, landmarks_3d,
+                                          rotation, translation, scale=scale))
+    return new_outputs

mediapipe/python/solutions/objectron_test.py ADDED Viewed

@@ -0,0 +1,81 @@
+# Copyright 2020 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for mediapipe.python.solutions.objectron."""
+import os
+from absl.testing import absltest
+from absl.testing import parameterized
+import cv2
+import numpy as np
+import numpy.testing as npt
+# resources dependency
+from mediapipe.python.solutions import objectron as mp_objectron
+TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
+DIFF_THRESHOLD = 30  # pixels
+EXPECTED_BOX_COORDINATES_PREDICTION = [[[236, 413], [408, 474], [135, 457],
+                                        [383, 505], [80, 478], [408, 345],
+                                        [130, 347], [384, 355], [72, 353]],
+                                       [[241, 206], [411, 279], [131, 280],
+                                        [392, 249], [78, 252], [412, 155],
+                                        [140, 178], [396, 105], [89, 137]]]
+class ObjectronTest(parameterized.TestCase):
+  def test_invalid_image_shape(self):
+    with mp_objectron.Objectron() as objectron:
+      with self.assertRaisesRegex(
+          ValueError, 'Input image must contain three channel rgb data.'):
+        objectron.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
+  def test_blank_image(self):
+    with mp_objectron.Objectron() as objectron:
+      image = np.zeros([100, 100, 3], dtype=np.uint8)
+      image.fill(255)
+      results = objectron.process(image)
+      self.assertIsNone(results.detected_objects)
+  @parameterized.named_parameters(('static_image_mode', True, 1),
+                                  ('video_mode', False, 5))
+  def test_multi_objects(self, static_image_mode, num_frames):
+    image_path = os.path.join(os.path.dirname(__file__), 'testdata/shoes.jpg')
+    image = cv2.imread(image_path)
+    with mp_objectron.Objectron(
+        static_image_mode=static_image_mode,
+        max_num_objects=2,
+        min_detection_confidence=0.5) as objectron:
+      for _ in range(num_frames):
+        results = objectron.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+        multi_box_coordinates = []
+        for detected_object in results.detected_objects:
+          landmarks = detected_object.landmarks_2d
+          self.assertLen(landmarks.landmark, 9)
+          x = [landmark.x for landmark in landmarks.landmark]
+          y = [landmark.y for landmark in landmarks.landmark]
+          box_coordinates = np.transpose(np.stack((y, x))) * image.shape[0:2]
+          multi_box_coordinates.append(box_coordinates)
+        self.assertLen(multi_box_coordinates, 2)
+        prediction_error = np.abs(
+            np.asarray(multi_box_coordinates) -
+            np.asarray(EXPECTED_BOX_COORDINATES_PREDICTION))
+        npt.assert_array_less(prediction_error, DIFF_THRESHOLD)
+if __name__ == '__main__':
+  absltest.main()