PyPI - mediapipe-nightly - Versions diffs - 0.10.21.post20250114__cp312-cp312-manylinux_2_28_x86_64.whl - Mend

mediapipe-nightly 0.10.21.post20250114__cp312-cp312-manylinux_2_28_x86_64.whl

Files changed (593) hide show

mediapipe/python/solutions/holistic.py ADDED Viewed

@@ -0,0 +1,167 @@
+# Copyright 2020-2021 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""MediaPipe Holistic."""
+from typing import NamedTuple
+import numpy as np
+# The following imports are needed because python pb2 silently discards
+# unknown protobuf fields.
+# pylint: disable=unused-import
+from mediapipe.calculators.core import constant_side_packet_calculator_pb2
+from mediapipe.calculators.core import gate_calculator_pb2
+from mediapipe.calculators.core import split_vector_calculator_pb2
+from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
+from mediapipe.calculators.tensor import inference_calculator_pb2
+from mediapipe.calculators.tensor import tensors_to_classification_calculator_pb2
+from mediapipe.calculators.tensor import tensors_to_floats_calculator_pb2
+from mediapipe.calculators.tensor import tensors_to_landmarks_calculator_pb2
+from mediapipe.calculators.tflite import ssd_anchors_calculator_pb2
+from mediapipe.calculators.util import detections_to_rects_calculator_pb2
+from mediapipe.calculators.util import landmark_projection_calculator_pb2
+from mediapipe.calculators.util import local_file_contents_calculator_pb2
+from mediapipe.calculators.util import non_max_suppression_calculator_pb2
+from mediapipe.calculators.util import rect_transformation_calculator_pb2
+from mediapipe.framework.tool import switch_container_pb2
+from mediapipe.modules.holistic_landmark.calculators import roi_tracking_calculator_pb2
+# pylint: enable=unused-import
+from mediapipe.python.solution_base import SolutionBase
+from mediapipe.python.solutions import download_utils
+# pylint: disable=unused-import
+from mediapipe.python.solutions.face_mesh_connections import FACEMESH_CONTOURS
+from mediapipe.python.solutions.face_mesh_connections import FACEMESH_TESSELATION
+from mediapipe.python.solutions.hands import HandLandmark
+from mediapipe.python.solutions.hands_connections import HAND_CONNECTIONS
+from mediapipe.python.solutions.pose import PoseLandmark
+from mediapipe.python.solutions.pose_connections import POSE_CONNECTIONS
+# pylint: enable=unused-import
+_BINARYPB_FILE_PATH = 'mediapipe/modules/holistic_landmark/holistic_landmark_cpu.binarypb'
+def _download_oss_pose_landmark_model(model_complexity):
+  """Downloads the pose landmark lite/heavy model from the MediaPipe Github repo if it doesn't exist in the package."""
+  if model_complexity == 0:
+    download_utils.download_oss_model(
+        'mediapipe/modules/pose_landmark/pose_landmark_lite.tflite')
+  elif model_complexity == 2:
+    download_utils.download_oss_model(
+        'mediapipe/modules/pose_landmark/pose_landmark_heavy.tflite')
+class Holistic(SolutionBase):
+  """MediaPipe Holistic.
+  MediaPipe Holistic processes an RGB image and returns pose landmarks, left and
+  right hand landmarks, and face mesh landmarks on the most prominent person
+  detected.
+  Please refer to https://solutions.mediapipe.dev/holistic#python-solution-api
+  for usage examples.
+  """
+  def __init__(self,
+               static_image_mode=False,
+               model_complexity=1,
+               smooth_landmarks=True,
+               enable_segmentation=False,
+               smooth_segmentation=True,
+               refine_face_landmarks=False,
+               min_detection_confidence=0.5,
+               min_tracking_confidence=0.5):
+    """Initializes a MediaPipe Holistic object.
+    Args:
+      static_image_mode: Whether to treat the input images as a batch of static
+        and possibly unrelated images, or a video stream. See details in
+        https://solutions.mediapipe.dev/holistic#static_image_mode.
+      model_complexity: Complexity of the pose landmark model: 0, 1 or 2. See
+        details in https://solutions.mediapipe.dev/holistic#model_complexity.
+      smooth_landmarks: Whether to filter landmarks across different input
+        images to reduce jitter. See details in
+        https://solutions.mediapipe.dev/holistic#smooth_landmarks.
+      enable_segmentation: Whether to predict segmentation mask. See details in
+        https://solutions.mediapipe.dev/holistic#enable_segmentation.
+      smooth_segmentation: Whether to filter segmentation across different input
+        images to reduce jitter. See details in
+        https://solutions.mediapipe.dev/holistic#smooth_segmentation.
+      refine_face_landmarks: Whether to further refine the landmark coordinates
+        around the eyes and lips, and output additional landmarks around the
+        irises. Default to False. See details in
+        https://solutions.mediapipe.dev/holistic#refine_face_landmarks.
+      min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for person
+        detection to be considered successful. See details in
+        https://solutions.mediapipe.dev/holistic#min_detection_confidence.
+      min_tracking_confidence: Minimum confidence value ([0.0, 1.0]) for the
+        pose landmarks to be considered tracked successfully. See details in
+        https://solutions.mediapipe.dev/holistic#min_tracking_confidence.
+    """
+    _download_oss_pose_landmark_model(model_complexity)
+    super().__init__(
+        binary_graph_path=_BINARYPB_FILE_PATH,
+        side_inputs={
+            'model_complexity': model_complexity,
+            'smooth_landmarks': smooth_landmarks and not static_image_mode,
+            'enable_segmentation': enable_segmentation,
+            'smooth_segmentation':
+                smooth_segmentation and not static_image_mode,
+            'refine_face_landmarks': refine_face_landmarks,
+            'use_prev_landmarks': not static_image_mode,
+        },
+        calculator_params={
+            'poselandmarkcpu__posedetectioncpu__TensorsToDetectionsCalculator.min_score_thresh':
+                min_detection_confidence,
+            'poselandmarkcpu__poselandmarkbyroicpu__tensorstoposelandmarksandsegmentation__ThresholdingCalculator.threshold':
+                min_tracking_confidence,
+        },
+        outputs=[
+            'pose_landmarks', 'pose_world_landmarks', 'left_hand_landmarks',
+            'right_hand_landmarks', 'face_landmarks', 'segmentation_mask'
+        ])
+  def process(self, image: np.ndarray) -> NamedTuple:
+    """Processes an RGB image and returns the pose landmarks, left and right hand landmarks, and face landmarks on the most prominent person detected.
+    Args:
+      image: An RGB image represented as a numpy ndarray.
+    Raises:
+      RuntimeError: If the underlying graph throws any error.
+      ValueError: If the input image is not three channel RGB.
+    Returns:
+      A NamedTuple with fields describing the landmarks on the most prominate
+      person detected:
+        1) "pose_landmarks" field that contains the pose landmarks.
+        2) "pose_world_landmarks" field that contains the pose landmarks in
+        real-world 3D coordinates that are in meters with the origin at the
+        center between hips.
+        3) "left_hand_landmarks" field that contains the left-hand landmarks.
+        4) "right_hand_landmarks" field that contains the right-hand landmarks.
+        5) "face_landmarks" field that contains the face landmarks.
+        6) "segmentation_mask" field that contains the segmentation mask if
+           "enable_segmentation" is set to true.
+    """
+    results = super().process(input_data={'image': image})
+    if results.pose_landmarks:  # pytype: disable=attribute-error
+      for landmark in results.pose_landmarks.landmark:  # pytype: disable=attribute-error
+        landmark.ClearField('presence')
+    if results.pose_world_landmarks:  # pytype: disable=attribute-error
+      for landmark in results.pose_world_landmarks.landmark:  # pytype: disable=attribute-error
+        landmark.ClearField('presence')
+    return results

mediapipe/python/solutions/holistic_test.py ADDED Viewed

@@ -0,0 +1,142 @@
+# Copyright 2020 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for mediapipe.python.solutions.pose."""
+import os
+import tempfile  # pylint: disable=unused-import
+from typing import NamedTuple
+from absl.testing import absltest
+from absl.testing import parameterized
+import cv2
+import numpy as np
+import numpy.testing as npt
+# resources dependency
+# undeclared dependency
+from mediapipe.python.solutions import drawing_styles
+from mediapipe.python.solutions import drawing_utils as mp_drawing
+from mediapipe.python.solutions import holistic as mp_holistic
+TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
+POSE_DIFF_THRESHOLD = 30  # pixels
+HAND_DIFF_THRESHOLD = 30  # pixels
+EXPECTED_POSE_LANDMARKS = np.array([[782, 243], [791, 232], [796, 233],
+                                    [801, 233], [773, 231], [766, 231],
+                                    [759, 232], [802, 242], [751, 239],
+                                    [791, 258], [766, 258], [830, 301],
+                                    [708, 298], [910, 248], [635, 234],
+                                    [954, 161], [593, 136], [961, 137],
+                                    [583, 110], [952, 132], [592, 106],
+                                    [950, 141], [596, 115], [793, 500],
+                                    [724, 502], [874, 626], [640, 629],
+                                    [965, 756], [542, 760], [962, 779],
+                                    [533, 781], [1025, 797], [487, 803]])
+EXPECTED_LEFT_HAND_LANDMARKS = np.array([[958, 167], [950, 161], [945, 151],
+                                         [945, 141], [947, 134], [945, 136],
+                                         [939, 122], [935, 113], [931, 106],
+                                         [951, 134], [946, 118], [942, 108],
+                                         [938, 100], [957, 135], [954, 120],
+                                         [951, 111], [948, 103], [964, 138],
+                                         [964, 128], [965, 122], [965, 117]])
+EXPECTED_RIGHT_HAND_LANDMARKS = np.array([[590, 135], [602, 125], [609, 114],
+                                          [613, 103], [617, 96], [596, 100],
+                                          [595, 84], [594, 74], [593, 68],
+                                          [588, 100], [586, 84], [585, 73],
+                                          [584, 65], [581, 103], [579, 89],
+                                          [579, 79], [579, 72], [575, 109],
+                                          [571, 99], [570, 93], [569, 87]])
+class PoseTest(parameterized.TestCase):
+  def _landmarks_list_to_array(self, landmark_list, image_shape):
+    rows, cols, _ = image_shape
+    return np.asarray([(lmk.x * cols, lmk.y * rows)
+                       for lmk in landmark_list.landmark])
+  def _assert_diff_less(self, array1, array2, threshold):
+    npt.assert_array_less(np.abs(array1 - array2), threshold)
+  def _annotate(self, frame: np.ndarray, results: NamedTuple, idx: int):
+    mp_drawing.draw_landmarks(
+        frame,
+        results.face_landmarks,
+        mp_holistic.FACEMESH_TESSELATION,
+        landmark_drawing_spec=None,
+        connection_drawing_spec=drawing_styles
+        .get_default_face_mesh_tesselation_style())
+    mp_drawing.draw_landmarks(
+        frame,
+        results.pose_landmarks,
+        mp_holistic.POSE_CONNECTIONS,
+        landmark_drawing_spec=drawing_styles.get_default_pose_landmarks_style())
+    path = os.path.join(tempfile.gettempdir(), self.id().split('.')[-1] +
+                                              '_frame_{}.png'.format(idx))
+    cv2.imwrite(path, frame)
+  def test_invalid_image_shape(self):
+    with mp_holistic.Holistic() as holistic:
+      with self.assertRaisesRegex(
+          ValueError, 'Input image must contain three channel rgb data.'):
+        holistic.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
+  def test_blank_image(self):
+    with mp_holistic.Holistic() as holistic:
+      image = np.zeros([100, 100, 3], dtype=np.uint8)
+      image.fill(255)
+      results = holistic.process(image)
+      self.assertIsNone(results.pose_landmarks)
+  @parameterized.named_parameters(('static_lite', True, 0, False, 3),
+                                  ('static_full', True, 1, False, 3),
+                                  ('static_heavy', True, 2, False, 3),
+                                  ('video_lite', False, 0, False, 3),
+                                  ('video_full', False, 1, False, 3),
+                                  ('video_heavy', False, 2, False, 3),
+                                  ('static_full_refine_face', True, 1, True, 3),
+                                  ('video_full_refine_face', False, 1, True, 3))
+  def test_on_image(self, static_image_mode, model_complexity,
+                    refine_face_landmarks, num_frames):
+    image_path = os.path.join(os.path.dirname(__file__),
+                              'testdata/holistic.jpg')
+    image = cv2.imread(image_path)
+    with mp_holistic.Holistic(
+        static_image_mode=static_image_mode,
+        model_complexity=model_complexity,
+        refine_face_landmarks=refine_face_landmarks) as holistic:
+      for idx in range(num_frames):
+        results = holistic.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+        self._annotate(image.copy(), results, idx)
+        self._assert_diff_less(
+            self._landmarks_list_to_array(results.pose_landmarks, image.shape),
+            EXPECTED_POSE_LANDMARKS,
+            POSE_DIFF_THRESHOLD)
+        self._assert_diff_less(
+            self._landmarks_list_to_array(results.left_hand_landmarks,
+                                          image.shape),
+            EXPECTED_LEFT_HAND_LANDMARKS,
+            HAND_DIFF_THRESHOLD)
+        self._assert_diff_less(
+            self._landmarks_list_to_array(results.right_hand_landmarks,
+                                          image.shape),
+            EXPECTED_RIGHT_HAND_LANDMARKS,
+            HAND_DIFF_THRESHOLD)
+        # TODO: Verify the correctness of the face landmarks.
+        self.assertLen(results.face_landmarks.landmark,
+                       478 if refine_face_landmarks else 468)
+if __name__ == '__main__':
+  absltest.main()

mediapipe/python/solutions/objectron.py ADDED Viewed

@@ -0,0 +1,288 @@
+# Copyright 2020-2021 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""MediaPipe Objectron."""
+import enum
+from typing import List, Tuple, NamedTuple, Optional
+import attr
+import numpy as np
+# pylint: disable=unused-import
+from mediapipe.calculators.core import constant_side_packet_calculator_pb2
+from mediapipe.calculators.core import gate_calculator_pb2
+from mediapipe.calculators.core import split_vector_calculator_pb2
+from mediapipe.calculators.tensor import image_to_tensor_calculator_pb2
+from mediapipe.calculators.tensor import inference_calculator_pb2
+from mediapipe.calculators.tensor import tensors_to_detections_calculator_pb2
+from mediapipe.calculators.tensor import tensors_to_floats_calculator_pb2
+from mediapipe.calculators.tensor import tensors_to_landmarks_calculator_pb2
+from mediapipe.calculators.tflite import ssd_anchors_calculator_pb2
+from mediapipe.calculators.util import association_calculator_pb2
+from mediapipe.calculators.util import collection_has_min_size_calculator_pb2
+from mediapipe.calculators.util import detection_label_id_to_text_calculator_pb2
+from mediapipe.calculators.util import detections_to_rects_calculator_pb2
+from mediapipe.calculators.util import landmark_projection_calculator_pb2
+from mediapipe.calculators.util import local_file_contents_calculator_pb2
+from mediapipe.calculators.util import non_max_suppression_calculator_pb2
+from mediapipe.calculators.util import rect_transformation_calculator_pb2
+from mediapipe.calculators.util import thresholding_calculator_pb2
+from mediapipe.framework.formats import landmark_pb2
+from mediapipe.modules.objectron.calculators import annotation_data_pb2
+from mediapipe.modules.objectron.calculators import frame_annotation_to_rect_calculator_pb2
+from mediapipe.modules.objectron.calculators import lift_2d_frame_annotation_to_3d_calculator_pb2
+# pylint: enable=unused-import
+from mediapipe.python.solution_base import SolutionBase
+from mediapipe.python.solutions import download_utils
+class BoxLandmark(enum.IntEnum):
+  """The 9 3D box landmarks."""
+  #
+  #       3 + + + + + + + + 7
+  #       +\                +\          UP
+  #       + \               + \
+  #       +  \              +  \        |
+  #       +   4 + + + + + + + + 8       | y
+  #       +   +             +   +       |
+  #       +   +             +   +       |
+  #       +   +     (0)     +   +       .------- x
+  #       +   +             +   +        \
+  #       1 + + + + + + + + 5   +         \
+  #        \  +              \  +          \ z
+  #         \ +               \ +           \
+  #          \+                \+
+  #           2 + + + + + + + + 6
+  CENTER = 0
+  BACK_BOTTOM_LEFT = 1
+  FRONT_BOTTOM_LEFT = 2
+  BACK_TOP_LEFT = 3
+  FRONT_TOP_LEFT = 4
+  BACK_BOTTOM_RIGHT = 5
+  FRONT_BOTTOM_RIGHT = 6
+  BACK_TOP_RIGHT = 7
+  FRONT_TOP_RIGHT = 8
+_BINARYPB_FILE_PATH = 'mediapipe/modules/objectron/objectron_cpu.binarypb'
+BOX_CONNECTIONS = frozenset([
+    (BoxLandmark.BACK_BOTTOM_LEFT, BoxLandmark.FRONT_BOTTOM_LEFT),
+    (BoxLandmark.BACK_BOTTOM_LEFT, BoxLandmark.BACK_TOP_LEFT),
+    (BoxLandmark.BACK_BOTTOM_LEFT, BoxLandmark.BACK_BOTTOM_RIGHT),
+    (BoxLandmark.FRONT_BOTTOM_LEFT, BoxLandmark.FRONT_TOP_LEFT),
+    (BoxLandmark.FRONT_BOTTOM_LEFT, BoxLandmark.FRONT_BOTTOM_RIGHT),
+    (BoxLandmark.BACK_TOP_LEFT, BoxLandmark.FRONT_TOP_LEFT),
+    (BoxLandmark.BACK_TOP_LEFT, BoxLandmark.BACK_TOP_RIGHT),
+    (BoxLandmark.FRONT_TOP_LEFT, BoxLandmark.FRONT_TOP_RIGHT),
+    (BoxLandmark.BACK_BOTTOM_RIGHT, BoxLandmark.FRONT_BOTTOM_RIGHT),
+    (BoxLandmark.BACK_BOTTOM_RIGHT, BoxLandmark.BACK_TOP_RIGHT),
+    (BoxLandmark.FRONT_BOTTOM_RIGHT, BoxLandmark.FRONT_TOP_RIGHT),
+    (BoxLandmark.BACK_TOP_RIGHT, BoxLandmark.FRONT_TOP_RIGHT),
+])
+@attr.s(auto_attribs=True)
+class ObjectronModel(object):
+  model_path: str
+  label_name: str
+@attr.s(auto_attribs=True, frozen=True)
+class ShoeModel(ObjectronModel):
+  model_path: str = ('mediapipe/modules/objectron/'
+                     'object_detection_3d_sneakers.tflite')
+  label_name: str = 'Footwear'
+@attr.s(auto_attribs=True, frozen=True)
+class ChairModel(ObjectronModel):
+  model_path: str = ('mediapipe/modules/objectron/'
+                     'object_detection_3d_chair.tflite')
+  label_name: str = 'Chair'
+@attr.s(auto_attribs=True, frozen=True)
+class CameraModel(ObjectronModel):
+  model_path: str = ('mediapipe/modules/objectron/'
+                     'object_detection_3d_camera.tflite')
+  label_name: str = 'Camera'
+@attr.s(auto_attribs=True, frozen=True)
+class CupModel(ObjectronModel):
+  model_path: str = ('mediapipe/modules/objectron/'
+                     'object_detection_3d_cup.tflite')
+  label_name: str = 'Coffee cup, Mug'
+_MODEL_DICT = {
+    'Shoe': ShoeModel(),
+    'Chair': ChairModel(),
+    'Cup': CupModel(),
+    'Camera': CameraModel()
+}
+def _download_oss_objectron_models(objectron_model: str):
+  """Downloads the objectron models from the MediaPipe Github repo if they don't exist in the package."""
+  download_utils.download_oss_model(
+      'mediapipe/modules/objectron/object_detection_ssd_mobilenetv2_oidv4_fp16.tflite'
+  )
+  download_utils.download_oss_model(objectron_model)
+def get_model_by_name(name: str) -> ObjectronModel:
+  if name not in _MODEL_DICT:
+    raise ValueError(f'{name} is not a valid model name for Objectron.')
+  _download_oss_objectron_models(_MODEL_DICT[name].model_path)
+  return _MODEL_DICT[name]
+@attr.s(auto_attribs=True)
+class ObjectronOutputs(object):
+  landmarks_2d: landmark_pb2.NormalizedLandmarkList
+  landmarks_3d: landmark_pb2.LandmarkList
+  rotation: np.ndarray
+  translation: np.ndarray
+  scale: np.ndarray
+class Objectron(SolutionBase):
+  """MediaPipe Objectron.
+  MediaPipe Objectron processes an RGB image and returns the 3D box landmarks
+  and 2D rectangular bounding box of each detected object.
+  """
+  def __init__(self,
+               static_image_mode: bool = False,
+               max_num_objects: int = 5,
+               min_detection_confidence: float = 0.5,
+               min_tracking_confidence: float = 0.99,
+               model_name: str = 'Shoe',
+               focal_length: Tuple[float, float] = (1.0, 1.0),
+               principal_point: Tuple[float, float] = (0.0, 0.0),
+               image_size: Optional[Tuple[int, int]] = None,
+               ):
+    """Initializes a MediaPipe Objectron class.
+    Args:
+      static_image_mode: Whether to treat the input images as a batch of static
+        and possibly unrelated images, or a video stream.
+      max_num_objects: Maximum number of objects to detect.
+      min_detection_confidence: Minimum confidence value ([0.0, 1.0]) for object
+        detection to be considered successful.
+      min_tracking_confidence: Minimum confidence value ([0.0, 1.0]) for the
+        box landmarks to be considered tracked successfully.
+      model_name: Name of model to use for predicting box landmarks, currently
+        support {'Shoe', 'Chair', 'Cup', 'Camera'}.
+      focal_length: Camera focal length `(fx, fy)`, by default is defined in NDC
+        space. To use focal length (fx_pixel, fy_pixel) in pixel space, users
+        should provide image_size = (image_width, image_height) to enable
+        conversions inside the API.
+      principal_point: Camera principal point (px, py), by default is defined in
+        NDC space. To use principal point (px_pixel, py_pixel) in pixel space,
+        users should provide image_size = (image_width, image_height) to enable
+        conversions inside the API.
+      image_size (Optional): size (image_width, image_height) of the input image
+        , ONLY needed when use focal_length and principal_point in pixel space.
+    Raises:
+      ConnectionError: If the objectron open source model can't be downloaded
+        from the MediaPipe Github repo.
+    """
+    # Get Camera parameters.
+    fx, fy = focal_length
+    px, py = principal_point
+    if image_size is not None:
+      half_width = image_size[0] / 2.0
+      half_height = image_size[1] / 2.0
+      fx = fx / half_width
+      fy = fy / half_height
+      px = - (px - half_width) / half_width
+      py = - (py - half_height) / half_height
+    # Create and init model.
+    model = get_model_by_name(model_name)
+    super().__init__(
+        binary_graph_path=_BINARYPB_FILE_PATH,
+        side_inputs={
+            'box_landmark_model_path': model.model_path,
+            'allowed_labels': model.label_name,
+            'max_num_objects': max_num_objects,
+            'use_prev_landmarks': not static_image_mode,
+        },
+        calculator_params={
+            ('objectdetectionoidv4subgraph'
+             '__TensorsToDetectionsCalculator.min_score_thresh'):
+                min_detection_confidence,
+            ('boxlandmarksubgraph__ThresholdingCalculator'
+             '.threshold'):
+                min_tracking_confidence,
+            ('Lift2DFrameAnnotationTo3DCalculator'
+             '.normalized_focal_x'): fx,
+            ('Lift2DFrameAnnotationTo3DCalculator'
+             '.normalized_focal_y'): fy,
+            ('Lift2DFrameAnnotationTo3DCalculator'
+             '.normalized_principal_point_x'): px,
+            ('Lift2DFrameAnnotationTo3DCalculator'
+             '.normalized_principal_point_y'): py,
+        },
+        outputs=['detected_objects'])
+  def process(self, image: np.ndarray) -> NamedTuple:
+    """Processes an RGB image and returns the box landmarks and rectangular bounding box of each detected object.
+    Args:
+      image: An RGB image represented as a numpy ndarray.
+    Raises:
+      RuntimeError: If the underlying graph throws any error.
+      ValueError: If the input image is not three channel RGB.
+    Returns:
+      A NamedTuple object with a "detected_objects" field that contains a list
+      of detected 3D bounding boxes. Each detected box is represented as an
+      "ObjectronOutputs" instance.
+    """
+    results = super().process(input_data={'image': image})
+    if results.detected_objects:  # pytype: disable=attribute-error
+      results.detected_objects = self._convert_format(results.detected_objects)  # type: ignore
+    else:
+      results.detected_objects = None  # pytype: disable=not-writable
+    return results
+  def _convert_format(
+      self,
+      inputs: annotation_data_pb2.FrameAnnotation) -> List[ObjectronOutputs]:
+    new_outputs = list()
+    for annotation in inputs.annotations:
+      # Get 3d object pose.
+      rotation = np.reshape(np.array(annotation.rotation), (3, 3))
+      translation = np.array(annotation.translation)
+      scale = np.array(annotation.scale)
+      # Get 2d/3d landmakrs.
+      landmarks_2d = landmark_pb2.NormalizedLandmarkList()
+      landmarks_3d = landmark_pb2.LandmarkList()
+      for keypoint in annotation.keypoints:
+        point_2d = keypoint.point_2d
+        landmarks_2d.landmark.add(x=point_2d.x, y=point_2d.y)
+        point_3d = keypoint.point_3d
+        landmarks_3d.landmark.add(x=point_3d.x, y=point_3d.y, z=point_3d.z)
+      # Add to objectron outputs.
+      new_outputs.append(ObjectronOutputs(landmarks_2d, landmarks_3d,
+                                          rotation, translation, scale=scale))
+    return new_outputs

mediapipe/python/solutions/objectron_test.py ADDED Viewed

@@ -0,0 +1,81 @@
+# Copyright 2020 The MediaPipe Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for mediapipe.python.solutions.objectron."""
+import os
+from absl.testing import absltest
+from absl.testing import parameterized
+import cv2
+import numpy as np
+import numpy.testing as npt
+# resources dependency
+from mediapipe.python.solutions import objectron as mp_objectron
+TEST_IMAGE_PATH = 'mediapipe/python/solutions/testdata'
+DIFF_THRESHOLD = 30  # pixels
+EXPECTED_BOX_COORDINATES_PREDICTION = [[[236, 413], [408, 474], [135, 457],
+                                        [383, 505], [80, 478], [408, 345],
+                                        [130, 347], [384, 355], [72, 353]],
+                                       [[241, 206], [411, 279], [131, 280],
+                                        [392, 249], [78, 252], [412, 155],
+                                        [140, 178], [396, 105], [89, 137]]]
+class ObjectronTest(parameterized.TestCase):
+  def test_invalid_image_shape(self):
+    with mp_objectron.Objectron() as objectron:
+      with self.assertRaisesRegex(
+          ValueError, 'Input image must contain three channel rgb data.'):
+        objectron.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
+  def test_blank_image(self):
+    with mp_objectron.Objectron() as objectron:
+      image = np.zeros([100, 100, 3], dtype=np.uint8)
+      image.fill(255)
+      results = objectron.process(image)
+      self.assertIsNone(results.detected_objects)
+  @parameterized.named_parameters(('static_image_mode', True, 1),
+                                  ('video_mode', False, 5))
+  def test_multi_objects(self, static_image_mode, num_frames):
+    image_path = os.path.join(os.path.dirname(__file__), 'testdata/shoes.jpg')
+    image = cv2.imread(image_path)
+    with mp_objectron.Objectron(
+        static_image_mode=static_image_mode,
+        max_num_objects=2,
+        min_detection_confidence=0.5) as objectron:
+      for _ in range(num_frames):
+        results = objectron.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+        multi_box_coordinates = []
+        for detected_object in results.detected_objects:
+          landmarks = detected_object.landmarks_2d
+          self.assertLen(landmarks.landmark, 9)
+          x = [landmark.x for landmark in landmarks.landmark]
+          y = [landmark.y for landmark in landmarks.landmark]
+          box_coordinates = np.transpose(np.stack((y, x))) * image.shape[0:2]
+          multi_box_coordinates.append(box_coordinates)
+        self.assertLen(multi_box_coordinates, 2)
+        prediction_error = np.abs(
+            np.asarray(multi_box_coordinates) -
+            np.asarray(EXPECTED_BOX_COORDINATES_PREDICTION))
+        npt.assert_array_less(prediction_error, DIFF_THRESHOLD)
+if __name__ == '__main__':
+  absltest.main()