mediapipe-nightly 0.10.21.post20241223__cp312-cp312-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (593) hide show
  1. mediapipe/__init__.py +26 -0
  2. mediapipe/calculators/__init__.py +0 -0
  3. mediapipe/calculators/audio/__init__.py +0 -0
  4. mediapipe/calculators/audio/mfcc_mel_calculators_pb2.py +33 -0
  5. mediapipe/calculators/audio/rational_factor_resample_calculator_pb2.py +33 -0
  6. mediapipe/calculators/audio/spectrogram_calculator_pb2.py +37 -0
  7. mediapipe/calculators/audio/stabilized_log_calculator_pb2.py +31 -0
  8. mediapipe/calculators/audio/time_series_framer_calculator_pb2.py +33 -0
  9. mediapipe/calculators/core/__init__.py +0 -0
  10. mediapipe/calculators/core/bypass_calculator_pb2.py +31 -0
  11. mediapipe/calculators/core/clip_vector_size_calculator_pb2.py +31 -0
  12. mediapipe/calculators/core/concatenate_vector_calculator_pb2.py +31 -0
  13. mediapipe/calculators/core/constant_side_packet_calculator_pb2.py +39 -0
  14. mediapipe/calculators/core/dequantize_byte_array_calculator_pb2.py +31 -0
  15. mediapipe/calculators/core/flow_limiter_calculator_pb2.py +32 -0
  16. mediapipe/calculators/core/gate_calculator_pb2.py +33 -0
  17. mediapipe/calculators/core/get_vector_item_calculator_pb2.py +31 -0
  18. mediapipe/calculators/core/graph_profile_calculator_pb2.py +31 -0
  19. mediapipe/calculators/core/packet_cloner_calculator_pb2.py +31 -0
  20. mediapipe/calculators/core/packet_resampler_calculator_pb2.py +33 -0
  21. mediapipe/calculators/core/packet_thinner_calculator_pb2.py +33 -0
  22. mediapipe/calculators/core/quantize_float_vector_calculator_pb2.py +31 -0
  23. mediapipe/calculators/core/sequence_shift_calculator_pb2.py +31 -0
  24. mediapipe/calculators/core/split_vector_calculator_pb2.py +33 -0
  25. mediapipe/calculators/image/__init__.py +0 -0
  26. mediapipe/calculators/image/bilateral_filter_calculator_pb2.py +31 -0
  27. mediapipe/calculators/image/feature_detector_calculator_pb2.py +31 -0
  28. mediapipe/calculators/image/image_clone_calculator_pb2.py +31 -0
  29. mediapipe/calculators/image/image_cropping_calculator_pb2.py +33 -0
  30. mediapipe/calculators/image/image_transformation_calculator_pb2.py +38 -0
  31. mediapipe/calculators/image/mask_overlay_calculator_pb2.py +33 -0
  32. mediapipe/calculators/image/opencv_encoded_image_to_image_frame_calculator_pb2.py +31 -0
  33. mediapipe/calculators/image/opencv_image_encoder_calculator_pb2.py +35 -0
  34. mediapipe/calculators/image/recolor_calculator_pb2.py +34 -0
  35. mediapipe/calculators/image/rotation_mode_pb2.py +29 -0
  36. mediapipe/calculators/image/scale_image_calculator_pb2.py +34 -0
  37. mediapipe/calculators/image/segmentation_smoothing_calculator_pb2.py +31 -0
  38. mediapipe/calculators/image/set_alpha_calculator_pb2.py +31 -0
  39. mediapipe/calculators/image/warp_affine_calculator_pb2.py +36 -0
  40. mediapipe/calculators/internal/__init__.py +0 -0
  41. mediapipe/calculators/internal/callback_packet_calculator_pb2.py +33 -0
  42. mediapipe/calculators/tensor/__init__.py +0 -0
  43. mediapipe/calculators/tensor/audio_to_tensor_calculator_pb2.py +35 -0
  44. mediapipe/calculators/tensor/bert_preprocessor_calculator_pb2.py +31 -0
  45. mediapipe/calculators/tensor/feedback_tensors_calculator_pb2.py +37 -0
  46. mediapipe/calculators/tensor/image_to_tensor_calculator_pb2.py +40 -0
  47. mediapipe/calculators/tensor/inference_calculator_pb2.py +63 -0
  48. mediapipe/calculators/tensor/landmarks_to_tensor_calculator_pb2.py +33 -0
  49. mediapipe/calculators/tensor/regex_preprocessor_calculator_pb2.py +31 -0
  50. mediapipe/calculators/tensor/tensor_converter_calculator_pb2.py +34 -0
  51. mediapipe/calculators/tensor/tensor_to_joints_calculator_pb2.py +31 -0
  52. mediapipe/calculators/tensor/tensors_readback_calculator_pb2.py +35 -0
  53. mediapipe/calculators/tensor/tensors_to_audio_calculator_pb2.py +33 -0
  54. mediapipe/calculators/tensor/tensors_to_classification_calculator_pb2.py +44 -0
  55. mediapipe/calculators/tensor/tensors_to_detections_calculator_pb2.py +39 -0
  56. mediapipe/calculators/tensor/tensors_to_floats_calculator_pb2.py +33 -0
  57. mediapipe/calculators/tensor/tensors_to_landmarks_calculator_pb2.py +33 -0
  58. mediapipe/calculators/tensor/tensors_to_segmentation_calculator_pb2.py +34 -0
  59. mediapipe/calculators/tensor/vector_to_tensor_calculator_pb2.py +27 -0
  60. mediapipe/calculators/tflite/__init__.py +0 -0
  61. mediapipe/calculators/tflite/ssd_anchors_calculator_pb2.py +32 -0
  62. mediapipe/calculators/tflite/tflite_converter_calculator_pb2.py +33 -0
  63. mediapipe/calculators/tflite/tflite_custom_op_resolver_calculator_pb2.py +31 -0
  64. mediapipe/calculators/tflite/tflite_inference_calculator_pb2.py +49 -0
  65. mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator_pb2.py +31 -0
  66. mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator_pb2.py +31 -0
  67. mediapipe/calculators/tflite/tflite_tensors_to_landmarks_calculator_pb2.py +33 -0
  68. mediapipe/calculators/tflite/tflite_tensors_to_segmentation_calculator_pb2.py +31 -0
  69. mediapipe/calculators/util/__init__.py +0 -0
  70. mediapipe/calculators/util/align_hand_to_pose_in_world_calculator_pb2.py +31 -0
  71. mediapipe/calculators/util/annotation_overlay_calculator_pb2.py +32 -0
  72. mediapipe/calculators/util/association_calculator_pb2.py +31 -0
  73. mediapipe/calculators/util/collection_has_min_size_calculator_pb2.py +31 -0
  74. mediapipe/calculators/util/combine_joints_calculator_pb2.py +36 -0
  75. mediapipe/calculators/util/detection_label_id_to_text_calculator_pb2.py +36 -0
  76. mediapipe/calculators/util/detections_to_rects_calculator_pb2.py +33 -0
  77. mediapipe/calculators/util/detections_to_render_data_calculator_pb2.py +33 -0
  78. mediapipe/calculators/util/face_to_rect_calculator_pb2.py +26 -0
  79. mediapipe/calculators/util/filter_detections_calculator_pb2.py +31 -0
  80. mediapipe/calculators/util/flat_color_image_calculator_pb2.py +32 -0
  81. mediapipe/calculators/util/labels_to_render_data_calculator_pb2.py +34 -0
  82. mediapipe/calculators/util/landmark_projection_calculator_pb2.py +31 -0
  83. mediapipe/calculators/util/landmarks_refinement_calculator_pb2.py +41 -0
  84. mediapipe/calculators/util/landmarks_smoothing_calculator_pb2.py +33 -0
  85. mediapipe/calculators/util/landmarks_to_detection_calculator_pb2.py +31 -0
  86. mediapipe/calculators/util/landmarks_to_floats_calculator_pb2.py +31 -0
  87. mediapipe/calculators/util/landmarks_to_render_data_calculator_pb2.py +32 -0
  88. mediapipe/calculators/util/landmarks_transformation_calculator_pb2.py +37 -0
  89. mediapipe/calculators/util/latency_pb2.py +26 -0
  90. mediapipe/calculators/util/local_file_contents_calculator_pb2.py +31 -0
  91. mediapipe/calculators/util/logic_calculator_pb2.py +34 -0
  92. mediapipe/calculators/util/non_max_suppression_calculator_pb2.py +35 -0
  93. mediapipe/calculators/util/packet_frequency_calculator_pb2.py +31 -0
  94. mediapipe/calculators/util/packet_frequency_pb2.py +26 -0
  95. mediapipe/calculators/util/packet_latency_calculator_pb2.py +31 -0
  96. mediapipe/calculators/util/rect_to_render_data_calculator_pb2.py +32 -0
  97. mediapipe/calculators/util/rect_to_render_scale_calculator_pb2.py +31 -0
  98. mediapipe/calculators/util/rect_transformation_calculator_pb2.py +31 -0
  99. mediapipe/calculators/util/refine_landmarks_from_heatmap_calculator_pb2.py +31 -0
  100. mediapipe/calculators/util/resource_provider_calculator_pb2.py +28 -0
  101. mediapipe/calculators/util/set_joints_visibility_calculator_pb2.py +41 -0
  102. mediapipe/calculators/util/thresholding_calculator_pb2.py +31 -0
  103. mediapipe/calculators/util/timed_box_list_id_to_label_calculator_pb2.py +31 -0
  104. mediapipe/calculators/util/timed_box_list_to_render_data_calculator_pb2.py +32 -0
  105. mediapipe/calculators/util/top_k_scores_calculator_pb2.py +31 -0
  106. mediapipe/calculators/util/visibility_copy_calculator_pb2.py +27 -0
  107. mediapipe/calculators/util/visibility_smoothing_calculator_pb2.py +31 -0
  108. mediapipe/calculators/video/__init__.py +0 -0
  109. mediapipe/calculators/video/box_detector_calculator_pb2.py +32 -0
  110. mediapipe/calculators/video/box_tracker_calculator_pb2.py +32 -0
  111. mediapipe/calculators/video/flow_packager_calculator_pb2.py +32 -0
  112. mediapipe/calculators/video/flow_to_image_calculator_pb2.py +31 -0
  113. mediapipe/calculators/video/motion_analysis_calculator_pb2.py +42 -0
  114. mediapipe/calculators/video/opencv_video_encoder_calculator_pb2.py +31 -0
  115. mediapipe/calculators/video/tool/__init__.py +0 -0
  116. mediapipe/calculators/video/tool/flow_quantizer_model_pb2.py +26 -0
  117. mediapipe/calculators/video/tracked_detection_manager_calculator_pb2.py +32 -0
  118. mediapipe/calculators/video/video_pre_stream_calculator_pb2.py +35 -0
  119. mediapipe/examples/__init__.py +14 -0
  120. mediapipe/examples/desktop/__init__.py +14 -0
  121. mediapipe/framework/__init__.py +0 -0
  122. mediapipe/framework/calculator_options_pb2.py +29 -0
  123. mediapipe/framework/calculator_pb2.py +59 -0
  124. mediapipe/framework/calculator_profile_pb2.py +48 -0
  125. mediapipe/framework/deps/__init__.py +0 -0
  126. mediapipe/framework/deps/proto_descriptor_pb2.py +29 -0
  127. mediapipe/framework/formats/__init__.py +0 -0
  128. mediapipe/framework/formats/affine_transform_data_pb2.py +28 -0
  129. mediapipe/framework/formats/annotation/__init__.py +0 -0
  130. mediapipe/framework/formats/annotation/locus_pb2.py +32 -0
  131. mediapipe/framework/formats/annotation/rasterization_pb2.py +29 -0
  132. mediapipe/framework/formats/body_rig_pb2.py +28 -0
  133. mediapipe/framework/formats/classification_pb2.py +31 -0
  134. mediapipe/framework/formats/detection_pb2.py +36 -0
  135. mediapipe/framework/formats/image_file_properties_pb2.py +26 -0
  136. mediapipe/framework/formats/image_format_pb2.py +29 -0
  137. mediapipe/framework/formats/landmark_pb2.py +37 -0
  138. mediapipe/framework/formats/location_data_pb2.py +38 -0
  139. mediapipe/framework/formats/matrix_data_pb2.py +31 -0
  140. mediapipe/framework/formats/motion/__init__.py +0 -0
  141. mediapipe/framework/formats/motion/optical_flow_field_data_pb2.py +30 -0
  142. mediapipe/framework/formats/object_detection/__init__.py +0 -0
  143. mediapipe/framework/formats/object_detection/anchor_pb2.py +26 -0
  144. mediapipe/framework/formats/rect_pb2.py +29 -0
  145. mediapipe/framework/formats/time_series_header_pb2.py +28 -0
  146. mediapipe/framework/graph_runtime_info_pb2.py +31 -0
  147. mediapipe/framework/mediapipe_options_pb2.py +27 -0
  148. mediapipe/framework/packet_factory_pb2.py +31 -0
  149. mediapipe/framework/packet_generator_pb2.py +33 -0
  150. mediapipe/framework/status_handler_pb2.py +28 -0
  151. mediapipe/framework/stream_handler/__init__.py +0 -0
  152. mediapipe/framework/stream_handler/default_input_stream_handler_pb2.py +27 -0
  153. mediapipe/framework/stream_handler/fixed_size_input_stream_handler_pb2.py +27 -0
  154. mediapipe/framework/stream_handler/sync_set_input_stream_handler_pb2.py +29 -0
  155. mediapipe/framework/stream_handler/timestamp_align_input_stream_handler_pb2.py +27 -0
  156. mediapipe/framework/stream_handler_pb2.py +30 -0
  157. mediapipe/framework/test_calculators_pb2.py +31 -0
  158. mediapipe/framework/thread_pool_executor_pb2.py +29 -0
  159. mediapipe/framework/tool/__init__.py +0 -0
  160. mediapipe/framework/tool/calculator_graph_template_pb2.py +44 -0
  161. mediapipe/framework/tool/field_data_pb2.py +28 -0
  162. mediapipe/framework/tool/node_chain_subgraph_pb2.py +31 -0
  163. mediapipe/framework/tool/packet_generator_wrapper_calculator_pb2.py +28 -0
  164. mediapipe/framework/tool/source_pb2.py +33 -0
  165. mediapipe/framework/tool/switch_container_pb2.py +32 -0
  166. mediapipe/gpu/__init__.py +0 -0
  167. mediapipe/gpu/copy_calculator_pb2.py +33 -0
  168. mediapipe/gpu/gl_animation_overlay_calculator_pb2.py +31 -0
  169. mediapipe/gpu/gl_context_options_pb2.py +31 -0
  170. mediapipe/gpu/gl_scaler_calculator_pb2.py +32 -0
  171. mediapipe/gpu/gl_surface_sink_calculator_pb2.py +32 -0
  172. mediapipe/gpu/gpu_origin_pb2.py +29 -0
  173. mediapipe/gpu/scale_mode_pb2.py +28 -0
  174. mediapipe/model_maker/__init__.py +27 -0
  175. mediapipe/model_maker/setup.py +107 -0
  176. mediapipe/modules/__init__.py +0 -0
  177. mediapipe/modules/face_detection/__init__.py +0 -0
  178. mediapipe/modules/face_detection/face_detection_full_range_cpu.binarypb +0 -0
  179. mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite +0 -0
  180. mediapipe/modules/face_detection/face_detection_pb2.py +30 -0
  181. mediapipe/modules/face_detection/face_detection_short_range.tflite +0 -0
  182. mediapipe/modules/face_detection/face_detection_short_range_cpu.binarypb +0 -0
  183. mediapipe/modules/face_geometry/__init__.py +0 -0
  184. mediapipe/modules/face_geometry/data/__init__.py +0 -0
  185. mediapipe/modules/face_geometry/effect_renderer_calculator_pb2.py +27 -0
  186. mediapipe/modules/face_geometry/env_generator_calculator_pb2.py +28 -0
  187. mediapipe/modules/face_geometry/geometry_pipeline_calculator_pb2.py +27 -0
  188. mediapipe/modules/face_geometry/libs/__init__.py +0 -0
  189. mediapipe/modules/face_geometry/protos/__init__.py +0 -0
  190. mediapipe/modules/face_geometry/protos/environment_pb2.py +31 -0
  191. mediapipe/modules/face_geometry/protos/face_geometry_pb2.py +29 -0
  192. mediapipe/modules/face_geometry/protos/geometry_pipeline_metadata_pb2.py +32 -0
  193. mediapipe/modules/face_geometry/protos/mesh_3d_pb2.py +31 -0
  194. mediapipe/modules/face_landmark/__init__.py +0 -0
  195. mediapipe/modules/face_landmark/face_landmark.tflite +0 -0
  196. mediapipe/modules/face_landmark/face_landmark_front_cpu.binarypb +0 -0
  197. mediapipe/modules/face_landmark/face_landmark_with_attention.tflite +0 -0
  198. mediapipe/modules/hand_landmark/__init__.py +0 -0
  199. mediapipe/modules/hand_landmark/calculators/__init__.py +0 -0
  200. mediapipe/modules/hand_landmark/hand_landmark_full.tflite +0 -0
  201. mediapipe/modules/hand_landmark/hand_landmark_lite.tflite +0 -0
  202. mediapipe/modules/hand_landmark/hand_landmark_tracking_cpu.binarypb +0 -0
  203. mediapipe/modules/hand_landmark/handedness.txt +2 -0
  204. mediapipe/modules/holistic_landmark/__init__.py +0 -0
  205. mediapipe/modules/holistic_landmark/calculators/__init__.py +0 -0
  206. mediapipe/modules/holistic_landmark/calculators/roi_tracking_calculator_pb2.py +37 -0
  207. mediapipe/modules/holistic_landmark/hand_recrop.tflite +0 -0
  208. mediapipe/modules/holistic_landmark/holistic_landmark_cpu.binarypb +0 -0
  209. mediapipe/modules/iris_landmark/__init__.py +0 -0
  210. mediapipe/modules/iris_landmark/iris_landmark.tflite +0 -0
  211. mediapipe/modules/objectron/__init__.py +0 -0
  212. mediapipe/modules/objectron/calculators/__init__.py +0 -0
  213. mediapipe/modules/objectron/calculators/a_r_capture_metadata_pb2.py +102 -0
  214. mediapipe/modules/objectron/calculators/annotation_data_pb2.py +38 -0
  215. mediapipe/modules/objectron/calculators/belief_decoder_config_pb2.py +28 -0
  216. mediapipe/modules/objectron/calculators/camera_parameters_pb2.py +30 -0
  217. mediapipe/modules/objectron/calculators/filter_detection_calculator_pb2.py +35 -0
  218. mediapipe/modules/objectron/calculators/frame_annotation_to_rect_calculator_pb2.py +31 -0
  219. mediapipe/modules/objectron/calculators/frame_annotation_tracker_calculator_pb2.py +31 -0
  220. mediapipe/modules/objectron/calculators/lift_2d_frame_annotation_to_3d_calculator_pb2.py +32 -0
  221. mediapipe/modules/objectron/calculators/object_pb2.py +38 -0
  222. mediapipe/modules/objectron/calculators/tensors_to_objects_calculator_pb2.py +32 -0
  223. mediapipe/modules/objectron/calculators/tflite_tensors_to_objects_calculator_pb2.py +32 -0
  224. mediapipe/modules/objectron/object_detection_oidv4_labelmap.txt +24 -0
  225. mediapipe/modules/objectron/objectron_cpu.binarypb +0 -0
  226. mediapipe/modules/palm_detection/__init__.py +0 -0
  227. mediapipe/modules/palm_detection/palm_detection_full.tflite +0 -0
  228. mediapipe/modules/palm_detection/palm_detection_lite.tflite +0 -0
  229. mediapipe/modules/pose_detection/__init__.py +0 -0
  230. mediapipe/modules/pose_detection/pose_detection.tflite +0 -0
  231. mediapipe/modules/pose_landmark/__init__.py +0 -0
  232. mediapipe/modules/pose_landmark/pose_landmark_cpu.binarypb +0 -0
  233. mediapipe/modules/pose_landmark/pose_landmark_full.tflite +0 -0
  234. mediapipe/modules/selfie_segmentation/__init__.py +0 -0
  235. mediapipe/modules/selfie_segmentation/selfie_segmentation.tflite +0 -0
  236. mediapipe/modules/selfie_segmentation/selfie_segmentation_cpu.binarypb +0 -0
  237. mediapipe/modules/selfie_segmentation/selfie_segmentation_landscape.tflite +0 -0
  238. mediapipe/python/__init__.py +29 -0
  239. mediapipe/python/_framework_bindings.cpython-312-x86_64-linux-gnu.so +0 -0
  240. mediapipe/python/calculator_graph_test.py +251 -0
  241. mediapipe/python/image_frame_test.py +194 -0
  242. mediapipe/python/image_test.py +218 -0
  243. mediapipe/python/packet_creator.py +275 -0
  244. mediapipe/python/packet_getter.py +120 -0
  245. mediapipe/python/packet_test.py +533 -0
  246. mediapipe/python/solution_base.py +604 -0
  247. mediapipe/python/solution_base_test.py +396 -0
  248. mediapipe/python/solutions/__init__.py +27 -0
  249. mediapipe/python/solutions/download_utils.py +37 -0
  250. mediapipe/python/solutions/drawing_styles.py +249 -0
  251. mediapipe/python/solutions/drawing_utils.py +320 -0
  252. mediapipe/python/solutions/drawing_utils_test.py +258 -0
  253. mediapipe/python/solutions/face_detection.py +105 -0
  254. mediapipe/python/solutions/face_detection_test.py +92 -0
  255. mediapipe/python/solutions/face_mesh.py +125 -0
  256. mediapipe/python/solutions/face_mesh_connections.py +500 -0
  257. mediapipe/python/solutions/face_mesh_test.py +170 -0
  258. mediapipe/python/solutions/hands.py +153 -0
  259. mediapipe/python/solutions/hands_connections.py +32 -0
  260. mediapipe/python/solutions/hands_test.py +219 -0
  261. mediapipe/python/solutions/holistic.py +167 -0
  262. mediapipe/python/solutions/holistic_test.py +142 -0
  263. mediapipe/python/solutions/objectron.py +288 -0
  264. mediapipe/python/solutions/objectron_test.py +81 -0
  265. mediapipe/python/solutions/pose.py +192 -0
  266. mediapipe/python/solutions/pose_connections.py +22 -0
  267. mediapipe/python/solutions/pose_test.py +262 -0
  268. mediapipe/python/solutions/selfie_segmentation.py +76 -0
  269. mediapipe/python/solutions/selfie_segmentation_test.py +68 -0
  270. mediapipe/python/timestamp_test.py +78 -0
  271. mediapipe/tasks/__init__.py +14 -0
  272. mediapipe/tasks/cc/__init__.py +0 -0
  273. mediapipe/tasks/cc/audio/__init__.py +0 -0
  274. mediapipe/tasks/cc/audio/audio_classifier/__init__.py +0 -0
  275. mediapipe/tasks/cc/audio/audio_classifier/proto/__init__.py +0 -0
  276. mediapipe/tasks/cc/audio/audio_classifier/proto/audio_classifier_graph_options_pb2.py +35 -0
  277. mediapipe/tasks/cc/audio/audio_embedder/__init__.py +0 -0
  278. mediapipe/tasks/cc/audio/audio_embedder/proto/__init__.py +0 -0
  279. mediapipe/tasks/cc/audio/audio_embedder/proto/audio_embedder_graph_options_pb2.py +35 -0
  280. mediapipe/tasks/cc/audio/core/__init__.py +0 -0
  281. mediapipe/tasks/cc/audio/utils/__init__.py +0 -0
  282. mediapipe/tasks/cc/components/__init__.py +0 -0
  283. mediapipe/tasks/cc/components/calculators/__init__.py +0 -0
  284. mediapipe/tasks/cc/components/calculators/classification_aggregation_calculator_pb2.py +31 -0
  285. mediapipe/tasks/cc/components/calculators/score_calibration_calculator_pb2.py +35 -0
  286. mediapipe/tasks/cc/components/calculators/tensors_to_embeddings_calculator_pb2.py +32 -0
  287. mediapipe/tasks/cc/components/containers/__init__.py +0 -0
  288. mediapipe/tasks/cc/components/containers/proto/__init__.py +0 -0
  289. mediapipe/tasks/cc/components/containers/proto/classifications_pb2.py +30 -0
  290. mediapipe/tasks/cc/components/containers/proto/embeddings_pb2.py +35 -0
  291. mediapipe/tasks/cc/components/containers/proto/landmarks_detection_result_pb2.py +32 -0
  292. mediapipe/tasks/cc/components/processors/__init__.py +0 -0
  293. mediapipe/tasks/cc/components/processors/proto/__init__.py +0 -0
  294. mediapipe/tasks/cc/components/processors/proto/classification_postprocessing_graph_options_pb2.py +38 -0
  295. mediapipe/tasks/cc/components/processors/proto/classifier_options_pb2.py +27 -0
  296. mediapipe/tasks/cc/components/processors/proto/detection_postprocessing_graph_options_pb2.py +36 -0
  297. mediapipe/tasks/cc/components/processors/proto/detector_options_pb2.py +27 -0
  298. mediapipe/tasks/cc/components/processors/proto/embedder_options_pb2.py +27 -0
  299. mediapipe/tasks/cc/components/processors/proto/embedding_postprocessing_graph_options_pb2.py +32 -0
  300. mediapipe/tasks/cc/components/processors/proto/image_preprocessing_graph_options_pb2.py +34 -0
  301. mediapipe/tasks/cc/components/processors/proto/text_model_type_pb2.py +28 -0
  302. mediapipe/tasks/cc/components/processors/proto/text_preprocessing_graph_options_pb2.py +32 -0
  303. mediapipe/tasks/cc/components/utils/__init__.py +0 -0
  304. mediapipe/tasks/cc/core/__init__.py +0 -0
  305. mediapipe/tasks/cc/core/proto/__init__.py +0 -0
  306. mediapipe/tasks/cc/core/proto/acceleration_pb2.py +28 -0
  307. mediapipe/tasks/cc/core/proto/base_options_pb2.py +30 -0
  308. mediapipe/tasks/cc/core/proto/external_file_pb2.py +31 -0
  309. mediapipe/tasks/cc/core/proto/inference_subgraph_pb2.py +32 -0
  310. mediapipe/tasks/cc/core/proto/model_resources_calculator_pb2.py +32 -0
  311. mediapipe/tasks/cc/genai/__init__.py +0 -0
  312. mediapipe/tasks/cc/genai/inference/__init__.py +0 -0
  313. mediapipe/tasks/cc/genai/inference/c/__init__.py +0 -0
  314. mediapipe/tasks/cc/genai/inference/calculators/__init__.py +0 -0
  315. mediapipe/tasks/cc/genai/inference/calculators/detokenizer_calculator_pb2.py +27 -0
  316. mediapipe/tasks/cc/genai/inference/calculators/llm_gpu_calculator_pb2.py +32 -0
  317. mediapipe/tasks/cc/genai/inference/calculators/model_data_calculator_pb2.py +27 -0
  318. mediapipe/tasks/cc/genai/inference/calculators/tokenizer_calculator_pb2.py +29 -0
  319. mediapipe/tasks/cc/genai/inference/common/__init__.py +0 -0
  320. mediapipe/tasks/cc/genai/inference/proto/__init__.py +0 -0
  321. mediapipe/tasks/cc/genai/inference/proto/llm_file_metadata_pb2.py +32 -0
  322. mediapipe/tasks/cc/genai/inference/proto/llm_params_pb2.py +33 -0
  323. mediapipe/tasks/cc/genai/inference/proto/prompt_template_pb2.py +27 -0
  324. mediapipe/tasks/cc/genai/inference/proto/sampler_params_pb2.py +29 -0
  325. mediapipe/tasks/cc/genai/inference/proto/transformer_params_pb2.py +45 -0
  326. mediapipe/tasks/cc/genai/inference/utils/__init__.py +0 -0
  327. mediapipe/tasks/cc/genai/inference/utils/llm_utils/__init__.py +0 -0
  328. mediapipe/tasks/cc/genai/inference/utils/xnn_utils/__init__.py +0 -0
  329. mediapipe/tasks/cc/metadata/__init__.py +0 -0
  330. mediapipe/tasks/cc/metadata/python/__init__.py +0 -0
  331. mediapipe/tasks/cc/metadata/python/_pywrap_metadata_version.cpython-312-x86_64-linux-gnu.so +0 -0
  332. mediapipe/tasks/cc/metadata/tests/__init__.py +0 -0
  333. mediapipe/tasks/cc/metadata/utils/__init__.py +0 -0
  334. mediapipe/tasks/cc/text/__init__.py +0 -0
  335. mediapipe/tasks/cc/text/custom_ops/__init__.py +0 -0
  336. mediapipe/tasks/cc/text/custom_ops/ragged/__init__.py +0 -0
  337. mediapipe/tasks/cc/text/custom_ops/sentencepiece/__init__.py +0 -0
  338. mediapipe/tasks/cc/text/custom_ops/sentencepiece/testdata/__init__.py +0 -0
  339. mediapipe/tasks/cc/text/language_detector/__init__.py +0 -0
  340. mediapipe/tasks/cc/text/language_detector/custom_ops/__init__.py +0 -0
  341. mediapipe/tasks/cc/text/language_detector/custom_ops/utils/__init__.py +0 -0
  342. mediapipe/tasks/cc/text/language_detector/custom_ops/utils/hash/__init__.py +0 -0
  343. mediapipe/tasks/cc/text/language_detector/custom_ops/utils/utf/__init__.py +0 -0
  344. mediapipe/tasks/cc/text/text_classifier/__init__.py +0 -0
  345. mediapipe/tasks/cc/text/text_classifier/proto/__init__.py +0 -0
  346. mediapipe/tasks/cc/text/text_classifier/proto/text_classifier_graph_options_pb2.py +35 -0
  347. mediapipe/tasks/cc/text/text_embedder/__init__.py +0 -0
  348. mediapipe/tasks/cc/text/text_embedder/proto/__init__.py +0 -0
  349. mediapipe/tasks/cc/text/text_embedder/proto/text_embedder_graph_options_pb2.py +35 -0
  350. mediapipe/tasks/cc/text/tokenizers/__init__.py +0 -0
  351. mediapipe/tasks/cc/text/utils/__init__.py +0 -0
  352. mediapipe/tasks/cc/vision/__init__.py +0 -0
  353. mediapipe/tasks/cc/vision/core/__init__.py +0 -0
  354. mediapipe/tasks/cc/vision/custom_ops/__init__.py +0 -0
  355. mediapipe/tasks/cc/vision/face_detector/__init__.py +0 -0
  356. mediapipe/tasks/cc/vision/face_detector/proto/__init__.py +0 -0
  357. mediapipe/tasks/cc/vision/face_detector/proto/face_detector_graph_options_pb2.py +34 -0
  358. mediapipe/tasks/cc/vision/face_geometry/__init__.py +0 -0
  359. mediapipe/tasks/cc/vision/face_geometry/calculators/__init__.py +0 -0
  360. mediapipe/tasks/cc/vision/face_geometry/calculators/env_generator_calculator_pb2.py +28 -0
  361. mediapipe/tasks/cc/vision/face_geometry/calculators/geometry_pipeline_calculator_pb2.py +29 -0
  362. mediapipe/tasks/cc/vision/face_geometry/data/__init__.py +0 -0
  363. mediapipe/tasks/cc/vision/face_geometry/libs/__init__.py +0 -0
  364. mediapipe/tasks/cc/vision/face_geometry/proto/__init__.py +0 -0
  365. mediapipe/tasks/cc/vision/face_geometry/proto/environment_pb2.py +31 -0
  366. mediapipe/tasks/cc/vision/face_geometry/proto/face_geometry_graph_options_pb2.py +29 -0
  367. mediapipe/tasks/cc/vision/face_geometry/proto/face_geometry_pb2.py +29 -0
  368. mediapipe/tasks/cc/vision/face_geometry/proto/geometry_pipeline_metadata_pb2.py +32 -0
  369. mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d_pb2.py +31 -0
  370. mediapipe/tasks/cc/vision/face_landmarker/__init__.py +0 -0
  371. mediapipe/tasks/cc/vision/face_landmarker/proto/__init__.py +0 -0
  372. mediapipe/tasks/cc/vision/face_landmarker/proto/face_blendshapes_graph_options_pb2.py +34 -0
  373. mediapipe/tasks/cc/vision/face_landmarker/proto/face_landmarker_graph_options_pb2.py +37 -0
  374. mediapipe/tasks/cc/vision/face_landmarker/proto/face_landmarks_detector_graph_options_pb2.py +35 -0
  375. mediapipe/tasks/cc/vision/face_landmarker/proto/tensors_to_face_landmarks_graph_options_pb2.py +32 -0
  376. mediapipe/tasks/cc/vision/face_stylizer/__init__.py +0 -0
  377. mediapipe/tasks/cc/vision/face_stylizer/calculators/__init__.py +0 -0
  378. mediapipe/tasks/cc/vision/face_stylizer/calculators/tensors_to_image_calculator_pb2.py +36 -0
  379. mediapipe/tasks/cc/vision/face_stylizer/proto/__init__.py +0 -0
  380. mediapipe/tasks/cc/vision/face_stylizer/proto/face_stylizer_graph_options_pb2.py +35 -0
  381. mediapipe/tasks/cc/vision/gesture_recognizer/__init__.py +0 -0
  382. mediapipe/tasks/cc/vision/gesture_recognizer/calculators/__init__.py +0 -0
  383. mediapipe/tasks/cc/vision/gesture_recognizer/calculators/combined_prediction_calculator_pb2.py +33 -0
  384. mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator_pb2.py +31 -0
  385. mediapipe/tasks/cc/vision/gesture_recognizer/proto/__init__.py +0 -0
  386. mediapipe/tasks/cc/vision/gesture_recognizer/proto/gesture_classifier_graph_options_pb2.py +35 -0
  387. mediapipe/tasks/cc/vision/gesture_recognizer/proto/gesture_embedder_graph_options_pb2.py +34 -0
  388. mediapipe/tasks/cc/vision/gesture_recognizer/proto/gesture_recognizer_graph_options_pb2.py +36 -0
  389. mediapipe/tasks/cc/vision/gesture_recognizer/proto/hand_gesture_recognizer_graph_options_pb2.py +36 -0
  390. mediapipe/tasks/cc/vision/hand_detector/__init__.py +0 -0
  391. mediapipe/tasks/cc/vision/hand_detector/proto/__init__.py +0 -0
  392. mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options_pb2.py +34 -0
  393. mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_result_pb2.py +30 -0
  394. mediapipe/tasks/cc/vision/hand_landmarker/__init__.py +0 -0
  395. mediapipe/tasks/cc/vision/hand_landmarker/calculators/__init__.py +0 -0
  396. mediapipe/tasks/cc/vision/hand_landmarker/calculators/hand_association_calculator_pb2.py +31 -0
  397. mediapipe/tasks/cc/vision/hand_landmarker/proto/__init__.py +0 -0
  398. mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options_pb2.py +36 -0
  399. mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options_pb2.py +34 -0
  400. mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_roi_refinement_graph_options_pb2.py +28 -0
  401. mediapipe/tasks/cc/vision/holistic_landmarker/__init__.py +0 -0
  402. mediapipe/tasks/cc/vision/holistic_landmarker/proto/__init__.py +0 -0
  403. mediapipe/tasks/cc/vision/holistic_landmarker/proto/holistic_landmarker_graph_options_pb2.py +34 -0
  404. mediapipe/tasks/cc/vision/holistic_landmarker/proto/holistic_result_pb2.py +29 -0
  405. mediapipe/tasks/cc/vision/image_classifier/__init__.py +0 -0
  406. mediapipe/tasks/cc/vision/image_classifier/proto/__init__.py +0 -0
  407. mediapipe/tasks/cc/vision/image_classifier/proto/image_classifier_graph_options_pb2.py +35 -0
  408. mediapipe/tasks/cc/vision/image_embedder/__init__.py +0 -0
  409. mediapipe/tasks/cc/vision/image_embedder/proto/__init__.py +0 -0
  410. mediapipe/tasks/cc/vision/image_embedder/proto/image_embedder_graph_options_pb2.py +35 -0
  411. mediapipe/tasks/cc/vision/image_generator/__init__.py +0 -0
  412. mediapipe/tasks/cc/vision/image_generator/diffuser/__init__.py +0 -0
  413. mediapipe/tasks/cc/vision/image_generator/diffuser/stable_diffusion_iterate_calculator_pb2.py +40 -0
  414. mediapipe/tasks/cc/vision/image_generator/proto/__init__.py +0 -0
  415. mediapipe/tasks/cc/vision/image_generator/proto/conditioned_image_graph_options_pb2.py +40 -0
  416. mediapipe/tasks/cc/vision/image_generator/proto/control_plugin_graph_options_pb2.py +34 -0
  417. mediapipe/tasks/cc/vision/image_generator/proto/image_generator_graph_options_pb2.py +30 -0
  418. mediapipe/tasks/cc/vision/image_segmenter/__init__.py +0 -0
  419. mediapipe/tasks/cc/vision/image_segmenter/calculators/__init__.py +0 -0
  420. mediapipe/tasks/cc/vision/image_segmenter/calculators/tensors_to_segmentation_calculator_pb2.py +34 -0
  421. mediapipe/tasks/cc/vision/image_segmenter/proto/__init__.py +0 -0
  422. mediapipe/tasks/cc/vision/image_segmenter/proto/image_segmenter_graph_options_pb2.py +35 -0
  423. mediapipe/tasks/cc/vision/image_segmenter/proto/segmenter_options_pb2.py +33 -0
  424. mediapipe/tasks/cc/vision/interactive_segmenter/__init__.py +0 -0
  425. mediapipe/tasks/cc/vision/object_detector/__init__.py +0 -0
  426. mediapipe/tasks/cc/vision/object_detector/proto/__init__.py +0 -0
  427. mediapipe/tasks/cc/vision/object_detector/proto/object_detector_options_pb2.py +34 -0
  428. mediapipe/tasks/cc/vision/pose_detector/__init__.py +0 -0
  429. mediapipe/tasks/cc/vision/pose_detector/proto/__init__.py +0 -0
  430. mediapipe/tasks/cc/vision/pose_detector/proto/pose_detector_graph_options_pb2.py +34 -0
  431. mediapipe/tasks/cc/vision/pose_landmarker/__init__.py +0 -0
  432. mediapipe/tasks/cc/vision/pose_landmarker/proto/__init__.py +0 -0
  433. mediapipe/tasks/cc/vision/pose_landmarker/proto/pose_landmarker_graph_options_pb2.py +36 -0
  434. mediapipe/tasks/cc/vision/pose_landmarker/proto/pose_landmarks_detector_graph_options_pb2.py +34 -0
  435. mediapipe/tasks/cc/vision/utils/__init__.py +0 -0
  436. mediapipe/tasks/cc/vision/utils/ghum/__init__.py +0 -0
  437. mediapipe/tasks/metadata/image_segmenter_metadata_schema.fbs +59 -0
  438. mediapipe/tasks/metadata/image_segmenter_metadata_schema_py_generated.py +108 -0
  439. mediapipe/tasks/metadata/metadata_schema.fbs +732 -0
  440. mediapipe/tasks/metadata/metadata_schema_py_generated.py +3251 -0
  441. mediapipe/tasks/metadata/object_detector_metadata_schema.fbs +98 -0
  442. mediapipe/tasks/metadata/object_detector_metadata_schema_py_generated.py +674 -0
  443. mediapipe/tasks/metadata/schema_py_generated.py +18438 -0
  444. mediapipe/tasks/python/__init__.py +27 -0
  445. mediapipe/tasks/python/audio/__init__.py +33 -0
  446. mediapipe/tasks/python/audio/audio_classifier.py +324 -0
  447. mediapipe/tasks/python/audio/audio_embedder.py +285 -0
  448. mediapipe/tasks/python/audio/core/__init__.py +16 -0
  449. mediapipe/tasks/python/audio/core/audio_record.py +125 -0
  450. mediapipe/tasks/python/audio/core/audio_task_running_mode.py +29 -0
  451. mediapipe/tasks/python/audio/core/base_audio_task_api.py +181 -0
  452. mediapipe/tasks/python/benchmark/__init__.py +13 -0
  453. mediapipe/tasks/python/benchmark/benchmark_utils.py +70 -0
  454. mediapipe/tasks/python/benchmark/vision/__init__.py +13 -0
  455. mediapipe/tasks/python/benchmark/vision/benchmark.py +99 -0
  456. mediapipe/tasks/python/benchmark/vision/core/__init__.py +14 -0
  457. mediapipe/tasks/python/benchmark/vision/core/base_vision_benchmark_api.py +40 -0
  458. mediapipe/tasks/python/components/__init__.py +13 -0
  459. mediapipe/tasks/python/components/containers/__init__.py +53 -0
  460. mediapipe/tasks/python/components/containers/audio_data.py +137 -0
  461. mediapipe/tasks/python/components/containers/bounding_box.py +73 -0
  462. mediapipe/tasks/python/components/containers/category.py +78 -0
  463. mediapipe/tasks/python/components/containers/classification_result.py +111 -0
  464. mediapipe/tasks/python/components/containers/detections.py +181 -0
  465. mediapipe/tasks/python/components/containers/embedding_result.py +89 -0
  466. mediapipe/tasks/python/components/containers/keypoint.py +77 -0
  467. mediapipe/tasks/python/components/containers/landmark.py +122 -0
  468. mediapipe/tasks/python/components/containers/landmark_detection_result.py +106 -0
  469. mediapipe/tasks/python/components/containers/rect.py +109 -0
  470. mediapipe/tasks/python/components/processors/__init__.py +23 -0
  471. mediapipe/tasks/python/components/processors/classifier_options.py +86 -0
  472. mediapipe/tasks/python/components/utils/__init__.py +13 -0
  473. mediapipe/tasks/python/components/utils/cosine_similarity.py +68 -0
  474. mediapipe/tasks/python/core/__init__.py +13 -0
  475. mediapipe/tasks/python/core/base_options.py +121 -0
  476. mediapipe/tasks/python/core/optional_dependencies.py +25 -0
  477. mediapipe/tasks/python/core/task_info.py +139 -0
  478. mediapipe/tasks/python/genai/__init__.py +14 -0
  479. mediapipe/tasks/python/genai/bundler/__init__.py +23 -0
  480. mediapipe/tasks/python/genai/bundler/llm_bundler.py +130 -0
  481. mediapipe/tasks/python/genai/bundler/llm_bundler_test.py +168 -0
  482. mediapipe/tasks/python/genai/converter/__init__.py +24 -0
  483. mediapipe/tasks/python/genai/converter/converter_base.py +179 -0
  484. mediapipe/tasks/python/genai/converter/converter_factory.py +79 -0
  485. mediapipe/tasks/python/genai/converter/llm_converter.py +374 -0
  486. mediapipe/tasks/python/genai/converter/llm_converter_test.py +63 -0
  487. mediapipe/tasks/python/genai/converter/pytorch_converter.py +318 -0
  488. mediapipe/tasks/python/genai/converter/pytorch_converter_test.py +86 -0
  489. mediapipe/tasks/python/genai/converter/quantization_util.py +516 -0
  490. mediapipe/tasks/python/genai/converter/quantization_util_test.py +259 -0
  491. mediapipe/tasks/python/genai/converter/safetensors_converter.py +580 -0
  492. mediapipe/tasks/python/genai/converter/safetensors_converter_test.py +83 -0
  493. mediapipe/tasks/python/genai/converter/weight_bins_writer.py +120 -0
  494. mediapipe/tasks/python/genai/converter/weight_bins_writer_test.py +95 -0
  495. mediapipe/tasks/python/metadata/__init__.py +13 -0
  496. mediapipe/tasks/python/metadata/flatbuffers_lib/_pywrap_flatbuffers.cpython-312-x86_64-linux-gnu.so +0 -0
  497. mediapipe/tasks/python/metadata/metadata.py +928 -0
  498. mediapipe/tasks/python/metadata/metadata_displayer_cli.py +34 -0
  499. mediapipe/tasks/python/metadata/metadata_writers/__init__.py +13 -0
  500. mediapipe/tasks/python/metadata/metadata_writers/face_stylizer.py +138 -0
  501. mediapipe/tasks/python/metadata/metadata_writers/image_classifier.py +71 -0
  502. mediapipe/tasks/python/metadata/metadata_writers/image_segmenter.py +170 -0
  503. mediapipe/tasks/python/metadata/metadata_writers/metadata_info.py +1166 -0
  504. mediapipe/tasks/python/metadata/metadata_writers/metadata_writer.py +845 -0
  505. mediapipe/tasks/python/metadata/metadata_writers/model_asset_bundle_utils.py +71 -0
  506. mediapipe/tasks/python/metadata/metadata_writers/object_detector.py +331 -0
  507. mediapipe/tasks/python/metadata/metadata_writers/text_classifier.py +119 -0
  508. mediapipe/tasks/python/metadata/metadata_writers/writer_utils.py +91 -0
  509. mediapipe/tasks/python/test/__init__.py +13 -0
  510. mediapipe/tasks/python/test/audio/__init__.py +13 -0
  511. mediapipe/tasks/python/test/audio/audio_classifier_test.py +387 -0
  512. mediapipe/tasks/python/test/audio/audio_embedder_test.py +297 -0
  513. mediapipe/tasks/python/test/test_utils.py +196 -0
  514. mediapipe/tasks/python/test/text/__init__.py +13 -0
  515. mediapipe/tasks/python/test/text/language_detector_test.py +228 -0
  516. mediapipe/tasks/python/test/text/text_classifier_test.py +235 -0
  517. mediapipe/tasks/python/test/text/text_embedder_test.py +326 -0
  518. mediapipe/tasks/python/test/vision/__init__.py +13 -0
  519. mediapipe/tasks/python/test/vision/face_aligner_test.py +190 -0
  520. mediapipe/tasks/python/test/vision/face_detector_test.py +523 -0
  521. mediapipe/tasks/python/test/vision/face_landmarker_test.py +565 -0
  522. mediapipe/tasks/python/test/vision/face_stylizer_test.py +191 -0
  523. mediapipe/tasks/python/test/vision/hand_landmarker_test.py +437 -0
  524. mediapipe/tasks/python/test/vision/holistic_landmarker_test.py +544 -0
  525. mediapipe/tasks/python/test/vision/image_classifier_test.py +657 -0
  526. mediapipe/tasks/python/test/vision/image_embedder_test.py +423 -0
  527. mediapipe/tasks/python/test/vision/image_segmenter_test.py +512 -0
  528. mediapipe/tasks/python/test/vision/interactive_segmenter_test.py +341 -0
  529. mediapipe/tasks/python/test/vision/object_detector_test.py +493 -0
  530. mediapipe/tasks/python/test/vision/pose_landmarker_test.py +518 -0
  531. mediapipe/tasks/python/text/__init__.py +35 -0
  532. mediapipe/tasks/python/text/core/__init__.py +16 -0
  533. mediapipe/tasks/python/text/core/base_text_task_api.py +54 -0
  534. mediapipe/tasks/python/text/language_detector.py +220 -0
  535. mediapipe/tasks/python/text/text_classifier.py +187 -0
  536. mediapipe/tasks/python/text/text_embedder.py +188 -0
  537. mediapipe/tasks/python/vision/__init__.py +90 -0
  538. mediapipe/tasks/python/vision/core/__init__.py +14 -0
  539. mediapipe/tasks/python/vision/core/base_vision_task_api.py +226 -0
  540. mediapipe/tasks/python/vision/core/image_processing_options.py +39 -0
  541. mediapipe/tasks/python/vision/core/vision_task_running_mode.py +31 -0
  542. mediapipe/tasks/python/vision/face_aligner.py +158 -0
  543. mediapipe/tasks/python/vision/face_detector.py +332 -0
  544. mediapipe/tasks/python/vision/face_landmarker.py +3244 -0
  545. mediapipe/tasks/python/vision/face_stylizer.py +158 -0
  546. mediapipe/tasks/python/vision/gesture_recognizer.py +480 -0
  547. mediapipe/tasks/python/vision/hand_landmarker.py +504 -0
  548. mediapipe/tasks/python/vision/holistic_landmarker.py +576 -0
  549. mediapipe/tasks/python/vision/image_classifier.py +358 -0
  550. mediapipe/tasks/python/vision/image_embedder.py +362 -0
  551. mediapipe/tasks/python/vision/image_segmenter.py +433 -0
  552. mediapipe/tasks/python/vision/interactive_segmenter.py +285 -0
  553. mediapipe/tasks/python/vision/object_detector.py +389 -0
  554. mediapipe/tasks/python/vision/pose_landmarker.py +455 -0
  555. mediapipe/util/__init__.py +0 -0
  556. mediapipe/util/analytics/__init__.py +0 -0
  557. mediapipe/util/analytics/mediapipe_log_extension_pb2.py +44 -0
  558. mediapipe/util/analytics/mediapipe_logging_enums_pb2.py +37 -0
  559. mediapipe/util/audio_decoder_pb2.py +33 -0
  560. mediapipe/util/color_pb2.py +33 -0
  561. mediapipe/util/label_map_pb2.py +27 -0
  562. mediapipe/util/render_data_pb2.py +58 -0
  563. mediapipe/util/sequence/__init__.py +14 -0
  564. mediapipe/util/sequence/media_sequence.py +716 -0
  565. mediapipe/util/sequence/media_sequence_test.py +290 -0
  566. mediapipe/util/sequence/media_sequence_util.py +800 -0
  567. mediapipe/util/sequence/media_sequence_util_test.py +389 -0
  568. mediapipe/util/tracking/__init__.py +0 -0
  569. mediapipe/util/tracking/box_detector_pb2.py +39 -0
  570. mediapipe/util/tracking/box_tracker_pb2.py +32 -0
  571. mediapipe/util/tracking/camera_motion_pb2.py +31 -0
  572. mediapipe/util/tracking/flow_packager_pb2.py +60 -0
  573. mediapipe/util/tracking/frame_selection_pb2.py +35 -0
  574. mediapipe/util/tracking/frame_selection_solution_evaluator_pb2.py +28 -0
  575. mediapipe/util/tracking/motion_analysis_pb2.py +35 -0
  576. mediapipe/util/tracking/motion_estimation_pb2.py +66 -0
  577. mediapipe/util/tracking/motion_models_pb2.py +42 -0
  578. mediapipe/util/tracking/motion_saliency_pb2.py +26 -0
  579. mediapipe/util/tracking/push_pull_filtering_pb2.py +26 -0
  580. mediapipe/util/tracking/region_flow_computation_pb2.py +59 -0
  581. mediapipe/util/tracking/region_flow_pb2.py +49 -0
  582. mediapipe/util/tracking/tone_estimation_pb2.py +45 -0
  583. mediapipe/util/tracking/tone_models_pb2.py +32 -0
  584. mediapipe/util/tracking/tracked_detection_manager_config_pb2.py +26 -0
  585. mediapipe/util/tracking/tracking_pb2.py +73 -0
  586. mediapipe_nightly-0.10.21.post20241223.dist-info/LICENSE +218 -0
  587. mediapipe_nightly-0.10.21.post20241223.dist-info/METADATA +199 -0
  588. mediapipe_nightly-0.10.21.post20241223.dist-info/RECORD +593 -0
  589. mediapipe_nightly-0.10.21.post20241223.dist-info/WHEEL +5 -0
  590. mediapipe_nightly-0.10.21.post20241223.dist-info/top_level.txt +4 -0
  591. mediapipe_nightly.libs/libEGL-48f73270.so.1.1.0 +0 -0
  592. mediapipe_nightly.libs/libGLESv2-ed5eda4f.so.2.1.0 +0 -0
  593. mediapipe_nightly.libs/libGLdispatch-64b28464.so.0.0.0 +0 -0
@@ -0,0 +1,387 @@
1
+ # Copyright 2022 The MediaPipe Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Tests for audio classifier."""
15
+
16
+ import os
17
+ from typing import List, Tuple
18
+ from unittest import mock
19
+
20
+ from absl.testing import absltest
21
+ from absl.testing import parameterized
22
+ import numpy as np
23
+ from scipy.io import wavfile
24
+
25
+ from mediapipe.tasks.python.audio import audio_classifier
26
+ from mediapipe.tasks.python.audio.core import audio_record
27
+ from mediapipe.tasks.python.audio.core import audio_task_running_mode
28
+ from mediapipe.tasks.python.components.containers import audio_data as audio_data_module
29
+ from mediapipe.tasks.python.components.containers import classification_result as classification_result_module
30
+ from mediapipe.tasks.python.core import base_options as base_options_module
31
+ from mediapipe.tasks.python.test import test_utils
32
+
33
+ _AudioClassifier = audio_classifier.AudioClassifier
34
+ _AudioClassifierOptions = audio_classifier.AudioClassifierOptions
35
+ _AudioClassifierResult = classification_result_module.ClassificationResult
36
+ _AudioData = audio_data_module.AudioData
37
+ _AudioRecord = audio_record.AudioRecord
38
+ _BaseOptions = base_options_module.BaseOptions
39
+ _RUNNING_MODE = audio_task_running_mode.AudioTaskRunningMode
40
+
41
+ _YAMNET_MODEL_FILE = 'yamnet_audio_classifier_with_metadata.tflite'
42
+ _YAMNET_MODEL_SAMPLE_RATE = 16000
43
+ _TWO_HEADS_MODEL_FILE = 'two_heads.tflite'
44
+ _SPEECH_WAV_16K_MONO = 'speech_16000_hz_mono.wav'
45
+ _SPEECH_WAV_48K_MONO = 'speech_48000_hz_mono.wav'
46
+ _TEST_DATA_DIR = 'mediapipe/tasks/testdata/audio'
47
+ _TWO_HEADS_WAV_16K_MONO = 'two_heads_16000_hz_mono.wav'
48
+ _TWO_HEADS_WAV_44K_MONO = 'two_heads_44100_hz_mono.wav'
49
+ _YAMNET_NUM_OF_SAMPLES = 15600
50
+ _MILLISECONDS_PER_SECOND = 1000
51
+
52
+
53
+ class AudioClassifierTest(parameterized.TestCase):
54
+
55
+ def setUp(self):
56
+ super().setUp()
57
+ self.yamnet_model_path = test_utils.get_test_data_path(
58
+ os.path.join(_TEST_DATA_DIR, _YAMNET_MODEL_FILE))
59
+ self.two_heads_model_path = test_utils.get_test_data_path(
60
+ os.path.join(_TEST_DATA_DIR, _TWO_HEADS_MODEL_FILE))
61
+
62
+ def _read_wav_file(self, file_name) -> _AudioData:
63
+ sample_rate, buffer = wavfile.read(
64
+ test_utils.get_test_data_path(os.path.join(_TEST_DATA_DIR, file_name)))
65
+ return _AudioData.create_from_array(
66
+ buffer.astype(float) / np.iinfo(np.int16).max, sample_rate)
67
+
68
+ def _read_wav_file_as_stream(self, file_name) -> List[Tuple[_AudioData, int]]:
69
+ sample_rate, buffer = wavfile.read(
70
+ test_utils.get_test_data_path(os.path.join(_TEST_DATA_DIR, file_name)))
71
+ audio_data_list = []
72
+ start = 0
73
+ step_size = _YAMNET_NUM_OF_SAMPLES * sample_rate / _YAMNET_MODEL_SAMPLE_RATE
74
+ while start < len(buffer):
75
+ end = min(start + (int)(step_size), len(buffer))
76
+ audio_data_list.append((_AudioData.create_from_array(
77
+ buffer[start:end].astype(float) / np.iinfo(np.int16).max,
78
+ sample_rate), (int)(start / sample_rate * _MILLISECONDS_PER_SECOND)))
79
+ start = end
80
+ return audio_data_list
81
+
82
+ # TODO: Compares the exact score values to capture unexpected
83
+ # changes in the inference pipeline.
84
+ def _check_yamnet_result(
85
+ self,
86
+ classification_result_list: List[_AudioClassifierResult],
87
+ expected_num_categories=521):
88
+ self.assertLen(classification_result_list, 5)
89
+ for idx, timestamp in enumerate([0, 975, 1950, 2925]):
90
+ classification_result = classification_result_list[idx]
91
+ self.assertEqual(classification_result.timestamp_ms, timestamp)
92
+ self.assertLen(classification_result.classifications, 1)
93
+ classifcation = classification_result.classifications[0]
94
+ self.assertEqual(classifcation.head_index, 0)
95
+ self.assertEqual(classifcation.head_name, 'scores')
96
+ self.assertLen(classifcation.categories, expected_num_categories)
97
+ audio_category = classifcation.categories[0]
98
+ self.assertEqual(audio_category.index, 0)
99
+ self.assertEqual(audio_category.category_name, 'Speech')
100
+ self.assertGreater(audio_category.score, 0.9)
101
+
102
+ # TODO: Compares the exact score values to capture unexpected
103
+ # changes in the inference pipeline.
104
+ def _check_two_heads_result(
105
+ self,
106
+ classification_result_list: List[_AudioClassifierResult],
107
+ first_head_expected_num_categories=521,
108
+ second_head_expected_num_categories=5):
109
+ self.assertGreaterEqual(len(classification_result_list), 1)
110
+ self.assertLessEqual(len(classification_result_list), 2)
111
+ # Checks the first result.
112
+ classification_result = classification_result_list[0]
113
+ self.assertEqual(classification_result.timestamp_ms, 0)
114
+ self.assertLen(classification_result.classifications, 2)
115
+ # Checks the first head.
116
+ yamnet_classifcation = classification_result.classifications[0]
117
+ self.assertEqual(yamnet_classifcation.head_index, 0)
118
+ self.assertEqual(yamnet_classifcation.head_name, 'yamnet_classification')
119
+ self.assertLen(yamnet_classifcation.categories,
120
+ first_head_expected_num_categories)
121
+ # Checks the second head.
122
+ yamnet_category = yamnet_classifcation.categories[0]
123
+ self.assertEqual(yamnet_category.index, 508)
124
+ self.assertEqual(yamnet_category.category_name, 'Environmental noise')
125
+ self.assertGreater(yamnet_category.score, 0.5)
126
+ bird_classifcation = classification_result.classifications[1]
127
+ self.assertEqual(bird_classifcation.head_index, 1)
128
+ self.assertEqual(bird_classifcation.head_name, 'bird_classification')
129
+ self.assertLen(bird_classifcation.categories,
130
+ second_head_expected_num_categories)
131
+ bird_category = bird_classifcation.categories[0]
132
+ self.assertEqual(bird_category.index, 4)
133
+ self.assertEqual(bird_category.category_name, 'Chestnut-crowned Antpitta')
134
+ self.assertGreater(bird_category.score, 0.93)
135
+ # Checks the second result, if present.
136
+ if len(classification_result_list) == 2:
137
+ classification_result = classification_result_list[1]
138
+ self.assertEqual(classification_result.timestamp_ms, 975)
139
+ self.assertLen(classification_result.classifications, 2)
140
+ # Checks the first head.
141
+ yamnet_classifcation = classification_result.classifications[0]
142
+ self.assertEqual(yamnet_classifcation.head_index, 0)
143
+ self.assertEqual(yamnet_classifcation.head_name, 'yamnet_classification')
144
+ self.assertLen(yamnet_classifcation.categories,
145
+ first_head_expected_num_categories)
146
+ yamnet_category = yamnet_classifcation.categories[0]
147
+ self.assertEqual(yamnet_category.index, 494)
148
+ self.assertEqual(yamnet_category.category_name, 'Silence')
149
+ self.assertGreater(yamnet_category.score, 0.9)
150
+ bird_classifcation = classification_result.classifications[1]
151
+ self.assertEqual(bird_classifcation.head_index, 1)
152
+ self.assertEqual(bird_classifcation.head_name, 'bird_classification')
153
+ self.assertLen(bird_classifcation.categories,
154
+ second_head_expected_num_categories)
155
+ # Checks the second head.
156
+ bird_category = bird_classifcation.categories[0]
157
+ self.assertEqual(bird_category.index, 1)
158
+ self.assertEqual(bird_category.category_name, 'White-breasted Wood-Wren')
159
+ self.assertGreater(bird_category.score, 0.99)
160
+
161
+ def test_create_from_file_succeeds_with_valid_model_path(self):
162
+ # Creates with default option and valid model file successfully.
163
+ with _AudioClassifier.create_from_model_path(
164
+ self.yamnet_model_path) as classifier:
165
+ self.assertIsInstance(classifier, _AudioClassifier)
166
+
167
+ def test_create_from_options_succeeds_with_valid_model_path(self):
168
+ # Creates with options containing model file successfully.
169
+ with _AudioClassifier.create_from_options(
170
+ _AudioClassifierOptions(
171
+ base_options=_BaseOptions(
172
+ model_asset_path=self.yamnet_model_path))) as classifier:
173
+ self.assertIsInstance(classifier, _AudioClassifier)
174
+
175
+ def test_create_from_options_fails_with_invalid_model_path(self):
176
+ with self.assertRaisesRegex(
177
+ RuntimeError, 'Unable to open file at /path/to/invalid/model.tflite'):
178
+ base_options = _BaseOptions(
179
+ model_asset_path='/path/to/invalid/model.tflite')
180
+ options = _AudioClassifierOptions(base_options=base_options)
181
+ _AudioClassifier.create_from_options(options)
182
+
183
+ def test_create_from_options_succeeds_with_valid_model_content(self):
184
+ # Creates with options containing model content successfully.
185
+ with open(self.yamnet_model_path, 'rb') as f:
186
+ base_options = _BaseOptions(model_asset_buffer=f.read())
187
+ options = _AudioClassifierOptions(base_options=base_options)
188
+ classifier = _AudioClassifier.create_from_options(options)
189
+ self.assertIsInstance(classifier, _AudioClassifier)
190
+
191
+ @parameterized.parameters((_SPEECH_WAV_16K_MONO), (_SPEECH_WAV_48K_MONO))
192
+ def test_classify_with_yamnet_model(self, audio_file):
193
+ with _AudioClassifier.create_from_model_path(
194
+ self.yamnet_model_path) as classifier:
195
+ classification_result_list = classifier.classify(
196
+ self._read_wav_file(audio_file))
197
+ self._check_yamnet_result(classification_result_list)
198
+
199
+ def test_classify_with_yamnet_model_and_inputs_at_different_sample_rates(
200
+ self):
201
+ with _AudioClassifier.create_from_model_path(
202
+ self.yamnet_model_path) as classifier:
203
+ for audio_file in [_SPEECH_WAV_16K_MONO, _SPEECH_WAV_48K_MONO]:
204
+ classification_result_list = classifier.classify(
205
+ self._read_wav_file(audio_file))
206
+ self._check_yamnet_result(classification_result_list)
207
+
208
+ @mock.patch('sounddevice.InputStream', return_value=mock.MagicMock())
209
+ def test_create_audio_record_from_classifier_succeeds(self, _):
210
+ # Creates AudioRecord instance using the classifier successfully.
211
+ with _AudioClassifier.create_from_model_path(
212
+ self.yamnet_model_path
213
+ ) as classifier:
214
+ self.assertIsInstance(classifier, _AudioClassifier)
215
+ record = classifier.create_audio_record(1, 16000, 16000)
216
+ self.assertIsInstance(record, _AudioRecord)
217
+ self.assertEqual(record.channels, 1)
218
+ self.assertEqual(record.sampling_rate, 16000)
219
+ self.assertEqual(record.buffer_size, 16000)
220
+
221
+ def test_max_result_options(self):
222
+ with _AudioClassifier.create_from_options(
223
+ _AudioClassifierOptions(
224
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
225
+ max_results=1)) as classifier:
226
+ for audio_file in [_SPEECH_WAV_16K_MONO, _SPEECH_WAV_16K_MONO]:
227
+ classification_result_list = classifier.classify(
228
+ self._read_wav_file(audio_file))
229
+ self._check_yamnet_result(
230
+ classification_result_list, expected_num_categories=1)
231
+
232
+ def test_score_threshold_options(self):
233
+ with _AudioClassifier.create_from_options(
234
+ _AudioClassifierOptions(
235
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
236
+ score_threshold=0.9)) as classifier:
237
+ for audio_file in [_SPEECH_WAV_16K_MONO, _SPEECH_WAV_16K_MONO]:
238
+ classification_result_list = classifier.classify(
239
+ self._read_wav_file(audio_file))
240
+ self._check_yamnet_result(
241
+ classification_result_list, expected_num_categories=1)
242
+
243
+ def test_allow_list_option(self):
244
+ with _AudioClassifier.create_from_options(
245
+ _AudioClassifierOptions(
246
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
247
+ category_allowlist=['Speech'])) as classifier:
248
+ for audio_file in [_SPEECH_WAV_16K_MONO, _SPEECH_WAV_16K_MONO]:
249
+ classification_result_list = classifier.classify(
250
+ self._read_wav_file(audio_file))
251
+ self._check_yamnet_result(
252
+ classification_result_list, expected_num_categories=1)
253
+
254
+ def test_combined_allowlist_and_denylist(self):
255
+ # Fails with combined allowlist and denylist
256
+ with self.assertRaisesRegex(
257
+ ValueError,
258
+ r'`category_allowlist` and `category_denylist` are mutually '
259
+ r'exclusive options.'):
260
+ options = _AudioClassifierOptions(
261
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
262
+ category_allowlist=['foo'],
263
+ category_denylist=['bar'])
264
+ with _AudioClassifier.create_from_options(options) as unused_classifier:
265
+ pass
266
+
267
+ @parameterized.parameters((_TWO_HEADS_WAV_16K_MONO),
268
+ (_TWO_HEADS_WAV_44K_MONO))
269
+ def test_classify_with_two_heads_model_and_inputs_at_different_sample_rates(
270
+ self, audio_file):
271
+ with _AudioClassifier.create_from_model_path(
272
+ self.two_heads_model_path) as classifier:
273
+ classification_result_list = classifier.classify(
274
+ self._read_wav_file(audio_file))
275
+ self._check_two_heads_result(classification_result_list)
276
+
277
+ def test_classify_with_two_heads_model(self):
278
+ with _AudioClassifier.create_from_model_path(
279
+ self.two_heads_model_path) as classifier:
280
+ for audio_file in [_TWO_HEADS_WAV_16K_MONO, _TWO_HEADS_WAV_44K_MONO]:
281
+ classification_result_list = classifier.classify(
282
+ self._read_wav_file(audio_file))
283
+ self._check_two_heads_result(classification_result_list)
284
+
285
+ def test_classify_with_two_heads_model_with_max_results(self):
286
+ with _AudioClassifier.create_from_options(
287
+ _AudioClassifierOptions(
288
+ base_options=_BaseOptions(
289
+ model_asset_path=self.two_heads_model_path),
290
+ max_results=1)) as classifier:
291
+ for audio_file in [_TWO_HEADS_WAV_16K_MONO, _TWO_HEADS_WAV_44K_MONO]:
292
+ classification_result_list = classifier.classify(
293
+ self._read_wav_file(audio_file))
294
+ self._check_two_heads_result(classification_result_list, 1, 1)
295
+
296
+ def test_missing_sample_rate_in_audio_clips_mode(self):
297
+ options = _AudioClassifierOptions(
298
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
299
+ running_mode=_RUNNING_MODE.AUDIO_CLIPS)
300
+ with self.assertRaisesRegex(ValueError,
301
+ r'Must provide the audio sample rate'):
302
+ with _AudioClassifier.create_from_options(options) as classifier:
303
+ classifier.classify(_AudioData(buffer_length=100))
304
+
305
+ def test_missing_sample_rate_in_audio_stream_mode(self):
306
+ options = _AudioClassifierOptions(
307
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
308
+ running_mode=_RUNNING_MODE.AUDIO_STREAM,
309
+ result_callback=mock.MagicMock())
310
+ with self.assertRaisesRegex(ValueError,
311
+ r'provide the audio sample rate in audio data'):
312
+ with _AudioClassifier.create_from_options(options) as classifier:
313
+ classifier.classify(_AudioData(buffer_length=100))
314
+
315
+ def test_missing_result_callback(self):
316
+ options = _AudioClassifierOptions(
317
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
318
+ running_mode=_RUNNING_MODE.AUDIO_STREAM)
319
+ with self.assertRaisesRegex(ValueError,
320
+ r'result callback must be provided'):
321
+ with _AudioClassifier.create_from_options(options) as unused_classifier:
322
+ pass
323
+
324
+ def test_illegal_result_callback(self):
325
+ options = _AudioClassifierOptions(
326
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
327
+ running_mode=_RUNNING_MODE.AUDIO_CLIPS,
328
+ result_callback=mock.MagicMock())
329
+ with self.assertRaisesRegex(ValueError,
330
+ r'result callback should not be provided'):
331
+ with _AudioClassifier.create_from_options(options) as unused_classifier:
332
+ pass
333
+
334
+ def test_calling_classify_in_audio_stream_mode(self):
335
+ options = _AudioClassifierOptions(
336
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
337
+ running_mode=_RUNNING_MODE.AUDIO_STREAM,
338
+ result_callback=mock.MagicMock())
339
+ with _AudioClassifier.create_from_options(options) as classifier:
340
+ with self.assertRaisesRegex(ValueError,
341
+ r'not initialized with the audio clips mode'):
342
+ classifier.classify(self._read_wav_file(_SPEECH_WAV_16K_MONO))
343
+
344
+ def test_calling_classify_async_in_audio_clips_mode(self):
345
+ options = _AudioClassifierOptions(
346
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
347
+ running_mode=_RUNNING_MODE.AUDIO_CLIPS)
348
+ with _AudioClassifier.create_from_options(options) as classifier:
349
+ with self.assertRaisesRegex(
350
+ ValueError, r'not initialized with the audio stream mode'):
351
+ classifier.classify_async(self._read_wav_file(_SPEECH_WAV_16K_MONO), 0)
352
+
353
+ def test_classify_async_calls_with_illegal_timestamp(self):
354
+ options = _AudioClassifierOptions(
355
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
356
+ running_mode=_RUNNING_MODE.AUDIO_STREAM,
357
+ result_callback=mock.MagicMock())
358
+ with _AudioClassifier.create_from_options(options) as classifier:
359
+ classifier.classify_async(self._read_wav_file(_SPEECH_WAV_16K_MONO), 100)
360
+ with self.assertRaisesRegex(
361
+ ValueError, r'Input timestamp must be monotonically increasing'):
362
+ classifier.classify_async(self._read_wav_file(_SPEECH_WAV_16K_MONO), 0)
363
+
364
+ @parameterized.parameters((_SPEECH_WAV_16K_MONO), (_SPEECH_WAV_48K_MONO))
365
+ def test_classify_async(self, audio_file):
366
+ classification_result_list = []
367
+
368
+ def save_result(result: _AudioClassifierResult, timestamp_ms: int):
369
+ result.timestamp_ms = timestamp_ms
370
+ classification_result_list.append(result)
371
+
372
+ options = _AudioClassifierOptions(
373
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
374
+ running_mode=_RUNNING_MODE.AUDIO_STREAM,
375
+ max_results=1,
376
+ result_callback=save_result)
377
+ classifier = _AudioClassifier.create_from_options(options)
378
+ audio_data_list = self._read_wav_file_as_stream(audio_file)
379
+ for audio_data, timestamp_ms in audio_data_list:
380
+ classifier.classify_async(audio_data, timestamp_ms)
381
+ classifier.close()
382
+ self._check_yamnet_result(
383
+ classification_result_list, expected_num_categories=1)
384
+
385
+
386
+ if __name__ == '__main__':
387
+ absltest.main()
@@ -0,0 +1,297 @@
1
+ # Copyright 2022 The MediaPipe Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Tests for audio embedder."""
15
+ import enum
16
+ import os
17
+ from typing import List, Tuple
18
+ from unittest import mock
19
+
20
+ from absl.testing import absltest
21
+ from absl.testing import parameterized
22
+
23
+ import numpy as np
24
+ from scipy.io import wavfile
25
+
26
+ from mediapipe.tasks.python.audio import audio_embedder
27
+ from mediapipe.tasks.python.audio.core import audio_record
28
+ from mediapipe.tasks.python.audio.core import audio_task_running_mode
29
+ from mediapipe.tasks.python.components.containers import audio_data as audio_data_module
30
+ from mediapipe.tasks.python.core import base_options as base_options_module
31
+ from mediapipe.tasks.python.test import test_utils
32
+
33
+ _AudioEmbedder = audio_embedder.AudioEmbedder
34
+ _AudioEmbedderOptions = audio_embedder.AudioEmbedderOptions
35
+ _AudioEmbedderResult = audio_embedder.AudioEmbedderResult
36
+ _AudioData = audio_data_module.AudioData
37
+ _AudioRecord = audio_record.AudioRecord
38
+ _BaseOptions = base_options_module.BaseOptions
39
+ _RUNNING_MODE = audio_task_running_mode.AudioTaskRunningMode
40
+
41
+ _YAMNET_MODEL_FILE = 'yamnet_embedding_metadata.tflite'
42
+ _YAMNET_MODEL_SAMPLE_RATE = 16000
43
+ _SPEECH_WAV_16K_MONO = 'speech_16000_hz_mono.wav'
44
+ _SPEECH_WAV_48K_MONO = 'speech_48000_hz_mono.wav'
45
+ _TWO_HEADS_WAV_16K_MONO = 'two_heads_16000_hz_mono.wav'
46
+ _TEST_DATA_DIR = 'mediapipe/tasks/testdata/audio'
47
+ _YAMNET_NUM_OF_SAMPLES = 15600
48
+ _MILLISECONDS_PER_SECOND = 1000
49
+ # Tolerance for embedding vector coordinate values.
50
+ _EPSILON = 3e-6
51
+
52
+
53
+ class ModelFileType(enum.Enum):
54
+ FILE_CONTENT = 1
55
+ FILE_NAME = 2
56
+
57
+
58
+ class AudioEmbedderTest(parameterized.TestCase):
59
+
60
+ def setUp(self):
61
+ super().setUp()
62
+ self.yamnet_model_path = test_utils.get_test_data_path(
63
+ os.path.join(_TEST_DATA_DIR, _YAMNET_MODEL_FILE))
64
+
65
+ def _read_wav_file(self, file_name) -> _AudioData:
66
+ sample_rate, buffer = wavfile.read(
67
+ test_utils.get_test_data_path(os.path.join(_TEST_DATA_DIR, file_name)))
68
+ return _AudioData.create_from_array(
69
+ buffer.astype(float) / np.iinfo(np.int16).max, sample_rate)
70
+
71
+ def _read_wav_file_as_stream(self, file_name) -> List[Tuple[_AudioData, int]]:
72
+ sample_rate, buffer = wavfile.read(
73
+ test_utils.get_test_data_path(os.path.join(_TEST_DATA_DIR, file_name)))
74
+ audio_data_list = []
75
+ start = 0
76
+ step_size = _YAMNET_NUM_OF_SAMPLES * sample_rate / _YAMNET_MODEL_SAMPLE_RATE
77
+ while start < len(buffer):
78
+ end = min(start + (int)(step_size), len(buffer))
79
+ audio_data_list.append((_AudioData.create_from_array(
80
+ buffer[start:end].astype(float) / np.iinfo(np.int16).max,
81
+ sample_rate), (int)(start / sample_rate * _MILLISECONDS_PER_SECOND)))
82
+ start = end
83
+ return audio_data_list
84
+
85
+ def _check_embedding_value(self, result, expected_first_value):
86
+ # Check embedding first value.
87
+ self.assertAlmostEqual(
88
+ result.embeddings[0].embedding[0], expected_first_value, delta=_EPSILON)
89
+
90
+ def _check_embedding_size(self, result, quantize, expected_embedding_size):
91
+ # Check embedding size.
92
+ self.assertLen(result.embeddings, 1)
93
+ embedding_result = result.embeddings[0]
94
+ self.assertLen(embedding_result.embedding, expected_embedding_size)
95
+ if quantize:
96
+ self.assertEqual(embedding_result.embedding.dtype, np.uint8)
97
+ else:
98
+ self.assertEqual(embedding_result.embedding.dtype, float)
99
+
100
+ def test_create_from_file_succeeds_with_valid_model_path(self):
101
+ # Creates with default option and valid model file successfully.
102
+ with _AudioEmbedder.create_from_model_path(
103
+ self.yamnet_model_path) as embedder:
104
+ self.assertIsInstance(embedder, _AudioEmbedder)
105
+
106
+ def test_create_from_options_succeeds_with_valid_model_path(self):
107
+ # Creates with options containing model file successfully.
108
+ with _AudioEmbedder.create_from_options(
109
+ _AudioEmbedderOptions(
110
+ base_options=_BaseOptions(
111
+ model_asset_path=self.yamnet_model_path))) as embedder:
112
+ self.assertIsInstance(embedder, _AudioEmbedder)
113
+
114
+ def test_create_from_options_fails_with_invalid_model_path(self):
115
+ with self.assertRaisesRegex(
116
+ RuntimeError, 'Unable to open file at /path/to/invalid/model.tflite'):
117
+ base_options = _BaseOptions(
118
+ model_asset_path='/path/to/invalid/model.tflite')
119
+ options = _AudioEmbedderOptions(base_options=base_options)
120
+ _AudioEmbedder.create_from_options(options)
121
+
122
+ def test_create_from_options_succeeds_with_valid_model_content(self):
123
+ # Creates with options containing model content successfully.
124
+ with open(self.yamnet_model_path, 'rb') as f:
125
+ base_options = _BaseOptions(model_asset_buffer=f.read())
126
+ options = _AudioEmbedderOptions(base_options=base_options)
127
+ embedder = _AudioEmbedder.create_from_options(options)
128
+ self.assertIsInstance(embedder, _AudioEmbedder)
129
+
130
+ @parameterized.parameters(
131
+ # Same audio inputs but different sample rates.
132
+ (False, False, ModelFileType.FILE_NAME, _SPEECH_WAV_16K_MONO,
133
+ _SPEECH_WAV_48K_MONO, 1024, (0, 0)),
134
+ (False, False, ModelFileType.FILE_CONTENT, _SPEECH_WAV_16K_MONO,
135
+ _SPEECH_WAV_48K_MONO, 1024, (0, 0)))
136
+ def test_embed_with_yamnet_model(self, l2_normalize, quantize,
137
+ model_file_type, audio_file0, audio_file1,
138
+ expected_size, expected_first_values):
139
+ # Creates embedder.
140
+ if model_file_type is ModelFileType.FILE_NAME:
141
+ base_options = _BaseOptions(model_asset_path=self.yamnet_model_path)
142
+ elif model_file_type is ModelFileType.FILE_CONTENT:
143
+ with open(self.yamnet_model_path, 'rb') as f:
144
+ model_content = f.read()
145
+ base_options = _BaseOptions(model_asset_buffer=model_content)
146
+ else:
147
+ # Should never happen
148
+ raise ValueError('model_file_type is invalid.')
149
+
150
+ options = _AudioEmbedderOptions(
151
+ base_options=base_options, l2_normalize=l2_normalize, quantize=quantize)
152
+
153
+ with _AudioEmbedder.create_from_options(options) as embedder:
154
+ embedding_result0_list = embedder.embed(self._read_wav_file(audio_file0))
155
+ embedding_result1_list = embedder.embed(self._read_wav_file(audio_file1))
156
+
157
+ # Checks embeddings.
158
+ expected_result0_value, expected_result1_value = expected_first_values
159
+ self._check_embedding_size(embedding_result0_list[0], quantize,
160
+ expected_size)
161
+ self._check_embedding_size(embedding_result1_list[0], quantize,
162
+ expected_size)
163
+ self._check_embedding_value(embedding_result0_list[0],
164
+ expected_result0_value)
165
+ self._check_embedding_value(embedding_result1_list[0],
166
+ expected_result1_value)
167
+ self.assertLen(embedding_result0_list, 5)
168
+ self.assertLen(embedding_result1_list, 5)
169
+
170
+ @mock.patch('sounddevice.InputStream', return_value=mock.MagicMock())
171
+ def test_create_audio_record_from_embedder_succeeds(self, _):
172
+ # Creates AudioRecord instance using the embedder successfully.
173
+ with _AudioEmbedder.create_from_model_path(
174
+ self.yamnet_model_path
175
+ ) as embedder:
176
+ self.assertIsInstance(embedder, _AudioEmbedder)
177
+ record = embedder.create_audio_record(1, 16000, 16000)
178
+ self.assertIsInstance(record, _AudioRecord)
179
+ self.assertEqual(record.channels, 1)
180
+ self.assertEqual(record.sampling_rate, 16000)
181
+ self.assertEqual(record.buffer_size, 16000)
182
+
183
+ def test_embed_with_yamnet_model_and_different_inputs(self):
184
+ with _AudioEmbedder.create_from_model_path(
185
+ self.yamnet_model_path) as embedder:
186
+ embedding_result0_list = embedder.embed(
187
+ self._read_wav_file(_SPEECH_WAV_16K_MONO))
188
+ embedding_result1_list = embedder.embed(
189
+ self._read_wav_file(_TWO_HEADS_WAV_16K_MONO))
190
+ self.assertLen(embedding_result0_list, 5)
191
+ self.assertLen(embedding_result1_list, 1)
192
+
193
+ def test_missing_sample_rate_in_audio_clips_mode(self):
194
+ options = _AudioEmbedderOptions(
195
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
196
+ running_mode=_RUNNING_MODE.AUDIO_CLIPS)
197
+ with self.assertRaisesRegex(ValueError,
198
+ r'Must provide the audio sample rate'):
199
+ with _AudioEmbedder.create_from_options(options) as embedder:
200
+ embedder.embed(_AudioData(buffer_length=100))
201
+
202
+ def test_missing_sample_rate_in_audio_stream_mode(self):
203
+ options = _AudioEmbedderOptions(
204
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
205
+ running_mode=_RUNNING_MODE.AUDIO_STREAM,
206
+ result_callback=mock.MagicMock())
207
+ with self.assertRaisesRegex(ValueError,
208
+ r'provide the audio sample rate in audio data'):
209
+ with _AudioEmbedder.create_from_options(options) as embedder:
210
+ embedder.embed(_AudioData(buffer_length=100))
211
+
212
+ def test_missing_result_callback(self):
213
+ options = _AudioEmbedderOptions(
214
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
215
+ running_mode=_RUNNING_MODE.AUDIO_STREAM)
216
+ with self.assertRaisesRegex(ValueError,
217
+ r'result callback must be provided'):
218
+ with _AudioEmbedder.create_from_options(options) as unused_embedder:
219
+ pass
220
+
221
+ def test_illegal_result_callback(self):
222
+ options = _AudioEmbedderOptions(
223
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
224
+ running_mode=_RUNNING_MODE.AUDIO_CLIPS,
225
+ result_callback=mock.MagicMock())
226
+ with self.assertRaisesRegex(ValueError,
227
+ r'result callback should not be provided'):
228
+ with _AudioEmbedder.create_from_options(options) as unused_embedder:
229
+ pass
230
+
231
+ def test_calling_embed_in_audio_stream_mode(self):
232
+ options = _AudioEmbedderOptions(
233
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
234
+ running_mode=_RUNNING_MODE.AUDIO_STREAM,
235
+ result_callback=mock.MagicMock())
236
+ with _AudioEmbedder.create_from_options(options) as embedder:
237
+ with self.assertRaisesRegex(ValueError,
238
+ r'not initialized with the audio clips mode'):
239
+ embedder.embed(self._read_wav_file(_SPEECH_WAV_16K_MONO))
240
+
241
+ def test_calling_embed_async_in_audio_clips_mode(self):
242
+ options = _AudioEmbedderOptions(
243
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
244
+ running_mode=_RUNNING_MODE.AUDIO_CLIPS)
245
+ with _AudioEmbedder.create_from_options(options) as embedder:
246
+ with self.assertRaisesRegex(
247
+ ValueError, r'not initialized with the audio stream mode'):
248
+ embedder.embed_async(self._read_wav_file(_SPEECH_WAV_16K_MONO), 0)
249
+
250
+ def test_embed_async_calls_with_illegal_timestamp(self):
251
+ options = _AudioEmbedderOptions(
252
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
253
+ running_mode=_RUNNING_MODE.AUDIO_STREAM,
254
+ result_callback=mock.MagicMock())
255
+ with _AudioEmbedder.create_from_options(options) as embedder:
256
+ embedder.embed_async(self._read_wav_file(_SPEECH_WAV_16K_MONO), 100)
257
+ with self.assertRaisesRegex(
258
+ ValueError, r'Input timestamp must be monotonically increasing'):
259
+ embedder.embed_async(self._read_wav_file(_SPEECH_WAV_16K_MONO), 0)
260
+
261
+ @parameterized.parameters(
262
+ # Same audio inputs but different sample rates.
263
+ (False, False, _SPEECH_WAV_16K_MONO, _SPEECH_WAV_48K_MONO))
264
+ def test_embed_async(self, l2_normalize, quantize, audio_file0, audio_file1):
265
+ embedding_result_list = []
266
+ embedding_result_list_copy = embedding_result_list.copy()
267
+
268
+ def save_result(result: _AudioEmbedderResult, timestamp_ms: int):
269
+ result.timestamp_ms = timestamp_ms
270
+ embedding_result_list.append(result)
271
+
272
+ options = _AudioEmbedderOptions(
273
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
274
+ running_mode=_RUNNING_MODE.AUDIO_STREAM,
275
+ l2_normalize=l2_normalize,
276
+ quantize=quantize,
277
+ result_callback=save_result)
278
+
279
+ with _AudioEmbedder.create_from_options(options) as embedder:
280
+ audio_data0_list = self._read_wav_file_as_stream(audio_file0)
281
+ for audio_data, timestamp_ms in audio_data0_list:
282
+ embedder.embed_async(audio_data, timestamp_ms)
283
+ embedding_result0_list = embedding_result_list
284
+
285
+ with _AudioEmbedder.create_from_options(options) as embedder:
286
+ audio_data1_list = self._read_wav_file_as_stream(audio_file1)
287
+ embedding_result_list = embedding_result_list_copy
288
+ for audio_data, timestamp_ms in audio_data1_list:
289
+ embedder.embed_async(audio_data, timestamp_ms)
290
+ embedding_result1_list = embedding_result_list
291
+
292
+ self.assertLen(embedding_result0_list, 5)
293
+ self.assertLen(embedding_result1_list, 5)
294
+
295
+
296
+ if __name__ == '__main__':
297
+ absltest.main()