mediapipe-nightly 0.10.21.post20250114__cp312-cp312-manylinux_2_28_x86_64.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (593) hide show
  1. mediapipe/__init__.py +26 -0
  2. mediapipe/calculators/__init__.py +0 -0
  3. mediapipe/calculators/audio/__init__.py +0 -0
  4. mediapipe/calculators/audio/mfcc_mel_calculators_pb2.py +33 -0
  5. mediapipe/calculators/audio/rational_factor_resample_calculator_pb2.py +33 -0
  6. mediapipe/calculators/audio/spectrogram_calculator_pb2.py +37 -0
  7. mediapipe/calculators/audio/stabilized_log_calculator_pb2.py +31 -0
  8. mediapipe/calculators/audio/time_series_framer_calculator_pb2.py +33 -0
  9. mediapipe/calculators/core/__init__.py +0 -0
  10. mediapipe/calculators/core/bypass_calculator_pb2.py +31 -0
  11. mediapipe/calculators/core/clip_vector_size_calculator_pb2.py +31 -0
  12. mediapipe/calculators/core/concatenate_vector_calculator_pb2.py +31 -0
  13. mediapipe/calculators/core/constant_side_packet_calculator_pb2.py +39 -0
  14. mediapipe/calculators/core/dequantize_byte_array_calculator_pb2.py +31 -0
  15. mediapipe/calculators/core/flow_limiter_calculator_pb2.py +32 -0
  16. mediapipe/calculators/core/gate_calculator_pb2.py +33 -0
  17. mediapipe/calculators/core/get_vector_item_calculator_pb2.py +31 -0
  18. mediapipe/calculators/core/graph_profile_calculator_pb2.py +31 -0
  19. mediapipe/calculators/core/packet_cloner_calculator_pb2.py +31 -0
  20. mediapipe/calculators/core/packet_resampler_calculator_pb2.py +33 -0
  21. mediapipe/calculators/core/packet_thinner_calculator_pb2.py +33 -0
  22. mediapipe/calculators/core/quantize_float_vector_calculator_pb2.py +31 -0
  23. mediapipe/calculators/core/sequence_shift_calculator_pb2.py +31 -0
  24. mediapipe/calculators/core/split_vector_calculator_pb2.py +33 -0
  25. mediapipe/calculators/image/__init__.py +0 -0
  26. mediapipe/calculators/image/bilateral_filter_calculator_pb2.py +31 -0
  27. mediapipe/calculators/image/feature_detector_calculator_pb2.py +31 -0
  28. mediapipe/calculators/image/image_clone_calculator_pb2.py +31 -0
  29. mediapipe/calculators/image/image_cropping_calculator_pb2.py +33 -0
  30. mediapipe/calculators/image/image_transformation_calculator_pb2.py +38 -0
  31. mediapipe/calculators/image/mask_overlay_calculator_pb2.py +33 -0
  32. mediapipe/calculators/image/opencv_encoded_image_to_image_frame_calculator_pb2.py +31 -0
  33. mediapipe/calculators/image/opencv_image_encoder_calculator_pb2.py +35 -0
  34. mediapipe/calculators/image/recolor_calculator_pb2.py +34 -0
  35. mediapipe/calculators/image/rotation_mode_pb2.py +29 -0
  36. mediapipe/calculators/image/scale_image_calculator_pb2.py +34 -0
  37. mediapipe/calculators/image/segmentation_smoothing_calculator_pb2.py +31 -0
  38. mediapipe/calculators/image/set_alpha_calculator_pb2.py +31 -0
  39. mediapipe/calculators/image/warp_affine_calculator_pb2.py +36 -0
  40. mediapipe/calculators/internal/__init__.py +0 -0
  41. mediapipe/calculators/internal/callback_packet_calculator_pb2.py +33 -0
  42. mediapipe/calculators/tensor/__init__.py +0 -0
  43. mediapipe/calculators/tensor/audio_to_tensor_calculator_pb2.py +35 -0
  44. mediapipe/calculators/tensor/bert_preprocessor_calculator_pb2.py +31 -0
  45. mediapipe/calculators/tensor/feedback_tensors_calculator_pb2.py +37 -0
  46. mediapipe/calculators/tensor/image_to_tensor_calculator_pb2.py +40 -0
  47. mediapipe/calculators/tensor/inference_calculator_pb2.py +63 -0
  48. mediapipe/calculators/tensor/landmarks_to_tensor_calculator_pb2.py +33 -0
  49. mediapipe/calculators/tensor/regex_preprocessor_calculator_pb2.py +31 -0
  50. mediapipe/calculators/tensor/tensor_converter_calculator_pb2.py +34 -0
  51. mediapipe/calculators/tensor/tensor_to_joints_calculator_pb2.py +31 -0
  52. mediapipe/calculators/tensor/tensors_readback_calculator_pb2.py +35 -0
  53. mediapipe/calculators/tensor/tensors_to_audio_calculator_pb2.py +33 -0
  54. mediapipe/calculators/tensor/tensors_to_classification_calculator_pb2.py +44 -0
  55. mediapipe/calculators/tensor/tensors_to_detections_calculator_pb2.py +39 -0
  56. mediapipe/calculators/tensor/tensors_to_floats_calculator_pb2.py +33 -0
  57. mediapipe/calculators/tensor/tensors_to_landmarks_calculator_pb2.py +33 -0
  58. mediapipe/calculators/tensor/tensors_to_segmentation_calculator_pb2.py +34 -0
  59. mediapipe/calculators/tensor/vector_to_tensor_calculator_pb2.py +27 -0
  60. mediapipe/calculators/tflite/__init__.py +0 -0
  61. mediapipe/calculators/tflite/ssd_anchors_calculator_pb2.py +32 -0
  62. mediapipe/calculators/tflite/tflite_converter_calculator_pb2.py +33 -0
  63. mediapipe/calculators/tflite/tflite_custom_op_resolver_calculator_pb2.py +31 -0
  64. mediapipe/calculators/tflite/tflite_inference_calculator_pb2.py +49 -0
  65. mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator_pb2.py +31 -0
  66. mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator_pb2.py +31 -0
  67. mediapipe/calculators/tflite/tflite_tensors_to_landmarks_calculator_pb2.py +33 -0
  68. mediapipe/calculators/tflite/tflite_tensors_to_segmentation_calculator_pb2.py +31 -0
  69. mediapipe/calculators/util/__init__.py +0 -0
  70. mediapipe/calculators/util/align_hand_to_pose_in_world_calculator_pb2.py +31 -0
  71. mediapipe/calculators/util/annotation_overlay_calculator_pb2.py +32 -0
  72. mediapipe/calculators/util/association_calculator_pb2.py +31 -0
  73. mediapipe/calculators/util/collection_has_min_size_calculator_pb2.py +31 -0
  74. mediapipe/calculators/util/combine_joints_calculator_pb2.py +36 -0
  75. mediapipe/calculators/util/detection_label_id_to_text_calculator_pb2.py +36 -0
  76. mediapipe/calculators/util/detections_to_rects_calculator_pb2.py +33 -0
  77. mediapipe/calculators/util/detections_to_render_data_calculator_pb2.py +33 -0
  78. mediapipe/calculators/util/face_to_rect_calculator_pb2.py +26 -0
  79. mediapipe/calculators/util/filter_detections_calculator_pb2.py +31 -0
  80. mediapipe/calculators/util/flat_color_image_calculator_pb2.py +32 -0
  81. mediapipe/calculators/util/labels_to_render_data_calculator_pb2.py +34 -0
  82. mediapipe/calculators/util/landmark_projection_calculator_pb2.py +31 -0
  83. mediapipe/calculators/util/landmarks_refinement_calculator_pb2.py +41 -0
  84. mediapipe/calculators/util/landmarks_smoothing_calculator_pb2.py +33 -0
  85. mediapipe/calculators/util/landmarks_to_detection_calculator_pb2.py +31 -0
  86. mediapipe/calculators/util/landmarks_to_floats_calculator_pb2.py +31 -0
  87. mediapipe/calculators/util/landmarks_to_render_data_calculator_pb2.py +32 -0
  88. mediapipe/calculators/util/landmarks_transformation_calculator_pb2.py +37 -0
  89. mediapipe/calculators/util/latency_pb2.py +26 -0
  90. mediapipe/calculators/util/local_file_contents_calculator_pb2.py +31 -0
  91. mediapipe/calculators/util/logic_calculator_pb2.py +34 -0
  92. mediapipe/calculators/util/non_max_suppression_calculator_pb2.py +35 -0
  93. mediapipe/calculators/util/packet_frequency_calculator_pb2.py +31 -0
  94. mediapipe/calculators/util/packet_frequency_pb2.py +26 -0
  95. mediapipe/calculators/util/packet_latency_calculator_pb2.py +31 -0
  96. mediapipe/calculators/util/rect_to_render_data_calculator_pb2.py +32 -0
  97. mediapipe/calculators/util/rect_to_render_scale_calculator_pb2.py +31 -0
  98. mediapipe/calculators/util/rect_transformation_calculator_pb2.py +31 -0
  99. mediapipe/calculators/util/refine_landmarks_from_heatmap_calculator_pb2.py +31 -0
  100. mediapipe/calculators/util/resource_provider_calculator_pb2.py +28 -0
  101. mediapipe/calculators/util/set_joints_visibility_calculator_pb2.py +41 -0
  102. mediapipe/calculators/util/thresholding_calculator_pb2.py +31 -0
  103. mediapipe/calculators/util/timed_box_list_id_to_label_calculator_pb2.py +31 -0
  104. mediapipe/calculators/util/timed_box_list_to_render_data_calculator_pb2.py +32 -0
  105. mediapipe/calculators/util/top_k_scores_calculator_pb2.py +31 -0
  106. mediapipe/calculators/util/visibility_copy_calculator_pb2.py +27 -0
  107. mediapipe/calculators/util/visibility_smoothing_calculator_pb2.py +31 -0
  108. mediapipe/calculators/video/__init__.py +0 -0
  109. mediapipe/calculators/video/box_detector_calculator_pb2.py +32 -0
  110. mediapipe/calculators/video/box_tracker_calculator_pb2.py +32 -0
  111. mediapipe/calculators/video/flow_packager_calculator_pb2.py +32 -0
  112. mediapipe/calculators/video/flow_to_image_calculator_pb2.py +31 -0
  113. mediapipe/calculators/video/motion_analysis_calculator_pb2.py +42 -0
  114. mediapipe/calculators/video/opencv_video_encoder_calculator_pb2.py +31 -0
  115. mediapipe/calculators/video/tool/__init__.py +0 -0
  116. mediapipe/calculators/video/tool/flow_quantizer_model_pb2.py +26 -0
  117. mediapipe/calculators/video/tracked_detection_manager_calculator_pb2.py +32 -0
  118. mediapipe/calculators/video/video_pre_stream_calculator_pb2.py +35 -0
  119. mediapipe/examples/__init__.py +14 -0
  120. mediapipe/examples/desktop/__init__.py +14 -0
  121. mediapipe/framework/__init__.py +0 -0
  122. mediapipe/framework/calculator_options_pb2.py +29 -0
  123. mediapipe/framework/calculator_pb2.py +59 -0
  124. mediapipe/framework/calculator_profile_pb2.py +48 -0
  125. mediapipe/framework/deps/__init__.py +0 -0
  126. mediapipe/framework/deps/proto_descriptor_pb2.py +29 -0
  127. mediapipe/framework/formats/__init__.py +0 -0
  128. mediapipe/framework/formats/affine_transform_data_pb2.py +28 -0
  129. mediapipe/framework/formats/annotation/__init__.py +0 -0
  130. mediapipe/framework/formats/annotation/locus_pb2.py +32 -0
  131. mediapipe/framework/formats/annotation/rasterization_pb2.py +29 -0
  132. mediapipe/framework/formats/body_rig_pb2.py +28 -0
  133. mediapipe/framework/formats/classification_pb2.py +31 -0
  134. mediapipe/framework/formats/detection_pb2.py +36 -0
  135. mediapipe/framework/formats/image_file_properties_pb2.py +26 -0
  136. mediapipe/framework/formats/image_format_pb2.py +29 -0
  137. mediapipe/framework/formats/landmark_pb2.py +37 -0
  138. mediapipe/framework/formats/location_data_pb2.py +38 -0
  139. mediapipe/framework/formats/matrix_data_pb2.py +31 -0
  140. mediapipe/framework/formats/motion/__init__.py +0 -0
  141. mediapipe/framework/formats/motion/optical_flow_field_data_pb2.py +30 -0
  142. mediapipe/framework/formats/object_detection/__init__.py +0 -0
  143. mediapipe/framework/formats/object_detection/anchor_pb2.py +26 -0
  144. mediapipe/framework/formats/rect_pb2.py +29 -0
  145. mediapipe/framework/formats/time_series_header_pb2.py +28 -0
  146. mediapipe/framework/graph_runtime_info_pb2.py +31 -0
  147. mediapipe/framework/mediapipe_options_pb2.py +27 -0
  148. mediapipe/framework/packet_factory_pb2.py +31 -0
  149. mediapipe/framework/packet_generator_pb2.py +33 -0
  150. mediapipe/framework/status_handler_pb2.py +28 -0
  151. mediapipe/framework/stream_handler/__init__.py +0 -0
  152. mediapipe/framework/stream_handler/default_input_stream_handler_pb2.py +27 -0
  153. mediapipe/framework/stream_handler/fixed_size_input_stream_handler_pb2.py +27 -0
  154. mediapipe/framework/stream_handler/sync_set_input_stream_handler_pb2.py +29 -0
  155. mediapipe/framework/stream_handler/timestamp_align_input_stream_handler_pb2.py +27 -0
  156. mediapipe/framework/stream_handler_pb2.py +30 -0
  157. mediapipe/framework/test_calculators_pb2.py +31 -0
  158. mediapipe/framework/thread_pool_executor_pb2.py +29 -0
  159. mediapipe/framework/tool/__init__.py +0 -0
  160. mediapipe/framework/tool/calculator_graph_template_pb2.py +44 -0
  161. mediapipe/framework/tool/field_data_pb2.py +28 -0
  162. mediapipe/framework/tool/node_chain_subgraph_pb2.py +31 -0
  163. mediapipe/framework/tool/packet_generator_wrapper_calculator_pb2.py +28 -0
  164. mediapipe/framework/tool/source_pb2.py +33 -0
  165. mediapipe/framework/tool/switch_container_pb2.py +32 -0
  166. mediapipe/gpu/__init__.py +0 -0
  167. mediapipe/gpu/copy_calculator_pb2.py +33 -0
  168. mediapipe/gpu/gl_animation_overlay_calculator_pb2.py +31 -0
  169. mediapipe/gpu/gl_context_options_pb2.py +31 -0
  170. mediapipe/gpu/gl_scaler_calculator_pb2.py +32 -0
  171. mediapipe/gpu/gl_surface_sink_calculator_pb2.py +32 -0
  172. mediapipe/gpu/gpu_origin_pb2.py +29 -0
  173. mediapipe/gpu/scale_mode_pb2.py +28 -0
  174. mediapipe/model_maker/__init__.py +27 -0
  175. mediapipe/model_maker/setup.py +107 -0
  176. mediapipe/modules/__init__.py +0 -0
  177. mediapipe/modules/face_detection/__init__.py +0 -0
  178. mediapipe/modules/face_detection/face_detection_full_range_cpu.binarypb +0 -0
  179. mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite +0 -0
  180. mediapipe/modules/face_detection/face_detection_pb2.py +30 -0
  181. mediapipe/modules/face_detection/face_detection_short_range.tflite +0 -0
  182. mediapipe/modules/face_detection/face_detection_short_range_cpu.binarypb +0 -0
  183. mediapipe/modules/face_geometry/__init__.py +0 -0
  184. mediapipe/modules/face_geometry/data/__init__.py +0 -0
  185. mediapipe/modules/face_geometry/effect_renderer_calculator_pb2.py +27 -0
  186. mediapipe/modules/face_geometry/env_generator_calculator_pb2.py +28 -0
  187. mediapipe/modules/face_geometry/geometry_pipeline_calculator_pb2.py +27 -0
  188. mediapipe/modules/face_geometry/libs/__init__.py +0 -0
  189. mediapipe/modules/face_geometry/protos/__init__.py +0 -0
  190. mediapipe/modules/face_geometry/protos/environment_pb2.py +31 -0
  191. mediapipe/modules/face_geometry/protos/face_geometry_pb2.py +29 -0
  192. mediapipe/modules/face_geometry/protos/geometry_pipeline_metadata_pb2.py +32 -0
  193. mediapipe/modules/face_geometry/protos/mesh_3d_pb2.py +31 -0
  194. mediapipe/modules/face_landmark/__init__.py +0 -0
  195. mediapipe/modules/face_landmark/face_landmark.tflite +0 -0
  196. mediapipe/modules/face_landmark/face_landmark_front_cpu.binarypb +0 -0
  197. mediapipe/modules/face_landmark/face_landmark_with_attention.tflite +0 -0
  198. mediapipe/modules/hand_landmark/__init__.py +0 -0
  199. mediapipe/modules/hand_landmark/calculators/__init__.py +0 -0
  200. mediapipe/modules/hand_landmark/hand_landmark_full.tflite +0 -0
  201. mediapipe/modules/hand_landmark/hand_landmark_lite.tflite +0 -0
  202. mediapipe/modules/hand_landmark/hand_landmark_tracking_cpu.binarypb +0 -0
  203. mediapipe/modules/hand_landmark/handedness.txt +2 -0
  204. mediapipe/modules/holistic_landmark/__init__.py +0 -0
  205. mediapipe/modules/holistic_landmark/calculators/__init__.py +0 -0
  206. mediapipe/modules/holistic_landmark/calculators/roi_tracking_calculator_pb2.py +37 -0
  207. mediapipe/modules/holistic_landmark/hand_recrop.tflite +0 -0
  208. mediapipe/modules/holistic_landmark/holistic_landmark_cpu.binarypb +0 -0
  209. mediapipe/modules/iris_landmark/__init__.py +0 -0
  210. mediapipe/modules/iris_landmark/iris_landmark.tflite +0 -0
  211. mediapipe/modules/objectron/__init__.py +0 -0
  212. mediapipe/modules/objectron/calculators/__init__.py +0 -0
  213. mediapipe/modules/objectron/calculators/a_r_capture_metadata_pb2.py +102 -0
  214. mediapipe/modules/objectron/calculators/annotation_data_pb2.py +38 -0
  215. mediapipe/modules/objectron/calculators/belief_decoder_config_pb2.py +28 -0
  216. mediapipe/modules/objectron/calculators/camera_parameters_pb2.py +30 -0
  217. mediapipe/modules/objectron/calculators/filter_detection_calculator_pb2.py +35 -0
  218. mediapipe/modules/objectron/calculators/frame_annotation_to_rect_calculator_pb2.py +31 -0
  219. mediapipe/modules/objectron/calculators/frame_annotation_tracker_calculator_pb2.py +31 -0
  220. mediapipe/modules/objectron/calculators/lift_2d_frame_annotation_to_3d_calculator_pb2.py +32 -0
  221. mediapipe/modules/objectron/calculators/object_pb2.py +38 -0
  222. mediapipe/modules/objectron/calculators/tensors_to_objects_calculator_pb2.py +32 -0
  223. mediapipe/modules/objectron/calculators/tflite_tensors_to_objects_calculator_pb2.py +32 -0
  224. mediapipe/modules/objectron/object_detection_oidv4_labelmap.txt +24 -0
  225. mediapipe/modules/objectron/objectron_cpu.binarypb +0 -0
  226. mediapipe/modules/palm_detection/__init__.py +0 -0
  227. mediapipe/modules/palm_detection/palm_detection_full.tflite +0 -0
  228. mediapipe/modules/palm_detection/palm_detection_lite.tflite +0 -0
  229. mediapipe/modules/pose_detection/__init__.py +0 -0
  230. mediapipe/modules/pose_detection/pose_detection.tflite +0 -0
  231. mediapipe/modules/pose_landmark/__init__.py +0 -0
  232. mediapipe/modules/pose_landmark/pose_landmark_cpu.binarypb +0 -0
  233. mediapipe/modules/pose_landmark/pose_landmark_full.tflite +0 -0
  234. mediapipe/modules/selfie_segmentation/__init__.py +0 -0
  235. mediapipe/modules/selfie_segmentation/selfie_segmentation.tflite +0 -0
  236. mediapipe/modules/selfie_segmentation/selfie_segmentation_cpu.binarypb +0 -0
  237. mediapipe/modules/selfie_segmentation/selfie_segmentation_landscape.tflite +0 -0
  238. mediapipe/python/__init__.py +29 -0
  239. mediapipe/python/_framework_bindings.cpython-312-x86_64-linux-gnu.so +0 -0
  240. mediapipe/python/calculator_graph_test.py +251 -0
  241. mediapipe/python/image_frame_test.py +194 -0
  242. mediapipe/python/image_test.py +218 -0
  243. mediapipe/python/packet_creator.py +275 -0
  244. mediapipe/python/packet_getter.py +120 -0
  245. mediapipe/python/packet_test.py +533 -0
  246. mediapipe/python/solution_base.py +604 -0
  247. mediapipe/python/solution_base_test.py +396 -0
  248. mediapipe/python/solutions/__init__.py +27 -0
  249. mediapipe/python/solutions/download_utils.py +37 -0
  250. mediapipe/python/solutions/drawing_styles.py +249 -0
  251. mediapipe/python/solutions/drawing_utils.py +320 -0
  252. mediapipe/python/solutions/drawing_utils_test.py +258 -0
  253. mediapipe/python/solutions/face_detection.py +105 -0
  254. mediapipe/python/solutions/face_detection_test.py +92 -0
  255. mediapipe/python/solutions/face_mesh.py +125 -0
  256. mediapipe/python/solutions/face_mesh_connections.py +500 -0
  257. mediapipe/python/solutions/face_mesh_test.py +170 -0
  258. mediapipe/python/solutions/hands.py +153 -0
  259. mediapipe/python/solutions/hands_connections.py +32 -0
  260. mediapipe/python/solutions/hands_test.py +219 -0
  261. mediapipe/python/solutions/holistic.py +167 -0
  262. mediapipe/python/solutions/holistic_test.py +142 -0
  263. mediapipe/python/solutions/objectron.py +288 -0
  264. mediapipe/python/solutions/objectron_test.py +81 -0
  265. mediapipe/python/solutions/pose.py +192 -0
  266. mediapipe/python/solutions/pose_connections.py +22 -0
  267. mediapipe/python/solutions/pose_test.py +262 -0
  268. mediapipe/python/solutions/selfie_segmentation.py +76 -0
  269. mediapipe/python/solutions/selfie_segmentation_test.py +68 -0
  270. mediapipe/python/timestamp_test.py +78 -0
  271. mediapipe/tasks/__init__.py +14 -0
  272. mediapipe/tasks/cc/__init__.py +0 -0
  273. mediapipe/tasks/cc/audio/__init__.py +0 -0
  274. mediapipe/tasks/cc/audio/audio_classifier/__init__.py +0 -0
  275. mediapipe/tasks/cc/audio/audio_classifier/proto/__init__.py +0 -0
  276. mediapipe/tasks/cc/audio/audio_classifier/proto/audio_classifier_graph_options_pb2.py +35 -0
  277. mediapipe/tasks/cc/audio/audio_embedder/__init__.py +0 -0
  278. mediapipe/tasks/cc/audio/audio_embedder/proto/__init__.py +0 -0
  279. mediapipe/tasks/cc/audio/audio_embedder/proto/audio_embedder_graph_options_pb2.py +35 -0
  280. mediapipe/tasks/cc/audio/core/__init__.py +0 -0
  281. mediapipe/tasks/cc/audio/utils/__init__.py +0 -0
  282. mediapipe/tasks/cc/components/__init__.py +0 -0
  283. mediapipe/tasks/cc/components/calculators/__init__.py +0 -0
  284. mediapipe/tasks/cc/components/calculators/classification_aggregation_calculator_pb2.py +31 -0
  285. mediapipe/tasks/cc/components/calculators/score_calibration_calculator_pb2.py +35 -0
  286. mediapipe/tasks/cc/components/calculators/tensors_to_embeddings_calculator_pb2.py +32 -0
  287. mediapipe/tasks/cc/components/containers/__init__.py +0 -0
  288. mediapipe/tasks/cc/components/containers/proto/__init__.py +0 -0
  289. mediapipe/tasks/cc/components/containers/proto/classifications_pb2.py +30 -0
  290. mediapipe/tasks/cc/components/containers/proto/embeddings_pb2.py +35 -0
  291. mediapipe/tasks/cc/components/containers/proto/landmarks_detection_result_pb2.py +32 -0
  292. mediapipe/tasks/cc/components/processors/__init__.py +0 -0
  293. mediapipe/tasks/cc/components/processors/proto/__init__.py +0 -0
  294. mediapipe/tasks/cc/components/processors/proto/classification_postprocessing_graph_options_pb2.py +38 -0
  295. mediapipe/tasks/cc/components/processors/proto/classifier_options_pb2.py +27 -0
  296. mediapipe/tasks/cc/components/processors/proto/detection_postprocessing_graph_options_pb2.py +36 -0
  297. mediapipe/tasks/cc/components/processors/proto/detector_options_pb2.py +27 -0
  298. mediapipe/tasks/cc/components/processors/proto/embedder_options_pb2.py +27 -0
  299. mediapipe/tasks/cc/components/processors/proto/embedding_postprocessing_graph_options_pb2.py +32 -0
  300. mediapipe/tasks/cc/components/processors/proto/image_preprocessing_graph_options_pb2.py +34 -0
  301. mediapipe/tasks/cc/components/processors/proto/text_model_type_pb2.py +28 -0
  302. mediapipe/tasks/cc/components/processors/proto/text_preprocessing_graph_options_pb2.py +32 -0
  303. mediapipe/tasks/cc/components/utils/__init__.py +0 -0
  304. mediapipe/tasks/cc/core/__init__.py +0 -0
  305. mediapipe/tasks/cc/core/proto/__init__.py +0 -0
  306. mediapipe/tasks/cc/core/proto/acceleration_pb2.py +28 -0
  307. mediapipe/tasks/cc/core/proto/base_options_pb2.py +30 -0
  308. mediapipe/tasks/cc/core/proto/external_file_pb2.py +31 -0
  309. mediapipe/tasks/cc/core/proto/inference_subgraph_pb2.py +32 -0
  310. mediapipe/tasks/cc/core/proto/model_resources_calculator_pb2.py +32 -0
  311. mediapipe/tasks/cc/genai/__init__.py +0 -0
  312. mediapipe/tasks/cc/genai/inference/__init__.py +0 -0
  313. mediapipe/tasks/cc/genai/inference/c/__init__.py +0 -0
  314. mediapipe/tasks/cc/genai/inference/calculators/__init__.py +0 -0
  315. mediapipe/tasks/cc/genai/inference/calculators/detokenizer_calculator_pb2.py +27 -0
  316. mediapipe/tasks/cc/genai/inference/calculators/llm_gpu_calculator_pb2.py +32 -0
  317. mediapipe/tasks/cc/genai/inference/calculators/model_data_calculator_pb2.py +27 -0
  318. mediapipe/tasks/cc/genai/inference/calculators/tokenizer_calculator_pb2.py +29 -0
  319. mediapipe/tasks/cc/genai/inference/common/__init__.py +0 -0
  320. mediapipe/tasks/cc/genai/inference/proto/__init__.py +0 -0
  321. mediapipe/tasks/cc/genai/inference/proto/llm_file_metadata_pb2.py +32 -0
  322. mediapipe/tasks/cc/genai/inference/proto/llm_params_pb2.py +33 -0
  323. mediapipe/tasks/cc/genai/inference/proto/prompt_template_pb2.py +27 -0
  324. mediapipe/tasks/cc/genai/inference/proto/sampler_params_pb2.py +29 -0
  325. mediapipe/tasks/cc/genai/inference/proto/transformer_params_pb2.py +45 -0
  326. mediapipe/tasks/cc/genai/inference/utils/__init__.py +0 -0
  327. mediapipe/tasks/cc/genai/inference/utils/llm_utils/__init__.py +0 -0
  328. mediapipe/tasks/cc/genai/inference/utils/xnn_utils/__init__.py +0 -0
  329. mediapipe/tasks/cc/metadata/__init__.py +0 -0
  330. mediapipe/tasks/cc/metadata/python/__init__.py +0 -0
  331. mediapipe/tasks/cc/metadata/python/_pywrap_metadata_version.cpython-312-x86_64-linux-gnu.so +0 -0
  332. mediapipe/tasks/cc/metadata/tests/__init__.py +0 -0
  333. mediapipe/tasks/cc/metadata/utils/__init__.py +0 -0
  334. mediapipe/tasks/cc/text/__init__.py +0 -0
  335. mediapipe/tasks/cc/text/custom_ops/__init__.py +0 -0
  336. mediapipe/tasks/cc/text/custom_ops/ragged/__init__.py +0 -0
  337. mediapipe/tasks/cc/text/custom_ops/sentencepiece/__init__.py +0 -0
  338. mediapipe/tasks/cc/text/custom_ops/sentencepiece/testdata/__init__.py +0 -0
  339. mediapipe/tasks/cc/text/language_detector/__init__.py +0 -0
  340. mediapipe/tasks/cc/text/language_detector/custom_ops/__init__.py +0 -0
  341. mediapipe/tasks/cc/text/language_detector/custom_ops/utils/__init__.py +0 -0
  342. mediapipe/tasks/cc/text/language_detector/custom_ops/utils/hash/__init__.py +0 -0
  343. mediapipe/tasks/cc/text/language_detector/custom_ops/utils/utf/__init__.py +0 -0
  344. mediapipe/tasks/cc/text/text_classifier/__init__.py +0 -0
  345. mediapipe/tasks/cc/text/text_classifier/proto/__init__.py +0 -0
  346. mediapipe/tasks/cc/text/text_classifier/proto/text_classifier_graph_options_pb2.py +35 -0
  347. mediapipe/tasks/cc/text/text_embedder/__init__.py +0 -0
  348. mediapipe/tasks/cc/text/text_embedder/proto/__init__.py +0 -0
  349. mediapipe/tasks/cc/text/text_embedder/proto/text_embedder_graph_options_pb2.py +35 -0
  350. mediapipe/tasks/cc/text/tokenizers/__init__.py +0 -0
  351. mediapipe/tasks/cc/text/utils/__init__.py +0 -0
  352. mediapipe/tasks/cc/vision/__init__.py +0 -0
  353. mediapipe/tasks/cc/vision/core/__init__.py +0 -0
  354. mediapipe/tasks/cc/vision/custom_ops/__init__.py +0 -0
  355. mediapipe/tasks/cc/vision/face_detector/__init__.py +0 -0
  356. mediapipe/tasks/cc/vision/face_detector/proto/__init__.py +0 -0
  357. mediapipe/tasks/cc/vision/face_detector/proto/face_detector_graph_options_pb2.py +34 -0
  358. mediapipe/tasks/cc/vision/face_geometry/__init__.py +0 -0
  359. mediapipe/tasks/cc/vision/face_geometry/calculators/__init__.py +0 -0
  360. mediapipe/tasks/cc/vision/face_geometry/calculators/env_generator_calculator_pb2.py +28 -0
  361. mediapipe/tasks/cc/vision/face_geometry/calculators/geometry_pipeline_calculator_pb2.py +29 -0
  362. mediapipe/tasks/cc/vision/face_geometry/data/__init__.py +0 -0
  363. mediapipe/tasks/cc/vision/face_geometry/libs/__init__.py +0 -0
  364. mediapipe/tasks/cc/vision/face_geometry/proto/__init__.py +0 -0
  365. mediapipe/tasks/cc/vision/face_geometry/proto/environment_pb2.py +31 -0
  366. mediapipe/tasks/cc/vision/face_geometry/proto/face_geometry_graph_options_pb2.py +29 -0
  367. mediapipe/tasks/cc/vision/face_geometry/proto/face_geometry_pb2.py +29 -0
  368. mediapipe/tasks/cc/vision/face_geometry/proto/geometry_pipeline_metadata_pb2.py +32 -0
  369. mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d_pb2.py +31 -0
  370. mediapipe/tasks/cc/vision/face_landmarker/__init__.py +0 -0
  371. mediapipe/tasks/cc/vision/face_landmarker/proto/__init__.py +0 -0
  372. mediapipe/tasks/cc/vision/face_landmarker/proto/face_blendshapes_graph_options_pb2.py +34 -0
  373. mediapipe/tasks/cc/vision/face_landmarker/proto/face_landmarker_graph_options_pb2.py +37 -0
  374. mediapipe/tasks/cc/vision/face_landmarker/proto/face_landmarks_detector_graph_options_pb2.py +35 -0
  375. mediapipe/tasks/cc/vision/face_landmarker/proto/tensors_to_face_landmarks_graph_options_pb2.py +32 -0
  376. mediapipe/tasks/cc/vision/face_stylizer/__init__.py +0 -0
  377. mediapipe/tasks/cc/vision/face_stylizer/calculators/__init__.py +0 -0
  378. mediapipe/tasks/cc/vision/face_stylizer/calculators/tensors_to_image_calculator_pb2.py +36 -0
  379. mediapipe/tasks/cc/vision/face_stylizer/proto/__init__.py +0 -0
  380. mediapipe/tasks/cc/vision/face_stylizer/proto/face_stylizer_graph_options_pb2.py +35 -0
  381. mediapipe/tasks/cc/vision/gesture_recognizer/__init__.py +0 -0
  382. mediapipe/tasks/cc/vision/gesture_recognizer/calculators/__init__.py +0 -0
  383. mediapipe/tasks/cc/vision/gesture_recognizer/calculators/combined_prediction_calculator_pb2.py +33 -0
  384. mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator_pb2.py +31 -0
  385. mediapipe/tasks/cc/vision/gesture_recognizer/proto/__init__.py +0 -0
  386. mediapipe/tasks/cc/vision/gesture_recognizer/proto/gesture_classifier_graph_options_pb2.py +35 -0
  387. mediapipe/tasks/cc/vision/gesture_recognizer/proto/gesture_embedder_graph_options_pb2.py +34 -0
  388. mediapipe/tasks/cc/vision/gesture_recognizer/proto/gesture_recognizer_graph_options_pb2.py +36 -0
  389. mediapipe/tasks/cc/vision/gesture_recognizer/proto/hand_gesture_recognizer_graph_options_pb2.py +36 -0
  390. mediapipe/tasks/cc/vision/hand_detector/__init__.py +0 -0
  391. mediapipe/tasks/cc/vision/hand_detector/proto/__init__.py +0 -0
  392. mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options_pb2.py +34 -0
  393. mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_result_pb2.py +30 -0
  394. mediapipe/tasks/cc/vision/hand_landmarker/__init__.py +0 -0
  395. mediapipe/tasks/cc/vision/hand_landmarker/calculators/__init__.py +0 -0
  396. mediapipe/tasks/cc/vision/hand_landmarker/calculators/hand_association_calculator_pb2.py +31 -0
  397. mediapipe/tasks/cc/vision/hand_landmarker/proto/__init__.py +0 -0
  398. mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options_pb2.py +36 -0
  399. mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options_pb2.py +34 -0
  400. mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_roi_refinement_graph_options_pb2.py +28 -0
  401. mediapipe/tasks/cc/vision/holistic_landmarker/__init__.py +0 -0
  402. mediapipe/tasks/cc/vision/holistic_landmarker/proto/__init__.py +0 -0
  403. mediapipe/tasks/cc/vision/holistic_landmarker/proto/holistic_landmarker_graph_options_pb2.py +34 -0
  404. mediapipe/tasks/cc/vision/holistic_landmarker/proto/holistic_result_pb2.py +29 -0
  405. mediapipe/tasks/cc/vision/image_classifier/__init__.py +0 -0
  406. mediapipe/tasks/cc/vision/image_classifier/proto/__init__.py +0 -0
  407. mediapipe/tasks/cc/vision/image_classifier/proto/image_classifier_graph_options_pb2.py +35 -0
  408. mediapipe/tasks/cc/vision/image_embedder/__init__.py +0 -0
  409. mediapipe/tasks/cc/vision/image_embedder/proto/__init__.py +0 -0
  410. mediapipe/tasks/cc/vision/image_embedder/proto/image_embedder_graph_options_pb2.py +35 -0
  411. mediapipe/tasks/cc/vision/image_generator/__init__.py +0 -0
  412. mediapipe/tasks/cc/vision/image_generator/diffuser/__init__.py +0 -0
  413. mediapipe/tasks/cc/vision/image_generator/diffuser/stable_diffusion_iterate_calculator_pb2.py +40 -0
  414. mediapipe/tasks/cc/vision/image_generator/proto/__init__.py +0 -0
  415. mediapipe/tasks/cc/vision/image_generator/proto/conditioned_image_graph_options_pb2.py +40 -0
  416. mediapipe/tasks/cc/vision/image_generator/proto/control_plugin_graph_options_pb2.py +34 -0
  417. mediapipe/tasks/cc/vision/image_generator/proto/image_generator_graph_options_pb2.py +30 -0
  418. mediapipe/tasks/cc/vision/image_segmenter/__init__.py +0 -0
  419. mediapipe/tasks/cc/vision/image_segmenter/calculators/__init__.py +0 -0
  420. mediapipe/tasks/cc/vision/image_segmenter/calculators/tensors_to_segmentation_calculator_pb2.py +34 -0
  421. mediapipe/tasks/cc/vision/image_segmenter/proto/__init__.py +0 -0
  422. mediapipe/tasks/cc/vision/image_segmenter/proto/image_segmenter_graph_options_pb2.py +35 -0
  423. mediapipe/tasks/cc/vision/image_segmenter/proto/segmenter_options_pb2.py +33 -0
  424. mediapipe/tasks/cc/vision/interactive_segmenter/__init__.py +0 -0
  425. mediapipe/tasks/cc/vision/object_detector/__init__.py +0 -0
  426. mediapipe/tasks/cc/vision/object_detector/proto/__init__.py +0 -0
  427. mediapipe/tasks/cc/vision/object_detector/proto/object_detector_options_pb2.py +34 -0
  428. mediapipe/tasks/cc/vision/pose_detector/__init__.py +0 -0
  429. mediapipe/tasks/cc/vision/pose_detector/proto/__init__.py +0 -0
  430. mediapipe/tasks/cc/vision/pose_detector/proto/pose_detector_graph_options_pb2.py +34 -0
  431. mediapipe/tasks/cc/vision/pose_landmarker/__init__.py +0 -0
  432. mediapipe/tasks/cc/vision/pose_landmarker/proto/__init__.py +0 -0
  433. mediapipe/tasks/cc/vision/pose_landmarker/proto/pose_landmarker_graph_options_pb2.py +36 -0
  434. mediapipe/tasks/cc/vision/pose_landmarker/proto/pose_landmarks_detector_graph_options_pb2.py +34 -0
  435. mediapipe/tasks/cc/vision/utils/__init__.py +0 -0
  436. mediapipe/tasks/cc/vision/utils/ghum/__init__.py +0 -0
  437. mediapipe/tasks/metadata/image_segmenter_metadata_schema.fbs +59 -0
  438. mediapipe/tasks/metadata/image_segmenter_metadata_schema_py_generated.py +108 -0
  439. mediapipe/tasks/metadata/metadata_schema.fbs +732 -0
  440. mediapipe/tasks/metadata/metadata_schema_py_generated.py +3251 -0
  441. mediapipe/tasks/metadata/object_detector_metadata_schema.fbs +98 -0
  442. mediapipe/tasks/metadata/object_detector_metadata_schema_py_generated.py +674 -0
  443. mediapipe/tasks/metadata/schema_py_generated.py +18438 -0
  444. mediapipe/tasks/python/__init__.py +27 -0
  445. mediapipe/tasks/python/audio/__init__.py +33 -0
  446. mediapipe/tasks/python/audio/audio_classifier.py +324 -0
  447. mediapipe/tasks/python/audio/audio_embedder.py +285 -0
  448. mediapipe/tasks/python/audio/core/__init__.py +16 -0
  449. mediapipe/tasks/python/audio/core/audio_record.py +125 -0
  450. mediapipe/tasks/python/audio/core/audio_task_running_mode.py +29 -0
  451. mediapipe/tasks/python/audio/core/base_audio_task_api.py +181 -0
  452. mediapipe/tasks/python/benchmark/__init__.py +13 -0
  453. mediapipe/tasks/python/benchmark/benchmark_utils.py +70 -0
  454. mediapipe/tasks/python/benchmark/vision/__init__.py +13 -0
  455. mediapipe/tasks/python/benchmark/vision/benchmark.py +99 -0
  456. mediapipe/tasks/python/benchmark/vision/core/__init__.py +14 -0
  457. mediapipe/tasks/python/benchmark/vision/core/base_vision_benchmark_api.py +40 -0
  458. mediapipe/tasks/python/components/__init__.py +13 -0
  459. mediapipe/tasks/python/components/containers/__init__.py +53 -0
  460. mediapipe/tasks/python/components/containers/audio_data.py +137 -0
  461. mediapipe/tasks/python/components/containers/bounding_box.py +73 -0
  462. mediapipe/tasks/python/components/containers/category.py +78 -0
  463. mediapipe/tasks/python/components/containers/classification_result.py +111 -0
  464. mediapipe/tasks/python/components/containers/detections.py +181 -0
  465. mediapipe/tasks/python/components/containers/embedding_result.py +89 -0
  466. mediapipe/tasks/python/components/containers/keypoint.py +77 -0
  467. mediapipe/tasks/python/components/containers/landmark.py +122 -0
  468. mediapipe/tasks/python/components/containers/landmark_detection_result.py +106 -0
  469. mediapipe/tasks/python/components/containers/rect.py +109 -0
  470. mediapipe/tasks/python/components/processors/__init__.py +23 -0
  471. mediapipe/tasks/python/components/processors/classifier_options.py +86 -0
  472. mediapipe/tasks/python/components/utils/__init__.py +13 -0
  473. mediapipe/tasks/python/components/utils/cosine_similarity.py +68 -0
  474. mediapipe/tasks/python/core/__init__.py +13 -0
  475. mediapipe/tasks/python/core/base_options.py +121 -0
  476. mediapipe/tasks/python/core/optional_dependencies.py +25 -0
  477. mediapipe/tasks/python/core/task_info.py +139 -0
  478. mediapipe/tasks/python/genai/__init__.py +14 -0
  479. mediapipe/tasks/python/genai/bundler/__init__.py +23 -0
  480. mediapipe/tasks/python/genai/bundler/llm_bundler.py +130 -0
  481. mediapipe/tasks/python/genai/bundler/llm_bundler_test.py +168 -0
  482. mediapipe/tasks/python/genai/converter/__init__.py +24 -0
  483. mediapipe/tasks/python/genai/converter/converter_base.py +179 -0
  484. mediapipe/tasks/python/genai/converter/converter_factory.py +79 -0
  485. mediapipe/tasks/python/genai/converter/llm_converter.py +374 -0
  486. mediapipe/tasks/python/genai/converter/llm_converter_test.py +63 -0
  487. mediapipe/tasks/python/genai/converter/pytorch_converter.py +318 -0
  488. mediapipe/tasks/python/genai/converter/pytorch_converter_test.py +86 -0
  489. mediapipe/tasks/python/genai/converter/quantization_util.py +516 -0
  490. mediapipe/tasks/python/genai/converter/quantization_util_test.py +259 -0
  491. mediapipe/tasks/python/genai/converter/safetensors_converter.py +580 -0
  492. mediapipe/tasks/python/genai/converter/safetensors_converter_test.py +83 -0
  493. mediapipe/tasks/python/genai/converter/weight_bins_writer.py +120 -0
  494. mediapipe/tasks/python/genai/converter/weight_bins_writer_test.py +95 -0
  495. mediapipe/tasks/python/metadata/__init__.py +13 -0
  496. mediapipe/tasks/python/metadata/flatbuffers_lib/_pywrap_flatbuffers.cpython-312-x86_64-linux-gnu.so +0 -0
  497. mediapipe/tasks/python/metadata/metadata.py +928 -0
  498. mediapipe/tasks/python/metadata/metadata_displayer_cli.py +34 -0
  499. mediapipe/tasks/python/metadata/metadata_writers/__init__.py +13 -0
  500. mediapipe/tasks/python/metadata/metadata_writers/face_stylizer.py +138 -0
  501. mediapipe/tasks/python/metadata/metadata_writers/image_classifier.py +71 -0
  502. mediapipe/tasks/python/metadata/metadata_writers/image_segmenter.py +170 -0
  503. mediapipe/tasks/python/metadata/metadata_writers/metadata_info.py +1166 -0
  504. mediapipe/tasks/python/metadata/metadata_writers/metadata_writer.py +845 -0
  505. mediapipe/tasks/python/metadata/metadata_writers/model_asset_bundle_utils.py +71 -0
  506. mediapipe/tasks/python/metadata/metadata_writers/object_detector.py +331 -0
  507. mediapipe/tasks/python/metadata/metadata_writers/text_classifier.py +119 -0
  508. mediapipe/tasks/python/metadata/metadata_writers/writer_utils.py +91 -0
  509. mediapipe/tasks/python/test/__init__.py +13 -0
  510. mediapipe/tasks/python/test/audio/__init__.py +13 -0
  511. mediapipe/tasks/python/test/audio/audio_classifier_test.py +387 -0
  512. mediapipe/tasks/python/test/audio/audio_embedder_test.py +297 -0
  513. mediapipe/tasks/python/test/test_utils.py +196 -0
  514. mediapipe/tasks/python/test/text/__init__.py +13 -0
  515. mediapipe/tasks/python/test/text/language_detector_test.py +228 -0
  516. mediapipe/tasks/python/test/text/text_classifier_test.py +235 -0
  517. mediapipe/tasks/python/test/text/text_embedder_test.py +326 -0
  518. mediapipe/tasks/python/test/vision/__init__.py +13 -0
  519. mediapipe/tasks/python/test/vision/face_aligner_test.py +190 -0
  520. mediapipe/tasks/python/test/vision/face_detector_test.py +523 -0
  521. mediapipe/tasks/python/test/vision/face_landmarker_test.py +565 -0
  522. mediapipe/tasks/python/test/vision/face_stylizer_test.py +191 -0
  523. mediapipe/tasks/python/test/vision/hand_landmarker_test.py +437 -0
  524. mediapipe/tasks/python/test/vision/holistic_landmarker_test.py +544 -0
  525. mediapipe/tasks/python/test/vision/image_classifier_test.py +657 -0
  526. mediapipe/tasks/python/test/vision/image_embedder_test.py +423 -0
  527. mediapipe/tasks/python/test/vision/image_segmenter_test.py +512 -0
  528. mediapipe/tasks/python/test/vision/interactive_segmenter_test.py +341 -0
  529. mediapipe/tasks/python/test/vision/object_detector_test.py +493 -0
  530. mediapipe/tasks/python/test/vision/pose_landmarker_test.py +518 -0
  531. mediapipe/tasks/python/text/__init__.py +35 -0
  532. mediapipe/tasks/python/text/core/__init__.py +16 -0
  533. mediapipe/tasks/python/text/core/base_text_task_api.py +54 -0
  534. mediapipe/tasks/python/text/language_detector.py +220 -0
  535. mediapipe/tasks/python/text/text_classifier.py +187 -0
  536. mediapipe/tasks/python/text/text_embedder.py +188 -0
  537. mediapipe/tasks/python/vision/__init__.py +90 -0
  538. mediapipe/tasks/python/vision/core/__init__.py +14 -0
  539. mediapipe/tasks/python/vision/core/base_vision_task_api.py +226 -0
  540. mediapipe/tasks/python/vision/core/image_processing_options.py +39 -0
  541. mediapipe/tasks/python/vision/core/vision_task_running_mode.py +31 -0
  542. mediapipe/tasks/python/vision/face_aligner.py +158 -0
  543. mediapipe/tasks/python/vision/face_detector.py +332 -0
  544. mediapipe/tasks/python/vision/face_landmarker.py +3244 -0
  545. mediapipe/tasks/python/vision/face_stylizer.py +158 -0
  546. mediapipe/tasks/python/vision/gesture_recognizer.py +480 -0
  547. mediapipe/tasks/python/vision/hand_landmarker.py +504 -0
  548. mediapipe/tasks/python/vision/holistic_landmarker.py +576 -0
  549. mediapipe/tasks/python/vision/image_classifier.py +358 -0
  550. mediapipe/tasks/python/vision/image_embedder.py +362 -0
  551. mediapipe/tasks/python/vision/image_segmenter.py +433 -0
  552. mediapipe/tasks/python/vision/interactive_segmenter.py +285 -0
  553. mediapipe/tasks/python/vision/object_detector.py +389 -0
  554. mediapipe/tasks/python/vision/pose_landmarker.py +455 -0
  555. mediapipe/util/__init__.py +0 -0
  556. mediapipe/util/analytics/__init__.py +0 -0
  557. mediapipe/util/analytics/mediapipe_log_extension_pb2.py +44 -0
  558. mediapipe/util/analytics/mediapipe_logging_enums_pb2.py +37 -0
  559. mediapipe/util/audio_decoder_pb2.py +33 -0
  560. mediapipe/util/color_pb2.py +33 -0
  561. mediapipe/util/label_map_pb2.py +27 -0
  562. mediapipe/util/render_data_pb2.py +58 -0
  563. mediapipe/util/sequence/__init__.py +14 -0
  564. mediapipe/util/sequence/media_sequence.py +716 -0
  565. mediapipe/util/sequence/media_sequence_test.py +290 -0
  566. mediapipe/util/sequence/media_sequence_util.py +800 -0
  567. mediapipe/util/sequence/media_sequence_util_test.py +389 -0
  568. mediapipe/util/tracking/__init__.py +0 -0
  569. mediapipe/util/tracking/box_detector_pb2.py +39 -0
  570. mediapipe/util/tracking/box_tracker_pb2.py +32 -0
  571. mediapipe/util/tracking/camera_motion_pb2.py +31 -0
  572. mediapipe/util/tracking/flow_packager_pb2.py +60 -0
  573. mediapipe/util/tracking/frame_selection_pb2.py +35 -0
  574. mediapipe/util/tracking/frame_selection_solution_evaluator_pb2.py +28 -0
  575. mediapipe/util/tracking/motion_analysis_pb2.py +35 -0
  576. mediapipe/util/tracking/motion_estimation_pb2.py +66 -0
  577. mediapipe/util/tracking/motion_models_pb2.py +42 -0
  578. mediapipe/util/tracking/motion_saliency_pb2.py +26 -0
  579. mediapipe/util/tracking/push_pull_filtering_pb2.py +26 -0
  580. mediapipe/util/tracking/region_flow_computation_pb2.py +59 -0
  581. mediapipe/util/tracking/region_flow_pb2.py +49 -0
  582. mediapipe/util/tracking/tone_estimation_pb2.py +45 -0
  583. mediapipe/util/tracking/tone_models_pb2.py +32 -0
  584. mediapipe/util/tracking/tracked_detection_manager_config_pb2.py +26 -0
  585. mediapipe/util/tracking/tracking_pb2.py +73 -0
  586. mediapipe_nightly-0.10.21.post20250114.dist-info/LICENSE +218 -0
  587. mediapipe_nightly-0.10.21.post20250114.dist-info/METADATA +199 -0
  588. mediapipe_nightly-0.10.21.post20250114.dist-info/RECORD +593 -0
  589. mediapipe_nightly-0.10.21.post20250114.dist-info/WHEEL +5 -0
  590. mediapipe_nightly-0.10.21.post20250114.dist-info/top_level.txt +4 -0
  591. mediapipe_nightly.libs/libEGL-48f73270.so.1.1.0 +0 -0
  592. mediapipe_nightly.libs/libGLESv2-ed5eda4f.so.2.1.0 +0 -0
  593. mediapipe_nightly.libs/libGLdispatch-64b28464.so.0.0.0 +0 -0
@@ -0,0 +1,387 @@
1
+ # Copyright 2022 The MediaPipe Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Tests for audio classifier."""
15
+
16
+ import os
17
+ from typing import List, Tuple
18
+ from unittest import mock
19
+
20
+ from absl.testing import absltest
21
+ from absl.testing import parameterized
22
+ import numpy as np
23
+ from scipy.io import wavfile
24
+
25
+ from mediapipe.tasks.python.audio import audio_classifier
26
+ from mediapipe.tasks.python.audio.core import audio_record
27
+ from mediapipe.tasks.python.audio.core import audio_task_running_mode
28
+ from mediapipe.tasks.python.components.containers import audio_data as audio_data_module
29
+ from mediapipe.tasks.python.components.containers import classification_result as classification_result_module
30
+ from mediapipe.tasks.python.core import base_options as base_options_module
31
+ from mediapipe.tasks.python.test import test_utils
32
+
33
+ _AudioClassifier = audio_classifier.AudioClassifier
34
+ _AudioClassifierOptions = audio_classifier.AudioClassifierOptions
35
+ _AudioClassifierResult = classification_result_module.ClassificationResult
36
+ _AudioData = audio_data_module.AudioData
37
+ _AudioRecord = audio_record.AudioRecord
38
+ _BaseOptions = base_options_module.BaseOptions
39
+ _RUNNING_MODE = audio_task_running_mode.AudioTaskRunningMode
40
+
41
+ _YAMNET_MODEL_FILE = 'yamnet_audio_classifier_with_metadata.tflite'
42
+ _YAMNET_MODEL_SAMPLE_RATE = 16000
43
+ _TWO_HEADS_MODEL_FILE = 'two_heads.tflite'
44
+ _SPEECH_WAV_16K_MONO = 'speech_16000_hz_mono.wav'
45
+ _SPEECH_WAV_48K_MONO = 'speech_48000_hz_mono.wav'
46
+ _TEST_DATA_DIR = 'mediapipe/tasks/testdata/audio'
47
+ _TWO_HEADS_WAV_16K_MONO = 'two_heads_16000_hz_mono.wav'
48
+ _TWO_HEADS_WAV_44K_MONO = 'two_heads_44100_hz_mono.wav'
49
+ _YAMNET_NUM_OF_SAMPLES = 15600
50
+ _MILLISECONDS_PER_SECOND = 1000
51
+
52
+
53
+ class AudioClassifierTest(parameterized.TestCase):
54
+
55
+ def setUp(self):
56
+ super().setUp()
57
+ self.yamnet_model_path = test_utils.get_test_data_path(
58
+ os.path.join(_TEST_DATA_DIR, _YAMNET_MODEL_FILE))
59
+ self.two_heads_model_path = test_utils.get_test_data_path(
60
+ os.path.join(_TEST_DATA_DIR, _TWO_HEADS_MODEL_FILE))
61
+
62
+ def _read_wav_file(self, file_name) -> _AudioData:
63
+ sample_rate, buffer = wavfile.read(
64
+ test_utils.get_test_data_path(os.path.join(_TEST_DATA_DIR, file_name)))
65
+ return _AudioData.create_from_array(
66
+ buffer.astype(float) / np.iinfo(np.int16).max, sample_rate)
67
+
68
+ def _read_wav_file_as_stream(self, file_name) -> List[Tuple[_AudioData, int]]:
69
+ sample_rate, buffer = wavfile.read(
70
+ test_utils.get_test_data_path(os.path.join(_TEST_DATA_DIR, file_name)))
71
+ audio_data_list = []
72
+ start = 0
73
+ step_size = _YAMNET_NUM_OF_SAMPLES * sample_rate / _YAMNET_MODEL_SAMPLE_RATE
74
+ while start < len(buffer):
75
+ end = min(start + (int)(step_size), len(buffer))
76
+ audio_data_list.append((_AudioData.create_from_array(
77
+ buffer[start:end].astype(float) / np.iinfo(np.int16).max,
78
+ sample_rate), (int)(start / sample_rate * _MILLISECONDS_PER_SECOND)))
79
+ start = end
80
+ return audio_data_list
81
+
82
+ # TODO: Compares the exact score values to capture unexpected
83
+ # changes in the inference pipeline.
84
+ def _check_yamnet_result(
85
+ self,
86
+ classification_result_list: List[_AudioClassifierResult],
87
+ expected_num_categories=521):
88
+ self.assertLen(classification_result_list, 5)
89
+ for idx, timestamp in enumerate([0, 975, 1950, 2925]):
90
+ classification_result = classification_result_list[idx]
91
+ self.assertEqual(classification_result.timestamp_ms, timestamp)
92
+ self.assertLen(classification_result.classifications, 1)
93
+ classifcation = classification_result.classifications[0]
94
+ self.assertEqual(classifcation.head_index, 0)
95
+ self.assertEqual(classifcation.head_name, 'scores')
96
+ self.assertLen(classifcation.categories, expected_num_categories)
97
+ audio_category = classifcation.categories[0]
98
+ self.assertEqual(audio_category.index, 0)
99
+ self.assertEqual(audio_category.category_name, 'Speech')
100
+ self.assertGreater(audio_category.score, 0.9)
101
+
102
+ # TODO: Compares the exact score values to capture unexpected
103
+ # changes in the inference pipeline.
104
+ def _check_two_heads_result(
105
+ self,
106
+ classification_result_list: List[_AudioClassifierResult],
107
+ first_head_expected_num_categories=521,
108
+ second_head_expected_num_categories=5):
109
+ self.assertGreaterEqual(len(classification_result_list), 1)
110
+ self.assertLessEqual(len(classification_result_list), 2)
111
+ # Checks the first result.
112
+ classification_result = classification_result_list[0]
113
+ self.assertEqual(classification_result.timestamp_ms, 0)
114
+ self.assertLen(classification_result.classifications, 2)
115
+ # Checks the first head.
116
+ yamnet_classifcation = classification_result.classifications[0]
117
+ self.assertEqual(yamnet_classifcation.head_index, 0)
118
+ self.assertEqual(yamnet_classifcation.head_name, 'yamnet_classification')
119
+ self.assertLen(yamnet_classifcation.categories,
120
+ first_head_expected_num_categories)
121
+ # Checks the second head.
122
+ yamnet_category = yamnet_classifcation.categories[0]
123
+ self.assertEqual(yamnet_category.index, 508)
124
+ self.assertEqual(yamnet_category.category_name, 'Environmental noise')
125
+ self.assertGreater(yamnet_category.score, 0.5)
126
+ bird_classifcation = classification_result.classifications[1]
127
+ self.assertEqual(bird_classifcation.head_index, 1)
128
+ self.assertEqual(bird_classifcation.head_name, 'bird_classification')
129
+ self.assertLen(bird_classifcation.categories,
130
+ second_head_expected_num_categories)
131
+ bird_category = bird_classifcation.categories[0]
132
+ self.assertEqual(bird_category.index, 4)
133
+ self.assertEqual(bird_category.category_name, 'Chestnut-crowned Antpitta')
134
+ self.assertGreater(bird_category.score, 0.93)
135
+ # Checks the second result, if present.
136
+ if len(classification_result_list) == 2:
137
+ classification_result = classification_result_list[1]
138
+ self.assertEqual(classification_result.timestamp_ms, 975)
139
+ self.assertLen(classification_result.classifications, 2)
140
+ # Checks the first head.
141
+ yamnet_classifcation = classification_result.classifications[0]
142
+ self.assertEqual(yamnet_classifcation.head_index, 0)
143
+ self.assertEqual(yamnet_classifcation.head_name, 'yamnet_classification')
144
+ self.assertLen(yamnet_classifcation.categories,
145
+ first_head_expected_num_categories)
146
+ yamnet_category = yamnet_classifcation.categories[0]
147
+ self.assertEqual(yamnet_category.index, 494)
148
+ self.assertEqual(yamnet_category.category_name, 'Silence')
149
+ self.assertGreater(yamnet_category.score, 0.9)
150
+ bird_classifcation = classification_result.classifications[1]
151
+ self.assertEqual(bird_classifcation.head_index, 1)
152
+ self.assertEqual(bird_classifcation.head_name, 'bird_classification')
153
+ self.assertLen(bird_classifcation.categories,
154
+ second_head_expected_num_categories)
155
+ # Checks the second head.
156
+ bird_category = bird_classifcation.categories[0]
157
+ self.assertEqual(bird_category.index, 1)
158
+ self.assertEqual(bird_category.category_name, 'White-breasted Wood-Wren')
159
+ self.assertGreater(bird_category.score, 0.99)
160
+
161
+ def test_create_from_file_succeeds_with_valid_model_path(self):
162
+ # Creates with default option and valid model file successfully.
163
+ with _AudioClassifier.create_from_model_path(
164
+ self.yamnet_model_path) as classifier:
165
+ self.assertIsInstance(classifier, _AudioClassifier)
166
+
167
+ def test_create_from_options_succeeds_with_valid_model_path(self):
168
+ # Creates with options containing model file successfully.
169
+ with _AudioClassifier.create_from_options(
170
+ _AudioClassifierOptions(
171
+ base_options=_BaseOptions(
172
+ model_asset_path=self.yamnet_model_path))) as classifier:
173
+ self.assertIsInstance(classifier, _AudioClassifier)
174
+
175
+ def test_create_from_options_fails_with_invalid_model_path(self):
176
+ with self.assertRaisesRegex(
177
+ RuntimeError, 'Unable to open file at /path/to/invalid/model.tflite'):
178
+ base_options = _BaseOptions(
179
+ model_asset_path='/path/to/invalid/model.tflite')
180
+ options = _AudioClassifierOptions(base_options=base_options)
181
+ _AudioClassifier.create_from_options(options)
182
+
183
+ def test_create_from_options_succeeds_with_valid_model_content(self):
184
+ # Creates with options containing model content successfully.
185
+ with open(self.yamnet_model_path, 'rb') as f:
186
+ base_options = _BaseOptions(model_asset_buffer=f.read())
187
+ options = _AudioClassifierOptions(base_options=base_options)
188
+ classifier = _AudioClassifier.create_from_options(options)
189
+ self.assertIsInstance(classifier, _AudioClassifier)
190
+
191
+ @parameterized.parameters((_SPEECH_WAV_16K_MONO), (_SPEECH_WAV_48K_MONO))
192
+ def test_classify_with_yamnet_model(self, audio_file):
193
+ with _AudioClassifier.create_from_model_path(
194
+ self.yamnet_model_path) as classifier:
195
+ classification_result_list = classifier.classify(
196
+ self._read_wav_file(audio_file))
197
+ self._check_yamnet_result(classification_result_list)
198
+
199
+ def test_classify_with_yamnet_model_and_inputs_at_different_sample_rates(
200
+ self):
201
+ with _AudioClassifier.create_from_model_path(
202
+ self.yamnet_model_path) as classifier:
203
+ for audio_file in [_SPEECH_WAV_16K_MONO, _SPEECH_WAV_48K_MONO]:
204
+ classification_result_list = classifier.classify(
205
+ self._read_wav_file(audio_file))
206
+ self._check_yamnet_result(classification_result_list)
207
+
208
+ @mock.patch('sounddevice.InputStream', return_value=mock.MagicMock())
209
+ def test_create_audio_record_from_classifier_succeeds(self, _):
210
+ # Creates AudioRecord instance using the classifier successfully.
211
+ with _AudioClassifier.create_from_model_path(
212
+ self.yamnet_model_path
213
+ ) as classifier:
214
+ self.assertIsInstance(classifier, _AudioClassifier)
215
+ record = classifier.create_audio_record(1, 16000, 16000)
216
+ self.assertIsInstance(record, _AudioRecord)
217
+ self.assertEqual(record.channels, 1)
218
+ self.assertEqual(record.sampling_rate, 16000)
219
+ self.assertEqual(record.buffer_size, 16000)
220
+
221
+ def test_max_result_options(self):
222
+ with _AudioClassifier.create_from_options(
223
+ _AudioClassifierOptions(
224
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
225
+ max_results=1)) as classifier:
226
+ for audio_file in [_SPEECH_WAV_16K_MONO, _SPEECH_WAV_16K_MONO]:
227
+ classification_result_list = classifier.classify(
228
+ self._read_wav_file(audio_file))
229
+ self._check_yamnet_result(
230
+ classification_result_list, expected_num_categories=1)
231
+
232
+ def test_score_threshold_options(self):
233
+ with _AudioClassifier.create_from_options(
234
+ _AudioClassifierOptions(
235
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
236
+ score_threshold=0.9)) as classifier:
237
+ for audio_file in [_SPEECH_WAV_16K_MONO, _SPEECH_WAV_16K_MONO]:
238
+ classification_result_list = classifier.classify(
239
+ self._read_wav_file(audio_file))
240
+ self._check_yamnet_result(
241
+ classification_result_list, expected_num_categories=1)
242
+
243
+ def test_allow_list_option(self):
244
+ with _AudioClassifier.create_from_options(
245
+ _AudioClassifierOptions(
246
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
247
+ category_allowlist=['Speech'])) as classifier:
248
+ for audio_file in [_SPEECH_WAV_16K_MONO, _SPEECH_WAV_16K_MONO]:
249
+ classification_result_list = classifier.classify(
250
+ self._read_wav_file(audio_file))
251
+ self._check_yamnet_result(
252
+ classification_result_list, expected_num_categories=1)
253
+
254
+ def test_combined_allowlist_and_denylist(self):
255
+ # Fails with combined allowlist and denylist
256
+ with self.assertRaisesRegex(
257
+ ValueError,
258
+ r'`category_allowlist` and `category_denylist` are mutually '
259
+ r'exclusive options.'):
260
+ options = _AudioClassifierOptions(
261
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
262
+ category_allowlist=['foo'],
263
+ category_denylist=['bar'])
264
+ with _AudioClassifier.create_from_options(options) as unused_classifier:
265
+ pass
266
+
267
+ @parameterized.parameters((_TWO_HEADS_WAV_16K_MONO),
268
+ (_TWO_HEADS_WAV_44K_MONO))
269
+ def test_classify_with_two_heads_model_and_inputs_at_different_sample_rates(
270
+ self, audio_file):
271
+ with _AudioClassifier.create_from_model_path(
272
+ self.two_heads_model_path) as classifier:
273
+ classification_result_list = classifier.classify(
274
+ self._read_wav_file(audio_file))
275
+ self._check_two_heads_result(classification_result_list)
276
+
277
+ def test_classify_with_two_heads_model(self):
278
+ with _AudioClassifier.create_from_model_path(
279
+ self.two_heads_model_path) as classifier:
280
+ for audio_file in [_TWO_HEADS_WAV_16K_MONO, _TWO_HEADS_WAV_44K_MONO]:
281
+ classification_result_list = classifier.classify(
282
+ self._read_wav_file(audio_file))
283
+ self._check_two_heads_result(classification_result_list)
284
+
285
+ def test_classify_with_two_heads_model_with_max_results(self):
286
+ with _AudioClassifier.create_from_options(
287
+ _AudioClassifierOptions(
288
+ base_options=_BaseOptions(
289
+ model_asset_path=self.two_heads_model_path),
290
+ max_results=1)) as classifier:
291
+ for audio_file in [_TWO_HEADS_WAV_16K_MONO, _TWO_HEADS_WAV_44K_MONO]:
292
+ classification_result_list = classifier.classify(
293
+ self._read_wav_file(audio_file))
294
+ self._check_two_heads_result(classification_result_list, 1, 1)
295
+
296
+ def test_missing_sample_rate_in_audio_clips_mode(self):
297
+ options = _AudioClassifierOptions(
298
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
299
+ running_mode=_RUNNING_MODE.AUDIO_CLIPS)
300
+ with self.assertRaisesRegex(ValueError,
301
+ r'Must provide the audio sample rate'):
302
+ with _AudioClassifier.create_from_options(options) as classifier:
303
+ classifier.classify(_AudioData(buffer_length=100))
304
+
305
+ def test_missing_sample_rate_in_audio_stream_mode(self):
306
+ options = _AudioClassifierOptions(
307
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
308
+ running_mode=_RUNNING_MODE.AUDIO_STREAM,
309
+ result_callback=mock.MagicMock())
310
+ with self.assertRaisesRegex(ValueError,
311
+ r'provide the audio sample rate in audio data'):
312
+ with _AudioClassifier.create_from_options(options) as classifier:
313
+ classifier.classify(_AudioData(buffer_length=100))
314
+
315
+ def test_missing_result_callback(self):
316
+ options = _AudioClassifierOptions(
317
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
318
+ running_mode=_RUNNING_MODE.AUDIO_STREAM)
319
+ with self.assertRaisesRegex(ValueError,
320
+ r'result callback must be provided'):
321
+ with _AudioClassifier.create_from_options(options) as unused_classifier:
322
+ pass
323
+
324
+ def test_illegal_result_callback(self):
325
+ options = _AudioClassifierOptions(
326
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
327
+ running_mode=_RUNNING_MODE.AUDIO_CLIPS,
328
+ result_callback=mock.MagicMock())
329
+ with self.assertRaisesRegex(ValueError,
330
+ r'result callback should not be provided'):
331
+ with _AudioClassifier.create_from_options(options) as unused_classifier:
332
+ pass
333
+
334
+ def test_calling_classify_in_audio_stream_mode(self):
335
+ options = _AudioClassifierOptions(
336
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
337
+ running_mode=_RUNNING_MODE.AUDIO_STREAM,
338
+ result_callback=mock.MagicMock())
339
+ with _AudioClassifier.create_from_options(options) as classifier:
340
+ with self.assertRaisesRegex(ValueError,
341
+ r'not initialized with the audio clips mode'):
342
+ classifier.classify(self._read_wav_file(_SPEECH_WAV_16K_MONO))
343
+
344
+ def test_calling_classify_async_in_audio_clips_mode(self):
345
+ options = _AudioClassifierOptions(
346
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
347
+ running_mode=_RUNNING_MODE.AUDIO_CLIPS)
348
+ with _AudioClassifier.create_from_options(options) as classifier:
349
+ with self.assertRaisesRegex(
350
+ ValueError, r'not initialized with the audio stream mode'):
351
+ classifier.classify_async(self._read_wav_file(_SPEECH_WAV_16K_MONO), 0)
352
+
353
+ def test_classify_async_calls_with_illegal_timestamp(self):
354
+ options = _AudioClassifierOptions(
355
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
356
+ running_mode=_RUNNING_MODE.AUDIO_STREAM,
357
+ result_callback=mock.MagicMock())
358
+ with _AudioClassifier.create_from_options(options) as classifier:
359
+ classifier.classify_async(self._read_wav_file(_SPEECH_WAV_16K_MONO), 100)
360
+ with self.assertRaisesRegex(
361
+ ValueError, r'Input timestamp must be monotonically increasing'):
362
+ classifier.classify_async(self._read_wav_file(_SPEECH_WAV_16K_MONO), 0)
363
+
364
+ @parameterized.parameters((_SPEECH_WAV_16K_MONO), (_SPEECH_WAV_48K_MONO))
365
+ def test_classify_async(self, audio_file):
366
+ classification_result_list = []
367
+
368
+ def save_result(result: _AudioClassifierResult, timestamp_ms: int):
369
+ result.timestamp_ms = timestamp_ms
370
+ classification_result_list.append(result)
371
+
372
+ options = _AudioClassifierOptions(
373
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
374
+ running_mode=_RUNNING_MODE.AUDIO_STREAM,
375
+ max_results=1,
376
+ result_callback=save_result)
377
+ classifier = _AudioClassifier.create_from_options(options)
378
+ audio_data_list = self._read_wav_file_as_stream(audio_file)
379
+ for audio_data, timestamp_ms in audio_data_list:
380
+ classifier.classify_async(audio_data, timestamp_ms)
381
+ classifier.close()
382
+ self._check_yamnet_result(
383
+ classification_result_list, expected_num_categories=1)
384
+
385
+
386
+ if __name__ == '__main__':
387
+ absltest.main()
@@ -0,0 +1,297 @@
1
+ # Copyright 2022 The MediaPipe Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Tests for audio embedder."""
15
+ import enum
16
+ import os
17
+ from typing import List, Tuple
18
+ from unittest import mock
19
+
20
+ from absl.testing import absltest
21
+ from absl.testing import parameterized
22
+
23
+ import numpy as np
24
+ from scipy.io import wavfile
25
+
26
+ from mediapipe.tasks.python.audio import audio_embedder
27
+ from mediapipe.tasks.python.audio.core import audio_record
28
+ from mediapipe.tasks.python.audio.core import audio_task_running_mode
29
+ from mediapipe.tasks.python.components.containers import audio_data as audio_data_module
30
+ from mediapipe.tasks.python.core import base_options as base_options_module
31
+ from mediapipe.tasks.python.test import test_utils
32
+
33
+ _AudioEmbedder = audio_embedder.AudioEmbedder
34
+ _AudioEmbedderOptions = audio_embedder.AudioEmbedderOptions
35
+ _AudioEmbedderResult = audio_embedder.AudioEmbedderResult
36
+ _AudioData = audio_data_module.AudioData
37
+ _AudioRecord = audio_record.AudioRecord
38
+ _BaseOptions = base_options_module.BaseOptions
39
+ _RUNNING_MODE = audio_task_running_mode.AudioTaskRunningMode
40
+
41
+ _YAMNET_MODEL_FILE = 'yamnet_embedding_metadata.tflite'
42
+ _YAMNET_MODEL_SAMPLE_RATE = 16000
43
+ _SPEECH_WAV_16K_MONO = 'speech_16000_hz_mono.wav'
44
+ _SPEECH_WAV_48K_MONO = 'speech_48000_hz_mono.wav'
45
+ _TWO_HEADS_WAV_16K_MONO = 'two_heads_16000_hz_mono.wav'
46
+ _TEST_DATA_DIR = 'mediapipe/tasks/testdata/audio'
47
+ _YAMNET_NUM_OF_SAMPLES = 15600
48
+ _MILLISECONDS_PER_SECOND = 1000
49
+ # Tolerance for embedding vector coordinate values.
50
+ _EPSILON = 3e-6
51
+
52
+
53
+ class ModelFileType(enum.Enum):
54
+ FILE_CONTENT = 1
55
+ FILE_NAME = 2
56
+
57
+
58
+ class AudioEmbedderTest(parameterized.TestCase):
59
+
60
+ def setUp(self):
61
+ super().setUp()
62
+ self.yamnet_model_path = test_utils.get_test_data_path(
63
+ os.path.join(_TEST_DATA_DIR, _YAMNET_MODEL_FILE))
64
+
65
+ def _read_wav_file(self, file_name) -> _AudioData:
66
+ sample_rate, buffer = wavfile.read(
67
+ test_utils.get_test_data_path(os.path.join(_TEST_DATA_DIR, file_name)))
68
+ return _AudioData.create_from_array(
69
+ buffer.astype(float) / np.iinfo(np.int16).max, sample_rate)
70
+
71
+ def _read_wav_file_as_stream(self, file_name) -> List[Tuple[_AudioData, int]]:
72
+ sample_rate, buffer = wavfile.read(
73
+ test_utils.get_test_data_path(os.path.join(_TEST_DATA_DIR, file_name)))
74
+ audio_data_list = []
75
+ start = 0
76
+ step_size = _YAMNET_NUM_OF_SAMPLES * sample_rate / _YAMNET_MODEL_SAMPLE_RATE
77
+ while start < len(buffer):
78
+ end = min(start + (int)(step_size), len(buffer))
79
+ audio_data_list.append((_AudioData.create_from_array(
80
+ buffer[start:end].astype(float) / np.iinfo(np.int16).max,
81
+ sample_rate), (int)(start / sample_rate * _MILLISECONDS_PER_SECOND)))
82
+ start = end
83
+ return audio_data_list
84
+
85
+ def _check_embedding_value(self, result, expected_first_value):
86
+ # Check embedding first value.
87
+ self.assertAlmostEqual(
88
+ result.embeddings[0].embedding[0], expected_first_value, delta=_EPSILON)
89
+
90
+ def _check_embedding_size(self, result, quantize, expected_embedding_size):
91
+ # Check embedding size.
92
+ self.assertLen(result.embeddings, 1)
93
+ embedding_result = result.embeddings[0]
94
+ self.assertLen(embedding_result.embedding, expected_embedding_size)
95
+ if quantize:
96
+ self.assertEqual(embedding_result.embedding.dtype, np.uint8)
97
+ else:
98
+ self.assertEqual(embedding_result.embedding.dtype, float)
99
+
100
+ def test_create_from_file_succeeds_with_valid_model_path(self):
101
+ # Creates with default option and valid model file successfully.
102
+ with _AudioEmbedder.create_from_model_path(
103
+ self.yamnet_model_path) as embedder:
104
+ self.assertIsInstance(embedder, _AudioEmbedder)
105
+
106
+ def test_create_from_options_succeeds_with_valid_model_path(self):
107
+ # Creates with options containing model file successfully.
108
+ with _AudioEmbedder.create_from_options(
109
+ _AudioEmbedderOptions(
110
+ base_options=_BaseOptions(
111
+ model_asset_path=self.yamnet_model_path))) as embedder:
112
+ self.assertIsInstance(embedder, _AudioEmbedder)
113
+
114
+ def test_create_from_options_fails_with_invalid_model_path(self):
115
+ with self.assertRaisesRegex(
116
+ RuntimeError, 'Unable to open file at /path/to/invalid/model.tflite'):
117
+ base_options = _BaseOptions(
118
+ model_asset_path='/path/to/invalid/model.tflite')
119
+ options = _AudioEmbedderOptions(base_options=base_options)
120
+ _AudioEmbedder.create_from_options(options)
121
+
122
+ def test_create_from_options_succeeds_with_valid_model_content(self):
123
+ # Creates with options containing model content successfully.
124
+ with open(self.yamnet_model_path, 'rb') as f:
125
+ base_options = _BaseOptions(model_asset_buffer=f.read())
126
+ options = _AudioEmbedderOptions(base_options=base_options)
127
+ embedder = _AudioEmbedder.create_from_options(options)
128
+ self.assertIsInstance(embedder, _AudioEmbedder)
129
+
130
+ @parameterized.parameters(
131
+ # Same audio inputs but different sample rates.
132
+ (False, False, ModelFileType.FILE_NAME, _SPEECH_WAV_16K_MONO,
133
+ _SPEECH_WAV_48K_MONO, 1024, (0, 0)),
134
+ (False, False, ModelFileType.FILE_CONTENT, _SPEECH_WAV_16K_MONO,
135
+ _SPEECH_WAV_48K_MONO, 1024, (0, 0)))
136
+ def test_embed_with_yamnet_model(self, l2_normalize, quantize,
137
+ model_file_type, audio_file0, audio_file1,
138
+ expected_size, expected_first_values):
139
+ # Creates embedder.
140
+ if model_file_type is ModelFileType.FILE_NAME:
141
+ base_options = _BaseOptions(model_asset_path=self.yamnet_model_path)
142
+ elif model_file_type is ModelFileType.FILE_CONTENT:
143
+ with open(self.yamnet_model_path, 'rb') as f:
144
+ model_content = f.read()
145
+ base_options = _BaseOptions(model_asset_buffer=model_content)
146
+ else:
147
+ # Should never happen
148
+ raise ValueError('model_file_type is invalid.')
149
+
150
+ options = _AudioEmbedderOptions(
151
+ base_options=base_options, l2_normalize=l2_normalize, quantize=quantize)
152
+
153
+ with _AudioEmbedder.create_from_options(options) as embedder:
154
+ embedding_result0_list = embedder.embed(self._read_wav_file(audio_file0))
155
+ embedding_result1_list = embedder.embed(self._read_wav_file(audio_file1))
156
+
157
+ # Checks embeddings.
158
+ expected_result0_value, expected_result1_value = expected_first_values
159
+ self._check_embedding_size(embedding_result0_list[0], quantize,
160
+ expected_size)
161
+ self._check_embedding_size(embedding_result1_list[0], quantize,
162
+ expected_size)
163
+ self._check_embedding_value(embedding_result0_list[0],
164
+ expected_result0_value)
165
+ self._check_embedding_value(embedding_result1_list[0],
166
+ expected_result1_value)
167
+ self.assertLen(embedding_result0_list, 5)
168
+ self.assertLen(embedding_result1_list, 5)
169
+
170
+ @mock.patch('sounddevice.InputStream', return_value=mock.MagicMock())
171
+ def test_create_audio_record_from_embedder_succeeds(self, _):
172
+ # Creates AudioRecord instance using the embedder successfully.
173
+ with _AudioEmbedder.create_from_model_path(
174
+ self.yamnet_model_path
175
+ ) as embedder:
176
+ self.assertIsInstance(embedder, _AudioEmbedder)
177
+ record = embedder.create_audio_record(1, 16000, 16000)
178
+ self.assertIsInstance(record, _AudioRecord)
179
+ self.assertEqual(record.channels, 1)
180
+ self.assertEqual(record.sampling_rate, 16000)
181
+ self.assertEqual(record.buffer_size, 16000)
182
+
183
+ def test_embed_with_yamnet_model_and_different_inputs(self):
184
+ with _AudioEmbedder.create_from_model_path(
185
+ self.yamnet_model_path) as embedder:
186
+ embedding_result0_list = embedder.embed(
187
+ self._read_wav_file(_SPEECH_WAV_16K_MONO))
188
+ embedding_result1_list = embedder.embed(
189
+ self._read_wav_file(_TWO_HEADS_WAV_16K_MONO))
190
+ self.assertLen(embedding_result0_list, 5)
191
+ self.assertLen(embedding_result1_list, 1)
192
+
193
+ def test_missing_sample_rate_in_audio_clips_mode(self):
194
+ options = _AudioEmbedderOptions(
195
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
196
+ running_mode=_RUNNING_MODE.AUDIO_CLIPS)
197
+ with self.assertRaisesRegex(ValueError,
198
+ r'Must provide the audio sample rate'):
199
+ with _AudioEmbedder.create_from_options(options) as embedder:
200
+ embedder.embed(_AudioData(buffer_length=100))
201
+
202
+ def test_missing_sample_rate_in_audio_stream_mode(self):
203
+ options = _AudioEmbedderOptions(
204
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
205
+ running_mode=_RUNNING_MODE.AUDIO_STREAM,
206
+ result_callback=mock.MagicMock())
207
+ with self.assertRaisesRegex(ValueError,
208
+ r'provide the audio sample rate in audio data'):
209
+ with _AudioEmbedder.create_from_options(options) as embedder:
210
+ embedder.embed(_AudioData(buffer_length=100))
211
+
212
+ def test_missing_result_callback(self):
213
+ options = _AudioEmbedderOptions(
214
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
215
+ running_mode=_RUNNING_MODE.AUDIO_STREAM)
216
+ with self.assertRaisesRegex(ValueError,
217
+ r'result callback must be provided'):
218
+ with _AudioEmbedder.create_from_options(options) as unused_embedder:
219
+ pass
220
+
221
+ def test_illegal_result_callback(self):
222
+ options = _AudioEmbedderOptions(
223
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
224
+ running_mode=_RUNNING_MODE.AUDIO_CLIPS,
225
+ result_callback=mock.MagicMock())
226
+ with self.assertRaisesRegex(ValueError,
227
+ r'result callback should not be provided'):
228
+ with _AudioEmbedder.create_from_options(options) as unused_embedder:
229
+ pass
230
+
231
+ def test_calling_embed_in_audio_stream_mode(self):
232
+ options = _AudioEmbedderOptions(
233
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
234
+ running_mode=_RUNNING_MODE.AUDIO_STREAM,
235
+ result_callback=mock.MagicMock())
236
+ with _AudioEmbedder.create_from_options(options) as embedder:
237
+ with self.assertRaisesRegex(ValueError,
238
+ r'not initialized with the audio clips mode'):
239
+ embedder.embed(self._read_wav_file(_SPEECH_WAV_16K_MONO))
240
+
241
+ def test_calling_embed_async_in_audio_clips_mode(self):
242
+ options = _AudioEmbedderOptions(
243
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
244
+ running_mode=_RUNNING_MODE.AUDIO_CLIPS)
245
+ with _AudioEmbedder.create_from_options(options) as embedder:
246
+ with self.assertRaisesRegex(
247
+ ValueError, r'not initialized with the audio stream mode'):
248
+ embedder.embed_async(self._read_wav_file(_SPEECH_WAV_16K_MONO), 0)
249
+
250
+ def test_embed_async_calls_with_illegal_timestamp(self):
251
+ options = _AudioEmbedderOptions(
252
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
253
+ running_mode=_RUNNING_MODE.AUDIO_STREAM,
254
+ result_callback=mock.MagicMock())
255
+ with _AudioEmbedder.create_from_options(options) as embedder:
256
+ embedder.embed_async(self._read_wav_file(_SPEECH_WAV_16K_MONO), 100)
257
+ with self.assertRaisesRegex(
258
+ ValueError, r'Input timestamp must be monotonically increasing'):
259
+ embedder.embed_async(self._read_wav_file(_SPEECH_WAV_16K_MONO), 0)
260
+
261
+ @parameterized.parameters(
262
+ # Same audio inputs but different sample rates.
263
+ (False, False, _SPEECH_WAV_16K_MONO, _SPEECH_WAV_48K_MONO))
264
+ def test_embed_async(self, l2_normalize, quantize, audio_file0, audio_file1):
265
+ embedding_result_list = []
266
+ embedding_result_list_copy = embedding_result_list.copy()
267
+
268
+ def save_result(result: _AudioEmbedderResult, timestamp_ms: int):
269
+ result.timestamp_ms = timestamp_ms
270
+ embedding_result_list.append(result)
271
+
272
+ options = _AudioEmbedderOptions(
273
+ base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
274
+ running_mode=_RUNNING_MODE.AUDIO_STREAM,
275
+ l2_normalize=l2_normalize,
276
+ quantize=quantize,
277
+ result_callback=save_result)
278
+
279
+ with _AudioEmbedder.create_from_options(options) as embedder:
280
+ audio_data0_list = self._read_wav_file_as_stream(audio_file0)
281
+ for audio_data, timestamp_ms in audio_data0_list:
282
+ embedder.embed_async(audio_data, timestamp_ms)
283
+ embedding_result0_list = embedding_result_list
284
+
285
+ with _AudioEmbedder.create_from_options(options) as embedder:
286
+ audio_data1_list = self._read_wav_file_as_stream(audio_file1)
287
+ embedding_result_list = embedding_result_list_copy
288
+ for audio_data, timestamp_ms in audio_data1_list:
289
+ embedder.embed_async(audio_data, timestamp_ms)
290
+ embedding_result1_list = embedding_result_list
291
+
292
+ self.assertLen(embedding_result0_list, 5)
293
+ self.assertLen(embedding_result1_list, 5)
294
+
295
+
296
+ if __name__ == '__main__':
297
+ absltest.main()