mediapipe-nightly 0.10.21.post20241223__cp312-cp312-macosx_11_0_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (594) hide show
  1. mediapipe/__init__.py +26 -0
  2. mediapipe/calculators/__init__.py +0 -0
  3. mediapipe/calculators/audio/__init__.py +0 -0
  4. mediapipe/calculators/audio/mfcc_mel_calculators_pb2.py +33 -0
  5. mediapipe/calculators/audio/rational_factor_resample_calculator_pb2.py +33 -0
  6. mediapipe/calculators/audio/spectrogram_calculator_pb2.py +37 -0
  7. mediapipe/calculators/audio/stabilized_log_calculator_pb2.py +31 -0
  8. mediapipe/calculators/audio/time_series_framer_calculator_pb2.py +33 -0
  9. mediapipe/calculators/core/__init__.py +0 -0
  10. mediapipe/calculators/core/bypass_calculator_pb2.py +31 -0
  11. mediapipe/calculators/core/clip_vector_size_calculator_pb2.py +31 -0
  12. mediapipe/calculators/core/concatenate_vector_calculator_pb2.py +31 -0
  13. mediapipe/calculators/core/constant_side_packet_calculator_pb2.py +39 -0
  14. mediapipe/calculators/core/dequantize_byte_array_calculator_pb2.py +31 -0
  15. mediapipe/calculators/core/flow_limiter_calculator_pb2.py +32 -0
  16. mediapipe/calculators/core/gate_calculator_pb2.py +33 -0
  17. mediapipe/calculators/core/get_vector_item_calculator_pb2.py +31 -0
  18. mediapipe/calculators/core/graph_profile_calculator_pb2.py +31 -0
  19. mediapipe/calculators/core/packet_cloner_calculator_pb2.py +31 -0
  20. mediapipe/calculators/core/packet_resampler_calculator_pb2.py +33 -0
  21. mediapipe/calculators/core/packet_thinner_calculator_pb2.py +33 -0
  22. mediapipe/calculators/core/quantize_float_vector_calculator_pb2.py +31 -0
  23. mediapipe/calculators/core/sequence_shift_calculator_pb2.py +31 -0
  24. mediapipe/calculators/core/split_vector_calculator_pb2.py +33 -0
  25. mediapipe/calculators/image/__init__.py +0 -0
  26. mediapipe/calculators/image/bilateral_filter_calculator_pb2.py +31 -0
  27. mediapipe/calculators/image/feature_detector_calculator_pb2.py +31 -0
  28. mediapipe/calculators/image/image_clone_calculator_pb2.py +31 -0
  29. mediapipe/calculators/image/image_cropping_calculator_pb2.py +33 -0
  30. mediapipe/calculators/image/image_transformation_calculator_pb2.py +38 -0
  31. mediapipe/calculators/image/mask_overlay_calculator_pb2.py +33 -0
  32. mediapipe/calculators/image/opencv_encoded_image_to_image_frame_calculator_pb2.py +31 -0
  33. mediapipe/calculators/image/opencv_image_encoder_calculator_pb2.py +35 -0
  34. mediapipe/calculators/image/recolor_calculator_pb2.py +34 -0
  35. mediapipe/calculators/image/rotation_mode_pb2.py +29 -0
  36. mediapipe/calculators/image/scale_image_calculator_pb2.py +34 -0
  37. mediapipe/calculators/image/segmentation_smoothing_calculator_pb2.py +31 -0
  38. mediapipe/calculators/image/set_alpha_calculator_pb2.py +31 -0
  39. mediapipe/calculators/image/warp_affine_calculator_pb2.py +36 -0
  40. mediapipe/calculators/internal/__init__.py +0 -0
  41. mediapipe/calculators/internal/callback_packet_calculator_pb2.py +33 -0
  42. mediapipe/calculators/tensor/__init__.py +0 -0
  43. mediapipe/calculators/tensor/audio_to_tensor_calculator_pb2.py +35 -0
  44. mediapipe/calculators/tensor/bert_preprocessor_calculator_pb2.py +31 -0
  45. mediapipe/calculators/tensor/feedback_tensors_calculator_pb2.py +37 -0
  46. mediapipe/calculators/tensor/image_to_tensor_calculator_pb2.py +40 -0
  47. mediapipe/calculators/tensor/inference_calculator_pb2.py +63 -0
  48. mediapipe/calculators/tensor/landmarks_to_tensor_calculator_pb2.py +33 -0
  49. mediapipe/calculators/tensor/regex_preprocessor_calculator_pb2.py +31 -0
  50. mediapipe/calculators/tensor/tensor_converter_calculator_pb2.py +34 -0
  51. mediapipe/calculators/tensor/tensor_to_joints_calculator_pb2.py +31 -0
  52. mediapipe/calculators/tensor/tensors_readback_calculator_pb2.py +35 -0
  53. mediapipe/calculators/tensor/tensors_to_audio_calculator_pb2.py +33 -0
  54. mediapipe/calculators/tensor/tensors_to_classification_calculator_pb2.py +44 -0
  55. mediapipe/calculators/tensor/tensors_to_detections_calculator_pb2.py +39 -0
  56. mediapipe/calculators/tensor/tensors_to_floats_calculator_pb2.py +33 -0
  57. mediapipe/calculators/tensor/tensors_to_landmarks_calculator_pb2.py +33 -0
  58. mediapipe/calculators/tensor/tensors_to_segmentation_calculator_pb2.py +34 -0
  59. mediapipe/calculators/tensor/vector_to_tensor_calculator_pb2.py +27 -0
  60. mediapipe/calculators/tflite/__init__.py +0 -0
  61. mediapipe/calculators/tflite/ssd_anchors_calculator_pb2.py +32 -0
  62. mediapipe/calculators/tflite/tflite_converter_calculator_pb2.py +33 -0
  63. mediapipe/calculators/tflite/tflite_custom_op_resolver_calculator_pb2.py +31 -0
  64. mediapipe/calculators/tflite/tflite_inference_calculator_pb2.py +49 -0
  65. mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator_pb2.py +31 -0
  66. mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator_pb2.py +31 -0
  67. mediapipe/calculators/tflite/tflite_tensors_to_landmarks_calculator_pb2.py +33 -0
  68. mediapipe/calculators/tflite/tflite_tensors_to_segmentation_calculator_pb2.py +31 -0
  69. mediapipe/calculators/util/__init__.py +0 -0
  70. mediapipe/calculators/util/align_hand_to_pose_in_world_calculator_pb2.py +31 -0
  71. mediapipe/calculators/util/annotation_overlay_calculator_pb2.py +32 -0
  72. mediapipe/calculators/util/association_calculator_pb2.py +31 -0
  73. mediapipe/calculators/util/collection_has_min_size_calculator_pb2.py +31 -0
  74. mediapipe/calculators/util/combine_joints_calculator_pb2.py +36 -0
  75. mediapipe/calculators/util/detection_label_id_to_text_calculator_pb2.py +36 -0
  76. mediapipe/calculators/util/detections_to_rects_calculator_pb2.py +33 -0
  77. mediapipe/calculators/util/detections_to_render_data_calculator_pb2.py +33 -0
  78. mediapipe/calculators/util/face_to_rect_calculator_pb2.py +26 -0
  79. mediapipe/calculators/util/filter_detections_calculator_pb2.py +31 -0
  80. mediapipe/calculators/util/flat_color_image_calculator_pb2.py +32 -0
  81. mediapipe/calculators/util/labels_to_render_data_calculator_pb2.py +34 -0
  82. mediapipe/calculators/util/landmark_projection_calculator_pb2.py +31 -0
  83. mediapipe/calculators/util/landmarks_refinement_calculator_pb2.py +41 -0
  84. mediapipe/calculators/util/landmarks_smoothing_calculator_pb2.py +33 -0
  85. mediapipe/calculators/util/landmarks_to_detection_calculator_pb2.py +31 -0
  86. mediapipe/calculators/util/landmarks_to_floats_calculator_pb2.py +31 -0
  87. mediapipe/calculators/util/landmarks_to_render_data_calculator_pb2.py +32 -0
  88. mediapipe/calculators/util/landmarks_transformation_calculator_pb2.py +37 -0
  89. mediapipe/calculators/util/latency_pb2.py +26 -0
  90. mediapipe/calculators/util/local_file_contents_calculator_pb2.py +31 -0
  91. mediapipe/calculators/util/logic_calculator_pb2.py +34 -0
  92. mediapipe/calculators/util/non_max_suppression_calculator_pb2.py +35 -0
  93. mediapipe/calculators/util/packet_frequency_calculator_pb2.py +31 -0
  94. mediapipe/calculators/util/packet_frequency_pb2.py +26 -0
  95. mediapipe/calculators/util/packet_latency_calculator_pb2.py +31 -0
  96. mediapipe/calculators/util/rect_to_render_data_calculator_pb2.py +32 -0
  97. mediapipe/calculators/util/rect_to_render_scale_calculator_pb2.py +31 -0
  98. mediapipe/calculators/util/rect_transformation_calculator_pb2.py +31 -0
  99. mediapipe/calculators/util/refine_landmarks_from_heatmap_calculator_pb2.py +31 -0
  100. mediapipe/calculators/util/resource_provider_calculator_pb2.py +28 -0
  101. mediapipe/calculators/util/set_joints_visibility_calculator_pb2.py +41 -0
  102. mediapipe/calculators/util/thresholding_calculator_pb2.py +31 -0
  103. mediapipe/calculators/util/timed_box_list_id_to_label_calculator_pb2.py +31 -0
  104. mediapipe/calculators/util/timed_box_list_to_render_data_calculator_pb2.py +32 -0
  105. mediapipe/calculators/util/top_k_scores_calculator_pb2.py +31 -0
  106. mediapipe/calculators/util/visibility_copy_calculator_pb2.py +27 -0
  107. mediapipe/calculators/util/visibility_smoothing_calculator_pb2.py +31 -0
  108. mediapipe/calculators/video/__init__.py +0 -0
  109. mediapipe/calculators/video/box_detector_calculator_pb2.py +32 -0
  110. mediapipe/calculators/video/box_tracker_calculator_pb2.py +32 -0
  111. mediapipe/calculators/video/flow_packager_calculator_pb2.py +32 -0
  112. mediapipe/calculators/video/flow_to_image_calculator_pb2.py +31 -0
  113. mediapipe/calculators/video/motion_analysis_calculator_pb2.py +42 -0
  114. mediapipe/calculators/video/opencv_video_encoder_calculator_pb2.py +31 -0
  115. mediapipe/calculators/video/tool/__init__.py +0 -0
  116. mediapipe/calculators/video/tool/flow_quantizer_model_pb2.py +26 -0
  117. mediapipe/calculators/video/tracked_detection_manager_calculator_pb2.py +32 -0
  118. mediapipe/calculators/video/video_pre_stream_calculator_pb2.py +35 -0
  119. mediapipe/examples/__init__.py +14 -0
  120. mediapipe/examples/desktop/__init__.py +14 -0
  121. mediapipe/framework/__init__.py +0 -0
  122. mediapipe/framework/calculator_options_pb2.py +29 -0
  123. mediapipe/framework/calculator_pb2.py +59 -0
  124. mediapipe/framework/calculator_profile_pb2.py +48 -0
  125. mediapipe/framework/deps/__init__.py +0 -0
  126. mediapipe/framework/deps/proto_descriptor_pb2.py +29 -0
  127. mediapipe/framework/formats/__init__.py +0 -0
  128. mediapipe/framework/formats/affine_transform_data_pb2.py +28 -0
  129. mediapipe/framework/formats/annotation/__init__.py +0 -0
  130. mediapipe/framework/formats/annotation/locus_pb2.py +32 -0
  131. mediapipe/framework/formats/annotation/rasterization_pb2.py +29 -0
  132. mediapipe/framework/formats/body_rig_pb2.py +28 -0
  133. mediapipe/framework/formats/classification_pb2.py +31 -0
  134. mediapipe/framework/formats/detection_pb2.py +36 -0
  135. mediapipe/framework/formats/image_file_properties_pb2.py +26 -0
  136. mediapipe/framework/formats/image_format_pb2.py +29 -0
  137. mediapipe/framework/formats/landmark_pb2.py +37 -0
  138. mediapipe/framework/formats/location_data_pb2.py +38 -0
  139. mediapipe/framework/formats/matrix_data_pb2.py +31 -0
  140. mediapipe/framework/formats/motion/__init__.py +0 -0
  141. mediapipe/framework/formats/motion/optical_flow_field_data_pb2.py +30 -0
  142. mediapipe/framework/formats/object_detection/__init__.py +0 -0
  143. mediapipe/framework/formats/object_detection/anchor_pb2.py +26 -0
  144. mediapipe/framework/formats/rect_pb2.py +29 -0
  145. mediapipe/framework/formats/time_series_header_pb2.py +28 -0
  146. mediapipe/framework/graph_runtime_info_pb2.py +31 -0
  147. mediapipe/framework/mediapipe_options_pb2.py +27 -0
  148. mediapipe/framework/packet_factory_pb2.py +31 -0
  149. mediapipe/framework/packet_generator_pb2.py +33 -0
  150. mediapipe/framework/status_handler_pb2.py +28 -0
  151. mediapipe/framework/stream_handler/__init__.py +0 -0
  152. mediapipe/framework/stream_handler/default_input_stream_handler_pb2.py +27 -0
  153. mediapipe/framework/stream_handler/fixed_size_input_stream_handler_pb2.py +27 -0
  154. mediapipe/framework/stream_handler/sync_set_input_stream_handler_pb2.py +29 -0
  155. mediapipe/framework/stream_handler/timestamp_align_input_stream_handler_pb2.py +27 -0
  156. mediapipe/framework/stream_handler_pb2.py +30 -0
  157. mediapipe/framework/test_calculators_pb2.py +31 -0
  158. mediapipe/framework/thread_pool_executor_pb2.py +29 -0
  159. mediapipe/framework/tool/__init__.py +0 -0
  160. mediapipe/framework/tool/calculator_graph_template_pb2.py +44 -0
  161. mediapipe/framework/tool/field_data_pb2.py +28 -0
  162. mediapipe/framework/tool/node_chain_subgraph_pb2.py +31 -0
  163. mediapipe/framework/tool/packet_generator_wrapper_calculator_pb2.py +28 -0
  164. mediapipe/framework/tool/source_pb2.py +33 -0
  165. mediapipe/framework/tool/switch_container_pb2.py +32 -0
  166. mediapipe/gpu/__init__.py +0 -0
  167. mediapipe/gpu/copy_calculator_pb2.py +33 -0
  168. mediapipe/gpu/gl_animation_overlay_calculator_pb2.py +31 -0
  169. mediapipe/gpu/gl_context_options_pb2.py +31 -0
  170. mediapipe/gpu/gl_scaler_calculator_pb2.py +32 -0
  171. mediapipe/gpu/gl_surface_sink_calculator_pb2.py +32 -0
  172. mediapipe/gpu/gpu_origin_pb2.py +29 -0
  173. mediapipe/gpu/scale_mode_pb2.py +28 -0
  174. mediapipe/model_maker/__init__.py +27 -0
  175. mediapipe/model_maker/setup.py +107 -0
  176. mediapipe/modules/__init__.py +0 -0
  177. mediapipe/modules/face_detection/__init__.py +0 -0
  178. mediapipe/modules/face_detection/face_detection_full_range_cpu.binarypb +0 -0
  179. mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite +0 -0
  180. mediapipe/modules/face_detection/face_detection_pb2.py +30 -0
  181. mediapipe/modules/face_detection/face_detection_short_range.tflite +0 -0
  182. mediapipe/modules/face_detection/face_detection_short_range_cpu.binarypb +0 -0
  183. mediapipe/modules/face_geometry/__init__.py +0 -0
  184. mediapipe/modules/face_geometry/data/__init__.py +0 -0
  185. mediapipe/modules/face_geometry/effect_renderer_calculator_pb2.py +27 -0
  186. mediapipe/modules/face_geometry/env_generator_calculator_pb2.py +28 -0
  187. mediapipe/modules/face_geometry/geometry_pipeline_calculator_pb2.py +27 -0
  188. mediapipe/modules/face_geometry/libs/__init__.py +0 -0
  189. mediapipe/modules/face_geometry/protos/__init__.py +0 -0
  190. mediapipe/modules/face_geometry/protos/environment_pb2.py +31 -0
  191. mediapipe/modules/face_geometry/protos/face_geometry_pb2.py +29 -0
  192. mediapipe/modules/face_geometry/protos/geometry_pipeline_metadata_pb2.py +32 -0
  193. mediapipe/modules/face_geometry/protos/mesh_3d_pb2.py +31 -0
  194. mediapipe/modules/face_landmark/__init__.py +0 -0
  195. mediapipe/modules/face_landmark/face_landmark.tflite +0 -0
  196. mediapipe/modules/face_landmark/face_landmark_front_cpu.binarypb +0 -0
  197. mediapipe/modules/face_landmark/face_landmark_with_attention.tflite +0 -0
  198. mediapipe/modules/hand_landmark/__init__.py +0 -0
  199. mediapipe/modules/hand_landmark/calculators/__init__.py +0 -0
  200. mediapipe/modules/hand_landmark/hand_landmark_full.tflite +0 -0
  201. mediapipe/modules/hand_landmark/hand_landmark_lite.tflite +0 -0
  202. mediapipe/modules/hand_landmark/hand_landmark_tracking_cpu.binarypb +0 -0
  203. mediapipe/modules/hand_landmark/handedness.txt +2 -0
  204. mediapipe/modules/holistic_landmark/__init__.py +0 -0
  205. mediapipe/modules/holistic_landmark/calculators/__init__.py +0 -0
  206. mediapipe/modules/holistic_landmark/calculators/roi_tracking_calculator_pb2.py +37 -0
  207. mediapipe/modules/holistic_landmark/hand_recrop.tflite +0 -0
  208. mediapipe/modules/holistic_landmark/holistic_landmark_cpu.binarypb +0 -0
  209. mediapipe/modules/iris_landmark/__init__.py +0 -0
  210. mediapipe/modules/iris_landmark/iris_landmark.tflite +0 -0
  211. mediapipe/modules/objectron/__init__.py +0 -0
  212. mediapipe/modules/objectron/calculators/__init__.py +0 -0
  213. mediapipe/modules/objectron/calculators/a_r_capture_metadata_pb2.py +102 -0
  214. mediapipe/modules/objectron/calculators/annotation_data_pb2.py +38 -0
  215. mediapipe/modules/objectron/calculators/belief_decoder_config_pb2.py +28 -0
  216. mediapipe/modules/objectron/calculators/camera_parameters_pb2.py +30 -0
  217. mediapipe/modules/objectron/calculators/filter_detection_calculator_pb2.py +35 -0
  218. mediapipe/modules/objectron/calculators/frame_annotation_to_rect_calculator_pb2.py +31 -0
  219. mediapipe/modules/objectron/calculators/frame_annotation_tracker_calculator_pb2.py +31 -0
  220. mediapipe/modules/objectron/calculators/lift_2d_frame_annotation_to_3d_calculator_pb2.py +32 -0
  221. mediapipe/modules/objectron/calculators/object_pb2.py +38 -0
  222. mediapipe/modules/objectron/calculators/tensors_to_objects_calculator_pb2.py +32 -0
  223. mediapipe/modules/objectron/calculators/tflite_tensors_to_objects_calculator_pb2.py +32 -0
  224. mediapipe/modules/objectron/object_detection_oidv4_labelmap.txt +24 -0
  225. mediapipe/modules/objectron/objectron_cpu.binarypb +0 -0
  226. mediapipe/modules/palm_detection/__init__.py +0 -0
  227. mediapipe/modules/palm_detection/palm_detection_full.tflite +0 -0
  228. mediapipe/modules/palm_detection/palm_detection_lite.tflite +0 -0
  229. mediapipe/modules/pose_detection/__init__.py +0 -0
  230. mediapipe/modules/pose_detection/pose_detection.tflite +0 -0
  231. mediapipe/modules/pose_landmark/__init__.py +0 -0
  232. mediapipe/modules/pose_landmark/pose_landmark_cpu.binarypb +0 -0
  233. mediapipe/modules/pose_landmark/pose_landmark_full.tflite +0 -0
  234. mediapipe/modules/selfie_segmentation/__init__.py +0 -0
  235. mediapipe/modules/selfie_segmentation/selfie_segmentation.tflite +0 -0
  236. mediapipe/modules/selfie_segmentation/selfie_segmentation_cpu.binarypb +0 -0
  237. mediapipe/modules/selfie_segmentation/selfie_segmentation_landscape.tflite +0 -0
  238. mediapipe/python/__init__.py +29 -0
  239. mediapipe/python/_framework_bindings/arm64.cpython-312-darwin.so +0 -0
  240. mediapipe/python/_framework_bindings.cpython-312-darwin.so +0 -0
  241. mediapipe/python/calculator_graph_test.py +251 -0
  242. mediapipe/python/image_frame_test.py +194 -0
  243. mediapipe/python/image_test.py +218 -0
  244. mediapipe/python/packet_creator.py +275 -0
  245. mediapipe/python/packet_getter.py +120 -0
  246. mediapipe/python/packet_test.py +533 -0
  247. mediapipe/python/solution_base.py +604 -0
  248. mediapipe/python/solution_base_test.py +396 -0
  249. mediapipe/python/solutions/__init__.py +27 -0
  250. mediapipe/python/solutions/download_utils.py +37 -0
  251. mediapipe/python/solutions/drawing_styles.py +249 -0
  252. mediapipe/python/solutions/drawing_utils.py +320 -0
  253. mediapipe/python/solutions/drawing_utils_test.py +258 -0
  254. mediapipe/python/solutions/face_detection.py +105 -0
  255. mediapipe/python/solutions/face_detection_test.py +92 -0
  256. mediapipe/python/solutions/face_mesh.py +125 -0
  257. mediapipe/python/solutions/face_mesh_connections.py +500 -0
  258. mediapipe/python/solutions/face_mesh_test.py +170 -0
  259. mediapipe/python/solutions/hands.py +153 -0
  260. mediapipe/python/solutions/hands_connections.py +32 -0
  261. mediapipe/python/solutions/hands_test.py +219 -0
  262. mediapipe/python/solutions/holistic.py +167 -0
  263. mediapipe/python/solutions/holistic_test.py +142 -0
  264. mediapipe/python/solutions/objectron.py +288 -0
  265. mediapipe/python/solutions/objectron_test.py +81 -0
  266. mediapipe/python/solutions/pose.py +192 -0
  267. mediapipe/python/solutions/pose_connections.py +22 -0
  268. mediapipe/python/solutions/pose_test.py +262 -0
  269. mediapipe/python/solutions/selfie_segmentation.py +76 -0
  270. mediapipe/python/solutions/selfie_segmentation_test.py +68 -0
  271. mediapipe/python/timestamp_test.py +78 -0
  272. mediapipe/tasks/__init__.py +14 -0
  273. mediapipe/tasks/cc/__init__.py +0 -0
  274. mediapipe/tasks/cc/audio/__init__.py +0 -0
  275. mediapipe/tasks/cc/audio/audio_classifier/__init__.py +0 -0
  276. mediapipe/tasks/cc/audio/audio_classifier/proto/__init__.py +0 -0
  277. mediapipe/tasks/cc/audio/audio_classifier/proto/audio_classifier_graph_options_pb2.py +35 -0
  278. mediapipe/tasks/cc/audio/audio_embedder/__init__.py +0 -0
  279. mediapipe/tasks/cc/audio/audio_embedder/proto/__init__.py +0 -0
  280. mediapipe/tasks/cc/audio/audio_embedder/proto/audio_embedder_graph_options_pb2.py +35 -0
  281. mediapipe/tasks/cc/audio/core/__init__.py +0 -0
  282. mediapipe/tasks/cc/audio/utils/__init__.py +0 -0
  283. mediapipe/tasks/cc/components/__init__.py +0 -0
  284. mediapipe/tasks/cc/components/calculators/__init__.py +0 -0
  285. mediapipe/tasks/cc/components/calculators/classification_aggregation_calculator_pb2.py +31 -0
  286. mediapipe/tasks/cc/components/calculators/score_calibration_calculator_pb2.py +35 -0
  287. mediapipe/tasks/cc/components/calculators/tensors_to_embeddings_calculator_pb2.py +32 -0
  288. mediapipe/tasks/cc/components/containers/__init__.py +0 -0
  289. mediapipe/tasks/cc/components/containers/proto/__init__.py +0 -0
  290. mediapipe/tasks/cc/components/containers/proto/classifications_pb2.py +30 -0
  291. mediapipe/tasks/cc/components/containers/proto/embeddings_pb2.py +35 -0
  292. mediapipe/tasks/cc/components/containers/proto/landmarks_detection_result_pb2.py +32 -0
  293. mediapipe/tasks/cc/components/processors/__init__.py +0 -0
  294. mediapipe/tasks/cc/components/processors/proto/__init__.py +0 -0
  295. mediapipe/tasks/cc/components/processors/proto/classification_postprocessing_graph_options_pb2.py +38 -0
  296. mediapipe/tasks/cc/components/processors/proto/classifier_options_pb2.py +27 -0
  297. mediapipe/tasks/cc/components/processors/proto/detection_postprocessing_graph_options_pb2.py +36 -0
  298. mediapipe/tasks/cc/components/processors/proto/detector_options_pb2.py +27 -0
  299. mediapipe/tasks/cc/components/processors/proto/embedder_options_pb2.py +27 -0
  300. mediapipe/tasks/cc/components/processors/proto/embedding_postprocessing_graph_options_pb2.py +32 -0
  301. mediapipe/tasks/cc/components/processors/proto/image_preprocessing_graph_options_pb2.py +34 -0
  302. mediapipe/tasks/cc/components/processors/proto/text_model_type_pb2.py +28 -0
  303. mediapipe/tasks/cc/components/processors/proto/text_preprocessing_graph_options_pb2.py +32 -0
  304. mediapipe/tasks/cc/components/utils/__init__.py +0 -0
  305. mediapipe/tasks/cc/core/__init__.py +0 -0
  306. mediapipe/tasks/cc/core/proto/__init__.py +0 -0
  307. mediapipe/tasks/cc/core/proto/acceleration_pb2.py +28 -0
  308. mediapipe/tasks/cc/core/proto/base_options_pb2.py +30 -0
  309. mediapipe/tasks/cc/core/proto/external_file_pb2.py +31 -0
  310. mediapipe/tasks/cc/core/proto/inference_subgraph_pb2.py +32 -0
  311. mediapipe/tasks/cc/core/proto/model_resources_calculator_pb2.py +32 -0
  312. mediapipe/tasks/cc/genai/__init__.py +0 -0
  313. mediapipe/tasks/cc/genai/inference/__init__.py +0 -0
  314. mediapipe/tasks/cc/genai/inference/c/__init__.py +0 -0
  315. mediapipe/tasks/cc/genai/inference/calculators/__init__.py +0 -0
  316. mediapipe/tasks/cc/genai/inference/calculators/detokenizer_calculator_pb2.py +27 -0
  317. mediapipe/tasks/cc/genai/inference/calculators/llm_gpu_calculator_pb2.py +32 -0
  318. mediapipe/tasks/cc/genai/inference/calculators/model_data_calculator_pb2.py +27 -0
  319. mediapipe/tasks/cc/genai/inference/calculators/tokenizer_calculator_pb2.py +29 -0
  320. mediapipe/tasks/cc/genai/inference/common/__init__.py +0 -0
  321. mediapipe/tasks/cc/genai/inference/proto/__init__.py +0 -0
  322. mediapipe/tasks/cc/genai/inference/proto/llm_file_metadata_pb2.py +32 -0
  323. mediapipe/tasks/cc/genai/inference/proto/llm_params_pb2.py +33 -0
  324. mediapipe/tasks/cc/genai/inference/proto/prompt_template_pb2.py +27 -0
  325. mediapipe/tasks/cc/genai/inference/proto/sampler_params_pb2.py +29 -0
  326. mediapipe/tasks/cc/genai/inference/proto/transformer_params_pb2.py +45 -0
  327. mediapipe/tasks/cc/genai/inference/utils/__init__.py +0 -0
  328. mediapipe/tasks/cc/genai/inference/utils/llm_utils/__init__.py +0 -0
  329. mediapipe/tasks/cc/genai/inference/utils/xnn_utils/__init__.py +0 -0
  330. mediapipe/tasks/cc/metadata/__init__.py +0 -0
  331. mediapipe/tasks/cc/metadata/python/__init__.py +0 -0
  332. mediapipe/tasks/cc/metadata/python/_pywrap_metadata_version/arm64.cpython-312-darwin.so +0 -0
  333. mediapipe/tasks/cc/metadata/python/_pywrap_metadata_version.cpython-312-darwin.so +0 -0
  334. mediapipe/tasks/cc/metadata/tests/__init__.py +0 -0
  335. mediapipe/tasks/cc/metadata/utils/__init__.py +0 -0
  336. mediapipe/tasks/cc/text/__init__.py +0 -0
  337. mediapipe/tasks/cc/text/custom_ops/__init__.py +0 -0
  338. mediapipe/tasks/cc/text/custom_ops/ragged/__init__.py +0 -0
  339. mediapipe/tasks/cc/text/custom_ops/sentencepiece/__init__.py +0 -0
  340. mediapipe/tasks/cc/text/custom_ops/sentencepiece/testdata/__init__.py +0 -0
  341. mediapipe/tasks/cc/text/language_detector/__init__.py +0 -0
  342. mediapipe/tasks/cc/text/language_detector/custom_ops/__init__.py +0 -0
  343. mediapipe/tasks/cc/text/language_detector/custom_ops/utils/__init__.py +0 -0
  344. mediapipe/tasks/cc/text/language_detector/custom_ops/utils/hash/__init__.py +0 -0
  345. mediapipe/tasks/cc/text/language_detector/custom_ops/utils/utf/__init__.py +0 -0
  346. mediapipe/tasks/cc/text/text_classifier/__init__.py +0 -0
  347. mediapipe/tasks/cc/text/text_classifier/proto/__init__.py +0 -0
  348. mediapipe/tasks/cc/text/text_classifier/proto/text_classifier_graph_options_pb2.py +35 -0
  349. mediapipe/tasks/cc/text/text_embedder/__init__.py +0 -0
  350. mediapipe/tasks/cc/text/text_embedder/proto/__init__.py +0 -0
  351. mediapipe/tasks/cc/text/text_embedder/proto/text_embedder_graph_options_pb2.py +35 -0
  352. mediapipe/tasks/cc/text/tokenizers/__init__.py +0 -0
  353. mediapipe/tasks/cc/text/utils/__init__.py +0 -0
  354. mediapipe/tasks/cc/vision/__init__.py +0 -0
  355. mediapipe/tasks/cc/vision/core/__init__.py +0 -0
  356. mediapipe/tasks/cc/vision/custom_ops/__init__.py +0 -0
  357. mediapipe/tasks/cc/vision/face_detector/__init__.py +0 -0
  358. mediapipe/tasks/cc/vision/face_detector/proto/__init__.py +0 -0
  359. mediapipe/tasks/cc/vision/face_detector/proto/face_detector_graph_options_pb2.py +34 -0
  360. mediapipe/tasks/cc/vision/face_geometry/__init__.py +0 -0
  361. mediapipe/tasks/cc/vision/face_geometry/calculators/__init__.py +0 -0
  362. mediapipe/tasks/cc/vision/face_geometry/calculators/env_generator_calculator_pb2.py +28 -0
  363. mediapipe/tasks/cc/vision/face_geometry/calculators/geometry_pipeline_calculator_pb2.py +29 -0
  364. mediapipe/tasks/cc/vision/face_geometry/data/__init__.py +0 -0
  365. mediapipe/tasks/cc/vision/face_geometry/libs/__init__.py +0 -0
  366. mediapipe/tasks/cc/vision/face_geometry/proto/__init__.py +0 -0
  367. mediapipe/tasks/cc/vision/face_geometry/proto/environment_pb2.py +31 -0
  368. mediapipe/tasks/cc/vision/face_geometry/proto/face_geometry_graph_options_pb2.py +29 -0
  369. mediapipe/tasks/cc/vision/face_geometry/proto/face_geometry_pb2.py +29 -0
  370. mediapipe/tasks/cc/vision/face_geometry/proto/geometry_pipeline_metadata_pb2.py +32 -0
  371. mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d_pb2.py +31 -0
  372. mediapipe/tasks/cc/vision/face_landmarker/__init__.py +0 -0
  373. mediapipe/tasks/cc/vision/face_landmarker/proto/__init__.py +0 -0
  374. mediapipe/tasks/cc/vision/face_landmarker/proto/face_blendshapes_graph_options_pb2.py +34 -0
  375. mediapipe/tasks/cc/vision/face_landmarker/proto/face_landmarker_graph_options_pb2.py +37 -0
  376. mediapipe/tasks/cc/vision/face_landmarker/proto/face_landmarks_detector_graph_options_pb2.py +35 -0
  377. mediapipe/tasks/cc/vision/face_landmarker/proto/tensors_to_face_landmarks_graph_options_pb2.py +32 -0
  378. mediapipe/tasks/cc/vision/face_stylizer/__init__.py +0 -0
  379. mediapipe/tasks/cc/vision/face_stylizer/calculators/__init__.py +0 -0
  380. mediapipe/tasks/cc/vision/face_stylizer/calculators/tensors_to_image_calculator_pb2.py +36 -0
  381. mediapipe/tasks/cc/vision/face_stylizer/proto/__init__.py +0 -0
  382. mediapipe/tasks/cc/vision/face_stylizer/proto/face_stylizer_graph_options_pb2.py +35 -0
  383. mediapipe/tasks/cc/vision/gesture_recognizer/__init__.py +0 -0
  384. mediapipe/tasks/cc/vision/gesture_recognizer/calculators/__init__.py +0 -0
  385. mediapipe/tasks/cc/vision/gesture_recognizer/calculators/combined_prediction_calculator_pb2.py +33 -0
  386. mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator_pb2.py +31 -0
  387. mediapipe/tasks/cc/vision/gesture_recognizer/proto/__init__.py +0 -0
  388. mediapipe/tasks/cc/vision/gesture_recognizer/proto/gesture_classifier_graph_options_pb2.py +35 -0
  389. mediapipe/tasks/cc/vision/gesture_recognizer/proto/gesture_embedder_graph_options_pb2.py +34 -0
  390. mediapipe/tasks/cc/vision/gesture_recognizer/proto/gesture_recognizer_graph_options_pb2.py +36 -0
  391. mediapipe/tasks/cc/vision/gesture_recognizer/proto/hand_gesture_recognizer_graph_options_pb2.py +36 -0
  392. mediapipe/tasks/cc/vision/hand_detector/__init__.py +0 -0
  393. mediapipe/tasks/cc/vision/hand_detector/proto/__init__.py +0 -0
  394. mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options_pb2.py +34 -0
  395. mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_result_pb2.py +30 -0
  396. mediapipe/tasks/cc/vision/hand_landmarker/__init__.py +0 -0
  397. mediapipe/tasks/cc/vision/hand_landmarker/calculators/__init__.py +0 -0
  398. mediapipe/tasks/cc/vision/hand_landmarker/calculators/hand_association_calculator_pb2.py +31 -0
  399. mediapipe/tasks/cc/vision/hand_landmarker/proto/__init__.py +0 -0
  400. mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options_pb2.py +36 -0
  401. mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options_pb2.py +34 -0
  402. mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_roi_refinement_graph_options_pb2.py +28 -0
  403. mediapipe/tasks/cc/vision/holistic_landmarker/__init__.py +0 -0
  404. mediapipe/tasks/cc/vision/holistic_landmarker/proto/__init__.py +0 -0
  405. mediapipe/tasks/cc/vision/holistic_landmarker/proto/holistic_landmarker_graph_options_pb2.py +34 -0
  406. mediapipe/tasks/cc/vision/holistic_landmarker/proto/holistic_result_pb2.py +29 -0
  407. mediapipe/tasks/cc/vision/image_classifier/__init__.py +0 -0
  408. mediapipe/tasks/cc/vision/image_classifier/proto/__init__.py +0 -0
  409. mediapipe/tasks/cc/vision/image_classifier/proto/image_classifier_graph_options_pb2.py +35 -0
  410. mediapipe/tasks/cc/vision/image_embedder/__init__.py +0 -0
  411. mediapipe/tasks/cc/vision/image_embedder/proto/__init__.py +0 -0
  412. mediapipe/tasks/cc/vision/image_embedder/proto/image_embedder_graph_options_pb2.py +35 -0
  413. mediapipe/tasks/cc/vision/image_generator/__init__.py +0 -0
  414. mediapipe/tasks/cc/vision/image_generator/diffuser/__init__.py +0 -0
  415. mediapipe/tasks/cc/vision/image_generator/diffuser/stable_diffusion_iterate_calculator_pb2.py +40 -0
  416. mediapipe/tasks/cc/vision/image_generator/proto/__init__.py +0 -0
  417. mediapipe/tasks/cc/vision/image_generator/proto/conditioned_image_graph_options_pb2.py +40 -0
  418. mediapipe/tasks/cc/vision/image_generator/proto/control_plugin_graph_options_pb2.py +34 -0
  419. mediapipe/tasks/cc/vision/image_generator/proto/image_generator_graph_options_pb2.py +30 -0
  420. mediapipe/tasks/cc/vision/image_segmenter/__init__.py +0 -0
  421. mediapipe/tasks/cc/vision/image_segmenter/calculators/__init__.py +0 -0
  422. mediapipe/tasks/cc/vision/image_segmenter/calculators/tensors_to_segmentation_calculator_pb2.py +34 -0
  423. mediapipe/tasks/cc/vision/image_segmenter/proto/__init__.py +0 -0
  424. mediapipe/tasks/cc/vision/image_segmenter/proto/image_segmenter_graph_options_pb2.py +35 -0
  425. mediapipe/tasks/cc/vision/image_segmenter/proto/segmenter_options_pb2.py +33 -0
  426. mediapipe/tasks/cc/vision/interactive_segmenter/__init__.py +0 -0
  427. mediapipe/tasks/cc/vision/object_detector/__init__.py +0 -0
  428. mediapipe/tasks/cc/vision/object_detector/proto/__init__.py +0 -0
  429. mediapipe/tasks/cc/vision/object_detector/proto/object_detector_options_pb2.py +34 -0
  430. mediapipe/tasks/cc/vision/pose_detector/__init__.py +0 -0
  431. mediapipe/tasks/cc/vision/pose_detector/proto/__init__.py +0 -0
  432. mediapipe/tasks/cc/vision/pose_detector/proto/pose_detector_graph_options_pb2.py +34 -0
  433. mediapipe/tasks/cc/vision/pose_landmarker/__init__.py +0 -0
  434. mediapipe/tasks/cc/vision/pose_landmarker/proto/__init__.py +0 -0
  435. mediapipe/tasks/cc/vision/pose_landmarker/proto/pose_landmarker_graph_options_pb2.py +36 -0
  436. mediapipe/tasks/cc/vision/pose_landmarker/proto/pose_landmarks_detector_graph_options_pb2.py +34 -0
  437. mediapipe/tasks/cc/vision/utils/__init__.py +0 -0
  438. mediapipe/tasks/cc/vision/utils/ghum/__init__.py +0 -0
  439. mediapipe/tasks/metadata/image_segmenter_metadata_schema.fbs +59 -0
  440. mediapipe/tasks/metadata/image_segmenter_metadata_schema_py_generated.py +108 -0
  441. mediapipe/tasks/metadata/metadata_schema.fbs +732 -0
  442. mediapipe/tasks/metadata/metadata_schema_py_generated.py +3251 -0
  443. mediapipe/tasks/metadata/object_detector_metadata_schema.fbs +98 -0
  444. mediapipe/tasks/metadata/object_detector_metadata_schema_py_generated.py +674 -0
  445. mediapipe/tasks/metadata/schema_py_generated.py +18438 -0
  446. mediapipe/tasks/python/__init__.py +27 -0
  447. mediapipe/tasks/python/audio/__init__.py +33 -0
  448. mediapipe/tasks/python/audio/audio_classifier.py +324 -0
  449. mediapipe/tasks/python/audio/audio_embedder.py +285 -0
  450. mediapipe/tasks/python/audio/core/__init__.py +16 -0
  451. mediapipe/tasks/python/audio/core/audio_record.py +125 -0
  452. mediapipe/tasks/python/audio/core/audio_task_running_mode.py +29 -0
  453. mediapipe/tasks/python/audio/core/base_audio_task_api.py +181 -0
  454. mediapipe/tasks/python/benchmark/__init__.py +13 -0
  455. mediapipe/tasks/python/benchmark/benchmark_utils.py +70 -0
  456. mediapipe/tasks/python/benchmark/vision/__init__.py +13 -0
  457. mediapipe/tasks/python/benchmark/vision/benchmark.py +99 -0
  458. mediapipe/tasks/python/benchmark/vision/core/__init__.py +14 -0
  459. mediapipe/tasks/python/benchmark/vision/core/base_vision_benchmark_api.py +40 -0
  460. mediapipe/tasks/python/components/__init__.py +13 -0
  461. mediapipe/tasks/python/components/containers/__init__.py +53 -0
  462. mediapipe/tasks/python/components/containers/audio_data.py +137 -0
  463. mediapipe/tasks/python/components/containers/bounding_box.py +73 -0
  464. mediapipe/tasks/python/components/containers/category.py +78 -0
  465. mediapipe/tasks/python/components/containers/classification_result.py +111 -0
  466. mediapipe/tasks/python/components/containers/detections.py +181 -0
  467. mediapipe/tasks/python/components/containers/embedding_result.py +89 -0
  468. mediapipe/tasks/python/components/containers/keypoint.py +77 -0
  469. mediapipe/tasks/python/components/containers/landmark.py +122 -0
  470. mediapipe/tasks/python/components/containers/landmark_detection_result.py +106 -0
  471. mediapipe/tasks/python/components/containers/rect.py +109 -0
  472. mediapipe/tasks/python/components/processors/__init__.py +23 -0
  473. mediapipe/tasks/python/components/processors/classifier_options.py +86 -0
  474. mediapipe/tasks/python/components/utils/__init__.py +13 -0
  475. mediapipe/tasks/python/components/utils/cosine_similarity.py +68 -0
  476. mediapipe/tasks/python/core/__init__.py +13 -0
  477. mediapipe/tasks/python/core/base_options.py +121 -0
  478. mediapipe/tasks/python/core/optional_dependencies.py +25 -0
  479. mediapipe/tasks/python/core/task_info.py +139 -0
  480. mediapipe/tasks/python/genai/__init__.py +14 -0
  481. mediapipe/tasks/python/genai/bundler/__init__.py +23 -0
  482. mediapipe/tasks/python/genai/bundler/llm_bundler.py +130 -0
  483. mediapipe/tasks/python/genai/bundler/llm_bundler_test.py +168 -0
  484. mediapipe/tasks/python/genai/converter/__init__.py +24 -0
  485. mediapipe/tasks/python/genai/converter/converter_base.py +179 -0
  486. mediapipe/tasks/python/genai/converter/converter_factory.py +79 -0
  487. mediapipe/tasks/python/genai/converter/llm_converter.py +374 -0
  488. mediapipe/tasks/python/genai/converter/llm_converter_test.py +63 -0
  489. mediapipe/tasks/python/genai/converter/pytorch_converter.py +318 -0
  490. mediapipe/tasks/python/genai/converter/pytorch_converter_test.py +86 -0
  491. mediapipe/tasks/python/genai/converter/quantization_util.py +516 -0
  492. mediapipe/tasks/python/genai/converter/quantization_util_test.py +259 -0
  493. mediapipe/tasks/python/genai/converter/safetensors_converter.py +580 -0
  494. mediapipe/tasks/python/genai/converter/safetensors_converter_test.py +83 -0
  495. mediapipe/tasks/python/genai/converter/weight_bins_writer.py +120 -0
  496. mediapipe/tasks/python/genai/converter/weight_bins_writer_test.py +95 -0
  497. mediapipe/tasks/python/metadata/__init__.py +13 -0
  498. mediapipe/tasks/python/metadata/flatbuffers_lib/_pywrap_flatbuffers/arm64.cpython-312-darwin.so +0 -0
  499. mediapipe/tasks/python/metadata/flatbuffers_lib/_pywrap_flatbuffers.cpython-312-darwin.so +0 -0
  500. mediapipe/tasks/python/metadata/metadata.py +928 -0
  501. mediapipe/tasks/python/metadata/metadata_displayer_cli.py +34 -0
  502. mediapipe/tasks/python/metadata/metadata_writers/__init__.py +13 -0
  503. mediapipe/tasks/python/metadata/metadata_writers/face_stylizer.py +138 -0
  504. mediapipe/tasks/python/metadata/metadata_writers/image_classifier.py +71 -0
  505. mediapipe/tasks/python/metadata/metadata_writers/image_segmenter.py +170 -0
  506. mediapipe/tasks/python/metadata/metadata_writers/metadata_info.py +1166 -0
  507. mediapipe/tasks/python/metadata/metadata_writers/metadata_writer.py +845 -0
  508. mediapipe/tasks/python/metadata/metadata_writers/model_asset_bundle_utils.py +71 -0
  509. mediapipe/tasks/python/metadata/metadata_writers/object_detector.py +331 -0
  510. mediapipe/tasks/python/metadata/metadata_writers/text_classifier.py +119 -0
  511. mediapipe/tasks/python/metadata/metadata_writers/writer_utils.py +91 -0
  512. mediapipe/tasks/python/test/__init__.py +13 -0
  513. mediapipe/tasks/python/test/audio/__init__.py +13 -0
  514. mediapipe/tasks/python/test/audio/audio_classifier_test.py +387 -0
  515. mediapipe/tasks/python/test/audio/audio_embedder_test.py +297 -0
  516. mediapipe/tasks/python/test/test_utils.py +196 -0
  517. mediapipe/tasks/python/test/text/__init__.py +13 -0
  518. mediapipe/tasks/python/test/text/language_detector_test.py +228 -0
  519. mediapipe/tasks/python/test/text/text_classifier_test.py +235 -0
  520. mediapipe/tasks/python/test/text/text_embedder_test.py +326 -0
  521. mediapipe/tasks/python/test/vision/__init__.py +13 -0
  522. mediapipe/tasks/python/test/vision/face_aligner_test.py +190 -0
  523. mediapipe/tasks/python/test/vision/face_detector_test.py +523 -0
  524. mediapipe/tasks/python/test/vision/face_landmarker_test.py +565 -0
  525. mediapipe/tasks/python/test/vision/face_stylizer_test.py +191 -0
  526. mediapipe/tasks/python/test/vision/hand_landmarker_test.py +437 -0
  527. mediapipe/tasks/python/test/vision/holistic_landmarker_test.py +544 -0
  528. mediapipe/tasks/python/test/vision/image_classifier_test.py +657 -0
  529. mediapipe/tasks/python/test/vision/image_embedder_test.py +423 -0
  530. mediapipe/tasks/python/test/vision/image_segmenter_test.py +512 -0
  531. mediapipe/tasks/python/test/vision/interactive_segmenter_test.py +341 -0
  532. mediapipe/tasks/python/test/vision/object_detector_test.py +493 -0
  533. mediapipe/tasks/python/test/vision/pose_landmarker_test.py +518 -0
  534. mediapipe/tasks/python/text/__init__.py +35 -0
  535. mediapipe/tasks/python/text/core/__init__.py +16 -0
  536. mediapipe/tasks/python/text/core/base_text_task_api.py +54 -0
  537. mediapipe/tasks/python/text/language_detector.py +220 -0
  538. mediapipe/tasks/python/text/text_classifier.py +187 -0
  539. mediapipe/tasks/python/text/text_embedder.py +188 -0
  540. mediapipe/tasks/python/vision/__init__.py +90 -0
  541. mediapipe/tasks/python/vision/core/__init__.py +14 -0
  542. mediapipe/tasks/python/vision/core/base_vision_task_api.py +226 -0
  543. mediapipe/tasks/python/vision/core/image_processing_options.py +39 -0
  544. mediapipe/tasks/python/vision/core/vision_task_running_mode.py +31 -0
  545. mediapipe/tasks/python/vision/face_aligner.py +158 -0
  546. mediapipe/tasks/python/vision/face_detector.py +332 -0
  547. mediapipe/tasks/python/vision/face_landmarker.py +3244 -0
  548. mediapipe/tasks/python/vision/face_stylizer.py +158 -0
  549. mediapipe/tasks/python/vision/gesture_recognizer.py +480 -0
  550. mediapipe/tasks/python/vision/hand_landmarker.py +504 -0
  551. mediapipe/tasks/python/vision/holistic_landmarker.py +576 -0
  552. mediapipe/tasks/python/vision/image_classifier.py +358 -0
  553. mediapipe/tasks/python/vision/image_embedder.py +362 -0
  554. mediapipe/tasks/python/vision/image_segmenter.py +433 -0
  555. mediapipe/tasks/python/vision/interactive_segmenter.py +285 -0
  556. mediapipe/tasks/python/vision/object_detector.py +389 -0
  557. mediapipe/tasks/python/vision/pose_landmarker.py +455 -0
  558. mediapipe/util/__init__.py +0 -0
  559. mediapipe/util/analytics/__init__.py +0 -0
  560. mediapipe/util/analytics/mediapipe_log_extension_pb2.py +44 -0
  561. mediapipe/util/analytics/mediapipe_logging_enums_pb2.py +37 -0
  562. mediapipe/util/audio_decoder_pb2.py +33 -0
  563. mediapipe/util/color_pb2.py +33 -0
  564. mediapipe/util/label_map_pb2.py +27 -0
  565. mediapipe/util/render_data_pb2.py +58 -0
  566. mediapipe/util/sequence/__init__.py +14 -0
  567. mediapipe/util/sequence/media_sequence.py +716 -0
  568. mediapipe/util/sequence/media_sequence_test.py +290 -0
  569. mediapipe/util/sequence/media_sequence_util.py +800 -0
  570. mediapipe/util/sequence/media_sequence_util_test.py +389 -0
  571. mediapipe/util/tracking/__init__.py +0 -0
  572. mediapipe/util/tracking/box_detector_pb2.py +39 -0
  573. mediapipe/util/tracking/box_tracker_pb2.py +32 -0
  574. mediapipe/util/tracking/camera_motion_pb2.py +31 -0
  575. mediapipe/util/tracking/flow_packager_pb2.py +60 -0
  576. mediapipe/util/tracking/frame_selection_pb2.py +35 -0
  577. mediapipe/util/tracking/frame_selection_solution_evaluator_pb2.py +28 -0
  578. mediapipe/util/tracking/motion_analysis_pb2.py +35 -0
  579. mediapipe/util/tracking/motion_estimation_pb2.py +66 -0
  580. mediapipe/util/tracking/motion_models_pb2.py +42 -0
  581. mediapipe/util/tracking/motion_saliency_pb2.py +26 -0
  582. mediapipe/util/tracking/push_pull_filtering_pb2.py +26 -0
  583. mediapipe/util/tracking/region_flow_computation_pb2.py +59 -0
  584. mediapipe/util/tracking/region_flow_pb2.py +49 -0
  585. mediapipe/util/tracking/tone_estimation_pb2.py +45 -0
  586. mediapipe/util/tracking/tone_models_pb2.py +32 -0
  587. mediapipe/util/tracking/tracked_detection_manager_config_pb2.py +26 -0
  588. mediapipe/util/tracking/tracking_pb2.py +73 -0
  589. mediapipe/version.txt +1 -0
  590. mediapipe_nightly-0.10.21.post20241223.dist-info/LICENSE +218 -0
  591. mediapipe_nightly-0.10.21.post20241223.dist-info/METADATA +199 -0
  592. mediapipe_nightly-0.10.21.post20241223.dist-info/RECORD +593 -0
  593. mediapipe_nightly-0.10.21.post20241223.dist-info/WHEEL +5 -0
  594. mediapipe_nightly-0.10.21.post20241223.dist-info/top_level.txt +4 -0
@@ -0,0 +1,27 @@
1
+ # Copyright 2022 The MediaPipe Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """MediaPipe Tasks API."""
16
+
17
+ from . import audio
18
+ from . import components
19
+ from . import core
20
+ from . import genai
21
+ from . import text
22
+ from . import vision
23
+
24
+ BaseOptions = core.base_options.BaseOptions
25
+
26
+ # Remove unnecessary modules to avoid duplication in API docs.
27
+ del core
@@ -0,0 +1,33 @@
1
+ # Copyright 2022 The MediaPipe Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """MediaPipe Tasks Audio API."""
16
+
17
+ import mediapipe.tasks.python.audio.core
18
+ import mediapipe.tasks.python.audio.audio_classifier
19
+ import mediapipe.tasks.python.audio.audio_embedder
20
+
21
+ AudioClassifier = audio_classifier.AudioClassifier
22
+ AudioClassifierOptions = audio_classifier.AudioClassifierOptions
23
+ AudioClassifierResult = audio_classifier.AudioClassifierResult
24
+ AudioEmbedder = audio_embedder.AudioEmbedder
25
+ AudioEmbedderOptions = audio_embedder.AudioEmbedderOptions
26
+ AudioEmbedderResult = audio_embedder.AudioEmbedderResult
27
+ RunningMode = core.audio_task_running_mode.AudioTaskRunningMode
28
+
29
+ # Remove unnecessary modules to avoid duplication in API docs.
30
+ del audio_classifier
31
+ del audio_embedder
32
+ del core
33
+ del mediapipe
@@ -0,0 +1,324 @@
1
+ # Copyright 2022 The MediaPipe Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """MediaPipe audio classifier task."""
15
+
16
+ import dataclasses
17
+ from typing import Callable, Mapping, List, Optional
18
+
19
+ from mediapipe.python import packet_creator
20
+ from mediapipe.python import packet_getter
21
+ from mediapipe.python._framework_bindings import packet
22
+ from mediapipe.tasks.cc.audio.audio_classifier.proto import audio_classifier_graph_options_pb2
23
+ from mediapipe.tasks.cc.components.containers.proto import classifications_pb2
24
+ from mediapipe.tasks.cc.components.processors.proto import classifier_options_pb2
25
+ from mediapipe.tasks.python.audio.core import audio_task_running_mode as running_mode_module
26
+ from mediapipe.tasks.python.audio.core import base_audio_task_api
27
+ from mediapipe.tasks.python.components.containers import audio_data as audio_data_module
28
+ from mediapipe.tasks.python.components.containers import classification_result as classification_result_module
29
+ from mediapipe.tasks.python.core import base_options as base_options_module
30
+ from mediapipe.tasks.python.core import task_info as task_info_module
31
+ from mediapipe.tasks.python.core.optional_dependencies import doc_controls
32
+
33
+ AudioClassifierResult = classification_result_module.ClassificationResult
34
+ _AudioClassifierGraphOptionsProto = audio_classifier_graph_options_pb2.AudioClassifierGraphOptions
35
+ _AudioData = audio_data_module.AudioData
36
+ _BaseOptions = base_options_module.BaseOptions
37
+ _ClassifierOptionsProto = classifier_options_pb2.ClassifierOptions
38
+ _RunningMode = running_mode_module.AudioTaskRunningMode
39
+ _TaskInfo = task_info_module.TaskInfo
40
+
41
+ _AUDIO_IN_STREAM_NAME = 'audio_in'
42
+ _AUDIO_TAG = 'AUDIO'
43
+ _CLASSIFICATIONS_STREAM_NAME = 'classifications_out'
44
+ _CLASSIFICATIONS_TAG = 'CLASSIFICATIONS'
45
+ _SAMPLE_RATE_IN_STREAM_NAME = 'sample_rate_in'
46
+ _SAMPLE_RATE_TAG = 'SAMPLE_RATE'
47
+ _TASK_GRAPH_NAME = 'mediapipe.tasks.audio.audio_classifier.AudioClassifierGraph'
48
+ _TIMESTAMPED_CLASSIFICATIONS_STREAM_NAME = 'timestamped_classifications_out'
49
+ _TIMESTAMPED_CLASSIFICATIONS_TAG = 'TIMESTAMPED_CLASSIFICATIONS'
50
+ _MICRO_SECONDS_PER_MILLISECOND = 1000
51
+
52
+
53
+ @dataclasses.dataclass
54
+ class AudioClassifierOptions:
55
+ """Options for the audio classifier task.
56
+
57
+ Attributes:
58
+ base_options: Base options for the audio classifier task.
59
+ running_mode: The running mode of the task. Default to the audio clips mode.
60
+ Audio classifier task has two running modes: 1) The audio clips mode for
61
+ running classification on independent audio clips. 2) The audio stream
62
+ mode for running classification on the audio stream, such as from
63
+ microphone. In this mode, the "result_callback" below must be specified
64
+ to receive the classification results asynchronously.
65
+ display_names_locale: The locale to use for display names specified through
66
+ the TFLite Model Metadata.
67
+ max_results: The maximum number of top-scored classification results to
68
+ return.
69
+ score_threshold: Overrides the ones provided in the model metadata. Results
70
+ below this value are rejected.
71
+ category_allowlist: Allowlist of category names. If non-empty,
72
+ classification results whose category name is not in this set will be
73
+ filtered out. Duplicate or unknown category names are ignored. Mutually
74
+ exclusive with `category_denylist`.
75
+ category_denylist: Denylist of category names. If non-empty, classification
76
+ results whose category name is in this set will be filtered out. Duplicate
77
+ or unknown category names are ignored. Mutually exclusive with
78
+ `category_allowlist`.
79
+ result_callback: The user-defined result callback for processing audio
80
+ stream data. The result callback should only be specified when the running
81
+ mode is set to the audio stream mode.
82
+ """
83
+ base_options: _BaseOptions
84
+ running_mode: _RunningMode = _RunningMode.AUDIO_CLIPS
85
+ display_names_locale: Optional[str] = None
86
+ max_results: Optional[int] = None
87
+ score_threshold: Optional[float] = None
88
+ category_allowlist: Optional[List[str]] = None
89
+ category_denylist: Optional[List[str]] = None
90
+ result_callback: Optional[Callable[[AudioClassifierResult, int], None]] = None
91
+
92
+ @doc_controls.do_not_generate_docs
93
+ def to_pb2(self) -> _AudioClassifierGraphOptionsProto:
94
+ """Generates an AudioClassifierOptions protobuf object."""
95
+ base_options_proto = self.base_options.to_pb2()
96
+ base_options_proto.use_stream_mode = False if self.running_mode == _RunningMode.AUDIO_CLIPS else True
97
+ classifier_options_proto = _ClassifierOptionsProto(
98
+ score_threshold=self.score_threshold,
99
+ category_allowlist=self.category_allowlist,
100
+ category_denylist=self.category_denylist,
101
+ display_names_locale=self.display_names_locale,
102
+ max_results=self.max_results)
103
+
104
+ return _AudioClassifierGraphOptionsProto(
105
+ base_options=base_options_proto,
106
+ classifier_options=classifier_options_proto)
107
+
108
+
109
+ class AudioClassifier(base_audio_task_api.BaseAudioTaskApi):
110
+ """Class that performs audio classification on audio data.
111
+
112
+ This API expects a TFLite model with mandatory TFLite Model Metadata that
113
+ contains the mandatory AudioProperties of the solo input audio tensor and the
114
+ optional (but recommended) category labels as AssociatedFiles with type
115
+ TENSOR_AXIS_LABELS per output classification tensor.
116
+
117
+ Input tensor:
118
+ (kTfLiteFloat32)
119
+ - input audio buffer of size `[batch * samples]`.
120
+ - batch inference is not supported (`batch` is required to be 1).
121
+ - for multi-channel models, the channels must be interleaved.
122
+ At least one output tensor with:
123
+ (kTfLiteFloat32)
124
+ - `[1 x N]` array with `N` represents the number of categories.
125
+ - optional (but recommended) category labels as AssociatedFiles with type
126
+ TENSOR_AXIS_LABELS, containing one label per line. The first such
127
+ AssociatedFile (if any) is used to fill the `category_name` field of the
128
+ results. The `display_name` field is filled from the AssociatedFile (if
129
+ any) whose locale matches the `display_names_locale` field of the
130
+ `AudioClassifierOptions` used at creation time ("en" by default, i.e.
131
+ English). If none of these are available, only the `index` field of the
132
+ results will be filled.
133
+ """
134
+
135
+ @classmethod
136
+ def create_from_model_path(cls, model_path: str) -> 'AudioClassifier':
137
+ """Creates an `AudioClassifier` object from a TensorFlow Lite model and the default `AudioClassifierOptions`.
138
+
139
+ Note that the created `AudioClassifier` instance is in audio clips mode, for
140
+ classifying on independent audio clips.
141
+
142
+ Args:
143
+ model_path: Path to the model.
144
+
145
+ Returns:
146
+ `AudioClassifier` object that's created from the model file and the
147
+ default `AudioClassifierOptions`.
148
+
149
+ Raises:
150
+ ValueError: If failed to create `AudioClassifier` object from the provided
151
+ file such as invalid file path.
152
+ RuntimeError: If other types of error occurred.
153
+ """
154
+ base_options = _BaseOptions(model_asset_path=model_path)
155
+ options = AudioClassifierOptions(
156
+ base_options=base_options, running_mode=_RunningMode.AUDIO_CLIPS)
157
+ return cls.create_from_options(options)
158
+
159
+ @classmethod
160
+ def create_from_options(cls,
161
+ options: AudioClassifierOptions) -> 'AudioClassifier':
162
+ """Creates the `AudioClassifier` object from audio classifier options.
163
+
164
+ Args:
165
+ options: Options for the audio classifier task.
166
+
167
+ Returns:
168
+ `AudioClassifier` object that's created from `options`.
169
+
170
+ Raises:
171
+ ValueError: If failed to create `AudioClassifier` object from
172
+ `AudioClassifierOptions` such as missing the model.
173
+ RuntimeError: If other types of error occurred.
174
+ """
175
+
176
+ def packets_callback(output_packets: Mapping[str, packet.Packet]):
177
+ timestamp_ms = output_packets[
178
+ _CLASSIFICATIONS_STREAM_NAME].timestamp.value // _MICRO_SECONDS_PER_MILLISECOND
179
+ if output_packets[_CLASSIFICATIONS_STREAM_NAME].is_empty():
180
+ options.result_callback(
181
+ AudioClassifierResult(classifications=[]), timestamp_ms)
182
+ return
183
+ classification_result_proto = classifications_pb2.ClassificationResult()
184
+ classification_result_proto.CopyFrom(
185
+ packet_getter.get_proto(output_packets[_CLASSIFICATIONS_STREAM_NAME]))
186
+ options.result_callback(
187
+ AudioClassifierResult.create_from_pb2(classification_result_proto),
188
+ timestamp_ms)
189
+
190
+ task_info = _TaskInfo(
191
+ task_graph=_TASK_GRAPH_NAME,
192
+ input_streams=[
193
+ ':'.join([_AUDIO_TAG, _AUDIO_IN_STREAM_NAME]),
194
+ ':'.join([_SAMPLE_RATE_TAG, _SAMPLE_RATE_IN_STREAM_NAME])
195
+ ],
196
+ output_streams=[
197
+ ':'.join([_CLASSIFICATIONS_TAG, _CLASSIFICATIONS_STREAM_NAME]),
198
+ ':'.join([
199
+ _TIMESTAMPED_CLASSIFICATIONS_TAG,
200
+ _TIMESTAMPED_CLASSIFICATIONS_STREAM_NAME
201
+ ])
202
+ ],
203
+ task_options=options)
204
+ return cls(
205
+ # Audio tasks should not drop input audio due to flow limiting, which
206
+ # may cause data inconsistency.
207
+ task_info.generate_graph_config(enable_flow_limiting=False),
208
+ options.running_mode,
209
+ packets_callback if options.result_callback else None)
210
+
211
+ def classify(self, audio_clip: _AudioData) -> List[AudioClassifierResult]:
212
+ """Performs audio classification on the provided audio clip.
213
+
214
+ The audio clip is represented as a MediaPipe AudioData. The method accepts
215
+ audio clips with various length and audio sample rate. It's required to
216
+ provide the corresponding audio sample rate within the `AudioData` object.
217
+
218
+ The input audio clip may be longer than what the model is able to process
219
+ in a single inference. When this occurs, the input audio clip is split into
220
+ multiple chunks starting at different timestamps. For this reason, this
221
+ function returns a vector of ClassificationResult objects, each associated
222
+ ith a timestamp corresponding to the start (in milliseconds) of the chunk
223
+ data that was classified, e.g:
224
+
225
+ ClassificationResult #0 (first chunk of data):
226
+ timestamp_ms: 0 (starts at 0ms)
227
+ classifications #0 (single head model):
228
+ category #0:
229
+ category_name: "Speech"
230
+ score: 0.6
231
+ category #1:
232
+ category_name: "Music"
233
+ score: 0.2
234
+ ClassificationResult #1 (second chunk of data):
235
+ timestamp_ms: 800 (starts at 800ms)
236
+ classifications #0 (single head model):
237
+ category #0:
238
+ category_name: "Speech"
239
+ score: 0.5
240
+ category #1:
241
+ category_name: "Silence"
242
+ score: 0.1
243
+
244
+ Args:
245
+ audio_clip: MediaPipe AudioData.
246
+
247
+ Returns:
248
+ An `AudioClassifierResult` object that contains a list of
249
+ classification result objects, each associated with a timestamp
250
+ corresponding to the start (in milliseconds) of the chunk data that was
251
+ classified.
252
+
253
+ Raises:
254
+ ValueError: If any of the input arguments is invalid, such as the sample
255
+ rate is not provided in the `AudioData` object.
256
+ RuntimeError: If audio classification failed to run.
257
+ """
258
+ if not audio_clip.audio_format.sample_rate:
259
+ raise ValueError('Must provide the audio sample rate in audio data.')
260
+ output_packets = self._process_audio_clip({
261
+ _AUDIO_IN_STREAM_NAME:
262
+ packet_creator.create_matrix(audio_clip.buffer, transpose=True),
263
+ _SAMPLE_RATE_IN_STREAM_NAME:
264
+ packet_creator.create_double(audio_clip.audio_format.sample_rate)
265
+ })
266
+ output_list = []
267
+ classification_result_proto_list = packet_getter.get_proto_list(
268
+ output_packets[_TIMESTAMPED_CLASSIFICATIONS_STREAM_NAME])
269
+ for proto in classification_result_proto_list:
270
+ classification_result_proto = classifications_pb2.ClassificationResult()
271
+ classification_result_proto.CopyFrom(proto)
272
+ output_list.append(
273
+ AudioClassifierResult.create_from_pb2(classification_result_proto))
274
+ return output_list
275
+
276
+ def classify_async(self, audio_block: _AudioData, timestamp_ms: int) -> None:
277
+ """Sends audio data (a block in a continuous audio stream) to perform audio classification.
278
+
279
+ Only use this method when the AudioClassifier is created with the audio
280
+ stream running mode. The input timestamps should be monotonically increasing
281
+ for adjacent calls of this method. This method will return immediately after
282
+ the input audio data is accepted. The results will be available via the
283
+ `result_callback` provided in the `AudioClassifierOptions`. The
284
+ `classify_async` method is designed to process auido stream data such as
285
+ microphone input.
286
+
287
+ The input audio data may be longer than what the model is able to process
288
+ in a single inference. When this occurs, the input audio block is split
289
+ into multiple chunks. For this reason, the callback may be called multiple
290
+ times (once per chunk) for each call to this function.
291
+
292
+ The `result_callback` provides:
293
+ - An `AudioClassifierResult` object that contains a list of
294
+ classifications.
295
+ - The input timestamp in milliseconds.
296
+
297
+ Args:
298
+ audio_block: MediaPipe AudioData.
299
+ timestamp_ms: The timestamp of the input audio data in milliseconds.
300
+
301
+ Raises:
302
+ ValueError: If any of the followings:
303
+ 1) The sample rate is not provided in the `AudioData` object or the
304
+ provided sample rate is inconsistent with the previously received.
305
+ 2) The current input timestamp is smaller than what the audio
306
+ classifier has already processed.
307
+ """
308
+ if not audio_block.audio_format.sample_rate:
309
+ raise ValueError('Must provide the audio sample rate in audio data.')
310
+ if not self._default_sample_rate:
311
+ self._default_sample_rate = audio_block.audio_format.sample_rate
312
+ self._set_sample_rate(_SAMPLE_RATE_IN_STREAM_NAME,
313
+ self._default_sample_rate)
314
+ elif audio_block.audio_format.sample_rate != self._default_sample_rate:
315
+ raise ValueError(
316
+ f'The audio sample rate provided in audio data: '
317
+ f'{audio_block.audio_format.sample_rate} is inconsistent with '
318
+ f'the previously received: {self._default_sample_rate}.')
319
+
320
+ self._send_audio_stream_data({
321
+ _AUDIO_IN_STREAM_NAME:
322
+ packet_creator.create_matrix(audio_block.buffer, transpose=True).at(
323
+ timestamp_ms * _MICRO_SECONDS_PER_MILLISECOND)
324
+ })
@@ -0,0 +1,285 @@
1
+ # Copyright 2022 The MediaPipe Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """MediaPipe audio embedder task."""
15
+
16
+ import dataclasses
17
+ from typing import Callable, Mapping, List, Optional
18
+
19
+ from mediapipe.python import packet_creator
20
+ from mediapipe.python import packet_getter
21
+ from mediapipe.python._framework_bindings import packet
22
+ from mediapipe.tasks.cc.audio.audio_embedder.proto import audio_embedder_graph_options_pb2
23
+ from mediapipe.tasks.cc.components.containers.proto import embeddings_pb2
24
+ from mediapipe.tasks.cc.components.processors.proto import embedder_options_pb2
25
+ from mediapipe.tasks.python.audio.core import audio_task_running_mode as running_mode_module
26
+ from mediapipe.tasks.python.audio.core import base_audio_task_api
27
+ from mediapipe.tasks.python.components.containers import audio_data as audio_data_module
28
+ from mediapipe.tasks.python.components.containers import embedding_result as embedding_result_module
29
+ from mediapipe.tasks.python.core import base_options as base_options_module
30
+ from mediapipe.tasks.python.core import task_info as task_info_module
31
+ from mediapipe.tasks.python.core.optional_dependencies import doc_controls
32
+
33
+ AudioEmbedderResult = embedding_result_module.EmbeddingResult
34
+ _AudioEmbedderGraphOptionsProto = audio_embedder_graph_options_pb2.AudioEmbedderGraphOptions
35
+ _AudioData = audio_data_module.AudioData
36
+ _BaseOptions = base_options_module.BaseOptions
37
+ _EmbedderOptionsProto = embedder_options_pb2.EmbedderOptions
38
+ _RunningMode = running_mode_module.AudioTaskRunningMode
39
+ _TaskInfo = task_info_module.TaskInfo
40
+
41
+ _AUDIO_IN_STREAM_NAME = 'audio_in'
42
+ _AUDIO_TAG = 'AUDIO'
43
+ _EMBEDDINGS_STREAM_NAME = 'embeddings_out'
44
+ _EMBEDDINGS_TAG = 'EMBEDDINGS'
45
+ _SAMPLE_RATE_IN_STREAM_NAME = 'sample_rate_in'
46
+ _SAMPLE_RATE_TAG = 'SAMPLE_RATE'
47
+ _TASK_GRAPH_NAME = 'mediapipe.tasks.audio.audio_embedder.AudioEmbedderGraph'
48
+ _TIMESTAMPTED_EMBEDDINGS_STREAM_NAME = 'timestamped_embeddings_out'
49
+ _TIMESTAMPTED_EMBEDDINGS_TAG = 'TIMESTAMPED_EMBEDDINGS'
50
+ _MICRO_SECONDS_PER_MILLISECOND = 1000
51
+
52
+
53
+ @dataclasses.dataclass
54
+ class AudioEmbedderOptions:
55
+ """Options for the audio embedder task.
56
+
57
+ Attributes:
58
+ base_options: Base options for the audio embedder task.
59
+ running_mode: The running mode of the task. Default to the audio clips mode.
60
+ Audio embedder task has two running modes: 1) The audio clips mode for
61
+ running embedding extraction on independent audio clips. 2) The audio
62
+ stream mode for running embedding extraction on the audio stream, such as
63
+ from microphone. In this mode, the "result_callback" below must be
64
+ specified to receive the embedding results asynchronously.
65
+ l2_normalize: Whether to normalize the returned feature vector with L2 norm.
66
+ Use this option only if the model does not already contain a native
67
+ L2_NORMALIZATION TF Lite Op. In most cases, this is already the case and
68
+ L2 norm is thus achieved through TF Lite inference.
69
+ quantize: Whether the returned embedding should be quantized to bytes via
70
+ scalar quantization. Embeddings are implicitly assumed to be unit-norm and
71
+ therefore any dimension is guaranteed to have a value in [-1.0, 1.0]. Use
72
+ the l2_normalize option if this is not the case.
73
+ result_callback: The user-defined result callback for processing audio
74
+ stream data. The result callback should only be specified when the running
75
+ mode is set to the audio stream mode.
76
+ """
77
+ base_options: _BaseOptions
78
+ running_mode: _RunningMode = _RunningMode.AUDIO_CLIPS
79
+ l2_normalize: Optional[bool] = None
80
+ quantize: Optional[bool] = None
81
+ result_callback: Optional[Callable[[AudioEmbedderResult, int], None]] = None
82
+
83
+ @doc_controls.do_not_generate_docs
84
+ def to_pb2(self) -> _AudioEmbedderGraphOptionsProto:
85
+ """Generates an AudioEmbedderOptions protobuf object."""
86
+ base_options_proto = self.base_options.to_pb2()
87
+ base_options_proto.use_stream_mode = False if self.running_mode == _RunningMode.AUDIO_CLIPS else True
88
+ embedder_options_proto = _EmbedderOptionsProto(
89
+ l2_normalize=self.l2_normalize, quantize=self.quantize)
90
+
91
+ return _AudioEmbedderGraphOptionsProto(
92
+ base_options=base_options_proto,
93
+ embedder_options=embedder_options_proto)
94
+
95
+
96
+ class AudioEmbedder(base_audio_task_api.BaseAudioTaskApi):
97
+ """Class that performs embedding extraction on audio clips or audio stream.
98
+
99
+ This API expects a TFLite model with mandatory TFLite Model Metadata that
100
+ contains the mandatory AudioProperties of the solo input audio tensor and the
101
+ optional (but recommended) label items as AssociatedFiles with type
102
+ TENSOR_AXIS_LABELS per output embedding tensor.
103
+
104
+ Input tensor:
105
+ (kTfLiteFloat32)
106
+ - input audio buffer of size `[batch * samples]`.
107
+ - batch inference is not supported (`batch` is required to be 1).
108
+ - for multi-channel models, the channels must be interleaved.
109
+ At least one output tensor with:
110
+ (kTfLiteUInt8/kTfLiteFloat32)
111
+ - `N` components corresponding to the `N` dimensions of the returned
112
+ feature vector for this output layer.
113
+ - Either 2 or 4 dimensions, i.e. `[1 x N]` or `[1 x 1 x 1 x N]`.
114
+ """
115
+
116
+ @classmethod
117
+ def create_from_model_path(cls, model_path: str) -> 'AudioEmbedder':
118
+ """Creates an `AudioEmbedder` object from a TensorFlow Lite model and the default `AudioEmbedderOptions`.
119
+
120
+ Note that the created `AudioEmbedder` instance is in audio clips mode, for
121
+ embedding extraction on the independent audio clips.
122
+
123
+ Args:
124
+ model_path: Path to the model.
125
+
126
+ Returns:
127
+ `AudioEmbedder` object that's created from the model file and the
128
+ default `AudioEmbedderOptions`.
129
+
130
+ Raises:
131
+ ValueError: If failed to create `AudioEmbedder` object from the provided
132
+ file such as invalid file path.
133
+ RuntimeError: If other types of error occurred.
134
+ """
135
+ base_options = _BaseOptions(model_asset_path=model_path)
136
+ options = AudioEmbedderOptions(
137
+ base_options=base_options, running_mode=_RunningMode.AUDIO_CLIPS)
138
+ return cls.create_from_options(options)
139
+
140
+ @classmethod
141
+ def create_from_options(cls,
142
+ options: AudioEmbedderOptions) -> 'AudioEmbedder':
143
+ """Creates the `AudioEmbedder` object from audio embedder options.
144
+
145
+ Args:
146
+ options: Options for the audio embedder task.
147
+
148
+ Returns:
149
+ `AudioEmbedder` object that's created from `options`.
150
+
151
+ Raises:
152
+ ValueError: If failed to create `AudioEmbedder` object from
153
+ `AudioEmbedderOptions` such as missing the model.
154
+ RuntimeError: If other types of error occurred.
155
+ """
156
+
157
+ def packets_callback(output_packets: Mapping[str, packet.Packet]):
158
+ timestamp_ms = output_packets[
159
+ _EMBEDDINGS_STREAM_NAME].timestamp.value // _MICRO_SECONDS_PER_MILLISECOND
160
+ if output_packets[_EMBEDDINGS_STREAM_NAME].is_empty():
161
+ options.result_callback(
162
+ AudioEmbedderResult(embeddings=[]), timestamp_ms)
163
+ return
164
+ embedding_result_proto = embeddings_pb2.EmbeddingResult()
165
+ embedding_result_proto.CopyFrom(
166
+ packet_getter.get_proto(output_packets[_EMBEDDINGS_STREAM_NAME]))
167
+ options.result_callback(
168
+ AudioEmbedderResult.create_from_pb2(embedding_result_proto),
169
+ timestamp_ms)
170
+
171
+ task_info = _TaskInfo(
172
+ task_graph=_TASK_GRAPH_NAME,
173
+ input_streams=[
174
+ ':'.join([_AUDIO_TAG, _AUDIO_IN_STREAM_NAME]),
175
+ ':'.join([_SAMPLE_RATE_TAG, _SAMPLE_RATE_IN_STREAM_NAME])
176
+ ],
177
+ output_streams=[
178
+ ':'.join([_EMBEDDINGS_TAG, _EMBEDDINGS_STREAM_NAME]), ':'.join([
179
+ _TIMESTAMPTED_EMBEDDINGS_TAG,
180
+ _TIMESTAMPTED_EMBEDDINGS_STREAM_NAME
181
+ ])
182
+ ],
183
+ task_options=options)
184
+ return cls(
185
+ # Audio tasks should not drop input audio due to flow limiting, which
186
+ # may cause data inconsistency.
187
+ task_info.generate_graph_config(enable_flow_limiting=False),
188
+ options.running_mode,
189
+ packets_callback if options.result_callback else None)
190
+
191
+ def embed(self, audio_clip: _AudioData) -> List[AudioEmbedderResult]:
192
+ """Performs embedding extraction on the provided audio clips.
193
+
194
+ The audio clip is represented as a MediaPipe AudioData. The method accepts
195
+ audio clips with various length and audio sample rate. It's required to
196
+ provide the corresponding audio sample rate within the `AudioData` object.
197
+
198
+ The input audio clip may be longer than what the model is able to process
199
+ in a single inference. When this occurs, the input audio clip is split into
200
+ multiple chunks starting at different timestamps. For this reason, this
201
+ function returns a vector of EmbeddingResult objects, each associated
202
+ ith a timestamp corresponding to the start (in milliseconds) of the chunk
203
+ data on which embedding extraction was carried out.
204
+
205
+ Args:
206
+ audio_clip: MediaPipe AudioData.
207
+
208
+ Returns:
209
+ An `AudioEmbedderResult` object that contains a list of embedding result
210
+ objects, each associated with a timestamp corresponding to the start
211
+ (in milliseconds) of the chunk data on which embedding extraction was
212
+ carried out.
213
+
214
+ Raises:
215
+ ValueError: If any of the input arguments is invalid, such as the sample
216
+ rate is not provided in the `AudioData` object.
217
+ RuntimeError: If audio embedding extraction failed to run.
218
+ """
219
+ if not audio_clip.audio_format.sample_rate:
220
+ raise ValueError('Must provide the audio sample rate in audio data.')
221
+ output_packets = self._process_audio_clip({
222
+ _AUDIO_IN_STREAM_NAME:
223
+ packet_creator.create_matrix(audio_clip.buffer, transpose=True),
224
+ _SAMPLE_RATE_IN_STREAM_NAME:
225
+ packet_creator.create_double(audio_clip.audio_format.sample_rate)
226
+ })
227
+ output_list = []
228
+ embeddings_proto_list = packet_getter.get_proto_list(
229
+ output_packets[_TIMESTAMPTED_EMBEDDINGS_STREAM_NAME])
230
+ for proto in embeddings_proto_list:
231
+ embedding_result_proto = embeddings_pb2.EmbeddingResult()
232
+ embedding_result_proto.CopyFrom(proto)
233
+ output_list.append(
234
+ AudioEmbedderResult.create_from_pb2(embedding_result_proto))
235
+ return output_list
236
+
237
+ def embed_async(self, audio_block: _AudioData, timestamp_ms: int) -> None:
238
+ """Sends audio data (a block in a continuous audio stream) to perform audio embedding extraction.
239
+
240
+ Only use this method when the AudioEmbedder is created with the audio
241
+ stream running mode. The input timestamps should be monotonically increasing
242
+ for adjacent calls of this method. This method will return immediately after
243
+ the input audio data is accepted. The results will be available via the
244
+ `result_callback` provided in the `AudioEmbedderOptions`. The
245
+ `embed_async` method is designed to process auido stream data such as
246
+ microphone input.
247
+
248
+ The input audio data may be longer than what the model is able to process
249
+ in a single inference. When this occurs, the input audio block is split
250
+ into multiple chunks. For this reason, the callback may be called multiple
251
+ times (once per chunk) for each call to this function.
252
+
253
+ The `result_callback` provides:
254
+ - An `AudioEmbedderResult` object that contains a list of
255
+ embeddings.
256
+ - The input timestamp in milliseconds.
257
+
258
+ Args:
259
+ audio_block: MediaPipe AudioData.
260
+ timestamp_ms: The timestamp of the input audio data in milliseconds.
261
+
262
+ Raises:
263
+ ValueError: If any of the followings:
264
+ 1) The sample rate is not provided in the `AudioData` object or the
265
+ provided sample rate is inconsistent with the previously received.
266
+ 2) The current input timestamp is smaller than what the audio
267
+ embedder has already processed.
268
+ """
269
+ if not audio_block.audio_format.sample_rate:
270
+ raise ValueError('Must provide the audio sample rate in audio data.')
271
+ if not self._default_sample_rate:
272
+ self._default_sample_rate = audio_block.audio_format.sample_rate
273
+ self._set_sample_rate(_SAMPLE_RATE_IN_STREAM_NAME,
274
+ self._default_sample_rate)
275
+ elif audio_block.audio_format.sample_rate != self._default_sample_rate:
276
+ raise ValueError(
277
+ f'The audio sample rate provided in audio data: '
278
+ f'{audio_block.audio_format.sample_rate} is inconsistent with '
279
+ f'the previously received: {self._default_sample_rate}.')
280
+
281
+ self._send_audio_stream_data({
282
+ _AUDIO_IN_STREAM_NAME:
283
+ packet_creator.create_matrix(audio_block.buffer, transpose=True).at(
284
+ timestamp_ms * _MICRO_SECONDS_PER_MILLISECOND)
285
+ })
@@ -0,0 +1,16 @@
1
+ """Copyright 2022 The MediaPipe Authors.
2
+
3
+ All Rights Reserved.
4
+
5
+ Licensed under the Apache License, Version 2.0 (the "License");
6
+ you may not use this file except in compliance with the License.
7
+ You may obtain a copy of the License at
8
+
9
+ http://www.apache.org/licenses/LICENSE-2.0
10
+
11
+ Unless required by applicable law or agreed to in writing, software
12
+ distributed under the License is distributed on an "AS IS" BASIS,
13
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ See the License for the specific language governing permissions and
15
+ limitations under the License.
16
+ """