mediapipe-nightly 0.10.21.post20250114__cp312-cp312-manylinux_2_28_x86_64.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (593) hide show
  1. mediapipe/__init__.py +26 -0
  2. mediapipe/calculators/__init__.py +0 -0
  3. mediapipe/calculators/audio/__init__.py +0 -0
  4. mediapipe/calculators/audio/mfcc_mel_calculators_pb2.py +33 -0
  5. mediapipe/calculators/audio/rational_factor_resample_calculator_pb2.py +33 -0
  6. mediapipe/calculators/audio/spectrogram_calculator_pb2.py +37 -0
  7. mediapipe/calculators/audio/stabilized_log_calculator_pb2.py +31 -0
  8. mediapipe/calculators/audio/time_series_framer_calculator_pb2.py +33 -0
  9. mediapipe/calculators/core/__init__.py +0 -0
  10. mediapipe/calculators/core/bypass_calculator_pb2.py +31 -0
  11. mediapipe/calculators/core/clip_vector_size_calculator_pb2.py +31 -0
  12. mediapipe/calculators/core/concatenate_vector_calculator_pb2.py +31 -0
  13. mediapipe/calculators/core/constant_side_packet_calculator_pb2.py +39 -0
  14. mediapipe/calculators/core/dequantize_byte_array_calculator_pb2.py +31 -0
  15. mediapipe/calculators/core/flow_limiter_calculator_pb2.py +32 -0
  16. mediapipe/calculators/core/gate_calculator_pb2.py +33 -0
  17. mediapipe/calculators/core/get_vector_item_calculator_pb2.py +31 -0
  18. mediapipe/calculators/core/graph_profile_calculator_pb2.py +31 -0
  19. mediapipe/calculators/core/packet_cloner_calculator_pb2.py +31 -0
  20. mediapipe/calculators/core/packet_resampler_calculator_pb2.py +33 -0
  21. mediapipe/calculators/core/packet_thinner_calculator_pb2.py +33 -0
  22. mediapipe/calculators/core/quantize_float_vector_calculator_pb2.py +31 -0
  23. mediapipe/calculators/core/sequence_shift_calculator_pb2.py +31 -0
  24. mediapipe/calculators/core/split_vector_calculator_pb2.py +33 -0
  25. mediapipe/calculators/image/__init__.py +0 -0
  26. mediapipe/calculators/image/bilateral_filter_calculator_pb2.py +31 -0
  27. mediapipe/calculators/image/feature_detector_calculator_pb2.py +31 -0
  28. mediapipe/calculators/image/image_clone_calculator_pb2.py +31 -0
  29. mediapipe/calculators/image/image_cropping_calculator_pb2.py +33 -0
  30. mediapipe/calculators/image/image_transformation_calculator_pb2.py +38 -0
  31. mediapipe/calculators/image/mask_overlay_calculator_pb2.py +33 -0
  32. mediapipe/calculators/image/opencv_encoded_image_to_image_frame_calculator_pb2.py +31 -0
  33. mediapipe/calculators/image/opencv_image_encoder_calculator_pb2.py +35 -0
  34. mediapipe/calculators/image/recolor_calculator_pb2.py +34 -0
  35. mediapipe/calculators/image/rotation_mode_pb2.py +29 -0
  36. mediapipe/calculators/image/scale_image_calculator_pb2.py +34 -0
  37. mediapipe/calculators/image/segmentation_smoothing_calculator_pb2.py +31 -0
  38. mediapipe/calculators/image/set_alpha_calculator_pb2.py +31 -0
  39. mediapipe/calculators/image/warp_affine_calculator_pb2.py +36 -0
  40. mediapipe/calculators/internal/__init__.py +0 -0
  41. mediapipe/calculators/internal/callback_packet_calculator_pb2.py +33 -0
  42. mediapipe/calculators/tensor/__init__.py +0 -0
  43. mediapipe/calculators/tensor/audio_to_tensor_calculator_pb2.py +35 -0
  44. mediapipe/calculators/tensor/bert_preprocessor_calculator_pb2.py +31 -0
  45. mediapipe/calculators/tensor/feedback_tensors_calculator_pb2.py +37 -0
  46. mediapipe/calculators/tensor/image_to_tensor_calculator_pb2.py +40 -0
  47. mediapipe/calculators/tensor/inference_calculator_pb2.py +63 -0
  48. mediapipe/calculators/tensor/landmarks_to_tensor_calculator_pb2.py +33 -0
  49. mediapipe/calculators/tensor/regex_preprocessor_calculator_pb2.py +31 -0
  50. mediapipe/calculators/tensor/tensor_converter_calculator_pb2.py +34 -0
  51. mediapipe/calculators/tensor/tensor_to_joints_calculator_pb2.py +31 -0
  52. mediapipe/calculators/tensor/tensors_readback_calculator_pb2.py +35 -0
  53. mediapipe/calculators/tensor/tensors_to_audio_calculator_pb2.py +33 -0
  54. mediapipe/calculators/tensor/tensors_to_classification_calculator_pb2.py +44 -0
  55. mediapipe/calculators/tensor/tensors_to_detections_calculator_pb2.py +39 -0
  56. mediapipe/calculators/tensor/tensors_to_floats_calculator_pb2.py +33 -0
  57. mediapipe/calculators/tensor/tensors_to_landmarks_calculator_pb2.py +33 -0
  58. mediapipe/calculators/tensor/tensors_to_segmentation_calculator_pb2.py +34 -0
  59. mediapipe/calculators/tensor/vector_to_tensor_calculator_pb2.py +27 -0
  60. mediapipe/calculators/tflite/__init__.py +0 -0
  61. mediapipe/calculators/tflite/ssd_anchors_calculator_pb2.py +32 -0
  62. mediapipe/calculators/tflite/tflite_converter_calculator_pb2.py +33 -0
  63. mediapipe/calculators/tflite/tflite_custom_op_resolver_calculator_pb2.py +31 -0
  64. mediapipe/calculators/tflite/tflite_inference_calculator_pb2.py +49 -0
  65. mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator_pb2.py +31 -0
  66. mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator_pb2.py +31 -0
  67. mediapipe/calculators/tflite/tflite_tensors_to_landmarks_calculator_pb2.py +33 -0
  68. mediapipe/calculators/tflite/tflite_tensors_to_segmentation_calculator_pb2.py +31 -0
  69. mediapipe/calculators/util/__init__.py +0 -0
  70. mediapipe/calculators/util/align_hand_to_pose_in_world_calculator_pb2.py +31 -0
  71. mediapipe/calculators/util/annotation_overlay_calculator_pb2.py +32 -0
  72. mediapipe/calculators/util/association_calculator_pb2.py +31 -0
  73. mediapipe/calculators/util/collection_has_min_size_calculator_pb2.py +31 -0
  74. mediapipe/calculators/util/combine_joints_calculator_pb2.py +36 -0
  75. mediapipe/calculators/util/detection_label_id_to_text_calculator_pb2.py +36 -0
  76. mediapipe/calculators/util/detections_to_rects_calculator_pb2.py +33 -0
  77. mediapipe/calculators/util/detections_to_render_data_calculator_pb2.py +33 -0
  78. mediapipe/calculators/util/face_to_rect_calculator_pb2.py +26 -0
  79. mediapipe/calculators/util/filter_detections_calculator_pb2.py +31 -0
  80. mediapipe/calculators/util/flat_color_image_calculator_pb2.py +32 -0
  81. mediapipe/calculators/util/labels_to_render_data_calculator_pb2.py +34 -0
  82. mediapipe/calculators/util/landmark_projection_calculator_pb2.py +31 -0
  83. mediapipe/calculators/util/landmarks_refinement_calculator_pb2.py +41 -0
  84. mediapipe/calculators/util/landmarks_smoothing_calculator_pb2.py +33 -0
  85. mediapipe/calculators/util/landmarks_to_detection_calculator_pb2.py +31 -0
  86. mediapipe/calculators/util/landmarks_to_floats_calculator_pb2.py +31 -0
  87. mediapipe/calculators/util/landmarks_to_render_data_calculator_pb2.py +32 -0
  88. mediapipe/calculators/util/landmarks_transformation_calculator_pb2.py +37 -0
  89. mediapipe/calculators/util/latency_pb2.py +26 -0
  90. mediapipe/calculators/util/local_file_contents_calculator_pb2.py +31 -0
  91. mediapipe/calculators/util/logic_calculator_pb2.py +34 -0
  92. mediapipe/calculators/util/non_max_suppression_calculator_pb2.py +35 -0
  93. mediapipe/calculators/util/packet_frequency_calculator_pb2.py +31 -0
  94. mediapipe/calculators/util/packet_frequency_pb2.py +26 -0
  95. mediapipe/calculators/util/packet_latency_calculator_pb2.py +31 -0
  96. mediapipe/calculators/util/rect_to_render_data_calculator_pb2.py +32 -0
  97. mediapipe/calculators/util/rect_to_render_scale_calculator_pb2.py +31 -0
  98. mediapipe/calculators/util/rect_transformation_calculator_pb2.py +31 -0
  99. mediapipe/calculators/util/refine_landmarks_from_heatmap_calculator_pb2.py +31 -0
  100. mediapipe/calculators/util/resource_provider_calculator_pb2.py +28 -0
  101. mediapipe/calculators/util/set_joints_visibility_calculator_pb2.py +41 -0
  102. mediapipe/calculators/util/thresholding_calculator_pb2.py +31 -0
  103. mediapipe/calculators/util/timed_box_list_id_to_label_calculator_pb2.py +31 -0
  104. mediapipe/calculators/util/timed_box_list_to_render_data_calculator_pb2.py +32 -0
  105. mediapipe/calculators/util/top_k_scores_calculator_pb2.py +31 -0
  106. mediapipe/calculators/util/visibility_copy_calculator_pb2.py +27 -0
  107. mediapipe/calculators/util/visibility_smoothing_calculator_pb2.py +31 -0
  108. mediapipe/calculators/video/__init__.py +0 -0
  109. mediapipe/calculators/video/box_detector_calculator_pb2.py +32 -0
  110. mediapipe/calculators/video/box_tracker_calculator_pb2.py +32 -0
  111. mediapipe/calculators/video/flow_packager_calculator_pb2.py +32 -0
  112. mediapipe/calculators/video/flow_to_image_calculator_pb2.py +31 -0
  113. mediapipe/calculators/video/motion_analysis_calculator_pb2.py +42 -0
  114. mediapipe/calculators/video/opencv_video_encoder_calculator_pb2.py +31 -0
  115. mediapipe/calculators/video/tool/__init__.py +0 -0
  116. mediapipe/calculators/video/tool/flow_quantizer_model_pb2.py +26 -0
  117. mediapipe/calculators/video/tracked_detection_manager_calculator_pb2.py +32 -0
  118. mediapipe/calculators/video/video_pre_stream_calculator_pb2.py +35 -0
  119. mediapipe/examples/__init__.py +14 -0
  120. mediapipe/examples/desktop/__init__.py +14 -0
  121. mediapipe/framework/__init__.py +0 -0
  122. mediapipe/framework/calculator_options_pb2.py +29 -0
  123. mediapipe/framework/calculator_pb2.py +59 -0
  124. mediapipe/framework/calculator_profile_pb2.py +48 -0
  125. mediapipe/framework/deps/__init__.py +0 -0
  126. mediapipe/framework/deps/proto_descriptor_pb2.py +29 -0
  127. mediapipe/framework/formats/__init__.py +0 -0
  128. mediapipe/framework/formats/affine_transform_data_pb2.py +28 -0
  129. mediapipe/framework/formats/annotation/__init__.py +0 -0
  130. mediapipe/framework/formats/annotation/locus_pb2.py +32 -0
  131. mediapipe/framework/formats/annotation/rasterization_pb2.py +29 -0
  132. mediapipe/framework/formats/body_rig_pb2.py +28 -0
  133. mediapipe/framework/formats/classification_pb2.py +31 -0
  134. mediapipe/framework/formats/detection_pb2.py +36 -0
  135. mediapipe/framework/formats/image_file_properties_pb2.py +26 -0
  136. mediapipe/framework/formats/image_format_pb2.py +29 -0
  137. mediapipe/framework/formats/landmark_pb2.py +37 -0
  138. mediapipe/framework/formats/location_data_pb2.py +38 -0
  139. mediapipe/framework/formats/matrix_data_pb2.py +31 -0
  140. mediapipe/framework/formats/motion/__init__.py +0 -0
  141. mediapipe/framework/formats/motion/optical_flow_field_data_pb2.py +30 -0
  142. mediapipe/framework/formats/object_detection/__init__.py +0 -0
  143. mediapipe/framework/formats/object_detection/anchor_pb2.py +26 -0
  144. mediapipe/framework/formats/rect_pb2.py +29 -0
  145. mediapipe/framework/formats/time_series_header_pb2.py +28 -0
  146. mediapipe/framework/graph_runtime_info_pb2.py +31 -0
  147. mediapipe/framework/mediapipe_options_pb2.py +27 -0
  148. mediapipe/framework/packet_factory_pb2.py +31 -0
  149. mediapipe/framework/packet_generator_pb2.py +33 -0
  150. mediapipe/framework/status_handler_pb2.py +28 -0
  151. mediapipe/framework/stream_handler/__init__.py +0 -0
  152. mediapipe/framework/stream_handler/default_input_stream_handler_pb2.py +27 -0
  153. mediapipe/framework/stream_handler/fixed_size_input_stream_handler_pb2.py +27 -0
  154. mediapipe/framework/stream_handler/sync_set_input_stream_handler_pb2.py +29 -0
  155. mediapipe/framework/stream_handler/timestamp_align_input_stream_handler_pb2.py +27 -0
  156. mediapipe/framework/stream_handler_pb2.py +30 -0
  157. mediapipe/framework/test_calculators_pb2.py +31 -0
  158. mediapipe/framework/thread_pool_executor_pb2.py +29 -0
  159. mediapipe/framework/tool/__init__.py +0 -0
  160. mediapipe/framework/tool/calculator_graph_template_pb2.py +44 -0
  161. mediapipe/framework/tool/field_data_pb2.py +28 -0
  162. mediapipe/framework/tool/node_chain_subgraph_pb2.py +31 -0
  163. mediapipe/framework/tool/packet_generator_wrapper_calculator_pb2.py +28 -0
  164. mediapipe/framework/tool/source_pb2.py +33 -0
  165. mediapipe/framework/tool/switch_container_pb2.py +32 -0
  166. mediapipe/gpu/__init__.py +0 -0
  167. mediapipe/gpu/copy_calculator_pb2.py +33 -0
  168. mediapipe/gpu/gl_animation_overlay_calculator_pb2.py +31 -0
  169. mediapipe/gpu/gl_context_options_pb2.py +31 -0
  170. mediapipe/gpu/gl_scaler_calculator_pb2.py +32 -0
  171. mediapipe/gpu/gl_surface_sink_calculator_pb2.py +32 -0
  172. mediapipe/gpu/gpu_origin_pb2.py +29 -0
  173. mediapipe/gpu/scale_mode_pb2.py +28 -0
  174. mediapipe/model_maker/__init__.py +27 -0
  175. mediapipe/model_maker/setup.py +107 -0
  176. mediapipe/modules/__init__.py +0 -0
  177. mediapipe/modules/face_detection/__init__.py +0 -0
  178. mediapipe/modules/face_detection/face_detection_full_range_cpu.binarypb +0 -0
  179. mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite +0 -0
  180. mediapipe/modules/face_detection/face_detection_pb2.py +30 -0
  181. mediapipe/modules/face_detection/face_detection_short_range.tflite +0 -0
  182. mediapipe/modules/face_detection/face_detection_short_range_cpu.binarypb +0 -0
  183. mediapipe/modules/face_geometry/__init__.py +0 -0
  184. mediapipe/modules/face_geometry/data/__init__.py +0 -0
  185. mediapipe/modules/face_geometry/effect_renderer_calculator_pb2.py +27 -0
  186. mediapipe/modules/face_geometry/env_generator_calculator_pb2.py +28 -0
  187. mediapipe/modules/face_geometry/geometry_pipeline_calculator_pb2.py +27 -0
  188. mediapipe/modules/face_geometry/libs/__init__.py +0 -0
  189. mediapipe/modules/face_geometry/protos/__init__.py +0 -0
  190. mediapipe/modules/face_geometry/protos/environment_pb2.py +31 -0
  191. mediapipe/modules/face_geometry/protos/face_geometry_pb2.py +29 -0
  192. mediapipe/modules/face_geometry/protos/geometry_pipeline_metadata_pb2.py +32 -0
  193. mediapipe/modules/face_geometry/protos/mesh_3d_pb2.py +31 -0
  194. mediapipe/modules/face_landmark/__init__.py +0 -0
  195. mediapipe/modules/face_landmark/face_landmark.tflite +0 -0
  196. mediapipe/modules/face_landmark/face_landmark_front_cpu.binarypb +0 -0
  197. mediapipe/modules/face_landmark/face_landmark_with_attention.tflite +0 -0
  198. mediapipe/modules/hand_landmark/__init__.py +0 -0
  199. mediapipe/modules/hand_landmark/calculators/__init__.py +0 -0
  200. mediapipe/modules/hand_landmark/hand_landmark_full.tflite +0 -0
  201. mediapipe/modules/hand_landmark/hand_landmark_lite.tflite +0 -0
  202. mediapipe/modules/hand_landmark/hand_landmark_tracking_cpu.binarypb +0 -0
  203. mediapipe/modules/hand_landmark/handedness.txt +2 -0
  204. mediapipe/modules/holistic_landmark/__init__.py +0 -0
  205. mediapipe/modules/holistic_landmark/calculators/__init__.py +0 -0
  206. mediapipe/modules/holistic_landmark/calculators/roi_tracking_calculator_pb2.py +37 -0
  207. mediapipe/modules/holistic_landmark/hand_recrop.tflite +0 -0
  208. mediapipe/modules/holistic_landmark/holistic_landmark_cpu.binarypb +0 -0
  209. mediapipe/modules/iris_landmark/__init__.py +0 -0
  210. mediapipe/modules/iris_landmark/iris_landmark.tflite +0 -0
  211. mediapipe/modules/objectron/__init__.py +0 -0
  212. mediapipe/modules/objectron/calculators/__init__.py +0 -0
  213. mediapipe/modules/objectron/calculators/a_r_capture_metadata_pb2.py +102 -0
  214. mediapipe/modules/objectron/calculators/annotation_data_pb2.py +38 -0
  215. mediapipe/modules/objectron/calculators/belief_decoder_config_pb2.py +28 -0
  216. mediapipe/modules/objectron/calculators/camera_parameters_pb2.py +30 -0
  217. mediapipe/modules/objectron/calculators/filter_detection_calculator_pb2.py +35 -0
  218. mediapipe/modules/objectron/calculators/frame_annotation_to_rect_calculator_pb2.py +31 -0
  219. mediapipe/modules/objectron/calculators/frame_annotation_tracker_calculator_pb2.py +31 -0
  220. mediapipe/modules/objectron/calculators/lift_2d_frame_annotation_to_3d_calculator_pb2.py +32 -0
  221. mediapipe/modules/objectron/calculators/object_pb2.py +38 -0
  222. mediapipe/modules/objectron/calculators/tensors_to_objects_calculator_pb2.py +32 -0
  223. mediapipe/modules/objectron/calculators/tflite_tensors_to_objects_calculator_pb2.py +32 -0
  224. mediapipe/modules/objectron/object_detection_oidv4_labelmap.txt +24 -0
  225. mediapipe/modules/objectron/objectron_cpu.binarypb +0 -0
  226. mediapipe/modules/palm_detection/__init__.py +0 -0
  227. mediapipe/modules/palm_detection/palm_detection_full.tflite +0 -0
  228. mediapipe/modules/palm_detection/palm_detection_lite.tflite +0 -0
  229. mediapipe/modules/pose_detection/__init__.py +0 -0
  230. mediapipe/modules/pose_detection/pose_detection.tflite +0 -0
  231. mediapipe/modules/pose_landmark/__init__.py +0 -0
  232. mediapipe/modules/pose_landmark/pose_landmark_cpu.binarypb +0 -0
  233. mediapipe/modules/pose_landmark/pose_landmark_full.tflite +0 -0
  234. mediapipe/modules/selfie_segmentation/__init__.py +0 -0
  235. mediapipe/modules/selfie_segmentation/selfie_segmentation.tflite +0 -0
  236. mediapipe/modules/selfie_segmentation/selfie_segmentation_cpu.binarypb +0 -0
  237. mediapipe/modules/selfie_segmentation/selfie_segmentation_landscape.tflite +0 -0
  238. mediapipe/python/__init__.py +29 -0
  239. mediapipe/python/_framework_bindings.cpython-312-x86_64-linux-gnu.so +0 -0
  240. mediapipe/python/calculator_graph_test.py +251 -0
  241. mediapipe/python/image_frame_test.py +194 -0
  242. mediapipe/python/image_test.py +218 -0
  243. mediapipe/python/packet_creator.py +275 -0
  244. mediapipe/python/packet_getter.py +120 -0
  245. mediapipe/python/packet_test.py +533 -0
  246. mediapipe/python/solution_base.py +604 -0
  247. mediapipe/python/solution_base_test.py +396 -0
  248. mediapipe/python/solutions/__init__.py +27 -0
  249. mediapipe/python/solutions/download_utils.py +37 -0
  250. mediapipe/python/solutions/drawing_styles.py +249 -0
  251. mediapipe/python/solutions/drawing_utils.py +320 -0
  252. mediapipe/python/solutions/drawing_utils_test.py +258 -0
  253. mediapipe/python/solutions/face_detection.py +105 -0
  254. mediapipe/python/solutions/face_detection_test.py +92 -0
  255. mediapipe/python/solutions/face_mesh.py +125 -0
  256. mediapipe/python/solutions/face_mesh_connections.py +500 -0
  257. mediapipe/python/solutions/face_mesh_test.py +170 -0
  258. mediapipe/python/solutions/hands.py +153 -0
  259. mediapipe/python/solutions/hands_connections.py +32 -0
  260. mediapipe/python/solutions/hands_test.py +219 -0
  261. mediapipe/python/solutions/holistic.py +167 -0
  262. mediapipe/python/solutions/holistic_test.py +142 -0
  263. mediapipe/python/solutions/objectron.py +288 -0
  264. mediapipe/python/solutions/objectron_test.py +81 -0
  265. mediapipe/python/solutions/pose.py +192 -0
  266. mediapipe/python/solutions/pose_connections.py +22 -0
  267. mediapipe/python/solutions/pose_test.py +262 -0
  268. mediapipe/python/solutions/selfie_segmentation.py +76 -0
  269. mediapipe/python/solutions/selfie_segmentation_test.py +68 -0
  270. mediapipe/python/timestamp_test.py +78 -0
  271. mediapipe/tasks/__init__.py +14 -0
  272. mediapipe/tasks/cc/__init__.py +0 -0
  273. mediapipe/tasks/cc/audio/__init__.py +0 -0
  274. mediapipe/tasks/cc/audio/audio_classifier/__init__.py +0 -0
  275. mediapipe/tasks/cc/audio/audio_classifier/proto/__init__.py +0 -0
  276. mediapipe/tasks/cc/audio/audio_classifier/proto/audio_classifier_graph_options_pb2.py +35 -0
  277. mediapipe/tasks/cc/audio/audio_embedder/__init__.py +0 -0
  278. mediapipe/tasks/cc/audio/audio_embedder/proto/__init__.py +0 -0
  279. mediapipe/tasks/cc/audio/audio_embedder/proto/audio_embedder_graph_options_pb2.py +35 -0
  280. mediapipe/tasks/cc/audio/core/__init__.py +0 -0
  281. mediapipe/tasks/cc/audio/utils/__init__.py +0 -0
  282. mediapipe/tasks/cc/components/__init__.py +0 -0
  283. mediapipe/tasks/cc/components/calculators/__init__.py +0 -0
  284. mediapipe/tasks/cc/components/calculators/classification_aggregation_calculator_pb2.py +31 -0
  285. mediapipe/tasks/cc/components/calculators/score_calibration_calculator_pb2.py +35 -0
  286. mediapipe/tasks/cc/components/calculators/tensors_to_embeddings_calculator_pb2.py +32 -0
  287. mediapipe/tasks/cc/components/containers/__init__.py +0 -0
  288. mediapipe/tasks/cc/components/containers/proto/__init__.py +0 -0
  289. mediapipe/tasks/cc/components/containers/proto/classifications_pb2.py +30 -0
  290. mediapipe/tasks/cc/components/containers/proto/embeddings_pb2.py +35 -0
  291. mediapipe/tasks/cc/components/containers/proto/landmarks_detection_result_pb2.py +32 -0
  292. mediapipe/tasks/cc/components/processors/__init__.py +0 -0
  293. mediapipe/tasks/cc/components/processors/proto/__init__.py +0 -0
  294. mediapipe/tasks/cc/components/processors/proto/classification_postprocessing_graph_options_pb2.py +38 -0
  295. mediapipe/tasks/cc/components/processors/proto/classifier_options_pb2.py +27 -0
  296. mediapipe/tasks/cc/components/processors/proto/detection_postprocessing_graph_options_pb2.py +36 -0
  297. mediapipe/tasks/cc/components/processors/proto/detector_options_pb2.py +27 -0
  298. mediapipe/tasks/cc/components/processors/proto/embedder_options_pb2.py +27 -0
  299. mediapipe/tasks/cc/components/processors/proto/embedding_postprocessing_graph_options_pb2.py +32 -0
  300. mediapipe/tasks/cc/components/processors/proto/image_preprocessing_graph_options_pb2.py +34 -0
  301. mediapipe/tasks/cc/components/processors/proto/text_model_type_pb2.py +28 -0
  302. mediapipe/tasks/cc/components/processors/proto/text_preprocessing_graph_options_pb2.py +32 -0
  303. mediapipe/tasks/cc/components/utils/__init__.py +0 -0
  304. mediapipe/tasks/cc/core/__init__.py +0 -0
  305. mediapipe/tasks/cc/core/proto/__init__.py +0 -0
  306. mediapipe/tasks/cc/core/proto/acceleration_pb2.py +28 -0
  307. mediapipe/tasks/cc/core/proto/base_options_pb2.py +30 -0
  308. mediapipe/tasks/cc/core/proto/external_file_pb2.py +31 -0
  309. mediapipe/tasks/cc/core/proto/inference_subgraph_pb2.py +32 -0
  310. mediapipe/tasks/cc/core/proto/model_resources_calculator_pb2.py +32 -0
  311. mediapipe/tasks/cc/genai/__init__.py +0 -0
  312. mediapipe/tasks/cc/genai/inference/__init__.py +0 -0
  313. mediapipe/tasks/cc/genai/inference/c/__init__.py +0 -0
  314. mediapipe/tasks/cc/genai/inference/calculators/__init__.py +0 -0
  315. mediapipe/tasks/cc/genai/inference/calculators/detokenizer_calculator_pb2.py +27 -0
  316. mediapipe/tasks/cc/genai/inference/calculators/llm_gpu_calculator_pb2.py +32 -0
  317. mediapipe/tasks/cc/genai/inference/calculators/model_data_calculator_pb2.py +27 -0
  318. mediapipe/tasks/cc/genai/inference/calculators/tokenizer_calculator_pb2.py +29 -0
  319. mediapipe/tasks/cc/genai/inference/common/__init__.py +0 -0
  320. mediapipe/tasks/cc/genai/inference/proto/__init__.py +0 -0
  321. mediapipe/tasks/cc/genai/inference/proto/llm_file_metadata_pb2.py +32 -0
  322. mediapipe/tasks/cc/genai/inference/proto/llm_params_pb2.py +33 -0
  323. mediapipe/tasks/cc/genai/inference/proto/prompt_template_pb2.py +27 -0
  324. mediapipe/tasks/cc/genai/inference/proto/sampler_params_pb2.py +29 -0
  325. mediapipe/tasks/cc/genai/inference/proto/transformer_params_pb2.py +45 -0
  326. mediapipe/tasks/cc/genai/inference/utils/__init__.py +0 -0
  327. mediapipe/tasks/cc/genai/inference/utils/llm_utils/__init__.py +0 -0
  328. mediapipe/tasks/cc/genai/inference/utils/xnn_utils/__init__.py +0 -0
  329. mediapipe/tasks/cc/metadata/__init__.py +0 -0
  330. mediapipe/tasks/cc/metadata/python/__init__.py +0 -0
  331. mediapipe/tasks/cc/metadata/python/_pywrap_metadata_version.cpython-312-x86_64-linux-gnu.so +0 -0
  332. mediapipe/tasks/cc/metadata/tests/__init__.py +0 -0
  333. mediapipe/tasks/cc/metadata/utils/__init__.py +0 -0
  334. mediapipe/tasks/cc/text/__init__.py +0 -0
  335. mediapipe/tasks/cc/text/custom_ops/__init__.py +0 -0
  336. mediapipe/tasks/cc/text/custom_ops/ragged/__init__.py +0 -0
  337. mediapipe/tasks/cc/text/custom_ops/sentencepiece/__init__.py +0 -0
  338. mediapipe/tasks/cc/text/custom_ops/sentencepiece/testdata/__init__.py +0 -0
  339. mediapipe/tasks/cc/text/language_detector/__init__.py +0 -0
  340. mediapipe/tasks/cc/text/language_detector/custom_ops/__init__.py +0 -0
  341. mediapipe/tasks/cc/text/language_detector/custom_ops/utils/__init__.py +0 -0
  342. mediapipe/tasks/cc/text/language_detector/custom_ops/utils/hash/__init__.py +0 -0
  343. mediapipe/tasks/cc/text/language_detector/custom_ops/utils/utf/__init__.py +0 -0
  344. mediapipe/tasks/cc/text/text_classifier/__init__.py +0 -0
  345. mediapipe/tasks/cc/text/text_classifier/proto/__init__.py +0 -0
  346. mediapipe/tasks/cc/text/text_classifier/proto/text_classifier_graph_options_pb2.py +35 -0
  347. mediapipe/tasks/cc/text/text_embedder/__init__.py +0 -0
  348. mediapipe/tasks/cc/text/text_embedder/proto/__init__.py +0 -0
  349. mediapipe/tasks/cc/text/text_embedder/proto/text_embedder_graph_options_pb2.py +35 -0
  350. mediapipe/tasks/cc/text/tokenizers/__init__.py +0 -0
  351. mediapipe/tasks/cc/text/utils/__init__.py +0 -0
  352. mediapipe/tasks/cc/vision/__init__.py +0 -0
  353. mediapipe/tasks/cc/vision/core/__init__.py +0 -0
  354. mediapipe/tasks/cc/vision/custom_ops/__init__.py +0 -0
  355. mediapipe/tasks/cc/vision/face_detector/__init__.py +0 -0
  356. mediapipe/tasks/cc/vision/face_detector/proto/__init__.py +0 -0
  357. mediapipe/tasks/cc/vision/face_detector/proto/face_detector_graph_options_pb2.py +34 -0
  358. mediapipe/tasks/cc/vision/face_geometry/__init__.py +0 -0
  359. mediapipe/tasks/cc/vision/face_geometry/calculators/__init__.py +0 -0
  360. mediapipe/tasks/cc/vision/face_geometry/calculators/env_generator_calculator_pb2.py +28 -0
  361. mediapipe/tasks/cc/vision/face_geometry/calculators/geometry_pipeline_calculator_pb2.py +29 -0
  362. mediapipe/tasks/cc/vision/face_geometry/data/__init__.py +0 -0
  363. mediapipe/tasks/cc/vision/face_geometry/libs/__init__.py +0 -0
  364. mediapipe/tasks/cc/vision/face_geometry/proto/__init__.py +0 -0
  365. mediapipe/tasks/cc/vision/face_geometry/proto/environment_pb2.py +31 -0
  366. mediapipe/tasks/cc/vision/face_geometry/proto/face_geometry_graph_options_pb2.py +29 -0
  367. mediapipe/tasks/cc/vision/face_geometry/proto/face_geometry_pb2.py +29 -0
  368. mediapipe/tasks/cc/vision/face_geometry/proto/geometry_pipeline_metadata_pb2.py +32 -0
  369. mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d_pb2.py +31 -0
  370. mediapipe/tasks/cc/vision/face_landmarker/__init__.py +0 -0
  371. mediapipe/tasks/cc/vision/face_landmarker/proto/__init__.py +0 -0
  372. mediapipe/tasks/cc/vision/face_landmarker/proto/face_blendshapes_graph_options_pb2.py +34 -0
  373. mediapipe/tasks/cc/vision/face_landmarker/proto/face_landmarker_graph_options_pb2.py +37 -0
  374. mediapipe/tasks/cc/vision/face_landmarker/proto/face_landmarks_detector_graph_options_pb2.py +35 -0
  375. mediapipe/tasks/cc/vision/face_landmarker/proto/tensors_to_face_landmarks_graph_options_pb2.py +32 -0
  376. mediapipe/tasks/cc/vision/face_stylizer/__init__.py +0 -0
  377. mediapipe/tasks/cc/vision/face_stylizer/calculators/__init__.py +0 -0
  378. mediapipe/tasks/cc/vision/face_stylizer/calculators/tensors_to_image_calculator_pb2.py +36 -0
  379. mediapipe/tasks/cc/vision/face_stylizer/proto/__init__.py +0 -0
  380. mediapipe/tasks/cc/vision/face_stylizer/proto/face_stylizer_graph_options_pb2.py +35 -0
  381. mediapipe/tasks/cc/vision/gesture_recognizer/__init__.py +0 -0
  382. mediapipe/tasks/cc/vision/gesture_recognizer/calculators/__init__.py +0 -0
  383. mediapipe/tasks/cc/vision/gesture_recognizer/calculators/combined_prediction_calculator_pb2.py +33 -0
  384. mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator_pb2.py +31 -0
  385. mediapipe/tasks/cc/vision/gesture_recognizer/proto/__init__.py +0 -0
  386. mediapipe/tasks/cc/vision/gesture_recognizer/proto/gesture_classifier_graph_options_pb2.py +35 -0
  387. mediapipe/tasks/cc/vision/gesture_recognizer/proto/gesture_embedder_graph_options_pb2.py +34 -0
  388. mediapipe/tasks/cc/vision/gesture_recognizer/proto/gesture_recognizer_graph_options_pb2.py +36 -0
  389. mediapipe/tasks/cc/vision/gesture_recognizer/proto/hand_gesture_recognizer_graph_options_pb2.py +36 -0
  390. mediapipe/tasks/cc/vision/hand_detector/__init__.py +0 -0
  391. mediapipe/tasks/cc/vision/hand_detector/proto/__init__.py +0 -0
  392. mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options_pb2.py +34 -0
  393. mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_result_pb2.py +30 -0
  394. mediapipe/tasks/cc/vision/hand_landmarker/__init__.py +0 -0
  395. mediapipe/tasks/cc/vision/hand_landmarker/calculators/__init__.py +0 -0
  396. mediapipe/tasks/cc/vision/hand_landmarker/calculators/hand_association_calculator_pb2.py +31 -0
  397. mediapipe/tasks/cc/vision/hand_landmarker/proto/__init__.py +0 -0
  398. mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options_pb2.py +36 -0
  399. mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options_pb2.py +34 -0
  400. mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_roi_refinement_graph_options_pb2.py +28 -0
  401. mediapipe/tasks/cc/vision/holistic_landmarker/__init__.py +0 -0
  402. mediapipe/tasks/cc/vision/holistic_landmarker/proto/__init__.py +0 -0
  403. mediapipe/tasks/cc/vision/holistic_landmarker/proto/holistic_landmarker_graph_options_pb2.py +34 -0
  404. mediapipe/tasks/cc/vision/holistic_landmarker/proto/holistic_result_pb2.py +29 -0
  405. mediapipe/tasks/cc/vision/image_classifier/__init__.py +0 -0
  406. mediapipe/tasks/cc/vision/image_classifier/proto/__init__.py +0 -0
  407. mediapipe/tasks/cc/vision/image_classifier/proto/image_classifier_graph_options_pb2.py +35 -0
  408. mediapipe/tasks/cc/vision/image_embedder/__init__.py +0 -0
  409. mediapipe/tasks/cc/vision/image_embedder/proto/__init__.py +0 -0
  410. mediapipe/tasks/cc/vision/image_embedder/proto/image_embedder_graph_options_pb2.py +35 -0
  411. mediapipe/tasks/cc/vision/image_generator/__init__.py +0 -0
  412. mediapipe/tasks/cc/vision/image_generator/diffuser/__init__.py +0 -0
  413. mediapipe/tasks/cc/vision/image_generator/diffuser/stable_diffusion_iterate_calculator_pb2.py +40 -0
  414. mediapipe/tasks/cc/vision/image_generator/proto/__init__.py +0 -0
  415. mediapipe/tasks/cc/vision/image_generator/proto/conditioned_image_graph_options_pb2.py +40 -0
  416. mediapipe/tasks/cc/vision/image_generator/proto/control_plugin_graph_options_pb2.py +34 -0
  417. mediapipe/tasks/cc/vision/image_generator/proto/image_generator_graph_options_pb2.py +30 -0
  418. mediapipe/tasks/cc/vision/image_segmenter/__init__.py +0 -0
  419. mediapipe/tasks/cc/vision/image_segmenter/calculators/__init__.py +0 -0
  420. mediapipe/tasks/cc/vision/image_segmenter/calculators/tensors_to_segmentation_calculator_pb2.py +34 -0
  421. mediapipe/tasks/cc/vision/image_segmenter/proto/__init__.py +0 -0
  422. mediapipe/tasks/cc/vision/image_segmenter/proto/image_segmenter_graph_options_pb2.py +35 -0
  423. mediapipe/tasks/cc/vision/image_segmenter/proto/segmenter_options_pb2.py +33 -0
  424. mediapipe/tasks/cc/vision/interactive_segmenter/__init__.py +0 -0
  425. mediapipe/tasks/cc/vision/object_detector/__init__.py +0 -0
  426. mediapipe/tasks/cc/vision/object_detector/proto/__init__.py +0 -0
  427. mediapipe/tasks/cc/vision/object_detector/proto/object_detector_options_pb2.py +34 -0
  428. mediapipe/tasks/cc/vision/pose_detector/__init__.py +0 -0
  429. mediapipe/tasks/cc/vision/pose_detector/proto/__init__.py +0 -0
  430. mediapipe/tasks/cc/vision/pose_detector/proto/pose_detector_graph_options_pb2.py +34 -0
  431. mediapipe/tasks/cc/vision/pose_landmarker/__init__.py +0 -0
  432. mediapipe/tasks/cc/vision/pose_landmarker/proto/__init__.py +0 -0
  433. mediapipe/tasks/cc/vision/pose_landmarker/proto/pose_landmarker_graph_options_pb2.py +36 -0
  434. mediapipe/tasks/cc/vision/pose_landmarker/proto/pose_landmarks_detector_graph_options_pb2.py +34 -0
  435. mediapipe/tasks/cc/vision/utils/__init__.py +0 -0
  436. mediapipe/tasks/cc/vision/utils/ghum/__init__.py +0 -0
  437. mediapipe/tasks/metadata/image_segmenter_metadata_schema.fbs +59 -0
  438. mediapipe/tasks/metadata/image_segmenter_metadata_schema_py_generated.py +108 -0
  439. mediapipe/tasks/metadata/metadata_schema.fbs +732 -0
  440. mediapipe/tasks/metadata/metadata_schema_py_generated.py +3251 -0
  441. mediapipe/tasks/metadata/object_detector_metadata_schema.fbs +98 -0
  442. mediapipe/tasks/metadata/object_detector_metadata_schema_py_generated.py +674 -0
  443. mediapipe/tasks/metadata/schema_py_generated.py +18438 -0
  444. mediapipe/tasks/python/__init__.py +27 -0
  445. mediapipe/tasks/python/audio/__init__.py +33 -0
  446. mediapipe/tasks/python/audio/audio_classifier.py +324 -0
  447. mediapipe/tasks/python/audio/audio_embedder.py +285 -0
  448. mediapipe/tasks/python/audio/core/__init__.py +16 -0
  449. mediapipe/tasks/python/audio/core/audio_record.py +125 -0
  450. mediapipe/tasks/python/audio/core/audio_task_running_mode.py +29 -0
  451. mediapipe/tasks/python/audio/core/base_audio_task_api.py +181 -0
  452. mediapipe/tasks/python/benchmark/__init__.py +13 -0
  453. mediapipe/tasks/python/benchmark/benchmark_utils.py +70 -0
  454. mediapipe/tasks/python/benchmark/vision/__init__.py +13 -0
  455. mediapipe/tasks/python/benchmark/vision/benchmark.py +99 -0
  456. mediapipe/tasks/python/benchmark/vision/core/__init__.py +14 -0
  457. mediapipe/tasks/python/benchmark/vision/core/base_vision_benchmark_api.py +40 -0
  458. mediapipe/tasks/python/components/__init__.py +13 -0
  459. mediapipe/tasks/python/components/containers/__init__.py +53 -0
  460. mediapipe/tasks/python/components/containers/audio_data.py +137 -0
  461. mediapipe/tasks/python/components/containers/bounding_box.py +73 -0
  462. mediapipe/tasks/python/components/containers/category.py +78 -0
  463. mediapipe/tasks/python/components/containers/classification_result.py +111 -0
  464. mediapipe/tasks/python/components/containers/detections.py +181 -0
  465. mediapipe/tasks/python/components/containers/embedding_result.py +89 -0
  466. mediapipe/tasks/python/components/containers/keypoint.py +77 -0
  467. mediapipe/tasks/python/components/containers/landmark.py +122 -0
  468. mediapipe/tasks/python/components/containers/landmark_detection_result.py +106 -0
  469. mediapipe/tasks/python/components/containers/rect.py +109 -0
  470. mediapipe/tasks/python/components/processors/__init__.py +23 -0
  471. mediapipe/tasks/python/components/processors/classifier_options.py +86 -0
  472. mediapipe/tasks/python/components/utils/__init__.py +13 -0
  473. mediapipe/tasks/python/components/utils/cosine_similarity.py +68 -0
  474. mediapipe/tasks/python/core/__init__.py +13 -0
  475. mediapipe/tasks/python/core/base_options.py +121 -0
  476. mediapipe/tasks/python/core/optional_dependencies.py +25 -0
  477. mediapipe/tasks/python/core/task_info.py +139 -0
  478. mediapipe/tasks/python/genai/__init__.py +14 -0
  479. mediapipe/tasks/python/genai/bundler/__init__.py +23 -0
  480. mediapipe/tasks/python/genai/bundler/llm_bundler.py +130 -0
  481. mediapipe/tasks/python/genai/bundler/llm_bundler_test.py +168 -0
  482. mediapipe/tasks/python/genai/converter/__init__.py +24 -0
  483. mediapipe/tasks/python/genai/converter/converter_base.py +179 -0
  484. mediapipe/tasks/python/genai/converter/converter_factory.py +79 -0
  485. mediapipe/tasks/python/genai/converter/llm_converter.py +374 -0
  486. mediapipe/tasks/python/genai/converter/llm_converter_test.py +63 -0
  487. mediapipe/tasks/python/genai/converter/pytorch_converter.py +318 -0
  488. mediapipe/tasks/python/genai/converter/pytorch_converter_test.py +86 -0
  489. mediapipe/tasks/python/genai/converter/quantization_util.py +516 -0
  490. mediapipe/tasks/python/genai/converter/quantization_util_test.py +259 -0
  491. mediapipe/tasks/python/genai/converter/safetensors_converter.py +580 -0
  492. mediapipe/tasks/python/genai/converter/safetensors_converter_test.py +83 -0
  493. mediapipe/tasks/python/genai/converter/weight_bins_writer.py +120 -0
  494. mediapipe/tasks/python/genai/converter/weight_bins_writer_test.py +95 -0
  495. mediapipe/tasks/python/metadata/__init__.py +13 -0
  496. mediapipe/tasks/python/metadata/flatbuffers_lib/_pywrap_flatbuffers.cpython-312-x86_64-linux-gnu.so +0 -0
  497. mediapipe/tasks/python/metadata/metadata.py +928 -0
  498. mediapipe/tasks/python/metadata/metadata_displayer_cli.py +34 -0
  499. mediapipe/tasks/python/metadata/metadata_writers/__init__.py +13 -0
  500. mediapipe/tasks/python/metadata/metadata_writers/face_stylizer.py +138 -0
  501. mediapipe/tasks/python/metadata/metadata_writers/image_classifier.py +71 -0
  502. mediapipe/tasks/python/metadata/metadata_writers/image_segmenter.py +170 -0
  503. mediapipe/tasks/python/metadata/metadata_writers/metadata_info.py +1166 -0
  504. mediapipe/tasks/python/metadata/metadata_writers/metadata_writer.py +845 -0
  505. mediapipe/tasks/python/metadata/metadata_writers/model_asset_bundle_utils.py +71 -0
  506. mediapipe/tasks/python/metadata/metadata_writers/object_detector.py +331 -0
  507. mediapipe/tasks/python/metadata/metadata_writers/text_classifier.py +119 -0
  508. mediapipe/tasks/python/metadata/metadata_writers/writer_utils.py +91 -0
  509. mediapipe/tasks/python/test/__init__.py +13 -0
  510. mediapipe/tasks/python/test/audio/__init__.py +13 -0
  511. mediapipe/tasks/python/test/audio/audio_classifier_test.py +387 -0
  512. mediapipe/tasks/python/test/audio/audio_embedder_test.py +297 -0
  513. mediapipe/tasks/python/test/test_utils.py +196 -0
  514. mediapipe/tasks/python/test/text/__init__.py +13 -0
  515. mediapipe/tasks/python/test/text/language_detector_test.py +228 -0
  516. mediapipe/tasks/python/test/text/text_classifier_test.py +235 -0
  517. mediapipe/tasks/python/test/text/text_embedder_test.py +326 -0
  518. mediapipe/tasks/python/test/vision/__init__.py +13 -0
  519. mediapipe/tasks/python/test/vision/face_aligner_test.py +190 -0
  520. mediapipe/tasks/python/test/vision/face_detector_test.py +523 -0
  521. mediapipe/tasks/python/test/vision/face_landmarker_test.py +565 -0
  522. mediapipe/tasks/python/test/vision/face_stylizer_test.py +191 -0
  523. mediapipe/tasks/python/test/vision/hand_landmarker_test.py +437 -0
  524. mediapipe/tasks/python/test/vision/holistic_landmarker_test.py +544 -0
  525. mediapipe/tasks/python/test/vision/image_classifier_test.py +657 -0
  526. mediapipe/tasks/python/test/vision/image_embedder_test.py +423 -0
  527. mediapipe/tasks/python/test/vision/image_segmenter_test.py +512 -0
  528. mediapipe/tasks/python/test/vision/interactive_segmenter_test.py +341 -0
  529. mediapipe/tasks/python/test/vision/object_detector_test.py +493 -0
  530. mediapipe/tasks/python/test/vision/pose_landmarker_test.py +518 -0
  531. mediapipe/tasks/python/text/__init__.py +35 -0
  532. mediapipe/tasks/python/text/core/__init__.py +16 -0
  533. mediapipe/tasks/python/text/core/base_text_task_api.py +54 -0
  534. mediapipe/tasks/python/text/language_detector.py +220 -0
  535. mediapipe/tasks/python/text/text_classifier.py +187 -0
  536. mediapipe/tasks/python/text/text_embedder.py +188 -0
  537. mediapipe/tasks/python/vision/__init__.py +90 -0
  538. mediapipe/tasks/python/vision/core/__init__.py +14 -0
  539. mediapipe/tasks/python/vision/core/base_vision_task_api.py +226 -0
  540. mediapipe/tasks/python/vision/core/image_processing_options.py +39 -0
  541. mediapipe/tasks/python/vision/core/vision_task_running_mode.py +31 -0
  542. mediapipe/tasks/python/vision/face_aligner.py +158 -0
  543. mediapipe/tasks/python/vision/face_detector.py +332 -0
  544. mediapipe/tasks/python/vision/face_landmarker.py +3244 -0
  545. mediapipe/tasks/python/vision/face_stylizer.py +158 -0
  546. mediapipe/tasks/python/vision/gesture_recognizer.py +480 -0
  547. mediapipe/tasks/python/vision/hand_landmarker.py +504 -0
  548. mediapipe/tasks/python/vision/holistic_landmarker.py +576 -0
  549. mediapipe/tasks/python/vision/image_classifier.py +358 -0
  550. mediapipe/tasks/python/vision/image_embedder.py +362 -0
  551. mediapipe/tasks/python/vision/image_segmenter.py +433 -0
  552. mediapipe/tasks/python/vision/interactive_segmenter.py +285 -0
  553. mediapipe/tasks/python/vision/object_detector.py +389 -0
  554. mediapipe/tasks/python/vision/pose_landmarker.py +455 -0
  555. mediapipe/util/__init__.py +0 -0
  556. mediapipe/util/analytics/__init__.py +0 -0
  557. mediapipe/util/analytics/mediapipe_log_extension_pb2.py +44 -0
  558. mediapipe/util/analytics/mediapipe_logging_enums_pb2.py +37 -0
  559. mediapipe/util/audio_decoder_pb2.py +33 -0
  560. mediapipe/util/color_pb2.py +33 -0
  561. mediapipe/util/label_map_pb2.py +27 -0
  562. mediapipe/util/render_data_pb2.py +58 -0
  563. mediapipe/util/sequence/__init__.py +14 -0
  564. mediapipe/util/sequence/media_sequence.py +716 -0
  565. mediapipe/util/sequence/media_sequence_test.py +290 -0
  566. mediapipe/util/sequence/media_sequence_util.py +800 -0
  567. mediapipe/util/sequence/media_sequence_util_test.py +389 -0
  568. mediapipe/util/tracking/__init__.py +0 -0
  569. mediapipe/util/tracking/box_detector_pb2.py +39 -0
  570. mediapipe/util/tracking/box_tracker_pb2.py +32 -0
  571. mediapipe/util/tracking/camera_motion_pb2.py +31 -0
  572. mediapipe/util/tracking/flow_packager_pb2.py +60 -0
  573. mediapipe/util/tracking/frame_selection_pb2.py +35 -0
  574. mediapipe/util/tracking/frame_selection_solution_evaluator_pb2.py +28 -0
  575. mediapipe/util/tracking/motion_analysis_pb2.py +35 -0
  576. mediapipe/util/tracking/motion_estimation_pb2.py +66 -0
  577. mediapipe/util/tracking/motion_models_pb2.py +42 -0
  578. mediapipe/util/tracking/motion_saliency_pb2.py +26 -0
  579. mediapipe/util/tracking/push_pull_filtering_pb2.py +26 -0
  580. mediapipe/util/tracking/region_flow_computation_pb2.py +59 -0
  581. mediapipe/util/tracking/region_flow_pb2.py +49 -0
  582. mediapipe/util/tracking/tone_estimation_pb2.py +45 -0
  583. mediapipe/util/tracking/tone_models_pb2.py +32 -0
  584. mediapipe/util/tracking/tracked_detection_manager_config_pb2.py +26 -0
  585. mediapipe/util/tracking/tracking_pb2.py +73 -0
  586. mediapipe_nightly-0.10.21.post20250114.dist-info/LICENSE +218 -0
  587. mediapipe_nightly-0.10.21.post20250114.dist-info/METADATA +199 -0
  588. mediapipe_nightly-0.10.21.post20250114.dist-info/RECORD +593 -0
  589. mediapipe_nightly-0.10.21.post20250114.dist-info/WHEEL +5 -0
  590. mediapipe_nightly-0.10.21.post20250114.dist-info/top_level.txt +4 -0
  591. mediapipe_nightly.libs/libEGL-48f73270.so.1.1.0 +0 -0
  592. mediapipe_nightly.libs/libGLESv2-ed5eda4f.so.2.1.0 +0 -0
  593. mediapipe_nightly.libs/libGLdispatch-64b28464.so.0.0.0 +0 -0
@@ -0,0 +1,27 @@
1
+ # Copyright 2022 The MediaPipe Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """MediaPipe Tasks API."""
16
+
17
+ from . import audio
18
+ from . import components
19
+ from . import core
20
+ from . import genai
21
+ from . import text
22
+ from . import vision
23
+
24
+ BaseOptions = core.base_options.BaseOptions
25
+
26
+ # Remove unnecessary modules to avoid duplication in API docs.
27
+ del core
@@ -0,0 +1,33 @@
1
+ # Copyright 2022 The MediaPipe Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """MediaPipe Tasks Audio API."""
16
+
17
+ import mediapipe.tasks.python.audio.core
18
+ import mediapipe.tasks.python.audio.audio_classifier
19
+ import mediapipe.tasks.python.audio.audio_embedder
20
+
21
+ AudioClassifier = audio_classifier.AudioClassifier
22
+ AudioClassifierOptions = audio_classifier.AudioClassifierOptions
23
+ AudioClassifierResult = audio_classifier.AudioClassifierResult
24
+ AudioEmbedder = audio_embedder.AudioEmbedder
25
+ AudioEmbedderOptions = audio_embedder.AudioEmbedderOptions
26
+ AudioEmbedderResult = audio_embedder.AudioEmbedderResult
27
+ RunningMode = core.audio_task_running_mode.AudioTaskRunningMode
28
+
29
+ # Remove unnecessary modules to avoid duplication in API docs.
30
+ del audio_classifier
31
+ del audio_embedder
32
+ del core
33
+ del mediapipe
@@ -0,0 +1,324 @@
1
+ # Copyright 2022 The MediaPipe Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """MediaPipe audio classifier task."""
15
+
16
+ import dataclasses
17
+ from typing import Callable, Mapping, List, Optional
18
+
19
+ from mediapipe.python import packet_creator
20
+ from mediapipe.python import packet_getter
21
+ from mediapipe.python._framework_bindings import packet
22
+ from mediapipe.tasks.cc.audio.audio_classifier.proto import audio_classifier_graph_options_pb2
23
+ from mediapipe.tasks.cc.components.containers.proto import classifications_pb2
24
+ from mediapipe.tasks.cc.components.processors.proto import classifier_options_pb2
25
+ from mediapipe.tasks.python.audio.core import audio_task_running_mode as running_mode_module
26
+ from mediapipe.tasks.python.audio.core import base_audio_task_api
27
+ from mediapipe.tasks.python.components.containers import audio_data as audio_data_module
28
+ from mediapipe.tasks.python.components.containers import classification_result as classification_result_module
29
+ from mediapipe.tasks.python.core import base_options as base_options_module
30
+ from mediapipe.tasks.python.core import task_info as task_info_module
31
+ from mediapipe.tasks.python.core.optional_dependencies import doc_controls
32
+
33
+ AudioClassifierResult = classification_result_module.ClassificationResult
34
+ _AudioClassifierGraphOptionsProto = audio_classifier_graph_options_pb2.AudioClassifierGraphOptions
35
+ _AudioData = audio_data_module.AudioData
36
+ _BaseOptions = base_options_module.BaseOptions
37
+ _ClassifierOptionsProto = classifier_options_pb2.ClassifierOptions
38
+ _RunningMode = running_mode_module.AudioTaskRunningMode
39
+ _TaskInfo = task_info_module.TaskInfo
40
+
41
+ _AUDIO_IN_STREAM_NAME = 'audio_in'
42
+ _AUDIO_TAG = 'AUDIO'
43
+ _CLASSIFICATIONS_STREAM_NAME = 'classifications_out'
44
+ _CLASSIFICATIONS_TAG = 'CLASSIFICATIONS'
45
+ _SAMPLE_RATE_IN_STREAM_NAME = 'sample_rate_in'
46
+ _SAMPLE_RATE_TAG = 'SAMPLE_RATE'
47
+ _TASK_GRAPH_NAME = 'mediapipe.tasks.audio.audio_classifier.AudioClassifierGraph'
48
+ _TIMESTAMPED_CLASSIFICATIONS_STREAM_NAME = 'timestamped_classifications_out'
49
+ _TIMESTAMPED_CLASSIFICATIONS_TAG = 'TIMESTAMPED_CLASSIFICATIONS'
50
+ _MICRO_SECONDS_PER_MILLISECOND = 1000
51
+
52
+
53
+ @dataclasses.dataclass
54
+ class AudioClassifierOptions:
55
+ """Options for the audio classifier task.
56
+
57
+ Attributes:
58
+ base_options: Base options for the audio classifier task.
59
+ running_mode: The running mode of the task. Default to the audio clips mode.
60
+ Audio classifier task has two running modes: 1) The audio clips mode for
61
+ running classification on independent audio clips. 2) The audio stream
62
+ mode for running classification on the audio stream, such as from
63
+ microphone. In this mode, the "result_callback" below must be specified
64
+ to receive the classification results asynchronously.
65
+ display_names_locale: The locale to use for display names specified through
66
+ the TFLite Model Metadata.
67
+ max_results: The maximum number of top-scored classification results to
68
+ return.
69
+ score_threshold: Overrides the ones provided in the model metadata. Results
70
+ below this value are rejected.
71
+ category_allowlist: Allowlist of category names. If non-empty,
72
+ classification results whose category name is not in this set will be
73
+ filtered out. Duplicate or unknown category names are ignored. Mutually
74
+ exclusive with `category_denylist`.
75
+ category_denylist: Denylist of category names. If non-empty, classification
76
+ results whose category name is in this set will be filtered out. Duplicate
77
+ or unknown category names are ignored. Mutually exclusive with
78
+ `category_allowlist`.
79
+ result_callback: The user-defined result callback for processing audio
80
+ stream data. The result callback should only be specified when the running
81
+ mode is set to the audio stream mode.
82
+ """
83
+ base_options: _BaseOptions
84
+ running_mode: _RunningMode = _RunningMode.AUDIO_CLIPS
85
+ display_names_locale: Optional[str] = None
86
+ max_results: Optional[int] = None
87
+ score_threshold: Optional[float] = None
88
+ category_allowlist: Optional[List[str]] = None
89
+ category_denylist: Optional[List[str]] = None
90
+ result_callback: Optional[Callable[[AudioClassifierResult, int], None]] = None
91
+
92
+ @doc_controls.do_not_generate_docs
93
+ def to_pb2(self) -> _AudioClassifierGraphOptionsProto:
94
+ """Generates an AudioClassifierOptions protobuf object."""
95
+ base_options_proto = self.base_options.to_pb2()
96
+ base_options_proto.use_stream_mode = False if self.running_mode == _RunningMode.AUDIO_CLIPS else True
97
+ classifier_options_proto = _ClassifierOptionsProto(
98
+ score_threshold=self.score_threshold,
99
+ category_allowlist=self.category_allowlist,
100
+ category_denylist=self.category_denylist,
101
+ display_names_locale=self.display_names_locale,
102
+ max_results=self.max_results)
103
+
104
+ return _AudioClassifierGraphOptionsProto(
105
+ base_options=base_options_proto,
106
+ classifier_options=classifier_options_proto)
107
+
108
+
109
+ class AudioClassifier(base_audio_task_api.BaseAudioTaskApi):
110
+ """Class that performs audio classification on audio data.
111
+
112
+ This API expects a TFLite model with mandatory TFLite Model Metadata that
113
+ contains the mandatory AudioProperties of the solo input audio tensor and the
114
+ optional (but recommended) category labels as AssociatedFiles with type
115
+ TENSOR_AXIS_LABELS per output classification tensor.
116
+
117
+ Input tensor:
118
+ (kTfLiteFloat32)
119
+ - input audio buffer of size `[batch * samples]`.
120
+ - batch inference is not supported (`batch` is required to be 1).
121
+ - for multi-channel models, the channels must be interleaved.
122
+ At least one output tensor with:
123
+ (kTfLiteFloat32)
124
+ - `[1 x N]` array with `N` represents the number of categories.
125
+ - optional (but recommended) category labels as AssociatedFiles with type
126
+ TENSOR_AXIS_LABELS, containing one label per line. The first such
127
+ AssociatedFile (if any) is used to fill the `category_name` field of the
128
+ results. The `display_name` field is filled from the AssociatedFile (if
129
+ any) whose locale matches the `display_names_locale` field of the
130
+ `AudioClassifierOptions` used at creation time ("en" by default, i.e.
131
+ English). If none of these are available, only the `index` field of the
132
+ results will be filled.
133
+ """
134
+
135
+ @classmethod
136
+ def create_from_model_path(cls, model_path: str) -> 'AudioClassifier':
137
+ """Creates an `AudioClassifier` object from a TensorFlow Lite model and the default `AudioClassifierOptions`.
138
+
139
+ Note that the created `AudioClassifier` instance is in audio clips mode, for
140
+ classifying on independent audio clips.
141
+
142
+ Args:
143
+ model_path: Path to the model.
144
+
145
+ Returns:
146
+ `AudioClassifier` object that's created from the model file and the
147
+ default `AudioClassifierOptions`.
148
+
149
+ Raises:
150
+ ValueError: If failed to create `AudioClassifier` object from the provided
151
+ file such as invalid file path.
152
+ RuntimeError: If other types of error occurred.
153
+ """
154
+ base_options = _BaseOptions(model_asset_path=model_path)
155
+ options = AudioClassifierOptions(
156
+ base_options=base_options, running_mode=_RunningMode.AUDIO_CLIPS)
157
+ return cls.create_from_options(options)
158
+
159
+ @classmethod
160
+ def create_from_options(cls,
161
+ options: AudioClassifierOptions) -> 'AudioClassifier':
162
+ """Creates the `AudioClassifier` object from audio classifier options.
163
+
164
+ Args:
165
+ options: Options for the audio classifier task.
166
+
167
+ Returns:
168
+ `AudioClassifier` object that's created from `options`.
169
+
170
+ Raises:
171
+ ValueError: If failed to create `AudioClassifier` object from
172
+ `AudioClassifierOptions` such as missing the model.
173
+ RuntimeError: If other types of error occurred.
174
+ """
175
+
176
+ def packets_callback(output_packets: Mapping[str, packet.Packet]):
177
+ timestamp_ms = output_packets[
178
+ _CLASSIFICATIONS_STREAM_NAME].timestamp.value // _MICRO_SECONDS_PER_MILLISECOND
179
+ if output_packets[_CLASSIFICATIONS_STREAM_NAME].is_empty():
180
+ options.result_callback(
181
+ AudioClassifierResult(classifications=[]), timestamp_ms)
182
+ return
183
+ classification_result_proto = classifications_pb2.ClassificationResult()
184
+ classification_result_proto.CopyFrom(
185
+ packet_getter.get_proto(output_packets[_CLASSIFICATIONS_STREAM_NAME]))
186
+ options.result_callback(
187
+ AudioClassifierResult.create_from_pb2(classification_result_proto),
188
+ timestamp_ms)
189
+
190
+ task_info = _TaskInfo(
191
+ task_graph=_TASK_GRAPH_NAME,
192
+ input_streams=[
193
+ ':'.join([_AUDIO_TAG, _AUDIO_IN_STREAM_NAME]),
194
+ ':'.join([_SAMPLE_RATE_TAG, _SAMPLE_RATE_IN_STREAM_NAME])
195
+ ],
196
+ output_streams=[
197
+ ':'.join([_CLASSIFICATIONS_TAG, _CLASSIFICATIONS_STREAM_NAME]),
198
+ ':'.join([
199
+ _TIMESTAMPED_CLASSIFICATIONS_TAG,
200
+ _TIMESTAMPED_CLASSIFICATIONS_STREAM_NAME
201
+ ])
202
+ ],
203
+ task_options=options)
204
+ return cls(
205
+ # Audio tasks should not drop input audio due to flow limiting, which
206
+ # may cause data inconsistency.
207
+ task_info.generate_graph_config(enable_flow_limiting=False),
208
+ options.running_mode,
209
+ packets_callback if options.result_callback else None)
210
+
211
+ def classify(self, audio_clip: _AudioData) -> List[AudioClassifierResult]:
212
+ """Performs audio classification on the provided audio clip.
213
+
214
+ The audio clip is represented as a MediaPipe AudioData. The method accepts
215
+ audio clips with various length and audio sample rate. It's required to
216
+ provide the corresponding audio sample rate within the `AudioData` object.
217
+
218
+ The input audio clip may be longer than what the model is able to process
219
+ in a single inference. When this occurs, the input audio clip is split into
220
+ multiple chunks starting at different timestamps. For this reason, this
221
+ function returns a vector of ClassificationResult objects, each associated
222
+ ith a timestamp corresponding to the start (in milliseconds) of the chunk
223
+ data that was classified, e.g:
224
+
225
+ ClassificationResult #0 (first chunk of data):
226
+ timestamp_ms: 0 (starts at 0ms)
227
+ classifications #0 (single head model):
228
+ category #0:
229
+ category_name: "Speech"
230
+ score: 0.6
231
+ category #1:
232
+ category_name: "Music"
233
+ score: 0.2
234
+ ClassificationResult #1 (second chunk of data):
235
+ timestamp_ms: 800 (starts at 800ms)
236
+ classifications #0 (single head model):
237
+ category #0:
238
+ category_name: "Speech"
239
+ score: 0.5
240
+ category #1:
241
+ category_name: "Silence"
242
+ score: 0.1
243
+
244
+ Args:
245
+ audio_clip: MediaPipe AudioData.
246
+
247
+ Returns:
248
+ An `AudioClassifierResult` object that contains a list of
249
+ classification result objects, each associated with a timestamp
250
+ corresponding to the start (in milliseconds) of the chunk data that was
251
+ classified.
252
+
253
+ Raises:
254
+ ValueError: If any of the input arguments is invalid, such as the sample
255
+ rate is not provided in the `AudioData` object.
256
+ RuntimeError: If audio classification failed to run.
257
+ """
258
+ if not audio_clip.audio_format.sample_rate:
259
+ raise ValueError('Must provide the audio sample rate in audio data.')
260
+ output_packets = self._process_audio_clip({
261
+ _AUDIO_IN_STREAM_NAME:
262
+ packet_creator.create_matrix(audio_clip.buffer, transpose=True),
263
+ _SAMPLE_RATE_IN_STREAM_NAME:
264
+ packet_creator.create_double(audio_clip.audio_format.sample_rate)
265
+ })
266
+ output_list = []
267
+ classification_result_proto_list = packet_getter.get_proto_list(
268
+ output_packets[_TIMESTAMPED_CLASSIFICATIONS_STREAM_NAME])
269
+ for proto in classification_result_proto_list:
270
+ classification_result_proto = classifications_pb2.ClassificationResult()
271
+ classification_result_proto.CopyFrom(proto)
272
+ output_list.append(
273
+ AudioClassifierResult.create_from_pb2(classification_result_proto))
274
+ return output_list
275
+
276
+ def classify_async(self, audio_block: _AudioData, timestamp_ms: int) -> None:
277
+ """Sends audio data (a block in a continuous audio stream) to perform audio classification.
278
+
279
+ Only use this method when the AudioClassifier is created with the audio
280
+ stream running mode. The input timestamps should be monotonically increasing
281
+ for adjacent calls of this method. This method will return immediately after
282
+ the input audio data is accepted. The results will be available via the
283
+ `result_callback` provided in the `AudioClassifierOptions`. The
284
+ `classify_async` method is designed to process auido stream data such as
285
+ microphone input.
286
+
287
+ The input audio data may be longer than what the model is able to process
288
+ in a single inference. When this occurs, the input audio block is split
289
+ into multiple chunks. For this reason, the callback may be called multiple
290
+ times (once per chunk) for each call to this function.
291
+
292
+ The `result_callback` provides:
293
+ - An `AudioClassifierResult` object that contains a list of
294
+ classifications.
295
+ - The input timestamp in milliseconds.
296
+
297
+ Args:
298
+ audio_block: MediaPipe AudioData.
299
+ timestamp_ms: The timestamp of the input audio data in milliseconds.
300
+
301
+ Raises:
302
+ ValueError: If any of the followings:
303
+ 1) The sample rate is not provided in the `AudioData` object or the
304
+ provided sample rate is inconsistent with the previously received.
305
+ 2) The current input timestamp is smaller than what the audio
306
+ classifier has already processed.
307
+ """
308
+ if not audio_block.audio_format.sample_rate:
309
+ raise ValueError('Must provide the audio sample rate in audio data.')
310
+ if not self._default_sample_rate:
311
+ self._default_sample_rate = audio_block.audio_format.sample_rate
312
+ self._set_sample_rate(_SAMPLE_RATE_IN_STREAM_NAME,
313
+ self._default_sample_rate)
314
+ elif audio_block.audio_format.sample_rate != self._default_sample_rate:
315
+ raise ValueError(
316
+ f'The audio sample rate provided in audio data: '
317
+ f'{audio_block.audio_format.sample_rate} is inconsistent with '
318
+ f'the previously received: {self._default_sample_rate}.')
319
+
320
+ self._send_audio_stream_data({
321
+ _AUDIO_IN_STREAM_NAME:
322
+ packet_creator.create_matrix(audio_block.buffer, transpose=True).at(
323
+ timestamp_ms * _MICRO_SECONDS_PER_MILLISECOND)
324
+ })
@@ -0,0 +1,285 @@
1
+ # Copyright 2022 The MediaPipe Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """MediaPipe audio embedder task."""
15
+
16
+ import dataclasses
17
+ from typing import Callable, Mapping, List, Optional
18
+
19
+ from mediapipe.python import packet_creator
20
+ from mediapipe.python import packet_getter
21
+ from mediapipe.python._framework_bindings import packet
22
+ from mediapipe.tasks.cc.audio.audio_embedder.proto import audio_embedder_graph_options_pb2
23
+ from mediapipe.tasks.cc.components.containers.proto import embeddings_pb2
24
+ from mediapipe.tasks.cc.components.processors.proto import embedder_options_pb2
25
+ from mediapipe.tasks.python.audio.core import audio_task_running_mode as running_mode_module
26
+ from mediapipe.tasks.python.audio.core import base_audio_task_api
27
+ from mediapipe.tasks.python.components.containers import audio_data as audio_data_module
28
+ from mediapipe.tasks.python.components.containers import embedding_result as embedding_result_module
29
+ from mediapipe.tasks.python.core import base_options as base_options_module
30
+ from mediapipe.tasks.python.core import task_info as task_info_module
31
+ from mediapipe.tasks.python.core.optional_dependencies import doc_controls
32
+
33
+ AudioEmbedderResult = embedding_result_module.EmbeddingResult
34
+ _AudioEmbedderGraphOptionsProto = audio_embedder_graph_options_pb2.AudioEmbedderGraphOptions
35
+ _AudioData = audio_data_module.AudioData
36
+ _BaseOptions = base_options_module.BaseOptions
37
+ _EmbedderOptionsProto = embedder_options_pb2.EmbedderOptions
38
+ _RunningMode = running_mode_module.AudioTaskRunningMode
39
+ _TaskInfo = task_info_module.TaskInfo
40
+
41
+ _AUDIO_IN_STREAM_NAME = 'audio_in'
42
+ _AUDIO_TAG = 'AUDIO'
43
+ _EMBEDDINGS_STREAM_NAME = 'embeddings_out'
44
+ _EMBEDDINGS_TAG = 'EMBEDDINGS'
45
+ _SAMPLE_RATE_IN_STREAM_NAME = 'sample_rate_in'
46
+ _SAMPLE_RATE_TAG = 'SAMPLE_RATE'
47
+ _TASK_GRAPH_NAME = 'mediapipe.tasks.audio.audio_embedder.AudioEmbedderGraph'
48
+ _TIMESTAMPTED_EMBEDDINGS_STREAM_NAME = 'timestamped_embeddings_out'
49
+ _TIMESTAMPTED_EMBEDDINGS_TAG = 'TIMESTAMPED_EMBEDDINGS'
50
+ _MICRO_SECONDS_PER_MILLISECOND = 1000
51
+
52
+
53
+ @dataclasses.dataclass
54
+ class AudioEmbedderOptions:
55
+ """Options for the audio embedder task.
56
+
57
+ Attributes:
58
+ base_options: Base options for the audio embedder task.
59
+ running_mode: The running mode of the task. Default to the audio clips mode.
60
+ Audio embedder task has two running modes: 1) The audio clips mode for
61
+ running embedding extraction on independent audio clips. 2) The audio
62
+ stream mode for running embedding extraction on the audio stream, such as
63
+ from microphone. In this mode, the "result_callback" below must be
64
+ specified to receive the embedding results asynchronously.
65
+ l2_normalize: Whether to normalize the returned feature vector with L2 norm.
66
+ Use this option only if the model does not already contain a native
67
+ L2_NORMALIZATION TF Lite Op. In most cases, this is already the case and
68
+ L2 norm is thus achieved through TF Lite inference.
69
+ quantize: Whether the returned embedding should be quantized to bytes via
70
+ scalar quantization. Embeddings are implicitly assumed to be unit-norm and
71
+ therefore any dimension is guaranteed to have a value in [-1.0, 1.0]. Use
72
+ the l2_normalize option if this is not the case.
73
+ result_callback: The user-defined result callback for processing audio
74
+ stream data. The result callback should only be specified when the running
75
+ mode is set to the audio stream mode.
76
+ """
77
+ base_options: _BaseOptions
78
+ running_mode: _RunningMode = _RunningMode.AUDIO_CLIPS
79
+ l2_normalize: Optional[bool] = None
80
+ quantize: Optional[bool] = None
81
+ result_callback: Optional[Callable[[AudioEmbedderResult, int], None]] = None
82
+
83
+ @doc_controls.do_not_generate_docs
84
+ def to_pb2(self) -> _AudioEmbedderGraphOptionsProto:
85
+ """Generates an AudioEmbedderOptions protobuf object."""
86
+ base_options_proto = self.base_options.to_pb2()
87
+ base_options_proto.use_stream_mode = False if self.running_mode == _RunningMode.AUDIO_CLIPS else True
88
+ embedder_options_proto = _EmbedderOptionsProto(
89
+ l2_normalize=self.l2_normalize, quantize=self.quantize)
90
+
91
+ return _AudioEmbedderGraphOptionsProto(
92
+ base_options=base_options_proto,
93
+ embedder_options=embedder_options_proto)
94
+
95
+
96
+ class AudioEmbedder(base_audio_task_api.BaseAudioTaskApi):
97
+ """Class that performs embedding extraction on audio clips or audio stream.
98
+
99
+ This API expects a TFLite model with mandatory TFLite Model Metadata that
100
+ contains the mandatory AudioProperties of the solo input audio tensor and the
101
+ optional (but recommended) label items as AssociatedFiles with type
102
+ TENSOR_AXIS_LABELS per output embedding tensor.
103
+
104
+ Input tensor:
105
+ (kTfLiteFloat32)
106
+ - input audio buffer of size `[batch * samples]`.
107
+ - batch inference is not supported (`batch` is required to be 1).
108
+ - for multi-channel models, the channels must be interleaved.
109
+ At least one output tensor with:
110
+ (kTfLiteUInt8/kTfLiteFloat32)
111
+ - `N` components corresponding to the `N` dimensions of the returned
112
+ feature vector for this output layer.
113
+ - Either 2 or 4 dimensions, i.e. `[1 x N]` or `[1 x 1 x 1 x N]`.
114
+ """
115
+
116
+ @classmethod
117
+ def create_from_model_path(cls, model_path: str) -> 'AudioEmbedder':
118
+ """Creates an `AudioEmbedder` object from a TensorFlow Lite model and the default `AudioEmbedderOptions`.
119
+
120
+ Note that the created `AudioEmbedder` instance is in audio clips mode, for
121
+ embedding extraction on the independent audio clips.
122
+
123
+ Args:
124
+ model_path: Path to the model.
125
+
126
+ Returns:
127
+ `AudioEmbedder` object that's created from the model file and the
128
+ default `AudioEmbedderOptions`.
129
+
130
+ Raises:
131
+ ValueError: If failed to create `AudioEmbedder` object from the provided
132
+ file such as invalid file path.
133
+ RuntimeError: If other types of error occurred.
134
+ """
135
+ base_options = _BaseOptions(model_asset_path=model_path)
136
+ options = AudioEmbedderOptions(
137
+ base_options=base_options, running_mode=_RunningMode.AUDIO_CLIPS)
138
+ return cls.create_from_options(options)
139
+
140
+ @classmethod
141
+ def create_from_options(cls,
142
+ options: AudioEmbedderOptions) -> 'AudioEmbedder':
143
+ """Creates the `AudioEmbedder` object from audio embedder options.
144
+
145
+ Args:
146
+ options: Options for the audio embedder task.
147
+
148
+ Returns:
149
+ `AudioEmbedder` object that's created from `options`.
150
+
151
+ Raises:
152
+ ValueError: If failed to create `AudioEmbedder` object from
153
+ `AudioEmbedderOptions` such as missing the model.
154
+ RuntimeError: If other types of error occurred.
155
+ """
156
+
157
+ def packets_callback(output_packets: Mapping[str, packet.Packet]):
158
+ timestamp_ms = output_packets[
159
+ _EMBEDDINGS_STREAM_NAME].timestamp.value // _MICRO_SECONDS_PER_MILLISECOND
160
+ if output_packets[_EMBEDDINGS_STREAM_NAME].is_empty():
161
+ options.result_callback(
162
+ AudioEmbedderResult(embeddings=[]), timestamp_ms)
163
+ return
164
+ embedding_result_proto = embeddings_pb2.EmbeddingResult()
165
+ embedding_result_proto.CopyFrom(
166
+ packet_getter.get_proto(output_packets[_EMBEDDINGS_STREAM_NAME]))
167
+ options.result_callback(
168
+ AudioEmbedderResult.create_from_pb2(embedding_result_proto),
169
+ timestamp_ms)
170
+
171
+ task_info = _TaskInfo(
172
+ task_graph=_TASK_GRAPH_NAME,
173
+ input_streams=[
174
+ ':'.join([_AUDIO_TAG, _AUDIO_IN_STREAM_NAME]),
175
+ ':'.join([_SAMPLE_RATE_TAG, _SAMPLE_RATE_IN_STREAM_NAME])
176
+ ],
177
+ output_streams=[
178
+ ':'.join([_EMBEDDINGS_TAG, _EMBEDDINGS_STREAM_NAME]), ':'.join([
179
+ _TIMESTAMPTED_EMBEDDINGS_TAG,
180
+ _TIMESTAMPTED_EMBEDDINGS_STREAM_NAME
181
+ ])
182
+ ],
183
+ task_options=options)
184
+ return cls(
185
+ # Audio tasks should not drop input audio due to flow limiting, which
186
+ # may cause data inconsistency.
187
+ task_info.generate_graph_config(enable_flow_limiting=False),
188
+ options.running_mode,
189
+ packets_callback if options.result_callback else None)
190
+
191
+ def embed(self, audio_clip: _AudioData) -> List[AudioEmbedderResult]:
192
+ """Performs embedding extraction on the provided audio clips.
193
+
194
+ The audio clip is represented as a MediaPipe AudioData. The method accepts
195
+ audio clips with various length and audio sample rate. It's required to
196
+ provide the corresponding audio sample rate within the `AudioData` object.
197
+
198
+ The input audio clip may be longer than what the model is able to process
199
+ in a single inference. When this occurs, the input audio clip is split into
200
+ multiple chunks starting at different timestamps. For this reason, this
201
+ function returns a vector of EmbeddingResult objects, each associated
202
+ ith a timestamp corresponding to the start (in milliseconds) of the chunk
203
+ data on which embedding extraction was carried out.
204
+
205
+ Args:
206
+ audio_clip: MediaPipe AudioData.
207
+
208
+ Returns:
209
+ An `AudioEmbedderResult` object that contains a list of embedding result
210
+ objects, each associated with a timestamp corresponding to the start
211
+ (in milliseconds) of the chunk data on which embedding extraction was
212
+ carried out.
213
+
214
+ Raises:
215
+ ValueError: If any of the input arguments is invalid, such as the sample
216
+ rate is not provided in the `AudioData` object.
217
+ RuntimeError: If audio embedding extraction failed to run.
218
+ """
219
+ if not audio_clip.audio_format.sample_rate:
220
+ raise ValueError('Must provide the audio sample rate in audio data.')
221
+ output_packets = self._process_audio_clip({
222
+ _AUDIO_IN_STREAM_NAME:
223
+ packet_creator.create_matrix(audio_clip.buffer, transpose=True),
224
+ _SAMPLE_RATE_IN_STREAM_NAME:
225
+ packet_creator.create_double(audio_clip.audio_format.sample_rate)
226
+ })
227
+ output_list = []
228
+ embeddings_proto_list = packet_getter.get_proto_list(
229
+ output_packets[_TIMESTAMPTED_EMBEDDINGS_STREAM_NAME])
230
+ for proto in embeddings_proto_list:
231
+ embedding_result_proto = embeddings_pb2.EmbeddingResult()
232
+ embedding_result_proto.CopyFrom(proto)
233
+ output_list.append(
234
+ AudioEmbedderResult.create_from_pb2(embedding_result_proto))
235
+ return output_list
236
+
237
+ def embed_async(self, audio_block: _AudioData, timestamp_ms: int) -> None:
238
+ """Sends audio data (a block in a continuous audio stream) to perform audio embedding extraction.
239
+
240
+ Only use this method when the AudioEmbedder is created with the audio
241
+ stream running mode. The input timestamps should be monotonically increasing
242
+ for adjacent calls of this method. This method will return immediately after
243
+ the input audio data is accepted. The results will be available via the
244
+ `result_callback` provided in the `AudioEmbedderOptions`. The
245
+ `embed_async` method is designed to process auido stream data such as
246
+ microphone input.
247
+
248
+ The input audio data may be longer than what the model is able to process
249
+ in a single inference. When this occurs, the input audio block is split
250
+ into multiple chunks. For this reason, the callback may be called multiple
251
+ times (once per chunk) for each call to this function.
252
+
253
+ The `result_callback` provides:
254
+ - An `AudioEmbedderResult` object that contains a list of
255
+ embeddings.
256
+ - The input timestamp in milliseconds.
257
+
258
+ Args:
259
+ audio_block: MediaPipe AudioData.
260
+ timestamp_ms: The timestamp of the input audio data in milliseconds.
261
+
262
+ Raises:
263
+ ValueError: If any of the followings:
264
+ 1) The sample rate is not provided in the `AudioData` object or the
265
+ provided sample rate is inconsistent with the previously received.
266
+ 2) The current input timestamp is smaller than what the audio
267
+ embedder has already processed.
268
+ """
269
+ if not audio_block.audio_format.sample_rate:
270
+ raise ValueError('Must provide the audio sample rate in audio data.')
271
+ if not self._default_sample_rate:
272
+ self._default_sample_rate = audio_block.audio_format.sample_rate
273
+ self._set_sample_rate(_SAMPLE_RATE_IN_STREAM_NAME,
274
+ self._default_sample_rate)
275
+ elif audio_block.audio_format.sample_rate != self._default_sample_rate:
276
+ raise ValueError(
277
+ f'The audio sample rate provided in audio data: '
278
+ f'{audio_block.audio_format.sample_rate} is inconsistent with '
279
+ f'the previously received: {self._default_sample_rate}.')
280
+
281
+ self._send_audio_stream_data({
282
+ _AUDIO_IN_STREAM_NAME:
283
+ packet_creator.create_matrix(audio_block.buffer, transpose=True).at(
284
+ timestamp_ms * _MICRO_SECONDS_PER_MILLISECOND)
285
+ })
@@ -0,0 +1,16 @@
1
+ """Copyright 2022 The MediaPipe Authors.
2
+
3
+ All Rights Reserved.
4
+
5
+ Licensed under the Apache License, Version 2.0 (the "License");
6
+ you may not use this file except in compliance with the License.
7
+ You may obtain a copy of the License at
8
+
9
+ http://www.apache.org/licenses/LICENSE-2.0
10
+
11
+ Unless required by applicable law or agreed to in writing, software
12
+ distributed under the License is distributed on an "AS IS" BASIS,
13
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ See the License for the specific language governing permissions and
15
+ limitations under the License.
16
+ """