mediapipe-nightly 0.0.0.post20231103__cp311-cp311-macosx_11_0_universal2.whl
Sign up to get free protection for your applications and to get access to all the features.
- mediapipe/__init__.py +26 -0
- mediapipe/calculators/__init__.py +0 -0
- mediapipe/calculators/audio/__init__.py +0 -0
- mediapipe/calculators/audio/mfcc_mel_calculators_pb2.py +34 -0
- mediapipe/calculators/audio/rational_factor_resample_calculator_pb2.py +33 -0
- mediapipe/calculators/audio/spectrogram_calculator_pb2.py +35 -0
- mediapipe/calculators/audio/stabilized_log_calculator_pb2.py +31 -0
- mediapipe/calculators/audio/time_series_framer_calculator_pb2.py +33 -0
- mediapipe/calculators/core/__init__.py +0 -0
- mediapipe/calculators/core/bypass_calculator_pb2.py +31 -0
- mediapipe/calculators/core/clip_vector_size_calculator_pb2.py +31 -0
- mediapipe/calculators/core/concatenate_vector_calculator_pb2.py +31 -0
- mediapipe/calculators/core/constant_side_packet_calculator_pb2.py +37 -0
- mediapipe/calculators/core/dequantize_byte_array_calculator_pb2.py +31 -0
- mediapipe/calculators/core/flow_limiter_calculator_pb2.py +32 -0
- mediapipe/calculators/core/gate_calculator_pb2.py +33 -0
- mediapipe/calculators/core/get_vector_item_calculator_pb2.py +31 -0
- mediapipe/calculators/core/graph_profile_calculator_pb2.py +31 -0
- mediapipe/calculators/core/packet_cloner_calculator_pb2.py +31 -0
- mediapipe/calculators/core/packet_resampler_calculator_pb2.py +33 -0
- mediapipe/calculators/core/packet_thinner_calculator_pb2.py +33 -0
- mediapipe/calculators/core/quantize_float_vector_calculator_pb2.py +31 -0
- mediapipe/calculators/core/sequence_shift_calculator_pb2.py +31 -0
- mediapipe/calculators/core/split_vector_calculator_pb2.py +33 -0
- mediapipe/calculators/image/__init__.py +0 -0
- mediapipe/calculators/image/bilateral_filter_calculator_pb2.py +31 -0
- mediapipe/calculators/image/feature_detector_calculator_pb2.py +31 -0
- mediapipe/calculators/image/image_clone_calculator_pb2.py +31 -0
- mediapipe/calculators/image/image_cropping_calculator_pb2.py +33 -0
- mediapipe/calculators/image/image_transformation_calculator_pb2.py +38 -0
- mediapipe/calculators/image/mask_overlay_calculator_pb2.py +33 -0
- mediapipe/calculators/image/opencv_encoded_image_to_image_frame_calculator_pb2.py +31 -0
- mediapipe/calculators/image/opencv_image_encoder_calculator_pb2.py +35 -0
- mediapipe/calculators/image/recolor_calculator_pb2.py +34 -0
- mediapipe/calculators/image/rotation_mode_pb2.py +28 -0
- mediapipe/calculators/image/scale_image_calculator_pb2.py +34 -0
- mediapipe/calculators/image/segmentation_smoothing_calculator_pb2.py +31 -0
- mediapipe/calculators/image/set_alpha_calculator_pb2.py +31 -0
- mediapipe/calculators/image/warp_affine_calculator_pb2.py +36 -0
- mediapipe/calculators/internal/__init__.py +0 -0
- mediapipe/calculators/internal/callback_packet_calculator_pb2.py +33 -0
- mediapipe/calculators/tensor/__init__.py +0 -0
- mediapipe/calculators/tensor/audio_to_tensor_calculator_pb2.py +35 -0
- mediapipe/calculators/tensor/bert_preprocessor_calculator_pb2.py +31 -0
- mediapipe/calculators/tensor/feedback_tensors_calculator_pb2.py +37 -0
- mediapipe/calculators/tensor/image_to_tensor_calculator_pb2.py +40 -0
- mediapipe/calculators/tensor/inference_calculator_pb2.py +53 -0
- mediapipe/calculators/tensor/landmarks_to_tensor_calculator_pb2.py +33 -0
- mediapipe/calculators/tensor/regex_preprocessor_calculator_pb2.py +31 -0
- mediapipe/calculators/tensor/tensor_converter_calculator_pb2.py +34 -0
- mediapipe/calculators/tensor/tensor_to_joints_calculator_pb2.py +31 -0
- mediapipe/calculators/tensor/tensors_readback_calculator_pb2.py +35 -0
- mediapipe/calculators/tensor/tensors_to_audio_calculator_pb2.py +33 -0
- mediapipe/calculators/tensor/tensors_to_classification_calculator_pb2.py +44 -0
- mediapipe/calculators/tensor/tensors_to_detections_calculator_pb2.py +39 -0
- mediapipe/calculators/tensor/tensors_to_floats_calculator_pb2.py +33 -0
- mediapipe/calculators/tensor/tensors_to_landmarks_calculator_pb2.py +33 -0
- mediapipe/calculators/tensor/tensors_to_segmentation_calculator_pb2.py +34 -0
- mediapipe/calculators/tflite/__init__.py +0 -0
- mediapipe/calculators/tflite/ssd_anchors_calculator_pb2.py +32 -0
- mediapipe/calculators/tflite/tflite_converter_calculator_pb2.py +33 -0
- mediapipe/calculators/tflite/tflite_custom_op_resolver_calculator_pb2.py +31 -0
- mediapipe/calculators/tflite/tflite_inference_calculator_pb2.py +49 -0
- mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator_pb2.py +31 -0
- mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator_pb2.py +31 -0
- mediapipe/calculators/tflite/tflite_tensors_to_landmarks_calculator_pb2.py +33 -0
- mediapipe/calculators/tflite/tflite_tensors_to_segmentation_calculator_pb2.py +31 -0
- mediapipe/calculators/util/__init__.py +0 -0
- mediapipe/calculators/util/align_hand_to_pose_in_world_calculator_pb2.py +31 -0
- mediapipe/calculators/util/annotation_overlay_calculator_pb2.py +32 -0
- mediapipe/calculators/util/association_calculator_pb2.py +31 -0
- mediapipe/calculators/util/collection_has_min_size_calculator_pb2.py +31 -0
- mediapipe/calculators/util/combine_joints_calculator_pb2.py +36 -0
- mediapipe/calculators/util/detection_label_id_to_text_calculator_pb2.py +36 -0
- mediapipe/calculators/util/detections_to_rects_calculator_pb2.py +33 -0
- mediapipe/calculators/util/detections_to_render_data_calculator_pb2.py +33 -0
- mediapipe/calculators/util/face_to_rect_calculator_pb2.py +25 -0
- mediapipe/calculators/util/filter_detections_calculator_pb2.py +31 -0
- mediapipe/calculators/util/flat_color_image_calculator_pb2.py +32 -0
- mediapipe/calculators/util/labels_to_render_data_calculator_pb2.py +34 -0
- mediapipe/calculators/util/landmark_projection_calculator_pb2.py +31 -0
- mediapipe/calculators/util/landmarks_refinement_calculator_pb2.py +41 -0
- mediapipe/calculators/util/landmarks_smoothing_calculator_pb2.py +33 -0
- mediapipe/calculators/util/landmarks_to_detection_calculator_pb2.py +31 -0
- mediapipe/calculators/util/landmarks_to_floats_calculator_pb2.py +31 -0
- mediapipe/calculators/util/landmarks_to_render_data_calculator_pb2.py +32 -0
- mediapipe/calculators/util/landmarks_transformation_calculator_pb2.py +37 -0
- mediapipe/calculators/util/latency_pb2.py +25 -0
- mediapipe/calculators/util/local_file_contents_calculator_pb2.py +31 -0
- mediapipe/calculators/util/logic_calculator_pb2.py +34 -0
- mediapipe/calculators/util/non_max_suppression_calculator_pb2.py +35 -0
- mediapipe/calculators/util/packet_frequency_calculator_pb2.py +31 -0
- mediapipe/calculators/util/packet_frequency_pb2.py +25 -0
- mediapipe/calculators/util/packet_latency_calculator_pb2.py +31 -0
- mediapipe/calculators/util/rect_to_render_data_calculator_pb2.py +32 -0
- mediapipe/calculators/util/rect_to_render_scale_calculator_pb2.py +31 -0
- mediapipe/calculators/util/rect_transformation_calculator_pb2.py +31 -0
- mediapipe/calculators/util/refine_landmarks_from_heatmap_calculator_pb2.py +31 -0
- mediapipe/calculators/util/set_joints_visibility_calculator_pb2.py +41 -0
- mediapipe/calculators/util/thresholding_calculator_pb2.py +31 -0
- mediapipe/calculators/util/timed_box_list_id_to_label_calculator_pb2.py +31 -0
- mediapipe/calculators/util/timed_box_list_to_render_data_calculator_pb2.py +32 -0
- mediapipe/calculators/util/top_k_scores_calculator_pb2.py +31 -0
- mediapipe/calculators/util/visibility_copy_calculator_pb2.py +27 -0
- mediapipe/calculators/util/visibility_smoothing_calculator_pb2.py +31 -0
- mediapipe/calculators/video/__init__.py +0 -0
- mediapipe/calculators/video/box_detector_calculator_pb2.py +32 -0
- mediapipe/calculators/video/box_tracker_calculator_pb2.py +32 -0
- mediapipe/calculators/video/flow_packager_calculator_pb2.py +32 -0
- mediapipe/calculators/video/flow_to_image_calculator_pb2.py +31 -0
- mediapipe/calculators/video/motion_analysis_calculator_pb2.py +42 -0
- mediapipe/calculators/video/opencv_video_encoder_calculator_pb2.py +31 -0
- mediapipe/calculators/video/tool/__init__.py +0 -0
- mediapipe/calculators/video/tool/flow_quantizer_model_pb2.py +25 -0
- mediapipe/calculators/video/tracked_detection_manager_calculator_pb2.py +32 -0
- mediapipe/calculators/video/video_pre_stream_calculator_pb2.py +35 -0
- mediapipe/examples/__init__.py +14 -0
- mediapipe/examples/desktop/__init__.py +14 -0
- mediapipe/framework/__init__.py +0 -0
- mediapipe/framework/calculator_options_pb2.py +28 -0
- mediapipe/framework/calculator_pb2.py +56 -0
- mediapipe/framework/calculator_profile_pb2.py +47 -0
- mediapipe/framework/deps/__init__.py +0 -0
- mediapipe/framework/deps/proto_descriptor_pb2.py +28 -0
- mediapipe/framework/formats/__init__.py +0 -0
- mediapipe/framework/formats/affine_transform_data_pb2.py +27 -0
- mediapipe/framework/formats/annotation/__init__.py +0 -0
- mediapipe/framework/formats/annotation/locus_pb2.py +31 -0
- mediapipe/framework/formats/annotation/rasterization_pb2.py +28 -0
- mediapipe/framework/formats/body_rig_pb2.py +27 -0
- mediapipe/framework/formats/classification_pb2.py +30 -0
- mediapipe/framework/formats/detection_pb2.py +35 -0
- mediapipe/framework/formats/image_file_properties_pb2.py +25 -0
- mediapipe/framework/formats/image_format_pb2.py +28 -0
- mediapipe/framework/formats/landmark_pb2.py +36 -0
- mediapipe/framework/formats/location_data_pb2.py +37 -0
- mediapipe/framework/formats/matrix_data_pb2.py +30 -0
- mediapipe/framework/formats/motion/__init__.py +0 -0
- mediapipe/framework/formats/motion/optical_flow_field_data_pb2.py +29 -0
- mediapipe/framework/formats/object_detection/__init__.py +0 -0
- mediapipe/framework/formats/object_detection/anchor_pb2.py +25 -0
- mediapipe/framework/formats/rect_pb2.py +28 -0
- mediapipe/framework/formats/time_series_header_pb2.py +27 -0
- mediapipe/framework/mediapipe_options_pb2.py +26 -0
- mediapipe/framework/packet_factory_pb2.py +30 -0
- mediapipe/framework/packet_generator_pb2.py +32 -0
- mediapipe/framework/status_handler_pb2.py +27 -0
- mediapipe/framework/stream_handler/__init__.py +0 -0
- mediapipe/framework/stream_handler/default_input_stream_handler_pb2.py +27 -0
- mediapipe/framework/stream_handler/fixed_size_input_stream_handler_pb2.py +27 -0
- mediapipe/framework/stream_handler/sync_set_input_stream_handler_pb2.py +29 -0
- mediapipe/framework/stream_handler/timestamp_align_input_stream_handler_pb2.py +27 -0
- mediapipe/framework/stream_handler_pb2.py +29 -0
- mediapipe/framework/test_calculators_pb2.py +31 -0
- mediapipe/framework/thread_pool_executor_pb2.py +29 -0
- mediapipe/framework/tool/__init__.py +0 -0
- mediapipe/framework/tool/calculator_graph_template_pb2.py +44 -0
- mediapipe/framework/tool/field_data_pb2.py +27 -0
- mediapipe/framework/tool/node_chain_subgraph_pb2.py +31 -0
- mediapipe/framework/tool/packet_generator_wrapper_calculator_pb2.py +28 -0
- mediapipe/framework/tool/source_pb2.py +33 -0
- mediapipe/framework/tool/switch_container_pb2.py +32 -0
- mediapipe/gpu/__init__.py +0 -0
- mediapipe/gpu/copy_calculator_pb2.py +33 -0
- mediapipe/gpu/gl_animation_overlay_calculator_pb2.py +31 -0
- mediapipe/gpu/gl_context_options_pb2.py +31 -0
- mediapipe/gpu/gl_scaler_calculator_pb2.py +32 -0
- mediapipe/gpu/gl_surface_sink_calculator_pb2.py +32 -0
- mediapipe/gpu/gpu_origin_pb2.py +28 -0
- mediapipe/gpu/scale_mode_pb2.py +27 -0
- mediapipe/model_maker/__init__.py +27 -0
- mediapipe/model_maker/setup.py +107 -0
- mediapipe/modules/__init__.py +0 -0
- mediapipe/modules/face_detection/__init__.py +0 -0
- mediapipe/modules/face_detection/face_detection_full_range_cpu.binarypb +0 -0
- mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite +0 -0
- mediapipe/modules/face_detection/face_detection_pb2.py +30 -0
- mediapipe/modules/face_detection/face_detection_short_range.tflite +0 -0
- mediapipe/modules/face_detection/face_detection_short_range_cpu.binarypb +0 -0
- mediapipe/modules/face_geometry/__init__.py +0 -0
- mediapipe/modules/face_geometry/data/__init__.py +0 -0
- mediapipe/modules/face_geometry/effect_renderer_calculator_pb2.py +27 -0
- mediapipe/modules/face_geometry/env_generator_calculator_pb2.py +28 -0
- mediapipe/modules/face_geometry/geometry_pipeline_calculator_pb2.py +27 -0
- mediapipe/modules/face_geometry/libs/__init__.py +0 -0
- mediapipe/modules/face_geometry/protos/__init__.py +0 -0
- mediapipe/modules/face_geometry/protos/environment_pb2.py +30 -0
- mediapipe/modules/face_geometry/protos/face_geometry_pb2.py +28 -0
- mediapipe/modules/face_geometry/protos/geometry_pipeline_metadata_pb2.py +31 -0
- mediapipe/modules/face_geometry/protos/mesh_3d_pb2.py +30 -0
- mediapipe/modules/face_landmark/__init__.py +0 -0
- mediapipe/modules/face_landmark/face_landmark.tflite +0 -0
- mediapipe/modules/face_landmark/face_landmark_front_cpu.binarypb +0 -0
- mediapipe/modules/face_landmark/face_landmark_with_attention.tflite +0 -0
- mediapipe/modules/hand_landmark/__init__.py +0 -0
- mediapipe/modules/hand_landmark/calculators/__init__.py +0 -0
- mediapipe/modules/hand_landmark/hand_landmark_full.tflite +0 -0
- mediapipe/modules/hand_landmark/hand_landmark_lite.tflite +0 -0
- mediapipe/modules/hand_landmark/hand_landmark_tracking_cpu.binarypb +0 -0
- mediapipe/modules/hand_landmark/handedness.txt +2 -0
- mediapipe/modules/holistic_landmark/__init__.py +0 -0
- mediapipe/modules/holistic_landmark/calculators/__init__.py +0 -0
- mediapipe/modules/holistic_landmark/calculators/roi_tracking_calculator_pb2.py +37 -0
- mediapipe/modules/holistic_landmark/hand_recrop.tflite +0 -0
- mediapipe/modules/holistic_landmark/holistic_landmark_cpu.binarypb +0 -0
- mediapipe/modules/iris_landmark/__init__.py +0 -0
- mediapipe/modules/iris_landmark/iris_landmark.tflite +0 -0
- mediapipe/modules/objectron/__init__.py +0 -0
- mediapipe/modules/objectron/calculators/__init__.py +0 -0
- mediapipe/modules/objectron/calculators/a_r_capture_metadata_pb2.py +101 -0
- mediapipe/modules/objectron/calculators/annotation_data_pb2.py +37 -0
- mediapipe/modules/objectron/calculators/belief_decoder_config_pb2.py +27 -0
- mediapipe/modules/objectron/calculators/camera_parameters_pb2.py +29 -0
- mediapipe/modules/objectron/calculators/filter_detection_calculator_pb2.py +35 -0
- mediapipe/modules/objectron/calculators/frame_annotation_to_rect_calculator_pb2.py +31 -0
- mediapipe/modules/objectron/calculators/frame_annotation_tracker_calculator_pb2.py +31 -0
- mediapipe/modules/objectron/calculators/lift_2d_frame_annotation_to_3d_calculator_pb2.py +32 -0
- mediapipe/modules/objectron/calculators/object_pb2.py +37 -0
- mediapipe/modules/objectron/calculators/tensors_to_objects_calculator_pb2.py +32 -0
- mediapipe/modules/objectron/calculators/tflite_tensors_to_objects_calculator_pb2.py +32 -0
- mediapipe/modules/objectron/object_detection_oidv4_labelmap.txt +24 -0
- mediapipe/modules/objectron/objectron_cpu.binarypb +0 -0
- mediapipe/modules/palm_detection/__init__.py +0 -0
- mediapipe/modules/palm_detection/palm_detection_full.tflite +0 -0
- mediapipe/modules/palm_detection/palm_detection_lite.tflite +0 -0
- mediapipe/modules/pose_detection/__init__.py +0 -0
- mediapipe/modules/pose_detection/pose_detection.tflite +0 -0
- mediapipe/modules/pose_landmark/__init__.py +0 -0
- mediapipe/modules/pose_landmark/pose_landmark_cpu.binarypb +0 -0
- mediapipe/modules/pose_landmark/pose_landmark_full.tflite +0 -0
- mediapipe/modules/selfie_segmentation/__init__.py +0 -0
- mediapipe/modules/selfie_segmentation/selfie_segmentation.tflite +0 -0
- mediapipe/modules/selfie_segmentation/selfie_segmentation_cpu.binarypb +0 -0
- mediapipe/modules/selfie_segmentation/selfie_segmentation_landscape.tflite +0 -0
- mediapipe/python/__init__.py +28 -0
- mediapipe/python/_framework_bindings/arm64.cpython-311-darwin.so +0 -0
- mediapipe/python/_framework_bindings.cpython-311-darwin.so +0 -0
- mediapipe/python/calculator_graph_test.py +251 -0
- mediapipe/python/image_frame_test.py +194 -0
- mediapipe/python/image_test.py +218 -0
- mediapipe/python/packet_creator.py +275 -0
- mediapipe/python/packet_getter.py +119 -0
- mediapipe/python/packet_test.py +533 -0
- mediapipe/python/solution_base.py +632 -0
- mediapipe/python/solution_base_test.py +396 -0
- mediapipe/python/solutions/__init__.py +27 -0
- mediapipe/python/solutions/download_utils.py +37 -0
- mediapipe/python/solutions/drawing_styles.py +249 -0
- mediapipe/python/solutions/drawing_utils.py +316 -0
- mediapipe/python/solutions/drawing_utils_test.py +258 -0
- mediapipe/python/solutions/face_detection.py +105 -0
- mediapipe/python/solutions/face_detection_test.py +92 -0
- mediapipe/python/solutions/face_mesh.py +125 -0
- mediapipe/python/solutions/face_mesh_connections.py +500 -0
- mediapipe/python/solutions/face_mesh_test.py +170 -0
- mediapipe/python/solutions/hands.py +153 -0
- mediapipe/python/solutions/hands_connections.py +32 -0
- mediapipe/python/solutions/hands_test.py +218 -0
- mediapipe/python/solutions/holistic.py +167 -0
- mediapipe/python/solutions/holistic_test.py +142 -0
- mediapipe/python/solutions/objectron.py +288 -0
- mediapipe/python/solutions/objectron_test.py +81 -0
- mediapipe/python/solutions/pose.py +192 -0
- mediapipe/python/solutions/pose_connections.py +22 -0
- mediapipe/python/solutions/pose_test.py +262 -0
- mediapipe/python/solutions/selfie_segmentation.py +76 -0
- mediapipe/python/solutions/selfie_segmentation_test.py +68 -0
- mediapipe/python/timestamp_test.py +78 -0
- mediapipe/tasks/__init__.py +14 -0
- mediapipe/tasks/cc/__init__.py +0 -0
- mediapipe/tasks/cc/audio/__init__.py +0 -0
- mediapipe/tasks/cc/audio/audio_classifier/__init__.py +0 -0
- mediapipe/tasks/cc/audio/audio_classifier/proto/__init__.py +0 -0
- mediapipe/tasks/cc/audio/audio_classifier/proto/audio_classifier_graph_options_pb2.py +35 -0
- mediapipe/tasks/cc/audio/audio_embedder/__init__.py +0 -0
- mediapipe/tasks/cc/audio/audio_embedder/proto/__init__.py +0 -0
- mediapipe/tasks/cc/audio/audio_embedder/proto/audio_embedder_graph_options_pb2.py +35 -0
- mediapipe/tasks/cc/audio/core/__init__.py +0 -0
- mediapipe/tasks/cc/audio/utils/__init__.py +0 -0
- mediapipe/tasks/cc/components/__init__.py +0 -0
- mediapipe/tasks/cc/components/calculators/__init__.py +0 -0
- mediapipe/tasks/cc/components/calculators/classification_aggregation_calculator_pb2.py +31 -0
- mediapipe/tasks/cc/components/calculators/score_calibration_calculator_pb2.py +35 -0
- mediapipe/tasks/cc/components/calculators/tensors_to_embeddings_calculator_pb2.py +32 -0
- mediapipe/tasks/cc/components/containers/__init__.py +0 -0
- mediapipe/tasks/cc/components/containers/proto/__init__.py +0 -0
- mediapipe/tasks/cc/components/containers/proto/classifications_pb2.py +29 -0
- mediapipe/tasks/cc/components/containers/proto/embeddings_pb2.py +34 -0
- mediapipe/tasks/cc/components/containers/proto/landmarks_detection_result_pb2.py +31 -0
- mediapipe/tasks/cc/components/processors/__init__.py +0 -0
- mediapipe/tasks/cc/components/processors/proto/__init__.py +0 -0
- mediapipe/tasks/cc/components/processors/proto/classification_postprocessing_graph_options_pb2.py +38 -0
- mediapipe/tasks/cc/components/processors/proto/classifier_options_pb2.py +26 -0
- mediapipe/tasks/cc/components/processors/proto/detection_postprocessing_graph_options_pb2.py +35 -0
- mediapipe/tasks/cc/components/processors/proto/detector_options_pb2.py +26 -0
- mediapipe/tasks/cc/components/processors/proto/embedder_options_pb2.py +26 -0
- mediapipe/tasks/cc/components/processors/proto/embedding_postprocessing_graph_options_pb2.py +32 -0
- mediapipe/tasks/cc/components/processors/proto/image_preprocessing_graph_options_pb2.py +34 -0
- mediapipe/tasks/cc/components/processors/proto/llm_params_pb2.py +27 -0
- mediapipe/tasks/cc/components/processors/proto/text_model_type_pb2.py +27 -0
- mediapipe/tasks/cc/components/processors/proto/text_preprocessing_graph_options_pb2.py +32 -0
- mediapipe/tasks/cc/components/processors/proto/transformer_params_pb2.py +28 -0
- mediapipe/tasks/cc/components/utils/__init__.py +0 -0
- mediapipe/tasks/cc/core/__init__.py +0 -0
- mediapipe/tasks/cc/core/proto/__init__.py +0 -0
- mediapipe/tasks/cc/core/proto/acceleration_pb2.py +27 -0
- mediapipe/tasks/cc/core/proto/base_options_pb2.py +29 -0
- mediapipe/tasks/cc/core/proto/external_file_pb2.py +30 -0
- mediapipe/tasks/cc/core/proto/inference_subgraph_pb2.py +32 -0
- mediapipe/tasks/cc/core/proto/model_resources_calculator_pb2.py +32 -0
- mediapipe/tasks/cc/metadata/__init__.py +0 -0
- mediapipe/tasks/cc/metadata/python/__init__.py +0 -0
- mediapipe/tasks/cc/metadata/python/_pywrap_metadata_version/arm64.cpython-311-darwin.so +0 -0
- mediapipe/tasks/cc/metadata/python/_pywrap_metadata_version.cpython-311-darwin.so +0 -0
- mediapipe/tasks/cc/metadata/tests/__init__.py +0 -0
- mediapipe/tasks/cc/metadata/utils/__init__.py +0 -0
- mediapipe/tasks/cc/text/__init__.py +0 -0
- mediapipe/tasks/cc/text/custom_ops/__init__.py +0 -0
- mediapipe/tasks/cc/text/custom_ops/ragged/__init__.py +0 -0
- mediapipe/tasks/cc/text/custom_ops/sentencepiece/__init__.py +0 -0
- mediapipe/tasks/cc/text/custom_ops/sentencepiece/testdata/__init__.py +0 -0
- mediapipe/tasks/cc/text/language_detector/__init__.py +0 -0
- mediapipe/tasks/cc/text/language_detector/custom_ops/__init__.py +0 -0
- mediapipe/tasks/cc/text/language_detector/custom_ops/utils/__init__.py +0 -0
- mediapipe/tasks/cc/text/language_detector/custom_ops/utils/hash/__init__.py +0 -0
- mediapipe/tasks/cc/text/language_detector/custom_ops/utils/utf/__init__.py +0 -0
- mediapipe/tasks/cc/text/text_classifier/__init__.py +0 -0
- mediapipe/tasks/cc/text/text_classifier/proto/__init__.py +0 -0
- mediapipe/tasks/cc/text/text_classifier/proto/text_classifier_graph_options_pb2.py +35 -0
- mediapipe/tasks/cc/text/text_embedder/__init__.py +0 -0
- mediapipe/tasks/cc/text/text_embedder/proto/__init__.py +0 -0
- mediapipe/tasks/cc/text/text_embedder/proto/text_embedder_graph_options_pb2.py +35 -0
- mediapipe/tasks/cc/text/tokenizers/__init__.py +0 -0
- mediapipe/tasks/cc/text/utils/__init__.py +0 -0
- mediapipe/tasks/cc/vision/__init__.py +0 -0
- mediapipe/tasks/cc/vision/core/__init__.py +0 -0
- mediapipe/tasks/cc/vision/custom_ops/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_detector/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_detector/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_detector/proto/face_detector_graph_options_pb2.py +34 -0
- mediapipe/tasks/cc/vision/face_geometry/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_geometry/calculators/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_geometry/calculators/env_generator_calculator_pb2.py +28 -0
- mediapipe/tasks/cc/vision/face_geometry/calculators/geometry_pipeline_calculator_pb2.py +29 -0
- mediapipe/tasks/cc/vision/face_geometry/data/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_geometry/libs/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_geometry/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_geometry/proto/environment_pb2.py +30 -0
- mediapipe/tasks/cc/vision/face_geometry/proto/face_geometry_graph_options_pb2.py +29 -0
- mediapipe/tasks/cc/vision/face_geometry/proto/face_geometry_pb2.py +28 -0
- mediapipe/tasks/cc/vision/face_geometry/proto/geometry_pipeline_metadata_pb2.py +31 -0
- mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d_pb2.py +30 -0
- mediapipe/tasks/cc/vision/face_landmarker/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_landmarker/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_landmarker/proto/face_blendshapes_graph_options_pb2.py +34 -0
- mediapipe/tasks/cc/vision/face_landmarker/proto/face_landmarker_graph_options_pb2.py +37 -0
- mediapipe/tasks/cc/vision/face_landmarker/proto/face_landmarks_detector_graph_options_pb2.py +35 -0
- mediapipe/tasks/cc/vision/face_landmarker/proto/tensors_to_face_landmarks_graph_options_pb2.py +32 -0
- mediapipe/tasks/cc/vision/face_stylizer/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_stylizer/calculators/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_stylizer/calculators/tensors_to_image_calculator_pb2.py +36 -0
- mediapipe/tasks/cc/vision/face_stylizer/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_stylizer/proto/face_stylizer_graph_options_pb2.py +35 -0
- mediapipe/tasks/cc/vision/gesture_recognizer/__init__.py +0 -0
- mediapipe/tasks/cc/vision/gesture_recognizer/calculators/__init__.py +0 -0
- mediapipe/tasks/cc/vision/gesture_recognizer/calculators/combined_prediction_calculator_pb2.py +33 -0
- mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator_pb2.py +31 -0
- mediapipe/tasks/cc/vision/gesture_recognizer/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/gesture_recognizer/proto/gesture_classifier_graph_options_pb2.py +35 -0
- mediapipe/tasks/cc/vision/gesture_recognizer/proto/gesture_embedder_graph_options_pb2.py +34 -0
- mediapipe/tasks/cc/vision/gesture_recognizer/proto/gesture_recognizer_graph_options_pb2.py +36 -0
- mediapipe/tasks/cc/vision/gesture_recognizer/proto/hand_gesture_recognizer_graph_options_pb2.py +36 -0
- mediapipe/tasks/cc/vision/hand_detector/__init__.py +0 -0
- mediapipe/tasks/cc/vision/hand_detector/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options_pb2.py +34 -0
- mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_result_pb2.py +29 -0
- mediapipe/tasks/cc/vision/hand_landmarker/__init__.py +0 -0
- mediapipe/tasks/cc/vision/hand_landmarker/calculators/__init__.py +0 -0
- mediapipe/tasks/cc/vision/hand_landmarker/calculators/hand_association_calculator_pb2.py +31 -0
- mediapipe/tasks/cc/vision/hand_landmarker/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options_pb2.py +36 -0
- mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options_pb2.py +34 -0
- mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_roi_refinement_graph_options_pb2.py +26 -0
- mediapipe/tasks/cc/vision/image_classifier/__init__.py +0 -0
- mediapipe/tasks/cc/vision/image_classifier/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/image_classifier/proto/image_classifier_graph_options_pb2.py +35 -0
- mediapipe/tasks/cc/vision/image_embedder/__init__.py +0 -0
- mediapipe/tasks/cc/vision/image_embedder/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/image_embedder/proto/image_embedder_graph_options_pb2.py +35 -0
- mediapipe/tasks/cc/vision/image_generator/__init__.py +0 -0
- mediapipe/tasks/cc/vision/image_generator/diffuser/__init__.py +0 -0
- mediapipe/tasks/cc/vision/image_generator/diffuser/stable_diffusion_iterate_calculator_pb2.py +40 -0
- mediapipe/tasks/cc/vision/image_generator/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/image_generator/proto/conditioned_image_graph_options_pb2.py +39 -0
- mediapipe/tasks/cc/vision/image_generator/proto/control_plugin_graph_options_pb2.py +33 -0
- mediapipe/tasks/cc/vision/image_generator/proto/image_generator_graph_options_pb2.py +29 -0
- mediapipe/tasks/cc/vision/image_segmenter/__init__.py +0 -0
- mediapipe/tasks/cc/vision/image_segmenter/calculators/__init__.py +0 -0
- mediapipe/tasks/cc/vision/image_segmenter/calculators/tensors_to_segmentation_calculator_pb2.py +34 -0
- mediapipe/tasks/cc/vision/image_segmenter/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/image_segmenter/proto/image_segmenter_graph_options_pb2.py +35 -0
- mediapipe/tasks/cc/vision/image_segmenter/proto/segmenter_options_pb2.py +32 -0
- mediapipe/tasks/cc/vision/interactive_segmenter/__init__.py +0 -0
- mediapipe/tasks/cc/vision/object_detector/__init__.py +0 -0
- mediapipe/tasks/cc/vision/object_detector/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/object_detector/proto/object_detector_options_pb2.py +34 -0
- mediapipe/tasks/cc/vision/pose_detector/__init__.py +0 -0
- mediapipe/tasks/cc/vision/pose_detector/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/pose_detector/proto/pose_detector_graph_options_pb2.py +34 -0
- mediapipe/tasks/cc/vision/pose_landmarker/__init__.py +0 -0
- mediapipe/tasks/cc/vision/pose_landmarker/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/pose_landmarker/proto/pose_landmarker_graph_options_pb2.py +36 -0
- mediapipe/tasks/cc/vision/pose_landmarker/proto/pose_landmarks_detector_graph_options_pb2.py +34 -0
- mediapipe/tasks/cc/vision/utils/__init__.py +0 -0
- mediapipe/tasks/cc/vision/utils/ghum/__init__.py +0 -0
- mediapipe/tasks/metadata/image_segmenter_metadata_schema.fbs +59 -0
- mediapipe/tasks/metadata/image_segmenter_metadata_schema_py_generated.py +108 -0
- mediapipe/tasks/metadata/metadata_schema.fbs +732 -0
- mediapipe/tasks/metadata/metadata_schema_py_generated.py +3251 -0
- mediapipe/tasks/metadata/object_detector_metadata_schema.fbs +98 -0
- mediapipe/tasks/metadata/object_detector_metadata_schema_py_generated.py +674 -0
- mediapipe/tasks/metadata/schema_py_generated.py +14263 -0
- mediapipe/tasks/python/__init__.py +26 -0
- mediapipe/tasks/python/audio/__init__.py +33 -0
- mediapipe/tasks/python/audio/audio_classifier.py +324 -0
- mediapipe/tasks/python/audio/audio_embedder.py +285 -0
- mediapipe/tasks/python/audio/core/__init__.py +16 -0
- mediapipe/tasks/python/audio/core/audio_record.py +125 -0
- mediapipe/tasks/python/audio/core/audio_task_running_mode.py +29 -0
- mediapipe/tasks/python/audio/core/base_audio_task_api.py +181 -0
- mediapipe/tasks/python/components/__init__.py +13 -0
- mediapipe/tasks/python/components/containers/__init__.py +53 -0
- mediapipe/tasks/python/components/containers/audio_data.py +137 -0
- mediapipe/tasks/python/components/containers/bounding_box.py +73 -0
- mediapipe/tasks/python/components/containers/category.py +78 -0
- mediapipe/tasks/python/components/containers/classification_result.py +111 -0
- mediapipe/tasks/python/components/containers/detections.py +181 -0
- mediapipe/tasks/python/components/containers/embedding_result.py +89 -0
- mediapipe/tasks/python/components/containers/keypoint.py +77 -0
- mediapipe/tasks/python/components/containers/landmark.py +122 -0
- mediapipe/tasks/python/components/containers/landmark_detection_result.py +106 -0
- mediapipe/tasks/python/components/containers/rect.py +109 -0
- mediapipe/tasks/python/components/processors/__init__.py +23 -0
- mediapipe/tasks/python/components/processors/classifier_options.py +86 -0
- mediapipe/tasks/python/components/utils/__init__.py +13 -0
- mediapipe/tasks/python/components/utils/cosine_similarity.py +68 -0
- mediapipe/tasks/python/core/__init__.py +13 -0
- mediapipe/tasks/python/core/base_options.py +121 -0
- mediapipe/tasks/python/core/optional_dependencies.py +25 -0
- mediapipe/tasks/python/core/task_info.py +132 -0
- mediapipe/tasks/python/metadata/__init__.py +13 -0
- mediapipe/tasks/python/metadata/flatbuffers_lib/_pywrap_flatbuffers/arm64.cpython-311-darwin.so +0 -0
- mediapipe/tasks/python/metadata/flatbuffers_lib/_pywrap_flatbuffers.cpython-311-darwin.so +0 -0
- mediapipe/tasks/python/metadata/metadata.py +928 -0
- mediapipe/tasks/python/metadata/metadata_displayer_cli.py +34 -0
- mediapipe/tasks/python/metadata/metadata_writers/__init__.py +13 -0
- mediapipe/tasks/python/metadata/metadata_writers/face_stylizer.py +138 -0
- mediapipe/tasks/python/metadata/metadata_writers/image_classifier.py +71 -0
- mediapipe/tasks/python/metadata/metadata_writers/image_segmenter.py +170 -0
- mediapipe/tasks/python/metadata/metadata_writers/metadata_info.py +1166 -0
- mediapipe/tasks/python/metadata/metadata_writers/metadata_writer.py +845 -0
- mediapipe/tasks/python/metadata/metadata_writers/model_asset_bundle_utils.py +71 -0
- mediapipe/tasks/python/metadata/metadata_writers/object_detector.py +331 -0
- mediapipe/tasks/python/metadata/metadata_writers/text_classifier.py +119 -0
- mediapipe/tasks/python/metadata/metadata_writers/writer_utils.py +91 -0
- mediapipe/tasks/python/test/__init__.py +13 -0
- mediapipe/tasks/python/test/audio/__init__.py +13 -0
- mediapipe/tasks/python/test/audio/audio_classifier_test.py +387 -0
- mediapipe/tasks/python/test/audio/audio_embedder_test.py +297 -0
- mediapipe/tasks/python/test/test_utils.py +196 -0
- mediapipe/tasks/python/test/text/__init__.py +13 -0
- mediapipe/tasks/python/test/text/language_detector_test.py +228 -0
- mediapipe/tasks/python/test/text/text_classifier_test.py +231 -0
- mediapipe/tasks/python/test/text/text_embedder_test.py +326 -0
- mediapipe/tasks/python/test/vision/__init__.py +13 -0
- mediapipe/tasks/python/test/vision/face_aligner_test.py +190 -0
- mediapipe/tasks/python/test/vision/face_detector_test.py +523 -0
- mediapipe/tasks/python/test/vision/face_landmarker_test.py +565 -0
- mediapipe/tasks/python/test/vision/face_stylizer_test.py +191 -0
- mediapipe/tasks/python/test/vision/hand_landmarker_test.py +437 -0
- mediapipe/tasks/python/test/vision/image_classifier_test.py +657 -0
- mediapipe/tasks/python/test/vision/image_embedder_test.py +423 -0
- mediapipe/tasks/python/test/vision/image_segmenter_test.py +512 -0
- mediapipe/tasks/python/test/vision/interactive_segmenter_test.py +341 -0
- mediapipe/tasks/python/test/vision/object_detector_test.py +493 -0
- mediapipe/tasks/python/test/vision/pose_landmarker_test.py +518 -0
- mediapipe/tasks/python/text/__init__.py +35 -0
- mediapipe/tasks/python/text/core/__init__.py +16 -0
- mediapipe/tasks/python/text/core/base_text_task_api.py +54 -0
- mediapipe/tasks/python/text/language_detector.py +220 -0
- mediapipe/tasks/python/text/text_classifier.py +187 -0
- mediapipe/tasks/python/text/text_embedder.py +188 -0
- mediapipe/tasks/python/vision/__init__.py +83 -0
- mediapipe/tasks/python/vision/core/__init__.py +14 -0
- mediapipe/tasks/python/vision/core/base_vision_task_api.py +226 -0
- mediapipe/tasks/python/vision/core/image_processing_options.py +39 -0
- mediapipe/tasks/python/vision/core/vision_task_running_mode.py +31 -0
- mediapipe/tasks/python/vision/face_aligner.py +158 -0
- mediapipe/tasks/python/vision/face_detector.py +332 -0
- mediapipe/tasks/python/vision/face_landmarker.py +3244 -0
- mediapipe/tasks/python/vision/face_stylizer.py +158 -0
- mediapipe/tasks/python/vision/gesture_recognizer.py +480 -0
- mediapipe/tasks/python/vision/hand_landmarker.py +504 -0
- mediapipe/tasks/python/vision/image_classifier.py +358 -0
- mediapipe/tasks/python/vision/image_embedder.py +362 -0
- mediapipe/tasks/python/vision/image_segmenter.py +433 -0
- mediapipe/tasks/python/vision/interactive_segmenter.py +285 -0
- mediapipe/tasks/python/vision/object_detector.py +385 -0
- mediapipe/tasks/python/vision/pose_landmarker.py +455 -0
- mediapipe/util/__init__.py +0 -0
- mediapipe/util/analytics/__init__.py +0 -0
- mediapipe/util/analytics/mediapipe_log_extension_pb2.py +41 -0
- mediapipe/util/analytics/mediapipe_logging_enums_pb2.py +36 -0
- mediapipe/util/audio_decoder_pb2.py +33 -0
- mediapipe/util/color_pb2.py +32 -0
- mediapipe/util/label_map_pb2.py +26 -0
- mediapipe/util/render_data_pb2.py +57 -0
- mediapipe/util/sequence/__init__.py +14 -0
- mediapipe/util/sequence/media_sequence.py +716 -0
- mediapipe/util/sequence/media_sequence_test.py +290 -0
- mediapipe/util/sequence/media_sequence_util.py +800 -0
- mediapipe/util/sequence/media_sequence_util_test.py +389 -0
- mediapipe/util/tracking/__init__.py +0 -0
- mediapipe/util/tracking/box_detector_pb2.py +38 -0
- mediapipe/util/tracking/box_tracker_pb2.py +31 -0
- mediapipe/util/tracking/camera_motion_pb2.py +30 -0
- mediapipe/util/tracking/flow_packager_pb2.py +59 -0
- mediapipe/util/tracking/frame_selection_pb2.py +34 -0
- mediapipe/util/tracking/frame_selection_solution_evaluator_pb2.py +27 -0
- mediapipe/util/tracking/motion_analysis_pb2.py +34 -0
- mediapipe/util/tracking/motion_estimation_pb2.py +65 -0
- mediapipe/util/tracking/motion_models_pb2.py +41 -0
- mediapipe/util/tracking/motion_saliency_pb2.py +25 -0
- mediapipe/util/tracking/push_pull_filtering_pb2.py +25 -0
- mediapipe/util/tracking/region_flow_computation_pb2.py +58 -0
- mediapipe/util/tracking/region_flow_pb2.py +48 -0
- mediapipe/util/tracking/tone_estimation_pb2.py +44 -0
- mediapipe/util/tracking/tone_models_pb2.py +31 -0
- mediapipe/util/tracking/tracked_detection_manager_config_pb2.py +25 -0
- mediapipe/util/tracking/tracking_pb2.py +72 -0
- mediapipe_nightly-0.0.0.post20231103.dist-info/LICENSE +218 -0
- mediapipe_nightly-0.0.0.post20231103.dist-info/METADATA +196 -0
- mediapipe_nightly-0.0.0.post20231103.dist-info/RECORD +545 -0
- mediapipe_nightly-0.0.0.post20231103.dist-info/WHEEL +5 -0
- mediapipe_nightly-0.0.0.post20231103.dist-info/top_level.txt +4 -0
@@ -0,0 +1,26 @@
|
|
1
|
+
# Copyright 2022 The MediaPipe Authors.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
"""MediaPipe Tasks API."""
|
16
|
+
|
17
|
+
from . import audio
|
18
|
+
from . import components
|
19
|
+
from . import core
|
20
|
+
from . import text
|
21
|
+
from . import vision
|
22
|
+
|
23
|
+
BaseOptions = core.base_options.BaseOptions
|
24
|
+
|
25
|
+
# Remove unnecessary modules to avoid duplication in API docs.
|
26
|
+
del core
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# Copyright 2022 The MediaPipe Authors.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
"""MediaPipe Tasks Audio API."""
|
16
|
+
|
17
|
+
import mediapipe.tasks.python.audio.core
|
18
|
+
import mediapipe.tasks.python.audio.audio_classifier
|
19
|
+
import mediapipe.tasks.python.audio.audio_embedder
|
20
|
+
|
21
|
+
AudioClassifier = audio_classifier.AudioClassifier
|
22
|
+
AudioClassifierOptions = audio_classifier.AudioClassifierOptions
|
23
|
+
AudioClassifierResult = audio_classifier.AudioClassifierResult
|
24
|
+
AudioEmbedder = audio_embedder.AudioEmbedder
|
25
|
+
AudioEmbedderOptions = audio_embedder.AudioEmbedderOptions
|
26
|
+
AudioEmbedderResult = audio_embedder.AudioEmbedderResult
|
27
|
+
RunningMode = core.audio_task_running_mode.AudioTaskRunningMode
|
28
|
+
|
29
|
+
# Remove unnecessary modules to avoid duplication in API docs.
|
30
|
+
del audio_classifier
|
31
|
+
del audio_embedder
|
32
|
+
del core
|
33
|
+
del mediapipe
|
@@ -0,0 +1,324 @@
|
|
1
|
+
# Copyright 2022 The MediaPipe Authors.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
"""MediaPipe audio classifier task."""
|
15
|
+
|
16
|
+
import dataclasses
|
17
|
+
from typing import Callable, Mapping, List, Optional
|
18
|
+
|
19
|
+
from mediapipe.python import packet_creator
|
20
|
+
from mediapipe.python import packet_getter
|
21
|
+
from mediapipe.python._framework_bindings import packet
|
22
|
+
from mediapipe.tasks.cc.audio.audio_classifier.proto import audio_classifier_graph_options_pb2
|
23
|
+
from mediapipe.tasks.cc.components.containers.proto import classifications_pb2
|
24
|
+
from mediapipe.tasks.cc.components.processors.proto import classifier_options_pb2
|
25
|
+
from mediapipe.tasks.python.audio.core import audio_task_running_mode as running_mode_module
|
26
|
+
from mediapipe.tasks.python.audio.core import base_audio_task_api
|
27
|
+
from mediapipe.tasks.python.components.containers import audio_data as audio_data_module
|
28
|
+
from mediapipe.tasks.python.components.containers import classification_result as classification_result_module
|
29
|
+
from mediapipe.tasks.python.core import base_options as base_options_module
|
30
|
+
from mediapipe.tasks.python.core import task_info as task_info_module
|
31
|
+
from mediapipe.tasks.python.core.optional_dependencies import doc_controls
|
32
|
+
|
33
|
+
AudioClassifierResult = classification_result_module.ClassificationResult
|
34
|
+
_AudioClassifierGraphOptionsProto = audio_classifier_graph_options_pb2.AudioClassifierGraphOptions
|
35
|
+
_AudioData = audio_data_module.AudioData
|
36
|
+
_BaseOptions = base_options_module.BaseOptions
|
37
|
+
_ClassifierOptionsProto = classifier_options_pb2.ClassifierOptions
|
38
|
+
_RunningMode = running_mode_module.AudioTaskRunningMode
|
39
|
+
_TaskInfo = task_info_module.TaskInfo
|
40
|
+
|
41
|
+
_AUDIO_IN_STREAM_NAME = 'audio_in'
|
42
|
+
_AUDIO_TAG = 'AUDIO'
|
43
|
+
_CLASSIFICATIONS_STREAM_NAME = 'classifications_out'
|
44
|
+
_CLASSIFICATIONS_TAG = 'CLASSIFICATIONS'
|
45
|
+
_SAMPLE_RATE_IN_STREAM_NAME = 'sample_rate_in'
|
46
|
+
_SAMPLE_RATE_TAG = 'SAMPLE_RATE'
|
47
|
+
_TASK_GRAPH_NAME = 'mediapipe.tasks.audio.audio_classifier.AudioClassifierGraph'
|
48
|
+
_TIMESTAMPED_CLASSIFICATIONS_STREAM_NAME = 'timestamped_classifications_out'
|
49
|
+
_TIMESTAMPED_CLASSIFICATIONS_TAG = 'TIMESTAMPED_CLASSIFICATIONS'
|
50
|
+
_MICRO_SECONDS_PER_MILLISECOND = 1000
|
51
|
+
|
52
|
+
|
53
|
+
@dataclasses.dataclass
|
54
|
+
class AudioClassifierOptions:
|
55
|
+
"""Options for the audio classifier task.
|
56
|
+
|
57
|
+
Attributes:
|
58
|
+
base_options: Base options for the audio classifier task.
|
59
|
+
running_mode: The running mode of the task. Default to the audio clips mode.
|
60
|
+
Audio classifier task has two running modes: 1) The audio clips mode for
|
61
|
+
running classification on independent audio clips. 2) The audio stream
|
62
|
+
mode for running classification on the audio stream, such as from
|
63
|
+
microphone. In this mode, the "result_callback" below must be specified
|
64
|
+
to receive the classification results asynchronously.
|
65
|
+
display_names_locale: The locale to use for display names specified through
|
66
|
+
the TFLite Model Metadata.
|
67
|
+
max_results: The maximum number of top-scored classification results to
|
68
|
+
return.
|
69
|
+
score_threshold: Overrides the ones provided in the model metadata. Results
|
70
|
+
below this value are rejected.
|
71
|
+
category_allowlist: Allowlist of category names. If non-empty,
|
72
|
+
classification results whose category name is not in this set will be
|
73
|
+
filtered out. Duplicate or unknown category names are ignored. Mutually
|
74
|
+
exclusive with `category_denylist`.
|
75
|
+
category_denylist: Denylist of category names. If non-empty, classification
|
76
|
+
results whose category name is in this set will be filtered out. Duplicate
|
77
|
+
or unknown category names are ignored. Mutually exclusive with
|
78
|
+
`category_allowlist`.
|
79
|
+
result_callback: The user-defined result callback for processing audio
|
80
|
+
stream data. The result callback should only be specified when the running
|
81
|
+
mode is set to the audio stream mode.
|
82
|
+
"""
|
83
|
+
base_options: _BaseOptions
|
84
|
+
running_mode: _RunningMode = _RunningMode.AUDIO_CLIPS
|
85
|
+
display_names_locale: Optional[str] = None
|
86
|
+
max_results: Optional[int] = None
|
87
|
+
score_threshold: Optional[float] = None
|
88
|
+
category_allowlist: Optional[List[str]] = None
|
89
|
+
category_denylist: Optional[List[str]] = None
|
90
|
+
result_callback: Optional[Callable[[AudioClassifierResult, int], None]] = None
|
91
|
+
|
92
|
+
@doc_controls.do_not_generate_docs
|
93
|
+
def to_pb2(self) -> _AudioClassifierGraphOptionsProto:
|
94
|
+
"""Generates an AudioClassifierOptions protobuf object."""
|
95
|
+
base_options_proto = self.base_options.to_pb2()
|
96
|
+
base_options_proto.use_stream_mode = False if self.running_mode == _RunningMode.AUDIO_CLIPS else True
|
97
|
+
classifier_options_proto = _ClassifierOptionsProto(
|
98
|
+
score_threshold=self.score_threshold,
|
99
|
+
category_allowlist=self.category_allowlist,
|
100
|
+
category_denylist=self.category_denylist,
|
101
|
+
display_names_locale=self.display_names_locale,
|
102
|
+
max_results=self.max_results)
|
103
|
+
|
104
|
+
return _AudioClassifierGraphOptionsProto(
|
105
|
+
base_options=base_options_proto,
|
106
|
+
classifier_options=classifier_options_proto)
|
107
|
+
|
108
|
+
|
109
|
+
class AudioClassifier(base_audio_task_api.BaseAudioTaskApi):
|
110
|
+
"""Class that performs audio classification on audio data.
|
111
|
+
|
112
|
+
This API expects a TFLite model with mandatory TFLite Model Metadata that
|
113
|
+
contains the mandatory AudioProperties of the solo input audio tensor and the
|
114
|
+
optional (but recommended) category labels as AssociatedFiles with type
|
115
|
+
TENSOR_AXIS_LABELS per output classification tensor.
|
116
|
+
|
117
|
+
Input tensor:
|
118
|
+
(kTfLiteFloat32)
|
119
|
+
- input audio buffer of size `[batch * samples]`.
|
120
|
+
- batch inference is not supported (`batch` is required to be 1).
|
121
|
+
- for multi-channel models, the channels must be interleaved.
|
122
|
+
At least one output tensor with:
|
123
|
+
(kTfLiteFloat32)
|
124
|
+
- `[1 x N]` array with `N` represents the number of categories.
|
125
|
+
- optional (but recommended) category labels as AssociatedFiles with type
|
126
|
+
TENSOR_AXIS_LABELS, containing one label per line. The first such
|
127
|
+
AssociatedFile (if any) is used to fill the `category_name` field of the
|
128
|
+
results. The `display_name` field is filled from the AssociatedFile (if
|
129
|
+
any) whose locale matches the `display_names_locale` field of the
|
130
|
+
`AudioClassifierOptions` used at creation time ("en" by default, i.e.
|
131
|
+
English). If none of these are available, only the `index` field of the
|
132
|
+
results will be filled.
|
133
|
+
"""
|
134
|
+
|
135
|
+
@classmethod
|
136
|
+
def create_from_model_path(cls, model_path: str) -> 'AudioClassifier':
|
137
|
+
"""Creates an `AudioClassifier` object from a TensorFlow Lite model and the default `AudioClassifierOptions`.
|
138
|
+
|
139
|
+
Note that the created `AudioClassifier` instance is in audio clips mode, for
|
140
|
+
classifying on independent audio clips.
|
141
|
+
|
142
|
+
Args:
|
143
|
+
model_path: Path to the model.
|
144
|
+
|
145
|
+
Returns:
|
146
|
+
`AudioClassifier` object that's created from the model file and the
|
147
|
+
default `AudioClassifierOptions`.
|
148
|
+
|
149
|
+
Raises:
|
150
|
+
ValueError: If failed to create `AudioClassifier` object from the provided
|
151
|
+
file such as invalid file path.
|
152
|
+
RuntimeError: If other types of error occurred.
|
153
|
+
"""
|
154
|
+
base_options = _BaseOptions(model_asset_path=model_path)
|
155
|
+
options = AudioClassifierOptions(
|
156
|
+
base_options=base_options, running_mode=_RunningMode.AUDIO_CLIPS)
|
157
|
+
return cls.create_from_options(options)
|
158
|
+
|
159
|
+
@classmethod
|
160
|
+
def create_from_options(cls,
|
161
|
+
options: AudioClassifierOptions) -> 'AudioClassifier':
|
162
|
+
"""Creates the `AudioClassifier` object from audio classifier options.
|
163
|
+
|
164
|
+
Args:
|
165
|
+
options: Options for the audio classifier task.
|
166
|
+
|
167
|
+
Returns:
|
168
|
+
`AudioClassifier` object that's created from `options`.
|
169
|
+
|
170
|
+
Raises:
|
171
|
+
ValueError: If failed to create `AudioClassifier` object from
|
172
|
+
`AudioClassifierOptions` such as missing the model.
|
173
|
+
RuntimeError: If other types of error occurred.
|
174
|
+
"""
|
175
|
+
|
176
|
+
def packets_callback(output_packets: Mapping[str, packet.Packet]):
|
177
|
+
timestamp_ms = output_packets[
|
178
|
+
_CLASSIFICATIONS_STREAM_NAME].timestamp.value // _MICRO_SECONDS_PER_MILLISECOND
|
179
|
+
if output_packets[_CLASSIFICATIONS_STREAM_NAME].is_empty():
|
180
|
+
options.result_callback(
|
181
|
+
AudioClassifierResult(classifications=[]), timestamp_ms)
|
182
|
+
return
|
183
|
+
classification_result_proto = classifications_pb2.ClassificationResult()
|
184
|
+
classification_result_proto.CopyFrom(
|
185
|
+
packet_getter.get_proto(output_packets[_CLASSIFICATIONS_STREAM_NAME]))
|
186
|
+
options.result_callback(
|
187
|
+
AudioClassifierResult.create_from_pb2(classification_result_proto),
|
188
|
+
timestamp_ms)
|
189
|
+
|
190
|
+
task_info = _TaskInfo(
|
191
|
+
task_graph=_TASK_GRAPH_NAME,
|
192
|
+
input_streams=[
|
193
|
+
':'.join([_AUDIO_TAG, _AUDIO_IN_STREAM_NAME]),
|
194
|
+
':'.join([_SAMPLE_RATE_TAG, _SAMPLE_RATE_IN_STREAM_NAME])
|
195
|
+
],
|
196
|
+
output_streams=[
|
197
|
+
':'.join([_CLASSIFICATIONS_TAG, _CLASSIFICATIONS_STREAM_NAME]),
|
198
|
+
':'.join([
|
199
|
+
_TIMESTAMPED_CLASSIFICATIONS_TAG,
|
200
|
+
_TIMESTAMPED_CLASSIFICATIONS_STREAM_NAME
|
201
|
+
])
|
202
|
+
],
|
203
|
+
task_options=options)
|
204
|
+
return cls(
|
205
|
+
# Audio tasks should not drop input audio due to flow limiting, which
|
206
|
+
# may cause data inconsistency.
|
207
|
+
task_info.generate_graph_config(enable_flow_limiting=False),
|
208
|
+
options.running_mode,
|
209
|
+
packets_callback if options.result_callback else None)
|
210
|
+
|
211
|
+
def classify(self, audio_clip: _AudioData) -> List[AudioClassifierResult]:
|
212
|
+
"""Performs audio classification on the provided audio clip.
|
213
|
+
|
214
|
+
The audio clip is represented as a MediaPipe AudioData. The method accepts
|
215
|
+
audio clips with various length and audio sample rate. It's required to
|
216
|
+
provide the corresponding audio sample rate within the `AudioData` object.
|
217
|
+
|
218
|
+
The input audio clip may be longer than what the model is able to process
|
219
|
+
in a single inference. When this occurs, the input audio clip is split into
|
220
|
+
multiple chunks starting at different timestamps. For this reason, this
|
221
|
+
function returns a vector of ClassificationResult objects, each associated
|
222
|
+
ith a timestamp corresponding to the start (in milliseconds) of the chunk
|
223
|
+
data that was classified, e.g:
|
224
|
+
|
225
|
+
ClassificationResult #0 (first chunk of data):
|
226
|
+
timestamp_ms: 0 (starts at 0ms)
|
227
|
+
classifications #0 (single head model):
|
228
|
+
category #0:
|
229
|
+
category_name: "Speech"
|
230
|
+
score: 0.6
|
231
|
+
category #1:
|
232
|
+
category_name: "Music"
|
233
|
+
score: 0.2
|
234
|
+
ClassificationResult #1 (second chunk of data):
|
235
|
+
timestamp_ms: 800 (starts at 800ms)
|
236
|
+
classifications #0 (single head model):
|
237
|
+
category #0:
|
238
|
+
category_name: "Speech"
|
239
|
+
score: 0.5
|
240
|
+
category #1:
|
241
|
+
category_name: "Silence"
|
242
|
+
score: 0.1
|
243
|
+
|
244
|
+
Args:
|
245
|
+
audio_clip: MediaPipe AudioData.
|
246
|
+
|
247
|
+
Returns:
|
248
|
+
An `AudioClassifierResult` object that contains a list of
|
249
|
+
classification result objects, each associated with a timestamp
|
250
|
+
corresponding to the start (in milliseconds) of the chunk data that was
|
251
|
+
classified.
|
252
|
+
|
253
|
+
Raises:
|
254
|
+
ValueError: If any of the input arguments is invalid, such as the sample
|
255
|
+
rate is not provided in the `AudioData` object.
|
256
|
+
RuntimeError: If audio classification failed to run.
|
257
|
+
"""
|
258
|
+
if not audio_clip.audio_format.sample_rate:
|
259
|
+
raise ValueError('Must provide the audio sample rate in audio data.')
|
260
|
+
output_packets = self._process_audio_clip({
|
261
|
+
_AUDIO_IN_STREAM_NAME:
|
262
|
+
packet_creator.create_matrix(audio_clip.buffer, transpose=True),
|
263
|
+
_SAMPLE_RATE_IN_STREAM_NAME:
|
264
|
+
packet_creator.create_double(audio_clip.audio_format.sample_rate)
|
265
|
+
})
|
266
|
+
output_list = []
|
267
|
+
classification_result_proto_list = packet_getter.get_proto_list(
|
268
|
+
output_packets[_TIMESTAMPED_CLASSIFICATIONS_STREAM_NAME])
|
269
|
+
for proto in classification_result_proto_list:
|
270
|
+
classification_result_proto = classifications_pb2.ClassificationResult()
|
271
|
+
classification_result_proto.CopyFrom(proto)
|
272
|
+
output_list.append(
|
273
|
+
AudioClassifierResult.create_from_pb2(classification_result_proto))
|
274
|
+
return output_list
|
275
|
+
|
276
|
+
def classify_async(self, audio_block: _AudioData, timestamp_ms: int) -> None:
|
277
|
+
"""Sends audio data (a block in a continuous audio stream) to perform audio classification.
|
278
|
+
|
279
|
+
Only use this method when the AudioClassifier is created with the audio
|
280
|
+
stream running mode. The input timestamps should be monotonically increasing
|
281
|
+
for adjacent calls of this method. This method will return immediately after
|
282
|
+
the input audio data is accepted. The results will be available via the
|
283
|
+
`result_callback` provided in the `AudioClassifierOptions`. The
|
284
|
+
`classify_async` method is designed to process auido stream data such as
|
285
|
+
microphone input.
|
286
|
+
|
287
|
+
The input audio data may be longer than what the model is able to process
|
288
|
+
in a single inference. When this occurs, the input audio block is split
|
289
|
+
into multiple chunks. For this reason, the callback may be called multiple
|
290
|
+
times (once per chunk) for each call to this function.
|
291
|
+
|
292
|
+
The `result_callback` provides:
|
293
|
+
- An `AudioClassifierResult` object that contains a list of
|
294
|
+
classifications.
|
295
|
+
- The input timestamp in milliseconds.
|
296
|
+
|
297
|
+
Args:
|
298
|
+
audio_block: MediaPipe AudioData.
|
299
|
+
timestamp_ms: The timestamp of the input audio data in milliseconds.
|
300
|
+
|
301
|
+
Raises:
|
302
|
+
ValueError: If any of the followings:
|
303
|
+
1) The sample rate is not provided in the `AudioData` object or the
|
304
|
+
provided sample rate is inconsistent with the previously received.
|
305
|
+
2) The current input timestamp is smaller than what the audio
|
306
|
+
classifier has already processed.
|
307
|
+
"""
|
308
|
+
if not audio_block.audio_format.sample_rate:
|
309
|
+
raise ValueError('Must provide the audio sample rate in audio data.')
|
310
|
+
if not self._default_sample_rate:
|
311
|
+
self._default_sample_rate = audio_block.audio_format.sample_rate
|
312
|
+
self._set_sample_rate(_SAMPLE_RATE_IN_STREAM_NAME,
|
313
|
+
self._default_sample_rate)
|
314
|
+
elif audio_block.audio_format.sample_rate != self._default_sample_rate:
|
315
|
+
raise ValueError(
|
316
|
+
f'The audio sample rate provided in audio data: '
|
317
|
+
f'{audio_block.audio_format.sample_rate} is inconsistent with '
|
318
|
+
f'the previously received: {self._default_sample_rate}.')
|
319
|
+
|
320
|
+
self._send_audio_stream_data({
|
321
|
+
_AUDIO_IN_STREAM_NAME:
|
322
|
+
packet_creator.create_matrix(audio_block.buffer, transpose=True).at(
|
323
|
+
timestamp_ms * _MICRO_SECONDS_PER_MILLISECOND)
|
324
|
+
})
|
@@ -0,0 +1,285 @@
|
|
1
|
+
# Copyright 2022 The MediaPipe Authors.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
"""MediaPipe audio embedder task."""
|
15
|
+
|
16
|
+
import dataclasses
|
17
|
+
from typing import Callable, Mapping, List, Optional
|
18
|
+
|
19
|
+
from mediapipe.python import packet_creator
|
20
|
+
from mediapipe.python import packet_getter
|
21
|
+
from mediapipe.python._framework_bindings import packet
|
22
|
+
from mediapipe.tasks.cc.audio.audio_embedder.proto import audio_embedder_graph_options_pb2
|
23
|
+
from mediapipe.tasks.cc.components.containers.proto import embeddings_pb2
|
24
|
+
from mediapipe.tasks.cc.components.processors.proto import embedder_options_pb2
|
25
|
+
from mediapipe.tasks.python.audio.core import audio_task_running_mode as running_mode_module
|
26
|
+
from mediapipe.tasks.python.audio.core import base_audio_task_api
|
27
|
+
from mediapipe.tasks.python.components.containers import audio_data as audio_data_module
|
28
|
+
from mediapipe.tasks.python.components.containers import embedding_result as embedding_result_module
|
29
|
+
from mediapipe.tasks.python.core import base_options as base_options_module
|
30
|
+
from mediapipe.tasks.python.core import task_info as task_info_module
|
31
|
+
from mediapipe.tasks.python.core.optional_dependencies import doc_controls
|
32
|
+
|
33
|
+
AudioEmbedderResult = embedding_result_module.EmbeddingResult
|
34
|
+
_AudioEmbedderGraphOptionsProto = audio_embedder_graph_options_pb2.AudioEmbedderGraphOptions
|
35
|
+
_AudioData = audio_data_module.AudioData
|
36
|
+
_BaseOptions = base_options_module.BaseOptions
|
37
|
+
_EmbedderOptionsProto = embedder_options_pb2.EmbedderOptions
|
38
|
+
_RunningMode = running_mode_module.AudioTaskRunningMode
|
39
|
+
_TaskInfo = task_info_module.TaskInfo
|
40
|
+
|
41
|
+
_AUDIO_IN_STREAM_NAME = 'audio_in'
|
42
|
+
_AUDIO_TAG = 'AUDIO'
|
43
|
+
_EMBEDDINGS_STREAM_NAME = 'embeddings_out'
|
44
|
+
_EMBEDDINGS_TAG = 'EMBEDDINGS'
|
45
|
+
_SAMPLE_RATE_IN_STREAM_NAME = 'sample_rate_in'
|
46
|
+
_SAMPLE_RATE_TAG = 'SAMPLE_RATE'
|
47
|
+
_TASK_GRAPH_NAME = 'mediapipe.tasks.audio.audio_embedder.AudioEmbedderGraph'
|
48
|
+
_TIMESTAMPTED_EMBEDDINGS_STREAM_NAME = 'timestamped_embeddings_out'
|
49
|
+
_TIMESTAMPTED_EMBEDDINGS_TAG = 'TIMESTAMPED_EMBEDDINGS'
|
50
|
+
_MICRO_SECONDS_PER_MILLISECOND = 1000
|
51
|
+
|
52
|
+
|
53
|
+
@dataclasses.dataclass
|
54
|
+
class AudioEmbedderOptions:
|
55
|
+
"""Options for the audio embedder task.
|
56
|
+
|
57
|
+
Attributes:
|
58
|
+
base_options: Base options for the audio embedder task.
|
59
|
+
running_mode: The running mode of the task. Default to the audio clips mode.
|
60
|
+
Audio embedder task has two running modes: 1) The audio clips mode for
|
61
|
+
running embedding extraction on independent audio clips. 2) The audio
|
62
|
+
stream mode for running embedding extraction on the audio stream, such as
|
63
|
+
from microphone. In this mode, the "result_callback" below must be
|
64
|
+
specified to receive the embedding results asynchronously.
|
65
|
+
l2_normalize: Whether to normalize the returned feature vector with L2 norm.
|
66
|
+
Use this option only if the model does not already contain a native
|
67
|
+
L2_NORMALIZATION TF Lite Op. In most cases, this is already the case and
|
68
|
+
L2 norm is thus achieved through TF Lite inference.
|
69
|
+
quantize: Whether the returned embedding should be quantized to bytes via
|
70
|
+
scalar quantization. Embeddings are implicitly assumed to be unit-norm and
|
71
|
+
therefore any dimension is guaranteed to have a value in [-1.0, 1.0]. Use
|
72
|
+
the l2_normalize option if this is not the case.
|
73
|
+
result_callback: The user-defined result callback for processing audio
|
74
|
+
stream data. The result callback should only be specified when the running
|
75
|
+
mode is set to the audio stream mode.
|
76
|
+
"""
|
77
|
+
base_options: _BaseOptions
|
78
|
+
running_mode: _RunningMode = _RunningMode.AUDIO_CLIPS
|
79
|
+
l2_normalize: Optional[bool] = None
|
80
|
+
quantize: Optional[bool] = None
|
81
|
+
result_callback: Optional[Callable[[AudioEmbedderResult, int], None]] = None
|
82
|
+
|
83
|
+
@doc_controls.do_not_generate_docs
|
84
|
+
def to_pb2(self) -> _AudioEmbedderGraphOptionsProto:
|
85
|
+
"""Generates an AudioEmbedderOptions protobuf object."""
|
86
|
+
base_options_proto = self.base_options.to_pb2()
|
87
|
+
base_options_proto.use_stream_mode = False if self.running_mode == _RunningMode.AUDIO_CLIPS else True
|
88
|
+
embedder_options_proto = _EmbedderOptionsProto(
|
89
|
+
l2_normalize=self.l2_normalize, quantize=self.quantize)
|
90
|
+
|
91
|
+
return _AudioEmbedderGraphOptionsProto(
|
92
|
+
base_options=base_options_proto,
|
93
|
+
embedder_options=embedder_options_proto)
|
94
|
+
|
95
|
+
|
96
|
+
class AudioEmbedder(base_audio_task_api.BaseAudioTaskApi):
|
97
|
+
"""Class that performs embedding extraction on audio clips or audio stream.
|
98
|
+
|
99
|
+
This API expects a TFLite model with mandatory TFLite Model Metadata that
|
100
|
+
contains the mandatory AudioProperties of the solo input audio tensor and the
|
101
|
+
optional (but recommended) label items as AssociatedFiles with type
|
102
|
+
TENSOR_AXIS_LABELS per output embedding tensor.
|
103
|
+
|
104
|
+
Input tensor:
|
105
|
+
(kTfLiteFloat32)
|
106
|
+
- input audio buffer of size `[batch * samples]`.
|
107
|
+
- batch inference is not supported (`batch` is required to be 1).
|
108
|
+
- for multi-channel models, the channels must be interleaved.
|
109
|
+
At least one output tensor with:
|
110
|
+
(kTfLiteUInt8/kTfLiteFloat32)
|
111
|
+
- `N` components corresponding to the `N` dimensions of the returned
|
112
|
+
feature vector for this output layer.
|
113
|
+
- Either 2 or 4 dimensions, i.e. `[1 x N]` or `[1 x 1 x 1 x N]`.
|
114
|
+
"""
|
115
|
+
|
116
|
+
@classmethod
|
117
|
+
def create_from_model_path(cls, model_path: str) -> 'AudioEmbedder':
|
118
|
+
"""Creates an `AudioEmbedder` object from a TensorFlow Lite model and the default `AudioEmbedderOptions`.
|
119
|
+
|
120
|
+
Note that the created `AudioEmbedder` instance is in audio clips mode, for
|
121
|
+
embedding extraction on the independent audio clips.
|
122
|
+
|
123
|
+
Args:
|
124
|
+
model_path: Path to the model.
|
125
|
+
|
126
|
+
Returns:
|
127
|
+
`AudioEmbedder` object that's created from the model file and the
|
128
|
+
default `AudioEmbedderOptions`.
|
129
|
+
|
130
|
+
Raises:
|
131
|
+
ValueError: If failed to create `AudioEmbedder` object from the provided
|
132
|
+
file such as invalid file path.
|
133
|
+
RuntimeError: If other types of error occurred.
|
134
|
+
"""
|
135
|
+
base_options = _BaseOptions(model_asset_path=model_path)
|
136
|
+
options = AudioEmbedderOptions(
|
137
|
+
base_options=base_options, running_mode=_RunningMode.AUDIO_CLIPS)
|
138
|
+
return cls.create_from_options(options)
|
139
|
+
|
140
|
+
@classmethod
|
141
|
+
def create_from_options(cls,
|
142
|
+
options: AudioEmbedderOptions) -> 'AudioEmbedder':
|
143
|
+
"""Creates the `AudioEmbedder` object from audio embedder options.
|
144
|
+
|
145
|
+
Args:
|
146
|
+
options: Options for the audio embedder task.
|
147
|
+
|
148
|
+
Returns:
|
149
|
+
`AudioEmbedder` object that's created from `options`.
|
150
|
+
|
151
|
+
Raises:
|
152
|
+
ValueError: If failed to create `AudioEmbedder` object from
|
153
|
+
`AudioEmbedderOptions` such as missing the model.
|
154
|
+
RuntimeError: If other types of error occurred.
|
155
|
+
"""
|
156
|
+
|
157
|
+
def packets_callback(output_packets: Mapping[str, packet.Packet]):
|
158
|
+
timestamp_ms = output_packets[
|
159
|
+
_EMBEDDINGS_STREAM_NAME].timestamp.value // _MICRO_SECONDS_PER_MILLISECOND
|
160
|
+
if output_packets[_EMBEDDINGS_STREAM_NAME].is_empty():
|
161
|
+
options.result_callback(
|
162
|
+
AudioEmbedderResult(embeddings=[]), timestamp_ms)
|
163
|
+
return
|
164
|
+
embedding_result_proto = embeddings_pb2.EmbeddingResult()
|
165
|
+
embedding_result_proto.CopyFrom(
|
166
|
+
packet_getter.get_proto(output_packets[_EMBEDDINGS_STREAM_NAME]))
|
167
|
+
options.result_callback(
|
168
|
+
AudioEmbedderResult.create_from_pb2(embedding_result_proto),
|
169
|
+
timestamp_ms)
|
170
|
+
|
171
|
+
task_info = _TaskInfo(
|
172
|
+
task_graph=_TASK_GRAPH_NAME,
|
173
|
+
input_streams=[
|
174
|
+
':'.join([_AUDIO_TAG, _AUDIO_IN_STREAM_NAME]),
|
175
|
+
':'.join([_SAMPLE_RATE_TAG, _SAMPLE_RATE_IN_STREAM_NAME])
|
176
|
+
],
|
177
|
+
output_streams=[
|
178
|
+
':'.join([_EMBEDDINGS_TAG, _EMBEDDINGS_STREAM_NAME]), ':'.join([
|
179
|
+
_TIMESTAMPTED_EMBEDDINGS_TAG,
|
180
|
+
_TIMESTAMPTED_EMBEDDINGS_STREAM_NAME
|
181
|
+
])
|
182
|
+
],
|
183
|
+
task_options=options)
|
184
|
+
return cls(
|
185
|
+
# Audio tasks should not drop input audio due to flow limiting, which
|
186
|
+
# may cause data inconsistency.
|
187
|
+
task_info.generate_graph_config(enable_flow_limiting=False),
|
188
|
+
options.running_mode,
|
189
|
+
packets_callback if options.result_callback else None)
|
190
|
+
|
191
|
+
def embed(self, audio_clip: _AudioData) -> List[AudioEmbedderResult]:
|
192
|
+
"""Performs embedding extraction on the provided audio clips.
|
193
|
+
|
194
|
+
The audio clip is represented as a MediaPipe AudioData. The method accepts
|
195
|
+
audio clips with various length and audio sample rate. It's required to
|
196
|
+
provide the corresponding audio sample rate within the `AudioData` object.
|
197
|
+
|
198
|
+
The input audio clip may be longer than what the model is able to process
|
199
|
+
in a single inference. When this occurs, the input audio clip is split into
|
200
|
+
multiple chunks starting at different timestamps. For this reason, this
|
201
|
+
function returns a vector of EmbeddingResult objects, each associated
|
202
|
+
ith a timestamp corresponding to the start (in milliseconds) of the chunk
|
203
|
+
data on which embedding extraction was carried out.
|
204
|
+
|
205
|
+
Args:
|
206
|
+
audio_clip: MediaPipe AudioData.
|
207
|
+
|
208
|
+
Returns:
|
209
|
+
An `AudioEmbedderResult` object that contains a list of embedding result
|
210
|
+
objects, each associated with a timestamp corresponding to the start
|
211
|
+
(in milliseconds) of the chunk data on which embedding extraction was
|
212
|
+
carried out.
|
213
|
+
|
214
|
+
Raises:
|
215
|
+
ValueError: If any of the input arguments is invalid, such as the sample
|
216
|
+
rate is not provided in the `AudioData` object.
|
217
|
+
RuntimeError: If audio embedding extraction failed to run.
|
218
|
+
"""
|
219
|
+
if not audio_clip.audio_format.sample_rate:
|
220
|
+
raise ValueError('Must provide the audio sample rate in audio data.')
|
221
|
+
output_packets = self._process_audio_clip({
|
222
|
+
_AUDIO_IN_STREAM_NAME:
|
223
|
+
packet_creator.create_matrix(audio_clip.buffer, transpose=True),
|
224
|
+
_SAMPLE_RATE_IN_STREAM_NAME:
|
225
|
+
packet_creator.create_double(audio_clip.audio_format.sample_rate)
|
226
|
+
})
|
227
|
+
output_list = []
|
228
|
+
embeddings_proto_list = packet_getter.get_proto_list(
|
229
|
+
output_packets[_TIMESTAMPTED_EMBEDDINGS_STREAM_NAME])
|
230
|
+
for proto in embeddings_proto_list:
|
231
|
+
embedding_result_proto = embeddings_pb2.EmbeddingResult()
|
232
|
+
embedding_result_proto.CopyFrom(proto)
|
233
|
+
output_list.append(
|
234
|
+
AudioEmbedderResult.create_from_pb2(embedding_result_proto))
|
235
|
+
return output_list
|
236
|
+
|
237
|
+
def embed_async(self, audio_block: _AudioData, timestamp_ms: int) -> None:
|
238
|
+
"""Sends audio data (a block in a continuous audio stream) to perform audio embedding extraction.
|
239
|
+
|
240
|
+
Only use this method when the AudioEmbedder is created with the audio
|
241
|
+
stream running mode. The input timestamps should be monotonically increasing
|
242
|
+
for adjacent calls of this method. This method will return immediately after
|
243
|
+
the input audio data is accepted. The results will be available via the
|
244
|
+
`result_callback` provided in the `AudioEmbedderOptions`. The
|
245
|
+
`embed_async` method is designed to process auido stream data such as
|
246
|
+
microphone input.
|
247
|
+
|
248
|
+
The input audio data may be longer than what the model is able to process
|
249
|
+
in a single inference. When this occurs, the input audio block is split
|
250
|
+
into multiple chunks. For this reason, the callback may be called multiple
|
251
|
+
times (once per chunk) for each call to this function.
|
252
|
+
|
253
|
+
The `result_callback` provides:
|
254
|
+
- An `AudioEmbedderResult` object that contains a list of
|
255
|
+
embeddings.
|
256
|
+
- The input timestamp in milliseconds.
|
257
|
+
|
258
|
+
Args:
|
259
|
+
audio_block: MediaPipe AudioData.
|
260
|
+
timestamp_ms: The timestamp of the input audio data in milliseconds.
|
261
|
+
|
262
|
+
Raises:
|
263
|
+
ValueError: If any of the followings:
|
264
|
+
1) The sample rate is not provided in the `AudioData` object or the
|
265
|
+
provided sample rate is inconsistent with the previously received.
|
266
|
+
2) The current input timestamp is smaller than what the audio
|
267
|
+
embedder has already processed.
|
268
|
+
"""
|
269
|
+
if not audio_block.audio_format.sample_rate:
|
270
|
+
raise ValueError('Must provide the audio sample rate in audio data.')
|
271
|
+
if not self._default_sample_rate:
|
272
|
+
self._default_sample_rate = audio_block.audio_format.sample_rate
|
273
|
+
self._set_sample_rate(_SAMPLE_RATE_IN_STREAM_NAME,
|
274
|
+
self._default_sample_rate)
|
275
|
+
elif audio_block.audio_format.sample_rate != self._default_sample_rate:
|
276
|
+
raise ValueError(
|
277
|
+
f'The audio sample rate provided in audio data: '
|
278
|
+
f'{audio_block.audio_format.sample_rate} is inconsistent with '
|
279
|
+
f'the previously received: {self._default_sample_rate}.')
|
280
|
+
|
281
|
+
self._send_audio_stream_data({
|
282
|
+
_AUDIO_IN_STREAM_NAME:
|
283
|
+
packet_creator.create_matrix(audio_block.buffer, transpose=True).at(
|
284
|
+
timestamp_ms * _MICRO_SECONDS_PER_MILLISECOND)
|
285
|
+
})
|
@@ -0,0 +1,16 @@
|
|
1
|
+
"""Copyright 2022 The MediaPipe Authors.
|
2
|
+
|
3
|
+
All Rights Reserved.
|
4
|
+
|
5
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
6
|
+
you may not use this file except in compliance with the License.
|
7
|
+
You may obtain a copy of the License at
|
8
|
+
|
9
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
|
11
|
+
Unless required by applicable law or agreed to in writing, software
|
12
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
+
See the License for the specific language governing permissions and
|
15
|
+
limitations under the License.
|
16
|
+
"""
|