mediapipe-nightly 0.0.0.post20231103__cp311-cp311-macosx_11_0_universal2.whl
Sign up to get free protection for your applications and to get access to all the features.
- mediapipe/__init__.py +26 -0
- mediapipe/calculators/__init__.py +0 -0
- mediapipe/calculators/audio/__init__.py +0 -0
- mediapipe/calculators/audio/mfcc_mel_calculators_pb2.py +34 -0
- mediapipe/calculators/audio/rational_factor_resample_calculator_pb2.py +33 -0
- mediapipe/calculators/audio/spectrogram_calculator_pb2.py +35 -0
- mediapipe/calculators/audio/stabilized_log_calculator_pb2.py +31 -0
- mediapipe/calculators/audio/time_series_framer_calculator_pb2.py +33 -0
- mediapipe/calculators/core/__init__.py +0 -0
- mediapipe/calculators/core/bypass_calculator_pb2.py +31 -0
- mediapipe/calculators/core/clip_vector_size_calculator_pb2.py +31 -0
- mediapipe/calculators/core/concatenate_vector_calculator_pb2.py +31 -0
- mediapipe/calculators/core/constant_side_packet_calculator_pb2.py +37 -0
- mediapipe/calculators/core/dequantize_byte_array_calculator_pb2.py +31 -0
- mediapipe/calculators/core/flow_limiter_calculator_pb2.py +32 -0
- mediapipe/calculators/core/gate_calculator_pb2.py +33 -0
- mediapipe/calculators/core/get_vector_item_calculator_pb2.py +31 -0
- mediapipe/calculators/core/graph_profile_calculator_pb2.py +31 -0
- mediapipe/calculators/core/packet_cloner_calculator_pb2.py +31 -0
- mediapipe/calculators/core/packet_resampler_calculator_pb2.py +33 -0
- mediapipe/calculators/core/packet_thinner_calculator_pb2.py +33 -0
- mediapipe/calculators/core/quantize_float_vector_calculator_pb2.py +31 -0
- mediapipe/calculators/core/sequence_shift_calculator_pb2.py +31 -0
- mediapipe/calculators/core/split_vector_calculator_pb2.py +33 -0
- mediapipe/calculators/image/__init__.py +0 -0
- mediapipe/calculators/image/bilateral_filter_calculator_pb2.py +31 -0
- mediapipe/calculators/image/feature_detector_calculator_pb2.py +31 -0
- mediapipe/calculators/image/image_clone_calculator_pb2.py +31 -0
- mediapipe/calculators/image/image_cropping_calculator_pb2.py +33 -0
- mediapipe/calculators/image/image_transformation_calculator_pb2.py +38 -0
- mediapipe/calculators/image/mask_overlay_calculator_pb2.py +33 -0
- mediapipe/calculators/image/opencv_encoded_image_to_image_frame_calculator_pb2.py +31 -0
- mediapipe/calculators/image/opencv_image_encoder_calculator_pb2.py +35 -0
- mediapipe/calculators/image/recolor_calculator_pb2.py +34 -0
- mediapipe/calculators/image/rotation_mode_pb2.py +28 -0
- mediapipe/calculators/image/scale_image_calculator_pb2.py +34 -0
- mediapipe/calculators/image/segmentation_smoothing_calculator_pb2.py +31 -0
- mediapipe/calculators/image/set_alpha_calculator_pb2.py +31 -0
- mediapipe/calculators/image/warp_affine_calculator_pb2.py +36 -0
- mediapipe/calculators/internal/__init__.py +0 -0
- mediapipe/calculators/internal/callback_packet_calculator_pb2.py +33 -0
- mediapipe/calculators/tensor/__init__.py +0 -0
- mediapipe/calculators/tensor/audio_to_tensor_calculator_pb2.py +35 -0
- mediapipe/calculators/tensor/bert_preprocessor_calculator_pb2.py +31 -0
- mediapipe/calculators/tensor/feedback_tensors_calculator_pb2.py +37 -0
- mediapipe/calculators/tensor/image_to_tensor_calculator_pb2.py +40 -0
- mediapipe/calculators/tensor/inference_calculator_pb2.py +53 -0
- mediapipe/calculators/tensor/landmarks_to_tensor_calculator_pb2.py +33 -0
- mediapipe/calculators/tensor/regex_preprocessor_calculator_pb2.py +31 -0
- mediapipe/calculators/tensor/tensor_converter_calculator_pb2.py +34 -0
- mediapipe/calculators/tensor/tensor_to_joints_calculator_pb2.py +31 -0
- mediapipe/calculators/tensor/tensors_readback_calculator_pb2.py +35 -0
- mediapipe/calculators/tensor/tensors_to_audio_calculator_pb2.py +33 -0
- mediapipe/calculators/tensor/tensors_to_classification_calculator_pb2.py +44 -0
- mediapipe/calculators/tensor/tensors_to_detections_calculator_pb2.py +39 -0
- mediapipe/calculators/tensor/tensors_to_floats_calculator_pb2.py +33 -0
- mediapipe/calculators/tensor/tensors_to_landmarks_calculator_pb2.py +33 -0
- mediapipe/calculators/tensor/tensors_to_segmentation_calculator_pb2.py +34 -0
- mediapipe/calculators/tflite/__init__.py +0 -0
- mediapipe/calculators/tflite/ssd_anchors_calculator_pb2.py +32 -0
- mediapipe/calculators/tflite/tflite_converter_calculator_pb2.py +33 -0
- mediapipe/calculators/tflite/tflite_custom_op_resolver_calculator_pb2.py +31 -0
- mediapipe/calculators/tflite/tflite_inference_calculator_pb2.py +49 -0
- mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator_pb2.py +31 -0
- mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator_pb2.py +31 -0
- mediapipe/calculators/tflite/tflite_tensors_to_landmarks_calculator_pb2.py +33 -0
- mediapipe/calculators/tflite/tflite_tensors_to_segmentation_calculator_pb2.py +31 -0
- mediapipe/calculators/util/__init__.py +0 -0
- mediapipe/calculators/util/align_hand_to_pose_in_world_calculator_pb2.py +31 -0
- mediapipe/calculators/util/annotation_overlay_calculator_pb2.py +32 -0
- mediapipe/calculators/util/association_calculator_pb2.py +31 -0
- mediapipe/calculators/util/collection_has_min_size_calculator_pb2.py +31 -0
- mediapipe/calculators/util/combine_joints_calculator_pb2.py +36 -0
- mediapipe/calculators/util/detection_label_id_to_text_calculator_pb2.py +36 -0
- mediapipe/calculators/util/detections_to_rects_calculator_pb2.py +33 -0
- mediapipe/calculators/util/detections_to_render_data_calculator_pb2.py +33 -0
- mediapipe/calculators/util/face_to_rect_calculator_pb2.py +25 -0
- mediapipe/calculators/util/filter_detections_calculator_pb2.py +31 -0
- mediapipe/calculators/util/flat_color_image_calculator_pb2.py +32 -0
- mediapipe/calculators/util/labels_to_render_data_calculator_pb2.py +34 -0
- mediapipe/calculators/util/landmark_projection_calculator_pb2.py +31 -0
- mediapipe/calculators/util/landmarks_refinement_calculator_pb2.py +41 -0
- mediapipe/calculators/util/landmarks_smoothing_calculator_pb2.py +33 -0
- mediapipe/calculators/util/landmarks_to_detection_calculator_pb2.py +31 -0
- mediapipe/calculators/util/landmarks_to_floats_calculator_pb2.py +31 -0
- mediapipe/calculators/util/landmarks_to_render_data_calculator_pb2.py +32 -0
- mediapipe/calculators/util/landmarks_transformation_calculator_pb2.py +37 -0
- mediapipe/calculators/util/latency_pb2.py +25 -0
- mediapipe/calculators/util/local_file_contents_calculator_pb2.py +31 -0
- mediapipe/calculators/util/logic_calculator_pb2.py +34 -0
- mediapipe/calculators/util/non_max_suppression_calculator_pb2.py +35 -0
- mediapipe/calculators/util/packet_frequency_calculator_pb2.py +31 -0
- mediapipe/calculators/util/packet_frequency_pb2.py +25 -0
- mediapipe/calculators/util/packet_latency_calculator_pb2.py +31 -0
- mediapipe/calculators/util/rect_to_render_data_calculator_pb2.py +32 -0
- mediapipe/calculators/util/rect_to_render_scale_calculator_pb2.py +31 -0
- mediapipe/calculators/util/rect_transformation_calculator_pb2.py +31 -0
- mediapipe/calculators/util/refine_landmarks_from_heatmap_calculator_pb2.py +31 -0
- mediapipe/calculators/util/set_joints_visibility_calculator_pb2.py +41 -0
- mediapipe/calculators/util/thresholding_calculator_pb2.py +31 -0
- mediapipe/calculators/util/timed_box_list_id_to_label_calculator_pb2.py +31 -0
- mediapipe/calculators/util/timed_box_list_to_render_data_calculator_pb2.py +32 -0
- mediapipe/calculators/util/top_k_scores_calculator_pb2.py +31 -0
- mediapipe/calculators/util/visibility_copy_calculator_pb2.py +27 -0
- mediapipe/calculators/util/visibility_smoothing_calculator_pb2.py +31 -0
- mediapipe/calculators/video/__init__.py +0 -0
- mediapipe/calculators/video/box_detector_calculator_pb2.py +32 -0
- mediapipe/calculators/video/box_tracker_calculator_pb2.py +32 -0
- mediapipe/calculators/video/flow_packager_calculator_pb2.py +32 -0
- mediapipe/calculators/video/flow_to_image_calculator_pb2.py +31 -0
- mediapipe/calculators/video/motion_analysis_calculator_pb2.py +42 -0
- mediapipe/calculators/video/opencv_video_encoder_calculator_pb2.py +31 -0
- mediapipe/calculators/video/tool/__init__.py +0 -0
- mediapipe/calculators/video/tool/flow_quantizer_model_pb2.py +25 -0
- mediapipe/calculators/video/tracked_detection_manager_calculator_pb2.py +32 -0
- mediapipe/calculators/video/video_pre_stream_calculator_pb2.py +35 -0
- mediapipe/examples/__init__.py +14 -0
- mediapipe/examples/desktop/__init__.py +14 -0
- mediapipe/framework/__init__.py +0 -0
- mediapipe/framework/calculator_options_pb2.py +28 -0
- mediapipe/framework/calculator_pb2.py +56 -0
- mediapipe/framework/calculator_profile_pb2.py +47 -0
- mediapipe/framework/deps/__init__.py +0 -0
- mediapipe/framework/deps/proto_descriptor_pb2.py +28 -0
- mediapipe/framework/formats/__init__.py +0 -0
- mediapipe/framework/formats/affine_transform_data_pb2.py +27 -0
- mediapipe/framework/formats/annotation/__init__.py +0 -0
- mediapipe/framework/formats/annotation/locus_pb2.py +31 -0
- mediapipe/framework/formats/annotation/rasterization_pb2.py +28 -0
- mediapipe/framework/formats/body_rig_pb2.py +27 -0
- mediapipe/framework/formats/classification_pb2.py +30 -0
- mediapipe/framework/formats/detection_pb2.py +35 -0
- mediapipe/framework/formats/image_file_properties_pb2.py +25 -0
- mediapipe/framework/formats/image_format_pb2.py +28 -0
- mediapipe/framework/formats/landmark_pb2.py +36 -0
- mediapipe/framework/formats/location_data_pb2.py +37 -0
- mediapipe/framework/formats/matrix_data_pb2.py +30 -0
- mediapipe/framework/formats/motion/__init__.py +0 -0
- mediapipe/framework/formats/motion/optical_flow_field_data_pb2.py +29 -0
- mediapipe/framework/formats/object_detection/__init__.py +0 -0
- mediapipe/framework/formats/object_detection/anchor_pb2.py +25 -0
- mediapipe/framework/formats/rect_pb2.py +28 -0
- mediapipe/framework/formats/time_series_header_pb2.py +27 -0
- mediapipe/framework/mediapipe_options_pb2.py +26 -0
- mediapipe/framework/packet_factory_pb2.py +30 -0
- mediapipe/framework/packet_generator_pb2.py +32 -0
- mediapipe/framework/status_handler_pb2.py +27 -0
- mediapipe/framework/stream_handler/__init__.py +0 -0
- mediapipe/framework/stream_handler/default_input_stream_handler_pb2.py +27 -0
- mediapipe/framework/stream_handler/fixed_size_input_stream_handler_pb2.py +27 -0
- mediapipe/framework/stream_handler/sync_set_input_stream_handler_pb2.py +29 -0
- mediapipe/framework/stream_handler/timestamp_align_input_stream_handler_pb2.py +27 -0
- mediapipe/framework/stream_handler_pb2.py +29 -0
- mediapipe/framework/test_calculators_pb2.py +31 -0
- mediapipe/framework/thread_pool_executor_pb2.py +29 -0
- mediapipe/framework/tool/__init__.py +0 -0
- mediapipe/framework/tool/calculator_graph_template_pb2.py +44 -0
- mediapipe/framework/tool/field_data_pb2.py +27 -0
- mediapipe/framework/tool/node_chain_subgraph_pb2.py +31 -0
- mediapipe/framework/tool/packet_generator_wrapper_calculator_pb2.py +28 -0
- mediapipe/framework/tool/source_pb2.py +33 -0
- mediapipe/framework/tool/switch_container_pb2.py +32 -0
- mediapipe/gpu/__init__.py +0 -0
- mediapipe/gpu/copy_calculator_pb2.py +33 -0
- mediapipe/gpu/gl_animation_overlay_calculator_pb2.py +31 -0
- mediapipe/gpu/gl_context_options_pb2.py +31 -0
- mediapipe/gpu/gl_scaler_calculator_pb2.py +32 -0
- mediapipe/gpu/gl_surface_sink_calculator_pb2.py +32 -0
- mediapipe/gpu/gpu_origin_pb2.py +28 -0
- mediapipe/gpu/scale_mode_pb2.py +27 -0
- mediapipe/model_maker/__init__.py +27 -0
- mediapipe/model_maker/setup.py +107 -0
- mediapipe/modules/__init__.py +0 -0
- mediapipe/modules/face_detection/__init__.py +0 -0
- mediapipe/modules/face_detection/face_detection_full_range_cpu.binarypb +0 -0
- mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite +0 -0
- mediapipe/modules/face_detection/face_detection_pb2.py +30 -0
- mediapipe/modules/face_detection/face_detection_short_range.tflite +0 -0
- mediapipe/modules/face_detection/face_detection_short_range_cpu.binarypb +0 -0
- mediapipe/modules/face_geometry/__init__.py +0 -0
- mediapipe/modules/face_geometry/data/__init__.py +0 -0
- mediapipe/modules/face_geometry/effect_renderer_calculator_pb2.py +27 -0
- mediapipe/modules/face_geometry/env_generator_calculator_pb2.py +28 -0
- mediapipe/modules/face_geometry/geometry_pipeline_calculator_pb2.py +27 -0
- mediapipe/modules/face_geometry/libs/__init__.py +0 -0
- mediapipe/modules/face_geometry/protos/__init__.py +0 -0
- mediapipe/modules/face_geometry/protos/environment_pb2.py +30 -0
- mediapipe/modules/face_geometry/protos/face_geometry_pb2.py +28 -0
- mediapipe/modules/face_geometry/protos/geometry_pipeline_metadata_pb2.py +31 -0
- mediapipe/modules/face_geometry/protos/mesh_3d_pb2.py +30 -0
- mediapipe/modules/face_landmark/__init__.py +0 -0
- mediapipe/modules/face_landmark/face_landmark.tflite +0 -0
- mediapipe/modules/face_landmark/face_landmark_front_cpu.binarypb +0 -0
- mediapipe/modules/face_landmark/face_landmark_with_attention.tflite +0 -0
- mediapipe/modules/hand_landmark/__init__.py +0 -0
- mediapipe/modules/hand_landmark/calculators/__init__.py +0 -0
- mediapipe/modules/hand_landmark/hand_landmark_full.tflite +0 -0
- mediapipe/modules/hand_landmark/hand_landmark_lite.tflite +0 -0
- mediapipe/modules/hand_landmark/hand_landmark_tracking_cpu.binarypb +0 -0
- mediapipe/modules/hand_landmark/handedness.txt +2 -0
- mediapipe/modules/holistic_landmark/__init__.py +0 -0
- mediapipe/modules/holistic_landmark/calculators/__init__.py +0 -0
- mediapipe/modules/holistic_landmark/calculators/roi_tracking_calculator_pb2.py +37 -0
- mediapipe/modules/holistic_landmark/hand_recrop.tflite +0 -0
- mediapipe/modules/holistic_landmark/holistic_landmark_cpu.binarypb +0 -0
- mediapipe/modules/iris_landmark/__init__.py +0 -0
- mediapipe/modules/iris_landmark/iris_landmark.tflite +0 -0
- mediapipe/modules/objectron/__init__.py +0 -0
- mediapipe/modules/objectron/calculators/__init__.py +0 -0
- mediapipe/modules/objectron/calculators/a_r_capture_metadata_pb2.py +101 -0
- mediapipe/modules/objectron/calculators/annotation_data_pb2.py +37 -0
- mediapipe/modules/objectron/calculators/belief_decoder_config_pb2.py +27 -0
- mediapipe/modules/objectron/calculators/camera_parameters_pb2.py +29 -0
- mediapipe/modules/objectron/calculators/filter_detection_calculator_pb2.py +35 -0
- mediapipe/modules/objectron/calculators/frame_annotation_to_rect_calculator_pb2.py +31 -0
- mediapipe/modules/objectron/calculators/frame_annotation_tracker_calculator_pb2.py +31 -0
- mediapipe/modules/objectron/calculators/lift_2d_frame_annotation_to_3d_calculator_pb2.py +32 -0
- mediapipe/modules/objectron/calculators/object_pb2.py +37 -0
- mediapipe/modules/objectron/calculators/tensors_to_objects_calculator_pb2.py +32 -0
- mediapipe/modules/objectron/calculators/tflite_tensors_to_objects_calculator_pb2.py +32 -0
- mediapipe/modules/objectron/object_detection_oidv4_labelmap.txt +24 -0
- mediapipe/modules/objectron/objectron_cpu.binarypb +0 -0
- mediapipe/modules/palm_detection/__init__.py +0 -0
- mediapipe/modules/palm_detection/palm_detection_full.tflite +0 -0
- mediapipe/modules/palm_detection/palm_detection_lite.tflite +0 -0
- mediapipe/modules/pose_detection/__init__.py +0 -0
- mediapipe/modules/pose_detection/pose_detection.tflite +0 -0
- mediapipe/modules/pose_landmark/__init__.py +0 -0
- mediapipe/modules/pose_landmark/pose_landmark_cpu.binarypb +0 -0
- mediapipe/modules/pose_landmark/pose_landmark_full.tflite +0 -0
- mediapipe/modules/selfie_segmentation/__init__.py +0 -0
- mediapipe/modules/selfie_segmentation/selfie_segmentation.tflite +0 -0
- mediapipe/modules/selfie_segmentation/selfie_segmentation_cpu.binarypb +0 -0
- mediapipe/modules/selfie_segmentation/selfie_segmentation_landscape.tflite +0 -0
- mediapipe/python/__init__.py +28 -0
- mediapipe/python/_framework_bindings/arm64.cpython-311-darwin.so +0 -0
- mediapipe/python/_framework_bindings.cpython-311-darwin.so +0 -0
- mediapipe/python/calculator_graph_test.py +251 -0
- mediapipe/python/image_frame_test.py +194 -0
- mediapipe/python/image_test.py +218 -0
- mediapipe/python/packet_creator.py +275 -0
- mediapipe/python/packet_getter.py +119 -0
- mediapipe/python/packet_test.py +533 -0
- mediapipe/python/solution_base.py +632 -0
- mediapipe/python/solution_base_test.py +396 -0
- mediapipe/python/solutions/__init__.py +27 -0
- mediapipe/python/solutions/download_utils.py +37 -0
- mediapipe/python/solutions/drawing_styles.py +249 -0
- mediapipe/python/solutions/drawing_utils.py +316 -0
- mediapipe/python/solutions/drawing_utils_test.py +258 -0
- mediapipe/python/solutions/face_detection.py +105 -0
- mediapipe/python/solutions/face_detection_test.py +92 -0
- mediapipe/python/solutions/face_mesh.py +125 -0
- mediapipe/python/solutions/face_mesh_connections.py +500 -0
- mediapipe/python/solutions/face_mesh_test.py +170 -0
- mediapipe/python/solutions/hands.py +153 -0
- mediapipe/python/solutions/hands_connections.py +32 -0
- mediapipe/python/solutions/hands_test.py +218 -0
- mediapipe/python/solutions/holistic.py +167 -0
- mediapipe/python/solutions/holistic_test.py +142 -0
- mediapipe/python/solutions/objectron.py +288 -0
- mediapipe/python/solutions/objectron_test.py +81 -0
- mediapipe/python/solutions/pose.py +192 -0
- mediapipe/python/solutions/pose_connections.py +22 -0
- mediapipe/python/solutions/pose_test.py +262 -0
- mediapipe/python/solutions/selfie_segmentation.py +76 -0
- mediapipe/python/solutions/selfie_segmentation_test.py +68 -0
- mediapipe/python/timestamp_test.py +78 -0
- mediapipe/tasks/__init__.py +14 -0
- mediapipe/tasks/cc/__init__.py +0 -0
- mediapipe/tasks/cc/audio/__init__.py +0 -0
- mediapipe/tasks/cc/audio/audio_classifier/__init__.py +0 -0
- mediapipe/tasks/cc/audio/audio_classifier/proto/__init__.py +0 -0
- mediapipe/tasks/cc/audio/audio_classifier/proto/audio_classifier_graph_options_pb2.py +35 -0
- mediapipe/tasks/cc/audio/audio_embedder/__init__.py +0 -0
- mediapipe/tasks/cc/audio/audio_embedder/proto/__init__.py +0 -0
- mediapipe/tasks/cc/audio/audio_embedder/proto/audio_embedder_graph_options_pb2.py +35 -0
- mediapipe/tasks/cc/audio/core/__init__.py +0 -0
- mediapipe/tasks/cc/audio/utils/__init__.py +0 -0
- mediapipe/tasks/cc/components/__init__.py +0 -0
- mediapipe/tasks/cc/components/calculators/__init__.py +0 -0
- mediapipe/tasks/cc/components/calculators/classification_aggregation_calculator_pb2.py +31 -0
- mediapipe/tasks/cc/components/calculators/score_calibration_calculator_pb2.py +35 -0
- mediapipe/tasks/cc/components/calculators/tensors_to_embeddings_calculator_pb2.py +32 -0
- mediapipe/tasks/cc/components/containers/__init__.py +0 -0
- mediapipe/tasks/cc/components/containers/proto/__init__.py +0 -0
- mediapipe/tasks/cc/components/containers/proto/classifications_pb2.py +29 -0
- mediapipe/tasks/cc/components/containers/proto/embeddings_pb2.py +34 -0
- mediapipe/tasks/cc/components/containers/proto/landmarks_detection_result_pb2.py +31 -0
- mediapipe/tasks/cc/components/processors/__init__.py +0 -0
- mediapipe/tasks/cc/components/processors/proto/__init__.py +0 -0
- mediapipe/tasks/cc/components/processors/proto/classification_postprocessing_graph_options_pb2.py +38 -0
- mediapipe/tasks/cc/components/processors/proto/classifier_options_pb2.py +26 -0
- mediapipe/tasks/cc/components/processors/proto/detection_postprocessing_graph_options_pb2.py +35 -0
- mediapipe/tasks/cc/components/processors/proto/detector_options_pb2.py +26 -0
- mediapipe/tasks/cc/components/processors/proto/embedder_options_pb2.py +26 -0
- mediapipe/tasks/cc/components/processors/proto/embedding_postprocessing_graph_options_pb2.py +32 -0
- mediapipe/tasks/cc/components/processors/proto/image_preprocessing_graph_options_pb2.py +34 -0
- mediapipe/tasks/cc/components/processors/proto/llm_params_pb2.py +27 -0
- mediapipe/tasks/cc/components/processors/proto/text_model_type_pb2.py +27 -0
- mediapipe/tasks/cc/components/processors/proto/text_preprocessing_graph_options_pb2.py +32 -0
- mediapipe/tasks/cc/components/processors/proto/transformer_params_pb2.py +28 -0
- mediapipe/tasks/cc/components/utils/__init__.py +0 -0
- mediapipe/tasks/cc/core/__init__.py +0 -0
- mediapipe/tasks/cc/core/proto/__init__.py +0 -0
- mediapipe/tasks/cc/core/proto/acceleration_pb2.py +27 -0
- mediapipe/tasks/cc/core/proto/base_options_pb2.py +29 -0
- mediapipe/tasks/cc/core/proto/external_file_pb2.py +30 -0
- mediapipe/tasks/cc/core/proto/inference_subgraph_pb2.py +32 -0
- mediapipe/tasks/cc/core/proto/model_resources_calculator_pb2.py +32 -0
- mediapipe/tasks/cc/metadata/__init__.py +0 -0
- mediapipe/tasks/cc/metadata/python/__init__.py +0 -0
- mediapipe/tasks/cc/metadata/python/_pywrap_metadata_version/arm64.cpython-311-darwin.so +0 -0
- mediapipe/tasks/cc/metadata/python/_pywrap_metadata_version.cpython-311-darwin.so +0 -0
- mediapipe/tasks/cc/metadata/tests/__init__.py +0 -0
- mediapipe/tasks/cc/metadata/utils/__init__.py +0 -0
- mediapipe/tasks/cc/text/__init__.py +0 -0
- mediapipe/tasks/cc/text/custom_ops/__init__.py +0 -0
- mediapipe/tasks/cc/text/custom_ops/ragged/__init__.py +0 -0
- mediapipe/tasks/cc/text/custom_ops/sentencepiece/__init__.py +0 -0
- mediapipe/tasks/cc/text/custom_ops/sentencepiece/testdata/__init__.py +0 -0
- mediapipe/tasks/cc/text/language_detector/__init__.py +0 -0
- mediapipe/tasks/cc/text/language_detector/custom_ops/__init__.py +0 -0
- mediapipe/tasks/cc/text/language_detector/custom_ops/utils/__init__.py +0 -0
- mediapipe/tasks/cc/text/language_detector/custom_ops/utils/hash/__init__.py +0 -0
- mediapipe/tasks/cc/text/language_detector/custom_ops/utils/utf/__init__.py +0 -0
- mediapipe/tasks/cc/text/text_classifier/__init__.py +0 -0
- mediapipe/tasks/cc/text/text_classifier/proto/__init__.py +0 -0
- mediapipe/tasks/cc/text/text_classifier/proto/text_classifier_graph_options_pb2.py +35 -0
- mediapipe/tasks/cc/text/text_embedder/__init__.py +0 -0
- mediapipe/tasks/cc/text/text_embedder/proto/__init__.py +0 -0
- mediapipe/tasks/cc/text/text_embedder/proto/text_embedder_graph_options_pb2.py +35 -0
- mediapipe/tasks/cc/text/tokenizers/__init__.py +0 -0
- mediapipe/tasks/cc/text/utils/__init__.py +0 -0
- mediapipe/tasks/cc/vision/__init__.py +0 -0
- mediapipe/tasks/cc/vision/core/__init__.py +0 -0
- mediapipe/tasks/cc/vision/custom_ops/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_detector/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_detector/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_detector/proto/face_detector_graph_options_pb2.py +34 -0
- mediapipe/tasks/cc/vision/face_geometry/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_geometry/calculators/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_geometry/calculators/env_generator_calculator_pb2.py +28 -0
- mediapipe/tasks/cc/vision/face_geometry/calculators/geometry_pipeline_calculator_pb2.py +29 -0
- mediapipe/tasks/cc/vision/face_geometry/data/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_geometry/libs/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_geometry/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_geometry/proto/environment_pb2.py +30 -0
- mediapipe/tasks/cc/vision/face_geometry/proto/face_geometry_graph_options_pb2.py +29 -0
- mediapipe/tasks/cc/vision/face_geometry/proto/face_geometry_pb2.py +28 -0
- mediapipe/tasks/cc/vision/face_geometry/proto/geometry_pipeline_metadata_pb2.py +31 -0
- mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d_pb2.py +30 -0
- mediapipe/tasks/cc/vision/face_landmarker/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_landmarker/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_landmarker/proto/face_blendshapes_graph_options_pb2.py +34 -0
- mediapipe/tasks/cc/vision/face_landmarker/proto/face_landmarker_graph_options_pb2.py +37 -0
- mediapipe/tasks/cc/vision/face_landmarker/proto/face_landmarks_detector_graph_options_pb2.py +35 -0
- mediapipe/tasks/cc/vision/face_landmarker/proto/tensors_to_face_landmarks_graph_options_pb2.py +32 -0
- mediapipe/tasks/cc/vision/face_stylizer/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_stylizer/calculators/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_stylizer/calculators/tensors_to_image_calculator_pb2.py +36 -0
- mediapipe/tasks/cc/vision/face_stylizer/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_stylizer/proto/face_stylizer_graph_options_pb2.py +35 -0
- mediapipe/tasks/cc/vision/gesture_recognizer/__init__.py +0 -0
- mediapipe/tasks/cc/vision/gesture_recognizer/calculators/__init__.py +0 -0
- mediapipe/tasks/cc/vision/gesture_recognizer/calculators/combined_prediction_calculator_pb2.py +33 -0
- mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator_pb2.py +31 -0
- mediapipe/tasks/cc/vision/gesture_recognizer/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/gesture_recognizer/proto/gesture_classifier_graph_options_pb2.py +35 -0
- mediapipe/tasks/cc/vision/gesture_recognizer/proto/gesture_embedder_graph_options_pb2.py +34 -0
- mediapipe/tasks/cc/vision/gesture_recognizer/proto/gesture_recognizer_graph_options_pb2.py +36 -0
- mediapipe/tasks/cc/vision/gesture_recognizer/proto/hand_gesture_recognizer_graph_options_pb2.py +36 -0
- mediapipe/tasks/cc/vision/hand_detector/__init__.py +0 -0
- mediapipe/tasks/cc/vision/hand_detector/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options_pb2.py +34 -0
- mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_result_pb2.py +29 -0
- mediapipe/tasks/cc/vision/hand_landmarker/__init__.py +0 -0
- mediapipe/tasks/cc/vision/hand_landmarker/calculators/__init__.py +0 -0
- mediapipe/tasks/cc/vision/hand_landmarker/calculators/hand_association_calculator_pb2.py +31 -0
- mediapipe/tasks/cc/vision/hand_landmarker/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options_pb2.py +36 -0
- mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options_pb2.py +34 -0
- mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_roi_refinement_graph_options_pb2.py +26 -0
- mediapipe/tasks/cc/vision/image_classifier/__init__.py +0 -0
- mediapipe/tasks/cc/vision/image_classifier/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/image_classifier/proto/image_classifier_graph_options_pb2.py +35 -0
- mediapipe/tasks/cc/vision/image_embedder/__init__.py +0 -0
- mediapipe/tasks/cc/vision/image_embedder/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/image_embedder/proto/image_embedder_graph_options_pb2.py +35 -0
- mediapipe/tasks/cc/vision/image_generator/__init__.py +0 -0
- mediapipe/tasks/cc/vision/image_generator/diffuser/__init__.py +0 -0
- mediapipe/tasks/cc/vision/image_generator/diffuser/stable_diffusion_iterate_calculator_pb2.py +40 -0
- mediapipe/tasks/cc/vision/image_generator/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/image_generator/proto/conditioned_image_graph_options_pb2.py +39 -0
- mediapipe/tasks/cc/vision/image_generator/proto/control_plugin_graph_options_pb2.py +33 -0
- mediapipe/tasks/cc/vision/image_generator/proto/image_generator_graph_options_pb2.py +29 -0
- mediapipe/tasks/cc/vision/image_segmenter/__init__.py +0 -0
- mediapipe/tasks/cc/vision/image_segmenter/calculators/__init__.py +0 -0
- mediapipe/tasks/cc/vision/image_segmenter/calculators/tensors_to_segmentation_calculator_pb2.py +34 -0
- mediapipe/tasks/cc/vision/image_segmenter/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/image_segmenter/proto/image_segmenter_graph_options_pb2.py +35 -0
- mediapipe/tasks/cc/vision/image_segmenter/proto/segmenter_options_pb2.py +32 -0
- mediapipe/tasks/cc/vision/interactive_segmenter/__init__.py +0 -0
- mediapipe/tasks/cc/vision/object_detector/__init__.py +0 -0
- mediapipe/tasks/cc/vision/object_detector/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/object_detector/proto/object_detector_options_pb2.py +34 -0
- mediapipe/tasks/cc/vision/pose_detector/__init__.py +0 -0
- mediapipe/tasks/cc/vision/pose_detector/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/pose_detector/proto/pose_detector_graph_options_pb2.py +34 -0
- mediapipe/tasks/cc/vision/pose_landmarker/__init__.py +0 -0
- mediapipe/tasks/cc/vision/pose_landmarker/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/pose_landmarker/proto/pose_landmarker_graph_options_pb2.py +36 -0
- mediapipe/tasks/cc/vision/pose_landmarker/proto/pose_landmarks_detector_graph_options_pb2.py +34 -0
- mediapipe/tasks/cc/vision/utils/__init__.py +0 -0
- mediapipe/tasks/cc/vision/utils/ghum/__init__.py +0 -0
- mediapipe/tasks/metadata/image_segmenter_metadata_schema.fbs +59 -0
- mediapipe/tasks/metadata/image_segmenter_metadata_schema_py_generated.py +108 -0
- mediapipe/tasks/metadata/metadata_schema.fbs +732 -0
- mediapipe/tasks/metadata/metadata_schema_py_generated.py +3251 -0
- mediapipe/tasks/metadata/object_detector_metadata_schema.fbs +98 -0
- mediapipe/tasks/metadata/object_detector_metadata_schema_py_generated.py +674 -0
- mediapipe/tasks/metadata/schema_py_generated.py +14263 -0
- mediapipe/tasks/python/__init__.py +26 -0
- mediapipe/tasks/python/audio/__init__.py +33 -0
- mediapipe/tasks/python/audio/audio_classifier.py +324 -0
- mediapipe/tasks/python/audio/audio_embedder.py +285 -0
- mediapipe/tasks/python/audio/core/__init__.py +16 -0
- mediapipe/tasks/python/audio/core/audio_record.py +125 -0
- mediapipe/tasks/python/audio/core/audio_task_running_mode.py +29 -0
- mediapipe/tasks/python/audio/core/base_audio_task_api.py +181 -0
- mediapipe/tasks/python/components/__init__.py +13 -0
- mediapipe/tasks/python/components/containers/__init__.py +53 -0
- mediapipe/tasks/python/components/containers/audio_data.py +137 -0
- mediapipe/tasks/python/components/containers/bounding_box.py +73 -0
- mediapipe/tasks/python/components/containers/category.py +78 -0
- mediapipe/tasks/python/components/containers/classification_result.py +111 -0
- mediapipe/tasks/python/components/containers/detections.py +181 -0
- mediapipe/tasks/python/components/containers/embedding_result.py +89 -0
- mediapipe/tasks/python/components/containers/keypoint.py +77 -0
- mediapipe/tasks/python/components/containers/landmark.py +122 -0
- mediapipe/tasks/python/components/containers/landmark_detection_result.py +106 -0
- mediapipe/tasks/python/components/containers/rect.py +109 -0
- mediapipe/tasks/python/components/processors/__init__.py +23 -0
- mediapipe/tasks/python/components/processors/classifier_options.py +86 -0
- mediapipe/tasks/python/components/utils/__init__.py +13 -0
- mediapipe/tasks/python/components/utils/cosine_similarity.py +68 -0
- mediapipe/tasks/python/core/__init__.py +13 -0
- mediapipe/tasks/python/core/base_options.py +121 -0
- mediapipe/tasks/python/core/optional_dependencies.py +25 -0
- mediapipe/tasks/python/core/task_info.py +132 -0
- mediapipe/tasks/python/metadata/__init__.py +13 -0
- mediapipe/tasks/python/metadata/flatbuffers_lib/_pywrap_flatbuffers/arm64.cpython-311-darwin.so +0 -0
- mediapipe/tasks/python/metadata/flatbuffers_lib/_pywrap_flatbuffers.cpython-311-darwin.so +0 -0
- mediapipe/tasks/python/metadata/metadata.py +928 -0
- mediapipe/tasks/python/metadata/metadata_displayer_cli.py +34 -0
- mediapipe/tasks/python/metadata/metadata_writers/__init__.py +13 -0
- mediapipe/tasks/python/metadata/metadata_writers/face_stylizer.py +138 -0
- mediapipe/tasks/python/metadata/metadata_writers/image_classifier.py +71 -0
- mediapipe/tasks/python/metadata/metadata_writers/image_segmenter.py +170 -0
- mediapipe/tasks/python/metadata/metadata_writers/metadata_info.py +1166 -0
- mediapipe/tasks/python/metadata/metadata_writers/metadata_writer.py +845 -0
- mediapipe/tasks/python/metadata/metadata_writers/model_asset_bundle_utils.py +71 -0
- mediapipe/tasks/python/metadata/metadata_writers/object_detector.py +331 -0
- mediapipe/tasks/python/metadata/metadata_writers/text_classifier.py +119 -0
- mediapipe/tasks/python/metadata/metadata_writers/writer_utils.py +91 -0
- mediapipe/tasks/python/test/__init__.py +13 -0
- mediapipe/tasks/python/test/audio/__init__.py +13 -0
- mediapipe/tasks/python/test/audio/audio_classifier_test.py +387 -0
- mediapipe/tasks/python/test/audio/audio_embedder_test.py +297 -0
- mediapipe/tasks/python/test/test_utils.py +196 -0
- mediapipe/tasks/python/test/text/__init__.py +13 -0
- mediapipe/tasks/python/test/text/language_detector_test.py +228 -0
- mediapipe/tasks/python/test/text/text_classifier_test.py +231 -0
- mediapipe/tasks/python/test/text/text_embedder_test.py +326 -0
- mediapipe/tasks/python/test/vision/__init__.py +13 -0
- mediapipe/tasks/python/test/vision/face_aligner_test.py +190 -0
- mediapipe/tasks/python/test/vision/face_detector_test.py +523 -0
- mediapipe/tasks/python/test/vision/face_landmarker_test.py +565 -0
- mediapipe/tasks/python/test/vision/face_stylizer_test.py +191 -0
- mediapipe/tasks/python/test/vision/hand_landmarker_test.py +437 -0
- mediapipe/tasks/python/test/vision/image_classifier_test.py +657 -0
- mediapipe/tasks/python/test/vision/image_embedder_test.py +423 -0
- mediapipe/tasks/python/test/vision/image_segmenter_test.py +512 -0
- mediapipe/tasks/python/test/vision/interactive_segmenter_test.py +341 -0
- mediapipe/tasks/python/test/vision/object_detector_test.py +493 -0
- mediapipe/tasks/python/test/vision/pose_landmarker_test.py +518 -0
- mediapipe/tasks/python/text/__init__.py +35 -0
- mediapipe/tasks/python/text/core/__init__.py +16 -0
- mediapipe/tasks/python/text/core/base_text_task_api.py +54 -0
- mediapipe/tasks/python/text/language_detector.py +220 -0
- mediapipe/tasks/python/text/text_classifier.py +187 -0
- mediapipe/tasks/python/text/text_embedder.py +188 -0
- mediapipe/tasks/python/vision/__init__.py +83 -0
- mediapipe/tasks/python/vision/core/__init__.py +14 -0
- mediapipe/tasks/python/vision/core/base_vision_task_api.py +226 -0
- mediapipe/tasks/python/vision/core/image_processing_options.py +39 -0
- mediapipe/tasks/python/vision/core/vision_task_running_mode.py +31 -0
- mediapipe/tasks/python/vision/face_aligner.py +158 -0
- mediapipe/tasks/python/vision/face_detector.py +332 -0
- mediapipe/tasks/python/vision/face_landmarker.py +3244 -0
- mediapipe/tasks/python/vision/face_stylizer.py +158 -0
- mediapipe/tasks/python/vision/gesture_recognizer.py +480 -0
- mediapipe/tasks/python/vision/hand_landmarker.py +504 -0
- mediapipe/tasks/python/vision/image_classifier.py +358 -0
- mediapipe/tasks/python/vision/image_embedder.py +362 -0
- mediapipe/tasks/python/vision/image_segmenter.py +433 -0
- mediapipe/tasks/python/vision/interactive_segmenter.py +285 -0
- mediapipe/tasks/python/vision/object_detector.py +385 -0
- mediapipe/tasks/python/vision/pose_landmarker.py +455 -0
- mediapipe/util/__init__.py +0 -0
- mediapipe/util/analytics/__init__.py +0 -0
- mediapipe/util/analytics/mediapipe_log_extension_pb2.py +41 -0
- mediapipe/util/analytics/mediapipe_logging_enums_pb2.py +36 -0
- mediapipe/util/audio_decoder_pb2.py +33 -0
- mediapipe/util/color_pb2.py +32 -0
- mediapipe/util/label_map_pb2.py +26 -0
- mediapipe/util/render_data_pb2.py +57 -0
- mediapipe/util/sequence/__init__.py +14 -0
- mediapipe/util/sequence/media_sequence.py +716 -0
- mediapipe/util/sequence/media_sequence_test.py +290 -0
- mediapipe/util/sequence/media_sequence_util.py +800 -0
- mediapipe/util/sequence/media_sequence_util_test.py +389 -0
- mediapipe/util/tracking/__init__.py +0 -0
- mediapipe/util/tracking/box_detector_pb2.py +38 -0
- mediapipe/util/tracking/box_tracker_pb2.py +31 -0
- mediapipe/util/tracking/camera_motion_pb2.py +30 -0
- mediapipe/util/tracking/flow_packager_pb2.py +59 -0
- mediapipe/util/tracking/frame_selection_pb2.py +34 -0
- mediapipe/util/tracking/frame_selection_solution_evaluator_pb2.py +27 -0
- mediapipe/util/tracking/motion_analysis_pb2.py +34 -0
- mediapipe/util/tracking/motion_estimation_pb2.py +65 -0
- mediapipe/util/tracking/motion_models_pb2.py +41 -0
- mediapipe/util/tracking/motion_saliency_pb2.py +25 -0
- mediapipe/util/tracking/push_pull_filtering_pb2.py +25 -0
- mediapipe/util/tracking/region_flow_computation_pb2.py +58 -0
- mediapipe/util/tracking/region_flow_pb2.py +48 -0
- mediapipe/util/tracking/tone_estimation_pb2.py +44 -0
- mediapipe/util/tracking/tone_models_pb2.py +31 -0
- mediapipe/util/tracking/tracked_detection_manager_config_pb2.py +25 -0
- mediapipe/util/tracking/tracking_pb2.py +72 -0
- mediapipe_nightly-0.0.0.post20231103.dist-info/LICENSE +218 -0
- mediapipe_nightly-0.0.0.post20231103.dist-info/METADATA +196 -0
- mediapipe_nightly-0.0.0.post20231103.dist-info/RECORD +545 -0
- mediapipe_nightly-0.0.0.post20231103.dist-info/WHEEL +5 -0
- mediapipe_nightly-0.0.0.post20231103.dist-info/top_level.txt +4 -0
@@ -0,0 +1,387 @@
|
|
1
|
+
# Copyright 2022 The MediaPipe Authors.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
"""Tests for audio classifier."""
|
15
|
+
|
16
|
+
import os
|
17
|
+
from typing import List, Tuple
|
18
|
+
from unittest import mock
|
19
|
+
|
20
|
+
from absl.testing import absltest
|
21
|
+
from absl.testing import parameterized
|
22
|
+
import numpy as np
|
23
|
+
from scipy.io import wavfile
|
24
|
+
|
25
|
+
from mediapipe.tasks.python.audio import audio_classifier
|
26
|
+
from mediapipe.tasks.python.audio.core import audio_record
|
27
|
+
from mediapipe.tasks.python.audio.core import audio_task_running_mode
|
28
|
+
from mediapipe.tasks.python.components.containers import audio_data as audio_data_module
|
29
|
+
from mediapipe.tasks.python.components.containers import classification_result as classification_result_module
|
30
|
+
from mediapipe.tasks.python.core import base_options as base_options_module
|
31
|
+
from mediapipe.tasks.python.test import test_utils
|
32
|
+
|
33
|
+
_AudioClassifier = audio_classifier.AudioClassifier
|
34
|
+
_AudioClassifierOptions = audio_classifier.AudioClassifierOptions
|
35
|
+
_AudioClassifierResult = classification_result_module.ClassificationResult
|
36
|
+
_AudioData = audio_data_module.AudioData
|
37
|
+
_AudioRecord = audio_record.AudioRecord
|
38
|
+
_BaseOptions = base_options_module.BaseOptions
|
39
|
+
_RUNNING_MODE = audio_task_running_mode.AudioTaskRunningMode
|
40
|
+
|
41
|
+
_YAMNET_MODEL_FILE = 'yamnet_audio_classifier_with_metadata.tflite'
|
42
|
+
_YAMNET_MODEL_SAMPLE_RATE = 16000
|
43
|
+
_TWO_HEADS_MODEL_FILE = 'two_heads.tflite'
|
44
|
+
_SPEECH_WAV_16K_MONO = 'speech_16000_hz_mono.wav'
|
45
|
+
_SPEECH_WAV_48K_MONO = 'speech_48000_hz_mono.wav'
|
46
|
+
_TEST_DATA_DIR = 'mediapipe/tasks/testdata/audio'
|
47
|
+
_TWO_HEADS_WAV_16K_MONO = 'two_heads_16000_hz_mono.wav'
|
48
|
+
_TWO_HEADS_WAV_44K_MONO = 'two_heads_44100_hz_mono.wav'
|
49
|
+
_YAMNET_NUM_OF_SAMPLES = 15600
|
50
|
+
_MILLISECONDS_PER_SECOND = 1000
|
51
|
+
|
52
|
+
|
53
|
+
class AudioClassifierTest(parameterized.TestCase):
|
54
|
+
|
55
|
+
def setUp(self):
|
56
|
+
super().setUp()
|
57
|
+
self.yamnet_model_path = test_utils.get_test_data_path(
|
58
|
+
os.path.join(_TEST_DATA_DIR, _YAMNET_MODEL_FILE))
|
59
|
+
self.two_heads_model_path = test_utils.get_test_data_path(
|
60
|
+
os.path.join(_TEST_DATA_DIR, _TWO_HEADS_MODEL_FILE))
|
61
|
+
|
62
|
+
def _read_wav_file(self, file_name) -> _AudioData:
|
63
|
+
sample_rate, buffer = wavfile.read(
|
64
|
+
test_utils.get_test_data_path(os.path.join(_TEST_DATA_DIR, file_name)))
|
65
|
+
return _AudioData.create_from_array(
|
66
|
+
buffer.astype(float) / np.iinfo(np.int16).max, sample_rate)
|
67
|
+
|
68
|
+
def _read_wav_file_as_stream(self, file_name) -> List[Tuple[_AudioData, int]]:
|
69
|
+
sample_rate, buffer = wavfile.read(
|
70
|
+
test_utils.get_test_data_path(os.path.join(_TEST_DATA_DIR, file_name)))
|
71
|
+
audio_data_list = []
|
72
|
+
start = 0
|
73
|
+
step_size = _YAMNET_NUM_OF_SAMPLES * sample_rate / _YAMNET_MODEL_SAMPLE_RATE
|
74
|
+
while start < len(buffer):
|
75
|
+
end = min(start + (int)(step_size), len(buffer))
|
76
|
+
audio_data_list.append((_AudioData.create_from_array(
|
77
|
+
buffer[start:end].astype(float) / np.iinfo(np.int16).max,
|
78
|
+
sample_rate), (int)(start / sample_rate * _MILLISECONDS_PER_SECOND)))
|
79
|
+
start = end
|
80
|
+
return audio_data_list
|
81
|
+
|
82
|
+
# TODO: Compares the exact score values to capture unexpected
|
83
|
+
# changes in the inference pipeline.
|
84
|
+
def _check_yamnet_result(
|
85
|
+
self,
|
86
|
+
classification_result_list: List[_AudioClassifierResult],
|
87
|
+
expected_num_categories=521):
|
88
|
+
self.assertLen(classification_result_list, 5)
|
89
|
+
for idx, timestamp in enumerate([0, 975, 1950, 2925]):
|
90
|
+
classification_result = classification_result_list[idx]
|
91
|
+
self.assertEqual(classification_result.timestamp_ms, timestamp)
|
92
|
+
self.assertLen(classification_result.classifications, 1)
|
93
|
+
classifcation = classification_result.classifications[0]
|
94
|
+
self.assertEqual(classifcation.head_index, 0)
|
95
|
+
self.assertEqual(classifcation.head_name, 'scores')
|
96
|
+
self.assertLen(classifcation.categories, expected_num_categories)
|
97
|
+
audio_category = classifcation.categories[0]
|
98
|
+
self.assertEqual(audio_category.index, 0)
|
99
|
+
self.assertEqual(audio_category.category_name, 'Speech')
|
100
|
+
self.assertGreater(audio_category.score, 0.9)
|
101
|
+
|
102
|
+
# TODO: Compares the exact score values to capture unexpected
|
103
|
+
# changes in the inference pipeline.
|
104
|
+
def _check_two_heads_result(
|
105
|
+
self,
|
106
|
+
classification_result_list: List[_AudioClassifierResult],
|
107
|
+
first_head_expected_num_categories=521,
|
108
|
+
second_head_expected_num_categories=5):
|
109
|
+
self.assertGreaterEqual(len(classification_result_list), 1)
|
110
|
+
self.assertLessEqual(len(classification_result_list), 2)
|
111
|
+
# Checks the first result.
|
112
|
+
classification_result = classification_result_list[0]
|
113
|
+
self.assertEqual(classification_result.timestamp_ms, 0)
|
114
|
+
self.assertLen(classification_result.classifications, 2)
|
115
|
+
# Checks the first head.
|
116
|
+
yamnet_classifcation = classification_result.classifications[0]
|
117
|
+
self.assertEqual(yamnet_classifcation.head_index, 0)
|
118
|
+
self.assertEqual(yamnet_classifcation.head_name, 'yamnet_classification')
|
119
|
+
self.assertLen(yamnet_classifcation.categories,
|
120
|
+
first_head_expected_num_categories)
|
121
|
+
# Checks the second head.
|
122
|
+
yamnet_category = yamnet_classifcation.categories[0]
|
123
|
+
self.assertEqual(yamnet_category.index, 508)
|
124
|
+
self.assertEqual(yamnet_category.category_name, 'Environmental noise')
|
125
|
+
self.assertGreater(yamnet_category.score, 0.5)
|
126
|
+
bird_classifcation = classification_result.classifications[1]
|
127
|
+
self.assertEqual(bird_classifcation.head_index, 1)
|
128
|
+
self.assertEqual(bird_classifcation.head_name, 'bird_classification')
|
129
|
+
self.assertLen(bird_classifcation.categories,
|
130
|
+
second_head_expected_num_categories)
|
131
|
+
bird_category = bird_classifcation.categories[0]
|
132
|
+
self.assertEqual(bird_category.index, 4)
|
133
|
+
self.assertEqual(bird_category.category_name, 'Chestnut-crowned Antpitta')
|
134
|
+
self.assertGreater(bird_category.score, 0.93)
|
135
|
+
# Checks the second result, if present.
|
136
|
+
if len(classification_result_list) == 2:
|
137
|
+
classification_result = classification_result_list[1]
|
138
|
+
self.assertEqual(classification_result.timestamp_ms, 975)
|
139
|
+
self.assertLen(classification_result.classifications, 2)
|
140
|
+
# Checks the first head.
|
141
|
+
yamnet_classifcation = classification_result.classifications[0]
|
142
|
+
self.assertEqual(yamnet_classifcation.head_index, 0)
|
143
|
+
self.assertEqual(yamnet_classifcation.head_name, 'yamnet_classification')
|
144
|
+
self.assertLen(yamnet_classifcation.categories,
|
145
|
+
first_head_expected_num_categories)
|
146
|
+
yamnet_category = yamnet_classifcation.categories[0]
|
147
|
+
self.assertEqual(yamnet_category.index, 494)
|
148
|
+
self.assertEqual(yamnet_category.category_name, 'Silence')
|
149
|
+
self.assertGreater(yamnet_category.score, 0.9)
|
150
|
+
bird_classifcation = classification_result.classifications[1]
|
151
|
+
self.assertEqual(bird_classifcation.head_index, 1)
|
152
|
+
self.assertEqual(bird_classifcation.head_name, 'bird_classification')
|
153
|
+
self.assertLen(bird_classifcation.categories,
|
154
|
+
second_head_expected_num_categories)
|
155
|
+
# Checks the second head.
|
156
|
+
bird_category = bird_classifcation.categories[0]
|
157
|
+
self.assertEqual(bird_category.index, 1)
|
158
|
+
self.assertEqual(bird_category.category_name, 'White-breasted Wood-Wren')
|
159
|
+
self.assertGreater(bird_category.score, 0.99)
|
160
|
+
|
161
|
+
def test_create_from_file_succeeds_with_valid_model_path(self):
|
162
|
+
# Creates with default option and valid model file successfully.
|
163
|
+
with _AudioClassifier.create_from_model_path(
|
164
|
+
self.yamnet_model_path) as classifier:
|
165
|
+
self.assertIsInstance(classifier, _AudioClassifier)
|
166
|
+
|
167
|
+
def test_create_from_options_succeeds_with_valid_model_path(self):
|
168
|
+
# Creates with options containing model file successfully.
|
169
|
+
with _AudioClassifier.create_from_options(
|
170
|
+
_AudioClassifierOptions(
|
171
|
+
base_options=_BaseOptions(
|
172
|
+
model_asset_path=self.yamnet_model_path))) as classifier:
|
173
|
+
self.assertIsInstance(classifier, _AudioClassifier)
|
174
|
+
|
175
|
+
def test_create_from_options_fails_with_invalid_model_path(self):
|
176
|
+
with self.assertRaisesRegex(
|
177
|
+
RuntimeError, 'Unable to open file at /path/to/invalid/model.tflite'):
|
178
|
+
base_options = _BaseOptions(
|
179
|
+
model_asset_path='/path/to/invalid/model.tflite')
|
180
|
+
options = _AudioClassifierOptions(base_options=base_options)
|
181
|
+
_AudioClassifier.create_from_options(options)
|
182
|
+
|
183
|
+
def test_create_from_options_succeeds_with_valid_model_content(self):
|
184
|
+
# Creates with options containing model content successfully.
|
185
|
+
with open(self.yamnet_model_path, 'rb') as f:
|
186
|
+
base_options = _BaseOptions(model_asset_buffer=f.read())
|
187
|
+
options = _AudioClassifierOptions(base_options=base_options)
|
188
|
+
classifier = _AudioClassifier.create_from_options(options)
|
189
|
+
self.assertIsInstance(classifier, _AudioClassifier)
|
190
|
+
|
191
|
+
@parameterized.parameters((_SPEECH_WAV_16K_MONO), (_SPEECH_WAV_48K_MONO))
|
192
|
+
def test_classify_with_yamnet_model(self, audio_file):
|
193
|
+
with _AudioClassifier.create_from_model_path(
|
194
|
+
self.yamnet_model_path) as classifier:
|
195
|
+
classification_result_list = classifier.classify(
|
196
|
+
self._read_wav_file(audio_file))
|
197
|
+
self._check_yamnet_result(classification_result_list)
|
198
|
+
|
199
|
+
def test_classify_with_yamnet_model_and_inputs_at_different_sample_rates(
|
200
|
+
self):
|
201
|
+
with _AudioClassifier.create_from_model_path(
|
202
|
+
self.yamnet_model_path) as classifier:
|
203
|
+
for audio_file in [_SPEECH_WAV_16K_MONO, _SPEECH_WAV_48K_MONO]:
|
204
|
+
classification_result_list = classifier.classify(
|
205
|
+
self._read_wav_file(audio_file))
|
206
|
+
self._check_yamnet_result(classification_result_list)
|
207
|
+
|
208
|
+
@mock.patch('sounddevice.InputStream', return_value=mock.MagicMock())
|
209
|
+
def test_create_audio_record_from_classifier_succeeds(self, _):
|
210
|
+
# Creates AudioRecord instance using the classifier successfully.
|
211
|
+
with _AudioClassifier.create_from_model_path(
|
212
|
+
self.yamnet_model_path
|
213
|
+
) as classifier:
|
214
|
+
self.assertIsInstance(classifier, _AudioClassifier)
|
215
|
+
record = classifier.create_audio_record(1, 16000, 16000)
|
216
|
+
self.assertIsInstance(record, _AudioRecord)
|
217
|
+
self.assertEqual(record.channels, 1)
|
218
|
+
self.assertEqual(record.sampling_rate, 16000)
|
219
|
+
self.assertEqual(record.buffer_size, 16000)
|
220
|
+
|
221
|
+
def test_max_result_options(self):
|
222
|
+
with _AudioClassifier.create_from_options(
|
223
|
+
_AudioClassifierOptions(
|
224
|
+
base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
|
225
|
+
max_results=1)) as classifier:
|
226
|
+
for audio_file in [_SPEECH_WAV_16K_MONO, _SPEECH_WAV_16K_MONO]:
|
227
|
+
classification_result_list = classifier.classify(
|
228
|
+
self._read_wav_file(audio_file))
|
229
|
+
self._check_yamnet_result(
|
230
|
+
classification_result_list, expected_num_categories=1)
|
231
|
+
|
232
|
+
def test_score_threshold_options(self):
|
233
|
+
with _AudioClassifier.create_from_options(
|
234
|
+
_AudioClassifierOptions(
|
235
|
+
base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
|
236
|
+
score_threshold=0.9)) as classifier:
|
237
|
+
for audio_file in [_SPEECH_WAV_16K_MONO, _SPEECH_WAV_16K_MONO]:
|
238
|
+
classification_result_list = classifier.classify(
|
239
|
+
self._read_wav_file(audio_file))
|
240
|
+
self._check_yamnet_result(
|
241
|
+
classification_result_list, expected_num_categories=1)
|
242
|
+
|
243
|
+
def test_allow_list_option(self):
|
244
|
+
with _AudioClassifier.create_from_options(
|
245
|
+
_AudioClassifierOptions(
|
246
|
+
base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
|
247
|
+
category_allowlist=['Speech'])) as classifier:
|
248
|
+
for audio_file in [_SPEECH_WAV_16K_MONO, _SPEECH_WAV_16K_MONO]:
|
249
|
+
classification_result_list = classifier.classify(
|
250
|
+
self._read_wav_file(audio_file))
|
251
|
+
self._check_yamnet_result(
|
252
|
+
classification_result_list, expected_num_categories=1)
|
253
|
+
|
254
|
+
def test_combined_allowlist_and_denylist(self):
|
255
|
+
# Fails with combined allowlist and denylist
|
256
|
+
with self.assertRaisesRegex(
|
257
|
+
ValueError,
|
258
|
+
r'`category_allowlist` and `category_denylist` are mutually '
|
259
|
+
r'exclusive options.'):
|
260
|
+
options = _AudioClassifierOptions(
|
261
|
+
base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
|
262
|
+
category_allowlist=['foo'],
|
263
|
+
category_denylist=['bar'])
|
264
|
+
with _AudioClassifier.create_from_options(options) as unused_classifier:
|
265
|
+
pass
|
266
|
+
|
267
|
+
@parameterized.parameters((_TWO_HEADS_WAV_16K_MONO),
|
268
|
+
(_TWO_HEADS_WAV_44K_MONO))
|
269
|
+
def test_classify_with_two_heads_model_and_inputs_at_different_sample_rates(
|
270
|
+
self, audio_file):
|
271
|
+
with _AudioClassifier.create_from_model_path(
|
272
|
+
self.two_heads_model_path) as classifier:
|
273
|
+
classification_result_list = classifier.classify(
|
274
|
+
self._read_wav_file(audio_file))
|
275
|
+
self._check_two_heads_result(classification_result_list)
|
276
|
+
|
277
|
+
def test_classify_with_two_heads_model(self):
|
278
|
+
with _AudioClassifier.create_from_model_path(
|
279
|
+
self.two_heads_model_path) as classifier:
|
280
|
+
for audio_file in [_TWO_HEADS_WAV_16K_MONO, _TWO_HEADS_WAV_44K_MONO]:
|
281
|
+
classification_result_list = classifier.classify(
|
282
|
+
self._read_wav_file(audio_file))
|
283
|
+
self._check_two_heads_result(classification_result_list)
|
284
|
+
|
285
|
+
def test_classify_with_two_heads_model_with_max_results(self):
|
286
|
+
with _AudioClassifier.create_from_options(
|
287
|
+
_AudioClassifierOptions(
|
288
|
+
base_options=_BaseOptions(
|
289
|
+
model_asset_path=self.two_heads_model_path),
|
290
|
+
max_results=1)) as classifier:
|
291
|
+
for audio_file in [_TWO_HEADS_WAV_16K_MONO, _TWO_HEADS_WAV_44K_MONO]:
|
292
|
+
classification_result_list = classifier.classify(
|
293
|
+
self._read_wav_file(audio_file))
|
294
|
+
self._check_two_heads_result(classification_result_list, 1, 1)
|
295
|
+
|
296
|
+
def test_missing_sample_rate_in_audio_clips_mode(self):
|
297
|
+
options = _AudioClassifierOptions(
|
298
|
+
base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
|
299
|
+
running_mode=_RUNNING_MODE.AUDIO_CLIPS)
|
300
|
+
with self.assertRaisesRegex(ValueError,
|
301
|
+
r'Must provide the audio sample rate'):
|
302
|
+
with _AudioClassifier.create_from_options(options) as classifier:
|
303
|
+
classifier.classify(_AudioData(buffer_length=100))
|
304
|
+
|
305
|
+
def test_missing_sample_rate_in_audio_stream_mode(self):
|
306
|
+
options = _AudioClassifierOptions(
|
307
|
+
base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
|
308
|
+
running_mode=_RUNNING_MODE.AUDIO_STREAM,
|
309
|
+
result_callback=mock.MagicMock())
|
310
|
+
with self.assertRaisesRegex(ValueError,
|
311
|
+
r'provide the audio sample rate in audio data'):
|
312
|
+
with _AudioClassifier.create_from_options(options) as classifier:
|
313
|
+
classifier.classify(_AudioData(buffer_length=100))
|
314
|
+
|
315
|
+
def test_missing_result_callback(self):
|
316
|
+
options = _AudioClassifierOptions(
|
317
|
+
base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
|
318
|
+
running_mode=_RUNNING_MODE.AUDIO_STREAM)
|
319
|
+
with self.assertRaisesRegex(ValueError,
|
320
|
+
r'result callback must be provided'):
|
321
|
+
with _AudioClassifier.create_from_options(options) as unused_classifier:
|
322
|
+
pass
|
323
|
+
|
324
|
+
def test_illegal_result_callback(self):
|
325
|
+
options = _AudioClassifierOptions(
|
326
|
+
base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
|
327
|
+
running_mode=_RUNNING_MODE.AUDIO_CLIPS,
|
328
|
+
result_callback=mock.MagicMock())
|
329
|
+
with self.assertRaisesRegex(ValueError,
|
330
|
+
r'result callback should not be provided'):
|
331
|
+
with _AudioClassifier.create_from_options(options) as unused_classifier:
|
332
|
+
pass
|
333
|
+
|
334
|
+
def test_calling_classify_in_audio_stream_mode(self):
|
335
|
+
options = _AudioClassifierOptions(
|
336
|
+
base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
|
337
|
+
running_mode=_RUNNING_MODE.AUDIO_STREAM,
|
338
|
+
result_callback=mock.MagicMock())
|
339
|
+
with _AudioClassifier.create_from_options(options) as classifier:
|
340
|
+
with self.assertRaisesRegex(ValueError,
|
341
|
+
r'not initialized with the audio clips mode'):
|
342
|
+
classifier.classify(self._read_wav_file(_SPEECH_WAV_16K_MONO))
|
343
|
+
|
344
|
+
def test_calling_classify_async_in_audio_clips_mode(self):
|
345
|
+
options = _AudioClassifierOptions(
|
346
|
+
base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
|
347
|
+
running_mode=_RUNNING_MODE.AUDIO_CLIPS)
|
348
|
+
with _AudioClassifier.create_from_options(options) as classifier:
|
349
|
+
with self.assertRaisesRegex(
|
350
|
+
ValueError, r'not initialized with the audio stream mode'):
|
351
|
+
classifier.classify_async(self._read_wav_file(_SPEECH_WAV_16K_MONO), 0)
|
352
|
+
|
353
|
+
def test_classify_async_calls_with_illegal_timestamp(self):
|
354
|
+
options = _AudioClassifierOptions(
|
355
|
+
base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
|
356
|
+
running_mode=_RUNNING_MODE.AUDIO_STREAM,
|
357
|
+
result_callback=mock.MagicMock())
|
358
|
+
with _AudioClassifier.create_from_options(options) as classifier:
|
359
|
+
classifier.classify_async(self._read_wav_file(_SPEECH_WAV_16K_MONO), 100)
|
360
|
+
with self.assertRaisesRegex(
|
361
|
+
ValueError, r'Input timestamp must be monotonically increasing'):
|
362
|
+
classifier.classify_async(self._read_wav_file(_SPEECH_WAV_16K_MONO), 0)
|
363
|
+
|
364
|
+
@parameterized.parameters((_SPEECH_WAV_16K_MONO), (_SPEECH_WAV_48K_MONO))
|
365
|
+
def test_classify_async(self, audio_file):
|
366
|
+
classification_result_list = []
|
367
|
+
|
368
|
+
def save_result(result: _AudioClassifierResult, timestamp_ms: int):
|
369
|
+
result.timestamp_ms = timestamp_ms
|
370
|
+
classification_result_list.append(result)
|
371
|
+
|
372
|
+
options = _AudioClassifierOptions(
|
373
|
+
base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
|
374
|
+
running_mode=_RUNNING_MODE.AUDIO_STREAM,
|
375
|
+
max_results=1,
|
376
|
+
result_callback=save_result)
|
377
|
+
classifier = _AudioClassifier.create_from_options(options)
|
378
|
+
audio_data_list = self._read_wav_file_as_stream(audio_file)
|
379
|
+
for audio_data, timestamp_ms in audio_data_list:
|
380
|
+
classifier.classify_async(audio_data, timestamp_ms)
|
381
|
+
classifier.close()
|
382
|
+
self._check_yamnet_result(
|
383
|
+
classification_result_list, expected_num_categories=1)
|
384
|
+
|
385
|
+
|
386
|
+
if __name__ == '__main__':
|
387
|
+
absltest.main()
|
@@ -0,0 +1,297 @@
|
|
1
|
+
# Copyright 2022 The MediaPipe Authors.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
"""Tests for audio embedder."""
|
15
|
+
import enum
|
16
|
+
import os
|
17
|
+
from typing import List, Tuple
|
18
|
+
from unittest import mock
|
19
|
+
|
20
|
+
from absl.testing import absltest
|
21
|
+
from absl.testing import parameterized
|
22
|
+
|
23
|
+
import numpy as np
|
24
|
+
from scipy.io import wavfile
|
25
|
+
|
26
|
+
from mediapipe.tasks.python.audio import audio_embedder
|
27
|
+
from mediapipe.tasks.python.audio.core import audio_record
|
28
|
+
from mediapipe.tasks.python.audio.core import audio_task_running_mode
|
29
|
+
from mediapipe.tasks.python.components.containers import audio_data as audio_data_module
|
30
|
+
from mediapipe.tasks.python.core import base_options as base_options_module
|
31
|
+
from mediapipe.tasks.python.test import test_utils
|
32
|
+
|
33
|
+
_AudioEmbedder = audio_embedder.AudioEmbedder
|
34
|
+
_AudioEmbedderOptions = audio_embedder.AudioEmbedderOptions
|
35
|
+
_AudioEmbedderResult = audio_embedder.AudioEmbedderResult
|
36
|
+
_AudioData = audio_data_module.AudioData
|
37
|
+
_AudioRecord = audio_record.AudioRecord
|
38
|
+
_BaseOptions = base_options_module.BaseOptions
|
39
|
+
_RUNNING_MODE = audio_task_running_mode.AudioTaskRunningMode
|
40
|
+
|
41
|
+
_YAMNET_MODEL_FILE = 'yamnet_embedding_metadata.tflite'
|
42
|
+
_YAMNET_MODEL_SAMPLE_RATE = 16000
|
43
|
+
_SPEECH_WAV_16K_MONO = 'speech_16000_hz_mono.wav'
|
44
|
+
_SPEECH_WAV_48K_MONO = 'speech_48000_hz_mono.wav'
|
45
|
+
_TWO_HEADS_WAV_16K_MONO = 'two_heads_16000_hz_mono.wav'
|
46
|
+
_TEST_DATA_DIR = 'mediapipe/tasks/testdata/audio'
|
47
|
+
_YAMNET_NUM_OF_SAMPLES = 15600
|
48
|
+
_MILLISECONDS_PER_SECOND = 1000
|
49
|
+
# Tolerance for embedding vector coordinate values.
|
50
|
+
_EPSILON = 3e-6
|
51
|
+
|
52
|
+
|
53
|
+
class ModelFileType(enum.Enum):
|
54
|
+
FILE_CONTENT = 1
|
55
|
+
FILE_NAME = 2
|
56
|
+
|
57
|
+
|
58
|
+
class AudioEmbedderTest(parameterized.TestCase):
|
59
|
+
|
60
|
+
def setUp(self):
|
61
|
+
super().setUp()
|
62
|
+
self.yamnet_model_path = test_utils.get_test_data_path(
|
63
|
+
os.path.join(_TEST_DATA_DIR, _YAMNET_MODEL_FILE))
|
64
|
+
|
65
|
+
def _read_wav_file(self, file_name) -> _AudioData:
|
66
|
+
sample_rate, buffer = wavfile.read(
|
67
|
+
test_utils.get_test_data_path(os.path.join(_TEST_DATA_DIR, file_name)))
|
68
|
+
return _AudioData.create_from_array(
|
69
|
+
buffer.astype(float) / np.iinfo(np.int16).max, sample_rate)
|
70
|
+
|
71
|
+
def _read_wav_file_as_stream(self, file_name) -> List[Tuple[_AudioData, int]]:
|
72
|
+
sample_rate, buffer = wavfile.read(
|
73
|
+
test_utils.get_test_data_path(os.path.join(_TEST_DATA_DIR, file_name)))
|
74
|
+
audio_data_list = []
|
75
|
+
start = 0
|
76
|
+
step_size = _YAMNET_NUM_OF_SAMPLES * sample_rate / _YAMNET_MODEL_SAMPLE_RATE
|
77
|
+
while start < len(buffer):
|
78
|
+
end = min(start + (int)(step_size), len(buffer))
|
79
|
+
audio_data_list.append((_AudioData.create_from_array(
|
80
|
+
buffer[start:end].astype(float) / np.iinfo(np.int16).max,
|
81
|
+
sample_rate), (int)(start / sample_rate * _MILLISECONDS_PER_SECOND)))
|
82
|
+
start = end
|
83
|
+
return audio_data_list
|
84
|
+
|
85
|
+
def _check_embedding_value(self, result, expected_first_value):
|
86
|
+
# Check embedding first value.
|
87
|
+
self.assertAlmostEqual(
|
88
|
+
result.embeddings[0].embedding[0], expected_first_value, delta=_EPSILON)
|
89
|
+
|
90
|
+
def _check_embedding_size(self, result, quantize, expected_embedding_size):
|
91
|
+
# Check embedding size.
|
92
|
+
self.assertLen(result.embeddings, 1)
|
93
|
+
embedding_result = result.embeddings[0]
|
94
|
+
self.assertLen(embedding_result.embedding, expected_embedding_size)
|
95
|
+
if quantize:
|
96
|
+
self.assertEqual(embedding_result.embedding.dtype, np.uint8)
|
97
|
+
else:
|
98
|
+
self.assertEqual(embedding_result.embedding.dtype, float)
|
99
|
+
|
100
|
+
def test_create_from_file_succeeds_with_valid_model_path(self):
|
101
|
+
# Creates with default option and valid model file successfully.
|
102
|
+
with _AudioEmbedder.create_from_model_path(
|
103
|
+
self.yamnet_model_path) as embedder:
|
104
|
+
self.assertIsInstance(embedder, _AudioEmbedder)
|
105
|
+
|
106
|
+
def test_create_from_options_succeeds_with_valid_model_path(self):
|
107
|
+
# Creates with options containing model file successfully.
|
108
|
+
with _AudioEmbedder.create_from_options(
|
109
|
+
_AudioEmbedderOptions(
|
110
|
+
base_options=_BaseOptions(
|
111
|
+
model_asset_path=self.yamnet_model_path))) as embedder:
|
112
|
+
self.assertIsInstance(embedder, _AudioEmbedder)
|
113
|
+
|
114
|
+
def test_create_from_options_fails_with_invalid_model_path(self):
|
115
|
+
with self.assertRaisesRegex(
|
116
|
+
RuntimeError, 'Unable to open file at /path/to/invalid/model.tflite'):
|
117
|
+
base_options = _BaseOptions(
|
118
|
+
model_asset_path='/path/to/invalid/model.tflite')
|
119
|
+
options = _AudioEmbedderOptions(base_options=base_options)
|
120
|
+
_AudioEmbedder.create_from_options(options)
|
121
|
+
|
122
|
+
def test_create_from_options_succeeds_with_valid_model_content(self):
|
123
|
+
# Creates with options containing model content successfully.
|
124
|
+
with open(self.yamnet_model_path, 'rb') as f:
|
125
|
+
base_options = _BaseOptions(model_asset_buffer=f.read())
|
126
|
+
options = _AudioEmbedderOptions(base_options=base_options)
|
127
|
+
embedder = _AudioEmbedder.create_from_options(options)
|
128
|
+
self.assertIsInstance(embedder, _AudioEmbedder)
|
129
|
+
|
130
|
+
@parameterized.parameters(
|
131
|
+
# Same audio inputs but different sample rates.
|
132
|
+
(False, False, ModelFileType.FILE_NAME, _SPEECH_WAV_16K_MONO,
|
133
|
+
_SPEECH_WAV_48K_MONO, 1024, (0, 0)),
|
134
|
+
(False, False, ModelFileType.FILE_CONTENT, _SPEECH_WAV_16K_MONO,
|
135
|
+
_SPEECH_WAV_48K_MONO, 1024, (0, 0)))
|
136
|
+
def test_embed_with_yamnet_model(self, l2_normalize, quantize,
|
137
|
+
model_file_type, audio_file0, audio_file1,
|
138
|
+
expected_size, expected_first_values):
|
139
|
+
# Creates embedder.
|
140
|
+
if model_file_type is ModelFileType.FILE_NAME:
|
141
|
+
base_options = _BaseOptions(model_asset_path=self.yamnet_model_path)
|
142
|
+
elif model_file_type is ModelFileType.FILE_CONTENT:
|
143
|
+
with open(self.yamnet_model_path, 'rb') as f:
|
144
|
+
model_content = f.read()
|
145
|
+
base_options = _BaseOptions(model_asset_buffer=model_content)
|
146
|
+
else:
|
147
|
+
# Should never happen
|
148
|
+
raise ValueError('model_file_type is invalid.')
|
149
|
+
|
150
|
+
options = _AudioEmbedderOptions(
|
151
|
+
base_options=base_options, l2_normalize=l2_normalize, quantize=quantize)
|
152
|
+
|
153
|
+
with _AudioEmbedder.create_from_options(options) as embedder:
|
154
|
+
embedding_result0_list = embedder.embed(self._read_wav_file(audio_file0))
|
155
|
+
embedding_result1_list = embedder.embed(self._read_wav_file(audio_file1))
|
156
|
+
|
157
|
+
# Checks embeddings.
|
158
|
+
expected_result0_value, expected_result1_value = expected_first_values
|
159
|
+
self._check_embedding_size(embedding_result0_list[0], quantize,
|
160
|
+
expected_size)
|
161
|
+
self._check_embedding_size(embedding_result1_list[0], quantize,
|
162
|
+
expected_size)
|
163
|
+
self._check_embedding_value(embedding_result0_list[0],
|
164
|
+
expected_result0_value)
|
165
|
+
self._check_embedding_value(embedding_result1_list[0],
|
166
|
+
expected_result1_value)
|
167
|
+
self.assertLen(embedding_result0_list, 5)
|
168
|
+
self.assertLen(embedding_result1_list, 5)
|
169
|
+
|
170
|
+
@mock.patch('sounddevice.InputStream', return_value=mock.MagicMock())
|
171
|
+
def test_create_audio_record_from_embedder_succeeds(self, _):
|
172
|
+
# Creates AudioRecord instance using the embedder successfully.
|
173
|
+
with _AudioEmbedder.create_from_model_path(
|
174
|
+
self.yamnet_model_path
|
175
|
+
) as embedder:
|
176
|
+
self.assertIsInstance(embedder, _AudioEmbedder)
|
177
|
+
record = embedder.create_audio_record(1, 16000, 16000)
|
178
|
+
self.assertIsInstance(record, _AudioRecord)
|
179
|
+
self.assertEqual(record.channels, 1)
|
180
|
+
self.assertEqual(record.sampling_rate, 16000)
|
181
|
+
self.assertEqual(record.buffer_size, 16000)
|
182
|
+
|
183
|
+
def test_embed_with_yamnet_model_and_different_inputs(self):
|
184
|
+
with _AudioEmbedder.create_from_model_path(
|
185
|
+
self.yamnet_model_path) as embedder:
|
186
|
+
embedding_result0_list = embedder.embed(
|
187
|
+
self._read_wav_file(_SPEECH_WAV_16K_MONO))
|
188
|
+
embedding_result1_list = embedder.embed(
|
189
|
+
self._read_wav_file(_TWO_HEADS_WAV_16K_MONO))
|
190
|
+
self.assertLen(embedding_result0_list, 5)
|
191
|
+
self.assertLen(embedding_result1_list, 1)
|
192
|
+
|
193
|
+
def test_missing_sample_rate_in_audio_clips_mode(self):
|
194
|
+
options = _AudioEmbedderOptions(
|
195
|
+
base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
|
196
|
+
running_mode=_RUNNING_MODE.AUDIO_CLIPS)
|
197
|
+
with self.assertRaisesRegex(ValueError,
|
198
|
+
r'Must provide the audio sample rate'):
|
199
|
+
with _AudioEmbedder.create_from_options(options) as embedder:
|
200
|
+
embedder.embed(_AudioData(buffer_length=100))
|
201
|
+
|
202
|
+
def test_missing_sample_rate_in_audio_stream_mode(self):
|
203
|
+
options = _AudioEmbedderOptions(
|
204
|
+
base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
|
205
|
+
running_mode=_RUNNING_MODE.AUDIO_STREAM,
|
206
|
+
result_callback=mock.MagicMock())
|
207
|
+
with self.assertRaisesRegex(ValueError,
|
208
|
+
r'provide the audio sample rate in audio data'):
|
209
|
+
with _AudioEmbedder.create_from_options(options) as embedder:
|
210
|
+
embedder.embed(_AudioData(buffer_length=100))
|
211
|
+
|
212
|
+
def test_missing_result_callback(self):
|
213
|
+
options = _AudioEmbedderOptions(
|
214
|
+
base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
|
215
|
+
running_mode=_RUNNING_MODE.AUDIO_STREAM)
|
216
|
+
with self.assertRaisesRegex(ValueError,
|
217
|
+
r'result callback must be provided'):
|
218
|
+
with _AudioEmbedder.create_from_options(options) as unused_embedder:
|
219
|
+
pass
|
220
|
+
|
221
|
+
def test_illegal_result_callback(self):
|
222
|
+
options = _AudioEmbedderOptions(
|
223
|
+
base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
|
224
|
+
running_mode=_RUNNING_MODE.AUDIO_CLIPS,
|
225
|
+
result_callback=mock.MagicMock())
|
226
|
+
with self.assertRaisesRegex(ValueError,
|
227
|
+
r'result callback should not be provided'):
|
228
|
+
with _AudioEmbedder.create_from_options(options) as unused_embedder:
|
229
|
+
pass
|
230
|
+
|
231
|
+
def test_calling_embed_in_audio_stream_mode(self):
|
232
|
+
options = _AudioEmbedderOptions(
|
233
|
+
base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
|
234
|
+
running_mode=_RUNNING_MODE.AUDIO_STREAM,
|
235
|
+
result_callback=mock.MagicMock())
|
236
|
+
with _AudioEmbedder.create_from_options(options) as embedder:
|
237
|
+
with self.assertRaisesRegex(ValueError,
|
238
|
+
r'not initialized with the audio clips mode'):
|
239
|
+
embedder.embed(self._read_wav_file(_SPEECH_WAV_16K_MONO))
|
240
|
+
|
241
|
+
def test_calling_embed_async_in_audio_clips_mode(self):
|
242
|
+
options = _AudioEmbedderOptions(
|
243
|
+
base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
|
244
|
+
running_mode=_RUNNING_MODE.AUDIO_CLIPS)
|
245
|
+
with _AudioEmbedder.create_from_options(options) as embedder:
|
246
|
+
with self.assertRaisesRegex(
|
247
|
+
ValueError, r'not initialized with the audio stream mode'):
|
248
|
+
embedder.embed_async(self._read_wav_file(_SPEECH_WAV_16K_MONO), 0)
|
249
|
+
|
250
|
+
def test_embed_async_calls_with_illegal_timestamp(self):
|
251
|
+
options = _AudioEmbedderOptions(
|
252
|
+
base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
|
253
|
+
running_mode=_RUNNING_MODE.AUDIO_STREAM,
|
254
|
+
result_callback=mock.MagicMock())
|
255
|
+
with _AudioEmbedder.create_from_options(options) as embedder:
|
256
|
+
embedder.embed_async(self._read_wav_file(_SPEECH_WAV_16K_MONO), 100)
|
257
|
+
with self.assertRaisesRegex(
|
258
|
+
ValueError, r'Input timestamp must be monotonically increasing'):
|
259
|
+
embedder.embed_async(self._read_wav_file(_SPEECH_WAV_16K_MONO), 0)
|
260
|
+
|
261
|
+
@parameterized.parameters(
|
262
|
+
# Same audio inputs but different sample rates.
|
263
|
+
(False, False, _SPEECH_WAV_16K_MONO, _SPEECH_WAV_48K_MONO))
|
264
|
+
def test_embed_async(self, l2_normalize, quantize, audio_file0, audio_file1):
|
265
|
+
embedding_result_list = []
|
266
|
+
embedding_result_list_copy = embedding_result_list.copy()
|
267
|
+
|
268
|
+
def save_result(result: _AudioEmbedderResult, timestamp_ms: int):
|
269
|
+
result.timestamp_ms = timestamp_ms
|
270
|
+
embedding_result_list.append(result)
|
271
|
+
|
272
|
+
options = _AudioEmbedderOptions(
|
273
|
+
base_options=_BaseOptions(model_asset_path=self.yamnet_model_path),
|
274
|
+
running_mode=_RUNNING_MODE.AUDIO_STREAM,
|
275
|
+
l2_normalize=l2_normalize,
|
276
|
+
quantize=quantize,
|
277
|
+
result_callback=save_result)
|
278
|
+
|
279
|
+
with _AudioEmbedder.create_from_options(options) as embedder:
|
280
|
+
audio_data0_list = self._read_wav_file_as_stream(audio_file0)
|
281
|
+
for audio_data, timestamp_ms in audio_data0_list:
|
282
|
+
embedder.embed_async(audio_data, timestamp_ms)
|
283
|
+
embedding_result0_list = embedding_result_list
|
284
|
+
|
285
|
+
with _AudioEmbedder.create_from_options(options) as embedder:
|
286
|
+
audio_data1_list = self._read_wav_file_as_stream(audio_file1)
|
287
|
+
embedding_result_list = embedding_result_list_copy
|
288
|
+
for audio_data, timestamp_ms in audio_data1_list:
|
289
|
+
embedder.embed_async(audio_data, timestamp_ms)
|
290
|
+
embedding_result1_list = embedding_result_list
|
291
|
+
|
292
|
+
self.assertLen(embedding_result0_list, 5)
|
293
|
+
self.assertLen(embedding_result1_list, 5)
|
294
|
+
|
295
|
+
|
296
|
+
if __name__ == '__main__':
|
297
|
+
absltest.main()
|