mediapipe-nightly 0.0.0.post20231103__cp39-cp39-macosx_11_0_universal2.whl
Sign up to get free protection for your applications and to get access to all the features.
- mediapipe/__init__.py +26 -0
- mediapipe/calculators/__init__.py +0 -0
- mediapipe/calculators/audio/__init__.py +0 -0
- mediapipe/calculators/audio/mfcc_mel_calculators_pb2.py +34 -0
- mediapipe/calculators/audio/rational_factor_resample_calculator_pb2.py +33 -0
- mediapipe/calculators/audio/spectrogram_calculator_pb2.py +35 -0
- mediapipe/calculators/audio/stabilized_log_calculator_pb2.py +31 -0
- mediapipe/calculators/audio/time_series_framer_calculator_pb2.py +33 -0
- mediapipe/calculators/core/__init__.py +0 -0
- mediapipe/calculators/core/bypass_calculator_pb2.py +31 -0
- mediapipe/calculators/core/clip_vector_size_calculator_pb2.py +31 -0
- mediapipe/calculators/core/concatenate_vector_calculator_pb2.py +31 -0
- mediapipe/calculators/core/constant_side_packet_calculator_pb2.py +37 -0
- mediapipe/calculators/core/dequantize_byte_array_calculator_pb2.py +31 -0
- mediapipe/calculators/core/flow_limiter_calculator_pb2.py +32 -0
- mediapipe/calculators/core/gate_calculator_pb2.py +33 -0
- mediapipe/calculators/core/get_vector_item_calculator_pb2.py +31 -0
- mediapipe/calculators/core/graph_profile_calculator_pb2.py +31 -0
- mediapipe/calculators/core/packet_cloner_calculator_pb2.py +31 -0
- mediapipe/calculators/core/packet_resampler_calculator_pb2.py +33 -0
- mediapipe/calculators/core/packet_thinner_calculator_pb2.py +33 -0
- mediapipe/calculators/core/quantize_float_vector_calculator_pb2.py +31 -0
- mediapipe/calculators/core/sequence_shift_calculator_pb2.py +31 -0
- mediapipe/calculators/core/split_vector_calculator_pb2.py +33 -0
- mediapipe/calculators/image/__init__.py +0 -0
- mediapipe/calculators/image/bilateral_filter_calculator_pb2.py +31 -0
- mediapipe/calculators/image/feature_detector_calculator_pb2.py +31 -0
- mediapipe/calculators/image/image_clone_calculator_pb2.py +31 -0
- mediapipe/calculators/image/image_cropping_calculator_pb2.py +33 -0
- mediapipe/calculators/image/image_transformation_calculator_pb2.py +38 -0
- mediapipe/calculators/image/mask_overlay_calculator_pb2.py +33 -0
- mediapipe/calculators/image/opencv_encoded_image_to_image_frame_calculator_pb2.py +31 -0
- mediapipe/calculators/image/opencv_image_encoder_calculator_pb2.py +35 -0
- mediapipe/calculators/image/recolor_calculator_pb2.py +34 -0
- mediapipe/calculators/image/rotation_mode_pb2.py +28 -0
- mediapipe/calculators/image/scale_image_calculator_pb2.py +34 -0
- mediapipe/calculators/image/segmentation_smoothing_calculator_pb2.py +31 -0
- mediapipe/calculators/image/set_alpha_calculator_pb2.py +31 -0
- mediapipe/calculators/image/warp_affine_calculator_pb2.py +36 -0
- mediapipe/calculators/internal/__init__.py +0 -0
- mediapipe/calculators/internal/callback_packet_calculator_pb2.py +33 -0
- mediapipe/calculators/tensor/__init__.py +0 -0
- mediapipe/calculators/tensor/audio_to_tensor_calculator_pb2.py +35 -0
- mediapipe/calculators/tensor/bert_preprocessor_calculator_pb2.py +31 -0
- mediapipe/calculators/tensor/feedback_tensors_calculator_pb2.py +37 -0
- mediapipe/calculators/tensor/image_to_tensor_calculator_pb2.py +40 -0
- mediapipe/calculators/tensor/inference_calculator_pb2.py +53 -0
- mediapipe/calculators/tensor/landmarks_to_tensor_calculator_pb2.py +33 -0
- mediapipe/calculators/tensor/regex_preprocessor_calculator_pb2.py +31 -0
- mediapipe/calculators/tensor/tensor_converter_calculator_pb2.py +34 -0
- mediapipe/calculators/tensor/tensor_to_joints_calculator_pb2.py +31 -0
- mediapipe/calculators/tensor/tensors_readback_calculator_pb2.py +35 -0
- mediapipe/calculators/tensor/tensors_to_audio_calculator_pb2.py +33 -0
- mediapipe/calculators/tensor/tensors_to_classification_calculator_pb2.py +44 -0
- mediapipe/calculators/tensor/tensors_to_detections_calculator_pb2.py +39 -0
- mediapipe/calculators/tensor/tensors_to_floats_calculator_pb2.py +33 -0
- mediapipe/calculators/tensor/tensors_to_landmarks_calculator_pb2.py +33 -0
- mediapipe/calculators/tensor/tensors_to_segmentation_calculator_pb2.py +34 -0
- mediapipe/calculators/tflite/__init__.py +0 -0
- mediapipe/calculators/tflite/ssd_anchors_calculator_pb2.py +32 -0
- mediapipe/calculators/tflite/tflite_converter_calculator_pb2.py +33 -0
- mediapipe/calculators/tflite/tflite_custom_op_resolver_calculator_pb2.py +31 -0
- mediapipe/calculators/tflite/tflite_inference_calculator_pb2.py +49 -0
- mediapipe/calculators/tflite/tflite_tensors_to_classification_calculator_pb2.py +31 -0
- mediapipe/calculators/tflite/tflite_tensors_to_detections_calculator_pb2.py +31 -0
- mediapipe/calculators/tflite/tflite_tensors_to_landmarks_calculator_pb2.py +33 -0
- mediapipe/calculators/tflite/tflite_tensors_to_segmentation_calculator_pb2.py +31 -0
- mediapipe/calculators/util/__init__.py +0 -0
- mediapipe/calculators/util/align_hand_to_pose_in_world_calculator_pb2.py +31 -0
- mediapipe/calculators/util/annotation_overlay_calculator_pb2.py +32 -0
- mediapipe/calculators/util/association_calculator_pb2.py +31 -0
- mediapipe/calculators/util/collection_has_min_size_calculator_pb2.py +31 -0
- mediapipe/calculators/util/combine_joints_calculator_pb2.py +36 -0
- mediapipe/calculators/util/detection_label_id_to_text_calculator_pb2.py +36 -0
- mediapipe/calculators/util/detections_to_rects_calculator_pb2.py +33 -0
- mediapipe/calculators/util/detections_to_render_data_calculator_pb2.py +33 -0
- mediapipe/calculators/util/face_to_rect_calculator_pb2.py +25 -0
- mediapipe/calculators/util/filter_detections_calculator_pb2.py +31 -0
- mediapipe/calculators/util/flat_color_image_calculator_pb2.py +32 -0
- mediapipe/calculators/util/labels_to_render_data_calculator_pb2.py +34 -0
- mediapipe/calculators/util/landmark_projection_calculator_pb2.py +31 -0
- mediapipe/calculators/util/landmarks_refinement_calculator_pb2.py +41 -0
- mediapipe/calculators/util/landmarks_smoothing_calculator_pb2.py +33 -0
- mediapipe/calculators/util/landmarks_to_detection_calculator_pb2.py +31 -0
- mediapipe/calculators/util/landmarks_to_floats_calculator_pb2.py +31 -0
- mediapipe/calculators/util/landmarks_to_render_data_calculator_pb2.py +32 -0
- mediapipe/calculators/util/landmarks_transformation_calculator_pb2.py +37 -0
- mediapipe/calculators/util/latency_pb2.py +25 -0
- mediapipe/calculators/util/local_file_contents_calculator_pb2.py +31 -0
- mediapipe/calculators/util/logic_calculator_pb2.py +34 -0
- mediapipe/calculators/util/non_max_suppression_calculator_pb2.py +35 -0
- mediapipe/calculators/util/packet_frequency_calculator_pb2.py +31 -0
- mediapipe/calculators/util/packet_frequency_pb2.py +25 -0
- mediapipe/calculators/util/packet_latency_calculator_pb2.py +31 -0
- mediapipe/calculators/util/rect_to_render_data_calculator_pb2.py +32 -0
- mediapipe/calculators/util/rect_to_render_scale_calculator_pb2.py +31 -0
- mediapipe/calculators/util/rect_transformation_calculator_pb2.py +31 -0
- mediapipe/calculators/util/refine_landmarks_from_heatmap_calculator_pb2.py +31 -0
- mediapipe/calculators/util/set_joints_visibility_calculator_pb2.py +41 -0
- mediapipe/calculators/util/thresholding_calculator_pb2.py +31 -0
- mediapipe/calculators/util/timed_box_list_id_to_label_calculator_pb2.py +31 -0
- mediapipe/calculators/util/timed_box_list_to_render_data_calculator_pb2.py +32 -0
- mediapipe/calculators/util/top_k_scores_calculator_pb2.py +31 -0
- mediapipe/calculators/util/visibility_copy_calculator_pb2.py +27 -0
- mediapipe/calculators/util/visibility_smoothing_calculator_pb2.py +31 -0
- mediapipe/calculators/video/__init__.py +0 -0
- mediapipe/calculators/video/box_detector_calculator_pb2.py +32 -0
- mediapipe/calculators/video/box_tracker_calculator_pb2.py +32 -0
- mediapipe/calculators/video/flow_packager_calculator_pb2.py +32 -0
- mediapipe/calculators/video/flow_to_image_calculator_pb2.py +31 -0
- mediapipe/calculators/video/motion_analysis_calculator_pb2.py +42 -0
- mediapipe/calculators/video/opencv_video_encoder_calculator_pb2.py +31 -0
- mediapipe/calculators/video/tool/__init__.py +0 -0
- mediapipe/calculators/video/tool/flow_quantizer_model_pb2.py +25 -0
- mediapipe/calculators/video/tracked_detection_manager_calculator_pb2.py +32 -0
- mediapipe/calculators/video/video_pre_stream_calculator_pb2.py +35 -0
- mediapipe/examples/__init__.py +14 -0
- mediapipe/examples/desktop/__init__.py +14 -0
- mediapipe/framework/__init__.py +0 -0
- mediapipe/framework/calculator_options_pb2.py +28 -0
- mediapipe/framework/calculator_pb2.py +56 -0
- mediapipe/framework/calculator_profile_pb2.py +47 -0
- mediapipe/framework/deps/__init__.py +0 -0
- mediapipe/framework/deps/proto_descriptor_pb2.py +28 -0
- mediapipe/framework/formats/__init__.py +0 -0
- mediapipe/framework/formats/affine_transform_data_pb2.py +27 -0
- mediapipe/framework/formats/annotation/__init__.py +0 -0
- mediapipe/framework/formats/annotation/locus_pb2.py +31 -0
- mediapipe/framework/formats/annotation/rasterization_pb2.py +28 -0
- mediapipe/framework/formats/body_rig_pb2.py +27 -0
- mediapipe/framework/formats/classification_pb2.py +30 -0
- mediapipe/framework/formats/detection_pb2.py +35 -0
- mediapipe/framework/formats/image_file_properties_pb2.py +25 -0
- mediapipe/framework/formats/image_format_pb2.py +28 -0
- mediapipe/framework/formats/landmark_pb2.py +36 -0
- mediapipe/framework/formats/location_data_pb2.py +37 -0
- mediapipe/framework/formats/matrix_data_pb2.py +30 -0
- mediapipe/framework/formats/motion/__init__.py +0 -0
- mediapipe/framework/formats/motion/optical_flow_field_data_pb2.py +29 -0
- mediapipe/framework/formats/object_detection/__init__.py +0 -0
- mediapipe/framework/formats/object_detection/anchor_pb2.py +25 -0
- mediapipe/framework/formats/rect_pb2.py +28 -0
- mediapipe/framework/formats/time_series_header_pb2.py +27 -0
- mediapipe/framework/mediapipe_options_pb2.py +26 -0
- mediapipe/framework/packet_factory_pb2.py +30 -0
- mediapipe/framework/packet_generator_pb2.py +32 -0
- mediapipe/framework/status_handler_pb2.py +27 -0
- mediapipe/framework/stream_handler/__init__.py +0 -0
- mediapipe/framework/stream_handler/default_input_stream_handler_pb2.py +27 -0
- mediapipe/framework/stream_handler/fixed_size_input_stream_handler_pb2.py +27 -0
- mediapipe/framework/stream_handler/sync_set_input_stream_handler_pb2.py +29 -0
- mediapipe/framework/stream_handler/timestamp_align_input_stream_handler_pb2.py +27 -0
- mediapipe/framework/stream_handler_pb2.py +29 -0
- mediapipe/framework/test_calculators_pb2.py +31 -0
- mediapipe/framework/thread_pool_executor_pb2.py +29 -0
- mediapipe/framework/tool/__init__.py +0 -0
- mediapipe/framework/tool/calculator_graph_template_pb2.py +44 -0
- mediapipe/framework/tool/field_data_pb2.py +27 -0
- mediapipe/framework/tool/node_chain_subgraph_pb2.py +31 -0
- mediapipe/framework/tool/packet_generator_wrapper_calculator_pb2.py +28 -0
- mediapipe/framework/tool/source_pb2.py +33 -0
- mediapipe/framework/tool/switch_container_pb2.py +32 -0
- mediapipe/gpu/__init__.py +0 -0
- mediapipe/gpu/copy_calculator_pb2.py +33 -0
- mediapipe/gpu/gl_animation_overlay_calculator_pb2.py +31 -0
- mediapipe/gpu/gl_context_options_pb2.py +31 -0
- mediapipe/gpu/gl_scaler_calculator_pb2.py +32 -0
- mediapipe/gpu/gl_surface_sink_calculator_pb2.py +32 -0
- mediapipe/gpu/gpu_origin_pb2.py +28 -0
- mediapipe/gpu/scale_mode_pb2.py +27 -0
- mediapipe/model_maker/__init__.py +27 -0
- mediapipe/model_maker/setup.py +107 -0
- mediapipe/modules/__init__.py +0 -0
- mediapipe/modules/face_detection/__init__.py +0 -0
- mediapipe/modules/face_detection/face_detection_full_range_cpu.binarypb +0 -0
- mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite +0 -0
- mediapipe/modules/face_detection/face_detection_pb2.py +30 -0
- mediapipe/modules/face_detection/face_detection_short_range.tflite +0 -0
- mediapipe/modules/face_detection/face_detection_short_range_cpu.binarypb +0 -0
- mediapipe/modules/face_geometry/__init__.py +0 -0
- mediapipe/modules/face_geometry/data/__init__.py +0 -0
- mediapipe/modules/face_geometry/effect_renderer_calculator_pb2.py +27 -0
- mediapipe/modules/face_geometry/env_generator_calculator_pb2.py +28 -0
- mediapipe/modules/face_geometry/geometry_pipeline_calculator_pb2.py +27 -0
- mediapipe/modules/face_geometry/libs/__init__.py +0 -0
- mediapipe/modules/face_geometry/protos/__init__.py +0 -0
- mediapipe/modules/face_geometry/protos/environment_pb2.py +30 -0
- mediapipe/modules/face_geometry/protos/face_geometry_pb2.py +28 -0
- mediapipe/modules/face_geometry/protos/geometry_pipeline_metadata_pb2.py +31 -0
- mediapipe/modules/face_geometry/protos/mesh_3d_pb2.py +30 -0
- mediapipe/modules/face_landmark/__init__.py +0 -0
- mediapipe/modules/face_landmark/face_landmark.tflite +0 -0
- mediapipe/modules/face_landmark/face_landmark_front_cpu.binarypb +0 -0
- mediapipe/modules/face_landmark/face_landmark_with_attention.tflite +0 -0
- mediapipe/modules/hand_landmark/__init__.py +0 -0
- mediapipe/modules/hand_landmark/calculators/__init__.py +0 -0
- mediapipe/modules/hand_landmark/hand_landmark_full.tflite +0 -0
- mediapipe/modules/hand_landmark/hand_landmark_lite.tflite +0 -0
- mediapipe/modules/hand_landmark/hand_landmark_tracking_cpu.binarypb +0 -0
- mediapipe/modules/hand_landmark/handedness.txt +2 -0
- mediapipe/modules/holistic_landmark/__init__.py +0 -0
- mediapipe/modules/holistic_landmark/calculators/__init__.py +0 -0
- mediapipe/modules/holistic_landmark/calculators/roi_tracking_calculator_pb2.py +37 -0
- mediapipe/modules/holistic_landmark/hand_recrop.tflite +0 -0
- mediapipe/modules/holistic_landmark/holistic_landmark_cpu.binarypb +0 -0
- mediapipe/modules/iris_landmark/__init__.py +0 -0
- mediapipe/modules/iris_landmark/iris_landmark.tflite +0 -0
- mediapipe/modules/objectron/__init__.py +0 -0
- mediapipe/modules/objectron/calculators/__init__.py +0 -0
- mediapipe/modules/objectron/calculators/a_r_capture_metadata_pb2.py +101 -0
- mediapipe/modules/objectron/calculators/annotation_data_pb2.py +37 -0
- mediapipe/modules/objectron/calculators/belief_decoder_config_pb2.py +27 -0
- mediapipe/modules/objectron/calculators/camera_parameters_pb2.py +29 -0
- mediapipe/modules/objectron/calculators/filter_detection_calculator_pb2.py +35 -0
- mediapipe/modules/objectron/calculators/frame_annotation_to_rect_calculator_pb2.py +31 -0
- mediapipe/modules/objectron/calculators/frame_annotation_tracker_calculator_pb2.py +31 -0
- mediapipe/modules/objectron/calculators/lift_2d_frame_annotation_to_3d_calculator_pb2.py +32 -0
- mediapipe/modules/objectron/calculators/object_pb2.py +37 -0
- mediapipe/modules/objectron/calculators/tensors_to_objects_calculator_pb2.py +32 -0
- mediapipe/modules/objectron/calculators/tflite_tensors_to_objects_calculator_pb2.py +32 -0
- mediapipe/modules/objectron/object_detection_oidv4_labelmap.txt +24 -0
- mediapipe/modules/objectron/objectron_cpu.binarypb +0 -0
- mediapipe/modules/palm_detection/__init__.py +0 -0
- mediapipe/modules/palm_detection/palm_detection_full.tflite +0 -0
- mediapipe/modules/palm_detection/palm_detection_lite.tflite +0 -0
- mediapipe/modules/pose_detection/__init__.py +0 -0
- mediapipe/modules/pose_detection/pose_detection.tflite +0 -0
- mediapipe/modules/pose_landmark/__init__.py +0 -0
- mediapipe/modules/pose_landmark/pose_landmark_cpu.binarypb +0 -0
- mediapipe/modules/pose_landmark/pose_landmark_full.tflite +0 -0
- mediapipe/modules/selfie_segmentation/__init__.py +0 -0
- mediapipe/modules/selfie_segmentation/selfie_segmentation.tflite +0 -0
- mediapipe/modules/selfie_segmentation/selfie_segmentation_cpu.binarypb +0 -0
- mediapipe/modules/selfie_segmentation/selfie_segmentation_landscape.tflite +0 -0
- mediapipe/python/__init__.py +28 -0
- mediapipe/python/_framework_bindings/arm64.cpython-39-darwin.so +0 -0
- mediapipe/python/_framework_bindings.cpython-39-darwin.so +0 -0
- mediapipe/python/calculator_graph_test.py +251 -0
- mediapipe/python/image_frame_test.py +194 -0
- mediapipe/python/image_test.py +218 -0
- mediapipe/python/packet_creator.py +275 -0
- mediapipe/python/packet_getter.py +119 -0
- mediapipe/python/packet_test.py +533 -0
- mediapipe/python/solution_base.py +632 -0
- mediapipe/python/solution_base_test.py +396 -0
- mediapipe/python/solutions/__init__.py +27 -0
- mediapipe/python/solutions/download_utils.py +37 -0
- mediapipe/python/solutions/drawing_styles.py +249 -0
- mediapipe/python/solutions/drawing_utils.py +316 -0
- mediapipe/python/solutions/drawing_utils_test.py +258 -0
- mediapipe/python/solutions/face_detection.py +105 -0
- mediapipe/python/solutions/face_detection_test.py +92 -0
- mediapipe/python/solutions/face_mesh.py +125 -0
- mediapipe/python/solutions/face_mesh_connections.py +500 -0
- mediapipe/python/solutions/face_mesh_test.py +170 -0
- mediapipe/python/solutions/hands.py +153 -0
- mediapipe/python/solutions/hands_connections.py +32 -0
- mediapipe/python/solutions/hands_test.py +218 -0
- mediapipe/python/solutions/holistic.py +167 -0
- mediapipe/python/solutions/holistic_test.py +142 -0
- mediapipe/python/solutions/objectron.py +288 -0
- mediapipe/python/solutions/objectron_test.py +81 -0
- mediapipe/python/solutions/pose.py +192 -0
- mediapipe/python/solutions/pose_connections.py +22 -0
- mediapipe/python/solutions/pose_test.py +262 -0
- mediapipe/python/solutions/selfie_segmentation.py +76 -0
- mediapipe/python/solutions/selfie_segmentation_test.py +68 -0
- mediapipe/python/timestamp_test.py +78 -0
- mediapipe/tasks/__init__.py +14 -0
- mediapipe/tasks/cc/__init__.py +0 -0
- mediapipe/tasks/cc/audio/__init__.py +0 -0
- mediapipe/tasks/cc/audio/audio_classifier/__init__.py +0 -0
- mediapipe/tasks/cc/audio/audio_classifier/proto/__init__.py +0 -0
- mediapipe/tasks/cc/audio/audio_classifier/proto/audio_classifier_graph_options_pb2.py +35 -0
- mediapipe/tasks/cc/audio/audio_embedder/__init__.py +0 -0
- mediapipe/tasks/cc/audio/audio_embedder/proto/__init__.py +0 -0
- mediapipe/tasks/cc/audio/audio_embedder/proto/audio_embedder_graph_options_pb2.py +35 -0
- mediapipe/tasks/cc/audio/core/__init__.py +0 -0
- mediapipe/tasks/cc/audio/utils/__init__.py +0 -0
- mediapipe/tasks/cc/components/__init__.py +0 -0
- mediapipe/tasks/cc/components/calculators/__init__.py +0 -0
- mediapipe/tasks/cc/components/calculators/classification_aggregation_calculator_pb2.py +31 -0
- mediapipe/tasks/cc/components/calculators/score_calibration_calculator_pb2.py +35 -0
- mediapipe/tasks/cc/components/calculators/tensors_to_embeddings_calculator_pb2.py +32 -0
- mediapipe/tasks/cc/components/containers/__init__.py +0 -0
- mediapipe/tasks/cc/components/containers/proto/__init__.py +0 -0
- mediapipe/tasks/cc/components/containers/proto/classifications_pb2.py +29 -0
- mediapipe/tasks/cc/components/containers/proto/embeddings_pb2.py +34 -0
- mediapipe/tasks/cc/components/containers/proto/landmarks_detection_result_pb2.py +31 -0
- mediapipe/tasks/cc/components/processors/__init__.py +0 -0
- mediapipe/tasks/cc/components/processors/proto/__init__.py +0 -0
- mediapipe/tasks/cc/components/processors/proto/classification_postprocessing_graph_options_pb2.py +38 -0
- mediapipe/tasks/cc/components/processors/proto/classifier_options_pb2.py +26 -0
- mediapipe/tasks/cc/components/processors/proto/detection_postprocessing_graph_options_pb2.py +35 -0
- mediapipe/tasks/cc/components/processors/proto/detector_options_pb2.py +26 -0
- mediapipe/tasks/cc/components/processors/proto/embedder_options_pb2.py +26 -0
- mediapipe/tasks/cc/components/processors/proto/embedding_postprocessing_graph_options_pb2.py +32 -0
- mediapipe/tasks/cc/components/processors/proto/image_preprocessing_graph_options_pb2.py +34 -0
- mediapipe/tasks/cc/components/processors/proto/llm_params_pb2.py +27 -0
- mediapipe/tasks/cc/components/processors/proto/text_model_type_pb2.py +27 -0
- mediapipe/tasks/cc/components/processors/proto/text_preprocessing_graph_options_pb2.py +32 -0
- mediapipe/tasks/cc/components/processors/proto/transformer_params_pb2.py +28 -0
- mediapipe/tasks/cc/components/utils/__init__.py +0 -0
- mediapipe/tasks/cc/core/__init__.py +0 -0
- mediapipe/tasks/cc/core/proto/__init__.py +0 -0
- mediapipe/tasks/cc/core/proto/acceleration_pb2.py +27 -0
- mediapipe/tasks/cc/core/proto/base_options_pb2.py +29 -0
- mediapipe/tasks/cc/core/proto/external_file_pb2.py +30 -0
- mediapipe/tasks/cc/core/proto/inference_subgraph_pb2.py +32 -0
- mediapipe/tasks/cc/core/proto/model_resources_calculator_pb2.py +32 -0
- mediapipe/tasks/cc/metadata/__init__.py +0 -0
- mediapipe/tasks/cc/metadata/python/__init__.py +0 -0
- mediapipe/tasks/cc/metadata/python/_pywrap_metadata_version/arm64.cpython-39-darwin.so +0 -0
- mediapipe/tasks/cc/metadata/python/_pywrap_metadata_version.cpython-39-darwin.so +0 -0
- mediapipe/tasks/cc/metadata/tests/__init__.py +0 -0
- mediapipe/tasks/cc/metadata/utils/__init__.py +0 -0
- mediapipe/tasks/cc/text/__init__.py +0 -0
- mediapipe/tasks/cc/text/custom_ops/__init__.py +0 -0
- mediapipe/tasks/cc/text/custom_ops/ragged/__init__.py +0 -0
- mediapipe/tasks/cc/text/custom_ops/sentencepiece/__init__.py +0 -0
- mediapipe/tasks/cc/text/custom_ops/sentencepiece/testdata/__init__.py +0 -0
- mediapipe/tasks/cc/text/language_detector/__init__.py +0 -0
- mediapipe/tasks/cc/text/language_detector/custom_ops/__init__.py +0 -0
- mediapipe/tasks/cc/text/language_detector/custom_ops/utils/__init__.py +0 -0
- mediapipe/tasks/cc/text/language_detector/custom_ops/utils/hash/__init__.py +0 -0
- mediapipe/tasks/cc/text/language_detector/custom_ops/utils/utf/__init__.py +0 -0
- mediapipe/tasks/cc/text/text_classifier/__init__.py +0 -0
- mediapipe/tasks/cc/text/text_classifier/proto/__init__.py +0 -0
- mediapipe/tasks/cc/text/text_classifier/proto/text_classifier_graph_options_pb2.py +35 -0
- mediapipe/tasks/cc/text/text_embedder/__init__.py +0 -0
- mediapipe/tasks/cc/text/text_embedder/proto/__init__.py +0 -0
- mediapipe/tasks/cc/text/text_embedder/proto/text_embedder_graph_options_pb2.py +35 -0
- mediapipe/tasks/cc/text/tokenizers/__init__.py +0 -0
- mediapipe/tasks/cc/text/utils/__init__.py +0 -0
- mediapipe/tasks/cc/vision/__init__.py +0 -0
- mediapipe/tasks/cc/vision/core/__init__.py +0 -0
- mediapipe/tasks/cc/vision/custom_ops/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_detector/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_detector/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_detector/proto/face_detector_graph_options_pb2.py +34 -0
- mediapipe/tasks/cc/vision/face_geometry/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_geometry/calculators/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_geometry/calculators/env_generator_calculator_pb2.py +28 -0
- mediapipe/tasks/cc/vision/face_geometry/calculators/geometry_pipeline_calculator_pb2.py +29 -0
- mediapipe/tasks/cc/vision/face_geometry/data/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_geometry/libs/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_geometry/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_geometry/proto/environment_pb2.py +30 -0
- mediapipe/tasks/cc/vision/face_geometry/proto/face_geometry_graph_options_pb2.py +29 -0
- mediapipe/tasks/cc/vision/face_geometry/proto/face_geometry_pb2.py +28 -0
- mediapipe/tasks/cc/vision/face_geometry/proto/geometry_pipeline_metadata_pb2.py +31 -0
- mediapipe/tasks/cc/vision/face_geometry/proto/mesh_3d_pb2.py +30 -0
- mediapipe/tasks/cc/vision/face_landmarker/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_landmarker/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_landmarker/proto/face_blendshapes_graph_options_pb2.py +34 -0
- mediapipe/tasks/cc/vision/face_landmarker/proto/face_landmarker_graph_options_pb2.py +37 -0
- mediapipe/tasks/cc/vision/face_landmarker/proto/face_landmarks_detector_graph_options_pb2.py +35 -0
- mediapipe/tasks/cc/vision/face_landmarker/proto/tensors_to_face_landmarks_graph_options_pb2.py +32 -0
- mediapipe/tasks/cc/vision/face_stylizer/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_stylizer/calculators/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_stylizer/calculators/tensors_to_image_calculator_pb2.py +36 -0
- mediapipe/tasks/cc/vision/face_stylizer/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/face_stylizer/proto/face_stylizer_graph_options_pb2.py +35 -0
- mediapipe/tasks/cc/vision/gesture_recognizer/__init__.py +0 -0
- mediapipe/tasks/cc/vision/gesture_recognizer/calculators/__init__.py +0 -0
- mediapipe/tasks/cc/vision/gesture_recognizer/calculators/combined_prediction_calculator_pb2.py +33 -0
- mediapipe/tasks/cc/vision/gesture_recognizer/calculators/landmarks_to_matrix_calculator_pb2.py +31 -0
- mediapipe/tasks/cc/vision/gesture_recognizer/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/gesture_recognizer/proto/gesture_classifier_graph_options_pb2.py +35 -0
- mediapipe/tasks/cc/vision/gesture_recognizer/proto/gesture_embedder_graph_options_pb2.py +34 -0
- mediapipe/tasks/cc/vision/gesture_recognizer/proto/gesture_recognizer_graph_options_pb2.py +36 -0
- mediapipe/tasks/cc/vision/gesture_recognizer/proto/hand_gesture_recognizer_graph_options_pb2.py +36 -0
- mediapipe/tasks/cc/vision/hand_detector/__init__.py +0 -0
- mediapipe/tasks/cc/vision/hand_detector/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_graph_options_pb2.py +34 -0
- mediapipe/tasks/cc/vision/hand_detector/proto/hand_detector_result_pb2.py +29 -0
- mediapipe/tasks/cc/vision/hand_landmarker/__init__.py +0 -0
- mediapipe/tasks/cc/vision/hand_landmarker/calculators/__init__.py +0 -0
- mediapipe/tasks/cc/vision/hand_landmarker/calculators/hand_association_calculator_pb2.py +31 -0
- mediapipe/tasks/cc/vision/hand_landmarker/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarker_graph_options_pb2.py +36 -0
- mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_landmarks_detector_graph_options_pb2.py +34 -0
- mediapipe/tasks/cc/vision/hand_landmarker/proto/hand_roi_refinement_graph_options_pb2.py +26 -0
- mediapipe/tasks/cc/vision/image_classifier/__init__.py +0 -0
- mediapipe/tasks/cc/vision/image_classifier/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/image_classifier/proto/image_classifier_graph_options_pb2.py +35 -0
- mediapipe/tasks/cc/vision/image_embedder/__init__.py +0 -0
- mediapipe/tasks/cc/vision/image_embedder/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/image_embedder/proto/image_embedder_graph_options_pb2.py +35 -0
- mediapipe/tasks/cc/vision/image_generator/__init__.py +0 -0
- mediapipe/tasks/cc/vision/image_generator/diffuser/__init__.py +0 -0
- mediapipe/tasks/cc/vision/image_generator/diffuser/stable_diffusion_iterate_calculator_pb2.py +40 -0
- mediapipe/tasks/cc/vision/image_generator/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/image_generator/proto/conditioned_image_graph_options_pb2.py +39 -0
- mediapipe/tasks/cc/vision/image_generator/proto/control_plugin_graph_options_pb2.py +33 -0
- mediapipe/tasks/cc/vision/image_generator/proto/image_generator_graph_options_pb2.py +29 -0
- mediapipe/tasks/cc/vision/image_segmenter/__init__.py +0 -0
- mediapipe/tasks/cc/vision/image_segmenter/calculators/__init__.py +0 -0
- mediapipe/tasks/cc/vision/image_segmenter/calculators/tensors_to_segmentation_calculator_pb2.py +34 -0
- mediapipe/tasks/cc/vision/image_segmenter/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/image_segmenter/proto/image_segmenter_graph_options_pb2.py +35 -0
- mediapipe/tasks/cc/vision/image_segmenter/proto/segmenter_options_pb2.py +32 -0
- mediapipe/tasks/cc/vision/interactive_segmenter/__init__.py +0 -0
- mediapipe/tasks/cc/vision/object_detector/__init__.py +0 -0
- mediapipe/tasks/cc/vision/object_detector/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/object_detector/proto/object_detector_options_pb2.py +34 -0
- mediapipe/tasks/cc/vision/pose_detector/__init__.py +0 -0
- mediapipe/tasks/cc/vision/pose_detector/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/pose_detector/proto/pose_detector_graph_options_pb2.py +34 -0
- mediapipe/tasks/cc/vision/pose_landmarker/__init__.py +0 -0
- mediapipe/tasks/cc/vision/pose_landmarker/proto/__init__.py +0 -0
- mediapipe/tasks/cc/vision/pose_landmarker/proto/pose_landmarker_graph_options_pb2.py +36 -0
- mediapipe/tasks/cc/vision/pose_landmarker/proto/pose_landmarks_detector_graph_options_pb2.py +34 -0
- mediapipe/tasks/cc/vision/utils/__init__.py +0 -0
- mediapipe/tasks/cc/vision/utils/ghum/__init__.py +0 -0
- mediapipe/tasks/metadata/image_segmenter_metadata_schema.fbs +59 -0
- mediapipe/tasks/metadata/image_segmenter_metadata_schema_py_generated.py +108 -0
- mediapipe/tasks/metadata/metadata_schema.fbs +732 -0
- mediapipe/tasks/metadata/metadata_schema_py_generated.py +3251 -0
- mediapipe/tasks/metadata/object_detector_metadata_schema.fbs +98 -0
- mediapipe/tasks/metadata/object_detector_metadata_schema_py_generated.py +674 -0
- mediapipe/tasks/metadata/schema_py_generated.py +14263 -0
- mediapipe/tasks/python/__init__.py +26 -0
- mediapipe/tasks/python/audio/__init__.py +33 -0
- mediapipe/tasks/python/audio/audio_classifier.py +324 -0
- mediapipe/tasks/python/audio/audio_embedder.py +285 -0
- mediapipe/tasks/python/audio/core/__init__.py +16 -0
- mediapipe/tasks/python/audio/core/audio_record.py +125 -0
- mediapipe/tasks/python/audio/core/audio_task_running_mode.py +29 -0
- mediapipe/tasks/python/audio/core/base_audio_task_api.py +181 -0
- mediapipe/tasks/python/components/__init__.py +13 -0
- mediapipe/tasks/python/components/containers/__init__.py +53 -0
- mediapipe/tasks/python/components/containers/audio_data.py +137 -0
- mediapipe/tasks/python/components/containers/bounding_box.py +73 -0
- mediapipe/tasks/python/components/containers/category.py +78 -0
- mediapipe/tasks/python/components/containers/classification_result.py +111 -0
- mediapipe/tasks/python/components/containers/detections.py +181 -0
- mediapipe/tasks/python/components/containers/embedding_result.py +89 -0
- mediapipe/tasks/python/components/containers/keypoint.py +77 -0
- mediapipe/tasks/python/components/containers/landmark.py +122 -0
- mediapipe/tasks/python/components/containers/landmark_detection_result.py +106 -0
- mediapipe/tasks/python/components/containers/rect.py +109 -0
- mediapipe/tasks/python/components/processors/__init__.py +23 -0
- mediapipe/tasks/python/components/processors/classifier_options.py +86 -0
- mediapipe/tasks/python/components/utils/__init__.py +13 -0
- mediapipe/tasks/python/components/utils/cosine_similarity.py +68 -0
- mediapipe/tasks/python/core/__init__.py +13 -0
- mediapipe/tasks/python/core/base_options.py +121 -0
- mediapipe/tasks/python/core/optional_dependencies.py +25 -0
- mediapipe/tasks/python/core/task_info.py +132 -0
- mediapipe/tasks/python/metadata/__init__.py +13 -0
- mediapipe/tasks/python/metadata/flatbuffers_lib/_pywrap_flatbuffers/arm64.cpython-39-darwin.so +0 -0
- mediapipe/tasks/python/metadata/flatbuffers_lib/_pywrap_flatbuffers.cpython-39-darwin.so +0 -0
- mediapipe/tasks/python/metadata/metadata.py +928 -0
- mediapipe/tasks/python/metadata/metadata_displayer_cli.py +34 -0
- mediapipe/tasks/python/metadata/metadata_writers/__init__.py +13 -0
- mediapipe/tasks/python/metadata/metadata_writers/face_stylizer.py +138 -0
- mediapipe/tasks/python/metadata/metadata_writers/image_classifier.py +71 -0
- mediapipe/tasks/python/metadata/metadata_writers/image_segmenter.py +170 -0
- mediapipe/tasks/python/metadata/metadata_writers/metadata_info.py +1166 -0
- mediapipe/tasks/python/metadata/metadata_writers/metadata_writer.py +845 -0
- mediapipe/tasks/python/metadata/metadata_writers/model_asset_bundle_utils.py +71 -0
- mediapipe/tasks/python/metadata/metadata_writers/object_detector.py +331 -0
- mediapipe/tasks/python/metadata/metadata_writers/text_classifier.py +119 -0
- mediapipe/tasks/python/metadata/metadata_writers/writer_utils.py +91 -0
- mediapipe/tasks/python/test/__init__.py +13 -0
- mediapipe/tasks/python/test/audio/__init__.py +13 -0
- mediapipe/tasks/python/test/audio/audio_classifier_test.py +387 -0
- mediapipe/tasks/python/test/audio/audio_embedder_test.py +297 -0
- mediapipe/tasks/python/test/test_utils.py +196 -0
- mediapipe/tasks/python/test/text/__init__.py +13 -0
- mediapipe/tasks/python/test/text/language_detector_test.py +228 -0
- mediapipe/tasks/python/test/text/text_classifier_test.py +231 -0
- mediapipe/tasks/python/test/text/text_embedder_test.py +326 -0
- mediapipe/tasks/python/test/vision/__init__.py +13 -0
- mediapipe/tasks/python/test/vision/face_aligner_test.py +190 -0
- mediapipe/tasks/python/test/vision/face_detector_test.py +523 -0
- mediapipe/tasks/python/test/vision/face_landmarker_test.py +565 -0
- mediapipe/tasks/python/test/vision/face_stylizer_test.py +191 -0
- mediapipe/tasks/python/test/vision/hand_landmarker_test.py +437 -0
- mediapipe/tasks/python/test/vision/image_classifier_test.py +657 -0
- mediapipe/tasks/python/test/vision/image_embedder_test.py +423 -0
- mediapipe/tasks/python/test/vision/image_segmenter_test.py +512 -0
- mediapipe/tasks/python/test/vision/interactive_segmenter_test.py +341 -0
- mediapipe/tasks/python/test/vision/object_detector_test.py +493 -0
- mediapipe/tasks/python/test/vision/pose_landmarker_test.py +518 -0
- mediapipe/tasks/python/text/__init__.py +35 -0
- mediapipe/tasks/python/text/core/__init__.py +16 -0
- mediapipe/tasks/python/text/core/base_text_task_api.py +54 -0
- mediapipe/tasks/python/text/language_detector.py +220 -0
- mediapipe/tasks/python/text/text_classifier.py +187 -0
- mediapipe/tasks/python/text/text_embedder.py +188 -0
- mediapipe/tasks/python/vision/__init__.py +83 -0
- mediapipe/tasks/python/vision/core/__init__.py +14 -0
- mediapipe/tasks/python/vision/core/base_vision_task_api.py +226 -0
- mediapipe/tasks/python/vision/core/image_processing_options.py +39 -0
- mediapipe/tasks/python/vision/core/vision_task_running_mode.py +31 -0
- mediapipe/tasks/python/vision/face_aligner.py +158 -0
- mediapipe/tasks/python/vision/face_detector.py +332 -0
- mediapipe/tasks/python/vision/face_landmarker.py +3244 -0
- mediapipe/tasks/python/vision/face_stylizer.py +158 -0
- mediapipe/tasks/python/vision/gesture_recognizer.py +480 -0
- mediapipe/tasks/python/vision/hand_landmarker.py +504 -0
- mediapipe/tasks/python/vision/image_classifier.py +358 -0
- mediapipe/tasks/python/vision/image_embedder.py +362 -0
- mediapipe/tasks/python/vision/image_segmenter.py +433 -0
- mediapipe/tasks/python/vision/interactive_segmenter.py +285 -0
- mediapipe/tasks/python/vision/object_detector.py +385 -0
- mediapipe/tasks/python/vision/pose_landmarker.py +455 -0
- mediapipe/util/__init__.py +0 -0
- mediapipe/util/analytics/__init__.py +0 -0
- mediapipe/util/analytics/mediapipe_log_extension_pb2.py +41 -0
- mediapipe/util/analytics/mediapipe_logging_enums_pb2.py +36 -0
- mediapipe/util/audio_decoder_pb2.py +33 -0
- mediapipe/util/color_pb2.py +32 -0
- mediapipe/util/label_map_pb2.py +26 -0
- mediapipe/util/render_data_pb2.py +57 -0
- mediapipe/util/sequence/__init__.py +14 -0
- mediapipe/util/sequence/media_sequence.py +716 -0
- mediapipe/util/sequence/media_sequence_test.py +290 -0
- mediapipe/util/sequence/media_sequence_util.py +800 -0
- mediapipe/util/sequence/media_sequence_util_test.py +389 -0
- mediapipe/util/tracking/__init__.py +0 -0
- mediapipe/util/tracking/box_detector_pb2.py +38 -0
- mediapipe/util/tracking/box_tracker_pb2.py +31 -0
- mediapipe/util/tracking/camera_motion_pb2.py +30 -0
- mediapipe/util/tracking/flow_packager_pb2.py +59 -0
- mediapipe/util/tracking/frame_selection_pb2.py +34 -0
- mediapipe/util/tracking/frame_selection_solution_evaluator_pb2.py +27 -0
- mediapipe/util/tracking/motion_analysis_pb2.py +34 -0
- mediapipe/util/tracking/motion_estimation_pb2.py +65 -0
- mediapipe/util/tracking/motion_models_pb2.py +41 -0
- mediapipe/util/tracking/motion_saliency_pb2.py +25 -0
- mediapipe/util/tracking/push_pull_filtering_pb2.py +25 -0
- mediapipe/util/tracking/region_flow_computation_pb2.py +58 -0
- mediapipe/util/tracking/region_flow_pb2.py +48 -0
- mediapipe/util/tracking/tone_estimation_pb2.py +44 -0
- mediapipe/util/tracking/tone_models_pb2.py +31 -0
- mediapipe/util/tracking/tracked_detection_manager_config_pb2.py +25 -0
- mediapipe/util/tracking/tracking_pb2.py +72 -0
- mediapipe_nightly-0.0.0.post20231103.dist-info/LICENSE +218 -0
- mediapipe_nightly-0.0.0.post20231103.dist-info/METADATA +196 -0
- mediapipe_nightly-0.0.0.post20231103.dist-info/RECORD +545 -0
- mediapipe_nightly-0.0.0.post20231103.dist-info/WHEEL +5 -0
- mediapipe_nightly-0.0.0.post20231103.dist-info/top_level.txt +4 -0
@@ -0,0 +1,716 @@
|
|
1
|
+
"""Copyright 2019 The MediaPipe Authors.
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
This script defines a large number of getters and setters for storing
|
17
|
+
multimedia, such as video or audio, and related machine learning data in
|
18
|
+
tf.train.SequenceExamples. These getters and setters simplify sharing
|
19
|
+
data by enforcing common patterns for storing data in SequenceExample
|
20
|
+
key-value pairs.
|
21
|
+
|
22
|
+
The constants, macros, and functions are organized into 6 groups: clip
|
23
|
+
metadata, clip label related, segment related, bounding-box related, image
|
24
|
+
related, feature list related, and keyframe related. The following examples
|
25
|
+
will walk through common task structures, but the relevant data to store can
|
26
|
+
vary by task.
|
27
|
+
|
28
|
+
The clip metadata group is generally data about the media and stored in the
|
29
|
+
SequenceExample.context. Specifying the metadata enables media pipelines,
|
30
|
+
such as MediaPipe, to retrieve that data. Typically, set_clip_data_path,
|
31
|
+
set_clip_start_timestamp, and set_clip_end_timestamp define which data to use
|
32
|
+
without storing the data itself. Example:
|
33
|
+
tensorflow.train.SequenceExample sequence
|
34
|
+
set_clip_data_path("/relative/path/to/data.mp4", sequence)
|
35
|
+
set_clip_start_timestamp(0, sequence)
|
36
|
+
set_clip_end_timestamp(10000000, sequence) # 10 seconds in microseconds.
|
37
|
+
|
38
|
+
The clip label group adds labels that apply to the entire media clip. To
|
39
|
+
annotate that a video clip has a particular label, set the clip metadata
|
40
|
+
above and also set the set_clip_label_index and set_clip_label_string. Most
|
41
|
+
training pipelines will only use the label index or string, but we recommend
|
42
|
+
storing both to improve readability while maintaining ease of use.
|
43
|
+
Example:
|
44
|
+
set_clip_label_string(("run", "jump"), sequence)
|
45
|
+
set_Clip_label_index((35, 47), sequence)
|
46
|
+
|
47
|
+
The segment group is generally data about time spans within the media clip
|
48
|
+
and stored in the SequenceExample.context. In this code, continuous lengths
|
49
|
+
of media are called clips, and each clip may have subregions of interest that
|
50
|
+
are called segments. To annotate that a video clip has time spans with labels
|
51
|
+
set the clip metadata above and use the functions set_segment_start_timestamp,
|
52
|
+
set_segment_end_timestamp, set_segment_label_index, and
|
53
|
+
set_segment_label_string. Most training pipelines will only use the label index
|
54
|
+
or string, but we recommend storing both to improve readability while
|
55
|
+
maintaining ease of use. By listing segments as times, the frame rate or other
|
56
|
+
properties can change without affecting the labels.
|
57
|
+
Example:
|
58
|
+
set_segment_start_timestamp((500000, 1000000), sequence) # in microseconds
|
59
|
+
set_segment_end_timestamp((2000000, 6000000), sequence)
|
60
|
+
set_segment_label_index((35, 47), sequence)
|
61
|
+
set_segment_label_string(("run", "jump"), sequence)
|
62
|
+
|
63
|
+
The bounding box group is useful for identifying spatio-temporal annotations
|
64
|
+
for detection, tracking, or action recognition. The exact keys that are
|
65
|
+
needed can vary by task, but to annotate a video clip for detection set the
|
66
|
+
clip metadata above and use repeatedly call add_bbox, add_bbox_timestamp,
|
67
|
+
add_bbox_label_index, and add_bbox_label_string. Most training pipelines will
|
68
|
+
only use the label index or string, but we recommend storing both to improve
|
69
|
+
readability while maintaining ease of use. Because bounding boxes are
|
70
|
+
assigned to timepoints in a video, changing the image frame rate can can
|
71
|
+
change the alignment. The media_sequence.h's ReconcileMetadata function can
|
72
|
+
align bounding boxes to the nearest image.
|
73
|
+
|
74
|
+
The image group is useful for storing data as sequential 2D arrays, typically
|
75
|
+
encoded as bytes. Images can be RGB images stored as JPEG, discrete masks
|
76
|
+
stored as PNG, or some other format. Parameters that are static over time are
|
77
|
+
set in the context using set_image_width, set_image_height, set_image_format,
|
78
|
+
etc. The series of frames and timestamps are then added with add_image_encoded
|
79
|
+
and
|
80
|
+
add_image_timestamp. For discrete masks, the class or instance indices can be
|
81
|
+
mapped to labels or classes using
|
82
|
+
set_class_segmentation_class_label_{index,string} and
|
83
|
+
set_instance_segmentation_object_class_index.
|
84
|
+
|
85
|
+
The feature list group is useful for storing audio and extracted features,
|
86
|
+
such as per-frame embeddings. SequenceExamples only store lists of floats per
|
87
|
+
timestep, so the dimensions are stored in the context to enable reshaping.
|
88
|
+
For example, set_feature_dimensions and repeatedly calling add_feature_floats
|
89
|
+
and add_feature_timestamp adds per-frame embeddings. The feature methods also
|
90
|
+
support audio features.
|
91
|
+
|
92
|
+
Macros for common patterns are created in media_sequence_util.py and are used
|
93
|
+
here extensively. Because these macros are formulaic, I will only include a
|
94
|
+
usage example here in the code rather than repeating documentation for every
|
95
|
+
instance. This header defines additional functions to simplify working with
|
96
|
+
MediaPipe types.
|
97
|
+
|
98
|
+
Each msu.create_{TYPE}_context_feature takes a NAME and a KEY. It provides
|
99
|
+
setters and getters for SequenceExamples and stores a single value under KEY
|
100
|
+
in the context field. The provided functions are has_${NAME}, get_${NAME},
|
101
|
+
set_${Name}, and clear_${NAME}.
|
102
|
+
Eg.
|
103
|
+
tf.train.SequenceExample example
|
104
|
+
set_data_path("data_path", example)
|
105
|
+
if has_data_path(example):
|
106
|
+
data_path = get_data_path(example)
|
107
|
+
clear_data_path(example)
|
108
|
+
|
109
|
+
Each msu.create_{TYPE}_list_context_feature takes a NAME and a KEY. It provides
|
110
|
+
setters and getters for SequenceExamples and stores a sequence of values
|
111
|
+
under KEY in the context field. The provided functions are has_${NAME},
|
112
|
+
get_${NAME}, set_${Name}, clear_${NAME}, get_${NAME}_at, and add_${NAME}.
|
113
|
+
Eg.
|
114
|
+
tf.train.SequenceExample example
|
115
|
+
set_clip_label_string(("run", "jump"), example)
|
116
|
+
if has_clip_label_string(example):
|
117
|
+
values = get_clip_label_string(example)
|
118
|
+
clear_clip_label_string(example)
|
119
|
+
|
120
|
+
Each msu.create_{TYPE}_feature_list takes a NAME and a KEY. It provides setters
|
121
|
+
and getters for SequenceExamples and stores a single value in each feature field
|
122
|
+
under KEY of the feature_lists field. The provided functions are has_${NAME},
|
123
|
+
get_${NAME}, clear_${NAME}, get_${NAME}_size, get_${NAME}_at, and add_${NAME}.
|
124
|
+
tf.train.SequenceExample example
|
125
|
+
add_image_timestamp(1000000, example)
|
126
|
+
add_image_timestamp(2000000, example)
|
127
|
+
if has_image_timestamp(example):
|
128
|
+
for i in range(get_image_timestamp_size()):
|
129
|
+
timestamp = get_image_timestamp_at(example, i)
|
130
|
+
clear_image_timestamp(example)
|
131
|
+
|
132
|
+
Each VECTOR_{TYPE}_FEATURE_LIST takes a NAME and a KEY. It provides setters
|
133
|
+
and getters for SequenceExamples and stores a sequence of values in each
|
134
|
+
feature field under KEY of the feature_lists field. The provided functions
|
135
|
+
are Has${NAME}, Get${NAME}, Clear${NAME}, Get${NAME}Size, Get${NAME}At, and
|
136
|
+
Add${NAME}.
|
137
|
+
tf.train.SequenceExample example
|
138
|
+
add_bbox_label_string(("run", "jump"), example)
|
139
|
+
add_bbox_label_string(("run", "fall"), example)
|
140
|
+
if has_bbox_label_string(example):
|
141
|
+
for i in range(get_bbox_label_string_size(example)):
|
142
|
+
labels = get_bbox_label_string_at(example, i)
|
143
|
+
clear_bbox_label_string(example)
|
144
|
+
|
145
|
+
As described in media_sequence_util.h, each of these functions can take an
|
146
|
+
additional string prefix argument as their first argument. The prefix can
|
147
|
+
be fixed with a new NAME by using functools.partial. Prefixes are used to
|
148
|
+
identify common storage patterns (e.g. storing an image along with the height
|
149
|
+
and width) under different names (e.g. storing a left and right image in a
|
150
|
+
stereo pair.) An example creating functions such as
|
151
|
+
add_left_image_encoded that adds a string under the key "LEFT/image/encoded"
|
152
|
+
add_left_image_encoded = msu.function_with_default(add_image_encoded, "LEFT")
|
153
|
+
"""
|
154
|
+
|
155
|
+
from __future__ import absolute_import
|
156
|
+
from __future__ import division
|
157
|
+
from __future__ import print_function
|
158
|
+
import numpy as np
|
159
|
+
from mediapipe.util.sequence import media_sequence_util
|
160
|
+
msu = media_sequence_util
|
161
|
+
|
162
|
+
_HAS_DYNAMIC_ATTRIBUTES = True
|
163
|
+
|
164
|
+
################################## METADATA #################################
|
165
|
+
# A unique identifier for each example.
|
166
|
+
EXAMPLE_ID_KEY = "example/id"
|
167
|
+
# The name o fthe data set, including the version.
|
168
|
+
EXAMPLE_DATASET_NAME_KEY = "example/dataset_name"
|
169
|
+
# String flags or attributes for this example within a data set.
|
170
|
+
EXAMPLE_DATASET_FLAG_STRING_KEY = "example/dataset/flag/string"
|
171
|
+
# The relative path to the data on disk from some root directory.
|
172
|
+
CLIP_DATA_PATH_KEY = "clip/data_path"
|
173
|
+
# Any identifier for the media beyond the data path.
|
174
|
+
CLIP_MEDIA_ID_KEY = "clip/media_id"
|
175
|
+
# Yet another alternative identifier.
|
176
|
+
ALTERNATIVE_CLIP_MEDIA_ID_KEY = "clip/alternative_media_id"
|
177
|
+
# The encoded bytes for storing media directly in the SequenceExample.
|
178
|
+
CLIP_ENCODED_MEDIA_BYTES_KEY = "clip/encoded_media_bytes"
|
179
|
+
# The start time for the encoded media if not preserved during encoding.
|
180
|
+
CLIP_ENCODED_MEDIA_START_TIMESTAMP_KEY = "clip/encoded_media_start_timestamp"
|
181
|
+
# The start time, in microseconds, for the start of the clip in the media.
|
182
|
+
CLIP_START_TIMESTAMP_KEY = "clip/start/timestamp"
|
183
|
+
# The end time, in microseconds, for the end of the clip in the media.
|
184
|
+
CLIP_END_TIMESTAMP_KEY = "clip/end/timestamp"
|
185
|
+
# A list of label indices for this clip.
|
186
|
+
CLIP_LABEL_INDEX_KEY = "clip/label/index"
|
187
|
+
# A list of label strings for this clip.
|
188
|
+
CLIP_LABEL_STRING_KEY = "clip/label/string"
|
189
|
+
# A list of label confidences for this clip.
|
190
|
+
CLIP_LABEL_CONFIDENCE_KEY = "clip/label/confidence"
|
191
|
+
# A list of label start timestamps for this clip.
|
192
|
+
CLIP_LABEL_START_TIMESTAMP_KEY = "clip/label/start/timestamp"
|
193
|
+
# A list of label end timestamps for this clip.
|
194
|
+
CLIP_LABEL_END_TIMESTAMP_KEY = "clip/label/end/timestamp"
|
195
|
+
msu.create_bytes_context_feature(
|
196
|
+
"example_id", EXAMPLE_ID_KEY, module_dict=globals())
|
197
|
+
msu.create_bytes_context_feature(
|
198
|
+
"example_dataset_name", EXAMPLE_DATASET_NAME_KEY, module_dict=globals())
|
199
|
+
msu.create_bytes_list_context_feature(
|
200
|
+
"example_dataset_flag_string", EXAMPLE_DATASET_FLAG_STRING_KEY,
|
201
|
+
module_dict=globals())
|
202
|
+
msu.create_bytes_context_feature(
|
203
|
+
"clip_media_id", CLIP_MEDIA_ID_KEY, module_dict=globals())
|
204
|
+
msu.create_bytes_context_feature(
|
205
|
+
"clip_alternative_media_id", ALTERNATIVE_CLIP_MEDIA_ID_KEY,
|
206
|
+
module_dict=globals())
|
207
|
+
msu.create_bytes_context_feature(
|
208
|
+
"clip_encoded_media_bytes", CLIP_ENCODED_MEDIA_BYTES_KEY,
|
209
|
+
module_dict=globals())
|
210
|
+
msu.create_bytes_context_feature(
|
211
|
+
"clip_data_path", CLIP_DATA_PATH_KEY, module_dict=globals())
|
212
|
+
msu.create_int_context_feature(
|
213
|
+
"clip_encoded_media_start_timestamp",
|
214
|
+
CLIP_ENCODED_MEDIA_START_TIMESTAMP_KEY, module_dict=globals())
|
215
|
+
msu.create_int_context_feature(
|
216
|
+
"clip_start_timestamp", CLIP_START_TIMESTAMP_KEY, module_dict=globals())
|
217
|
+
msu.create_int_context_feature(
|
218
|
+
"clip_end_timestamp", CLIP_END_TIMESTAMP_KEY, module_dict=globals())
|
219
|
+
msu.create_bytes_list_context_feature(
|
220
|
+
"clip_label_string", CLIP_LABEL_STRING_KEY, module_dict=globals())
|
221
|
+
msu.create_int_list_context_feature(
|
222
|
+
"clip_label_index", CLIP_LABEL_INDEX_KEY, module_dict=globals())
|
223
|
+
msu.create_float_list_context_feature(
|
224
|
+
"clip_label_confidence", CLIP_LABEL_CONFIDENCE_KEY, module_dict=globals())
|
225
|
+
msu.create_int_list_context_feature(
|
226
|
+
"clip_label_start_timestamp",
|
227
|
+
CLIP_LABEL_START_TIMESTAMP_KEY,
|
228
|
+
module_dict=globals())
|
229
|
+
msu.create_int_list_context_feature(
|
230
|
+
"clip_label_end_timestamp",
|
231
|
+
CLIP_LABEL_END_TIMESTAMP_KEY,
|
232
|
+
module_dict=globals())
|
233
|
+
|
234
|
+
################################## SEGMENTS #################################
|
235
|
+
# A list of segment start times in microseconds.
|
236
|
+
SEGMENT_START_TIMESTAMP_KEY = "segment/start/timestamp"
|
237
|
+
# A list of indices marking the first frame index >= the start timestamp.
|
238
|
+
SEGMENT_START_INDEX_KEY = "segment/start/index"
|
239
|
+
# A list of segment end times in microseconds.
|
240
|
+
SEGMENT_END_TIMESTAMP_KEY = "segment/end/timestamp"
|
241
|
+
# A list of indices marking the last frame index <= the end timestamp.
|
242
|
+
SEGMENT_END_INDEX_KEY = "segment/end/index"
|
243
|
+
# A list with the label index for each segment.
|
244
|
+
# Multiple labels for the same segment are encoded as repeated segments.
|
245
|
+
SEGMENT_LABEL_INDEX_KEY = "segment/label/index"
|
246
|
+
# A list with the label string for each segment.
|
247
|
+
# Multiple labels for the same segment are encoded as repeated segments.
|
248
|
+
SEGMENT_LABEL_STRING_KEY = "segment/label/string"
|
249
|
+
# A list with the label confidence for each segment.
|
250
|
+
# Multiple labels for the same segment are encoded as repeated segments.
|
251
|
+
SEGMENT_LABEL_CONFIDENCE_KEY = "segment/label/confidence"
|
252
|
+
msu.create_bytes_list_context_feature(
|
253
|
+
"segment_label_string", SEGMENT_LABEL_STRING_KEY, module_dict=globals())
|
254
|
+
msu.create_int_list_context_feature(
|
255
|
+
"segment_start_timestamp",
|
256
|
+
SEGMENT_START_TIMESTAMP_KEY, module_dict=globals())
|
257
|
+
msu.create_int_list_context_feature(
|
258
|
+
"segment_start_index", SEGMENT_START_INDEX_KEY, module_dict=globals())
|
259
|
+
msu.create_int_list_context_feature(
|
260
|
+
"segment_end_timestamp", SEGMENT_END_TIMESTAMP_KEY, module_dict=globals())
|
261
|
+
msu.create_int_list_context_feature(
|
262
|
+
"segment_end_index", SEGMENT_END_INDEX_KEY, module_dict=globals())
|
263
|
+
msu.create_int_list_context_feature(
|
264
|
+
"segment_label_index", SEGMENT_LABEL_INDEX_KEY, module_dict=globals())
|
265
|
+
msu.create_float_list_context_feature(
|
266
|
+
"segment_label_confidence",
|
267
|
+
SEGMENT_LABEL_CONFIDENCE_KEY, module_dict=globals())
|
268
|
+
|
269
|
+
########################## REGIONS / BOUNDING BOXES #########################
|
270
|
+
|
271
|
+
# Normalized coordinates of bounding boxes are provided in four lists to avoid
|
272
|
+
# order ambiguity. We provide additional accessors for complete bounding boxes
|
273
|
+
# below.
|
274
|
+
REGION_BBOX_YMIN_KEY = "region/bbox/ymin"
|
275
|
+
REGION_BBOX_XMIN_KEY = "region/bbox/xmin"
|
276
|
+
REGION_BBOX_YMAX_KEY = "region/bbox/ymax"
|
277
|
+
REGION_BBOX_XMAX_KEY = "region/bbox/xmax"
|
278
|
+
# The point and radius can denote keypoints.
|
279
|
+
REGION_POINT_X_KEY = "region/point/x"
|
280
|
+
REGION_POINT_Y_KEY = "region/point/y"
|
281
|
+
REGION_RADIUS_KEY = "region/radius"
|
282
|
+
# The 3D point can denote keypoints.
|
283
|
+
REGION_3D_POINT_X_KEY = "region/3d_point/x"
|
284
|
+
REGION_3D_POINT_Y_KEY = "region/3d_point/y"
|
285
|
+
REGION_3D_POINT_Z_KEY = "region/3d_point/z"
|
286
|
+
# The number of regions at that timestep.
|
287
|
+
REGION_NUM_REGIONS_KEY = "region/num_regions"
|
288
|
+
# Whether that timestep is annotated for regions.
|
289
|
+
# (Disambiguates between multiple meanings of num_regions = 0.)
|
290
|
+
REGION_IS_ANNOTATED_KEY = "region/is_annotated"
|
291
|
+
# A list indicating if each region is generated (1) or manually annotated (0)
|
292
|
+
REGION_IS_GENERATED_KEY = "region/is_generated"
|
293
|
+
# A list indicating if each region is occluded (1) or visible (0)
|
294
|
+
REGION_IS_OCCLUDED_KEY = "region/is_occluded"
|
295
|
+
# Lists with a label for each region.
|
296
|
+
# Multiple labels for the same region require duplicating the region.
|
297
|
+
REGION_LABEL_INDEX_KEY = "region/label/index"
|
298
|
+
REGION_LABEL_STRING_KEY = "region/label/string"
|
299
|
+
REGION_LABEL_CONFIDENCE_KEY = "region/label/confidence"
|
300
|
+
# Lists with a track identifier for each region.
|
301
|
+
# Multiple track identifier for the same region require duplicating the region.
|
302
|
+
REGION_TRACK_INDEX_KEY = "region/track/index"
|
303
|
+
REGION_TRACK_STRING_KEY = "region/track/string"
|
304
|
+
REGION_TRACK_CONFIDENCE_KEY = "region/track/confidence"
|
305
|
+
# Lists with a class for each region. In general, prefer to use the label
|
306
|
+
# fields. These class fields exist to distinguish tracks when different classes
|
307
|
+
# have overlapping track ids.
|
308
|
+
REGION_CLASS_INDEX_KEY = "region/class/index"
|
309
|
+
REGION_CLASS_STRING_KEY = "region/class/string"
|
310
|
+
REGION_CLASS_CONFIDENCE_KEY = "region/class/confidence"
|
311
|
+
# The timestamp of the region annotation in microseconds.
|
312
|
+
REGION_TIMESTAMP_KEY = "region/timestamp"
|
313
|
+
# The original timestamp in microseconds for region annotations.
|
314
|
+
# If regions are aligned to image frames, this field preserves the original
|
315
|
+
# timestamps.
|
316
|
+
REGION_UNMODIFIED_TIMESTAMP_KEY = "region/unmodified_timestamp"
|
317
|
+
# The list of region parts expected in this example.
|
318
|
+
REGION_PARTS_KEY = "region/parts"
|
319
|
+
# The dimensions of each embedding per region / bounding box.
|
320
|
+
REGION_EMBEDDING_DIMENSIONS_PER_REGION_KEY = (
|
321
|
+
"region/embedding/dimensions_per_region")
|
322
|
+
# The format encoding embeddings as strings.
|
323
|
+
REGION_EMBEDDING_FORMAT_KEY = "region/embedding/format"
|
324
|
+
# An embedding for each region. The length of each list must be the product of
|
325
|
+
# the number of regions and the product of the embedding dimensions.
|
326
|
+
REGION_EMBEDDING_FLOAT_KEY = "region/embedding/float"
|
327
|
+
# A string encoded embedding for each regions.
|
328
|
+
REGION_EMBEDDING_ENCODED_KEY = "region/embedding/encoded"
|
329
|
+
# The confidence of the embedding.
|
330
|
+
REGION_EMBEDDING_CONFIDENCE_KEY = "region/embedding/confidence"
|
331
|
+
|
332
|
+
|
333
|
+
def _create_region_with_prefix(name, prefix):
|
334
|
+
"""Create multiple accessors for region based data."""
|
335
|
+
msu.create_int_feature_list(name + "_num_regions", REGION_NUM_REGIONS_KEY,
|
336
|
+
prefix=prefix, module_dict=globals())
|
337
|
+
msu.create_int_feature_list(name + "_is_annotated", REGION_IS_ANNOTATED_KEY,
|
338
|
+
prefix=prefix, module_dict=globals())
|
339
|
+
msu.create_int_list_feature_list(
|
340
|
+
name + "_is_occluded", REGION_IS_OCCLUDED_KEY,
|
341
|
+
prefix=prefix, module_dict=globals())
|
342
|
+
msu.create_int_list_feature_list(
|
343
|
+
name + "_is_generated", REGION_IS_GENERATED_KEY,
|
344
|
+
prefix=prefix, module_dict=globals())
|
345
|
+
msu.create_int_feature_list(name + "_timestamp", REGION_TIMESTAMP_KEY,
|
346
|
+
prefix=prefix, module_dict=globals())
|
347
|
+
msu.create_int_feature_list(
|
348
|
+
name + "_unmodified_timestamp", REGION_UNMODIFIED_TIMESTAMP_KEY,
|
349
|
+
prefix=prefix, module_dict=globals())
|
350
|
+
msu.create_bytes_list_feature_list(
|
351
|
+
name + "_label_string", REGION_LABEL_STRING_KEY,
|
352
|
+
prefix=prefix, module_dict=globals())
|
353
|
+
msu.create_int_list_feature_list(
|
354
|
+
name + "_label_index", REGION_LABEL_INDEX_KEY,
|
355
|
+
prefix=prefix, module_dict=globals())
|
356
|
+
msu.create_float_list_feature_list(
|
357
|
+
name + "_label_confidence", REGION_LABEL_CONFIDENCE_KEY,
|
358
|
+
prefix=prefix, module_dict=globals())
|
359
|
+
msu.create_bytes_list_feature_list(
|
360
|
+
name + "_class_string", REGION_CLASS_STRING_KEY,
|
361
|
+
prefix=prefix, module_dict=globals())
|
362
|
+
msu.create_int_list_feature_list(
|
363
|
+
name + "_class_index", REGION_CLASS_INDEX_KEY,
|
364
|
+
prefix=prefix, module_dict=globals())
|
365
|
+
msu.create_float_list_feature_list(
|
366
|
+
name + "_class_confidence", REGION_CLASS_CONFIDENCE_KEY,
|
367
|
+
prefix=prefix, module_dict=globals())
|
368
|
+
msu.create_bytes_list_feature_list(
|
369
|
+
name + "_track_string", REGION_TRACK_STRING_KEY,
|
370
|
+
prefix=prefix, module_dict=globals())
|
371
|
+
msu.create_int_list_feature_list(
|
372
|
+
name + "_track_index", REGION_TRACK_INDEX_KEY,
|
373
|
+
prefix=prefix, module_dict=globals())
|
374
|
+
msu.create_float_list_feature_list(
|
375
|
+
name + "_track_confidence", REGION_TRACK_CONFIDENCE_KEY,
|
376
|
+
prefix=prefix, module_dict=globals())
|
377
|
+
msu.create_float_list_feature_list(name + "_ymin", REGION_BBOX_YMIN_KEY,
|
378
|
+
prefix=prefix, module_dict=globals())
|
379
|
+
msu.create_float_list_feature_list(name + "_xmin", REGION_BBOX_XMIN_KEY,
|
380
|
+
prefix=prefix, module_dict=globals())
|
381
|
+
msu.create_float_list_feature_list(name + "_ymax", REGION_BBOX_YMAX_KEY,
|
382
|
+
prefix=prefix, module_dict=globals())
|
383
|
+
msu.create_float_list_feature_list(name + "_xmax", REGION_BBOX_XMAX_KEY,
|
384
|
+
prefix=prefix, module_dict=globals())
|
385
|
+
msu.create_float_list_feature_list(name + "_point_x", REGION_POINT_X_KEY,
|
386
|
+
prefix=prefix, module_dict=globals())
|
387
|
+
msu.create_float_list_feature_list(name + "_point_y", REGION_POINT_Y_KEY,
|
388
|
+
prefix=prefix, module_dict=globals())
|
389
|
+
msu.create_float_list_feature_list(
|
390
|
+
name + "_3d_point_x", REGION_3D_POINT_X_KEY,
|
391
|
+
prefix=prefix, module_dict=globals())
|
392
|
+
msu.create_float_list_feature_list(
|
393
|
+
name + "_3d_point_y", REGION_3D_POINT_Y_KEY,
|
394
|
+
prefix=prefix, module_dict=globals())
|
395
|
+
msu.create_float_list_feature_list(
|
396
|
+
name + "_3d_point_z", REGION_3D_POINT_Z_KEY,
|
397
|
+
prefix=prefix, module_dict=globals())
|
398
|
+
msu.create_bytes_list_context_feature(name + "_parts",
|
399
|
+
REGION_PARTS_KEY,
|
400
|
+
prefix=prefix, module_dict=globals())
|
401
|
+
msu.create_float_list_context_feature(
|
402
|
+
name + "_embedding_dimensions_per_region",
|
403
|
+
REGION_EMBEDDING_DIMENSIONS_PER_REGION_KEY,
|
404
|
+
prefix=prefix, module_dict=globals())
|
405
|
+
msu.create_bytes_context_feature(name + "_embedding_format",
|
406
|
+
REGION_EMBEDDING_FORMAT_KEY,
|
407
|
+
prefix=prefix, module_dict=globals())
|
408
|
+
msu.create_float_list_feature_list(name + "_embedding_floats",
|
409
|
+
REGION_EMBEDDING_FLOAT_KEY,
|
410
|
+
prefix=prefix, module_dict=globals())
|
411
|
+
msu.create_bytes_list_feature_list(name + "_embedding_encoded",
|
412
|
+
REGION_EMBEDDING_ENCODED_KEY,
|
413
|
+
prefix=prefix, module_dict=globals())
|
414
|
+
msu.create_float_list_feature_list(name + "_embedding_confidence",
|
415
|
+
REGION_EMBEDDING_CONFIDENCE_KEY,
|
416
|
+
prefix=prefix, module_dict=globals())
|
417
|
+
# pylint: disable=undefined-variable
|
418
|
+
def get_prefixed_bbox_at(index, sequence_example, prefix):
|
419
|
+
return np.stack((
|
420
|
+
get_bbox_ymin_at(index, sequence_example, prefix=prefix),
|
421
|
+
get_bbox_xmin_at(index, sequence_example, prefix=prefix),
|
422
|
+
get_bbox_ymax_at(index, sequence_example, prefix=prefix),
|
423
|
+
get_bbox_xmax_at(index, sequence_example, prefix=prefix)),
|
424
|
+
1)
|
425
|
+
def add_prefixed_bbox(values, sequence_example, prefix):
|
426
|
+
values = np.array(values)
|
427
|
+
if values.size == 0:
|
428
|
+
add_bbox_ymin([], sequence_example, prefix=prefix)
|
429
|
+
add_bbox_xmin([], sequence_example, prefix=prefix)
|
430
|
+
add_bbox_ymax([], sequence_example, prefix=prefix)
|
431
|
+
add_bbox_xmax([], sequence_example, prefix=prefix)
|
432
|
+
else:
|
433
|
+
add_bbox_ymin(values[:, 0], sequence_example, prefix=prefix)
|
434
|
+
add_bbox_xmin(values[:, 1], sequence_example, prefix=prefix)
|
435
|
+
add_bbox_ymax(values[:, 2], sequence_example, prefix=prefix)
|
436
|
+
add_bbox_xmax(values[:, 3], sequence_example, prefix=prefix)
|
437
|
+
def get_prefixed_bbox_size(sequence_example, prefix):
|
438
|
+
return get_bbox_ymin_size(sequence_example, prefix=prefix)
|
439
|
+
def has_prefixed_bbox(sequence_example, prefix):
|
440
|
+
return has_bbox_ymin(sequence_example, prefix=prefix)
|
441
|
+
def clear_prefixed_bbox(sequence_example, prefix):
|
442
|
+
clear_bbox_ymin(sequence_example, prefix=prefix)
|
443
|
+
clear_bbox_xmin(sequence_example, prefix=prefix)
|
444
|
+
clear_bbox_ymax(sequence_example, prefix=prefix)
|
445
|
+
clear_bbox_xmax(sequence_example, prefix=prefix)
|
446
|
+
def get_prefixed_point_at(index, sequence_example, prefix):
|
447
|
+
return np.stack((
|
448
|
+
get_bbox_point_y_at(index, sequence_example, prefix=prefix),
|
449
|
+
get_bbox_point_x_at(index, sequence_example, prefix=prefix)),
|
450
|
+
1)
|
451
|
+
def add_prefixed_point(values, sequence_example, prefix):
|
452
|
+
add_bbox_point_y(values[:, 0], sequence_example, prefix=prefix)
|
453
|
+
add_bbox_point_x(values[:, 1], sequence_example, prefix=prefix)
|
454
|
+
def get_prefixed_point_size(sequence_example, prefix):
|
455
|
+
return get_bbox_point_y_size(sequence_example, prefix=prefix)
|
456
|
+
def has_prefixed_point(sequence_example, prefix):
|
457
|
+
return has_bbox_point_y(sequence_example, prefix=prefix)
|
458
|
+
def clear_prefixed_point(sequence_example, prefix):
|
459
|
+
clear_bbox_point_y(sequence_example, prefix=prefix)
|
460
|
+
clear_bbox_point_x(sequence_example, prefix=prefix)
|
461
|
+
def get_prefixed_3d_point_at(index, sequence_example, prefix):
|
462
|
+
return np.stack((
|
463
|
+
get_bbox_3d_point_x_at(index, sequence_example, prefix=prefix),
|
464
|
+
get_bbox_3d_point_y_at(index, sequence_example, prefix=prefix),
|
465
|
+
get_bbox_3d_point_z_at(index, sequence_example, prefix=prefix)),
|
466
|
+
1)
|
467
|
+
def add_prefixed_3d_point(values, sequence_example, prefix):
|
468
|
+
add_bbox_3d_point_x(values[:, 0], sequence_example, prefix=prefix)
|
469
|
+
add_bbox_3d_point_y(values[:, 1], sequence_example, prefix=prefix)
|
470
|
+
add_bbox_3d_point_z(values[:, 2], sequence_example, prefix=prefix)
|
471
|
+
def get_prefixed_3d_point_size(sequence_example, prefix):
|
472
|
+
return get_bbox_3d_point_x_size(sequence_example, prefix=prefix)
|
473
|
+
def has_prefixed_3d_point(sequence_example, prefix):
|
474
|
+
return has_bbox_3d_point_x(sequence_example, prefix=prefix)
|
475
|
+
def clear_prefixed_3d_point(sequence_example, prefix):
|
476
|
+
clear_bbox_3d_point_x(sequence_example, prefix=prefix)
|
477
|
+
clear_bbox_3d_point_y(sequence_example, prefix=prefix)
|
478
|
+
clear_bbox_3d_point_z(sequence_example, prefix=prefix)
|
479
|
+
# pylint: enable=undefined-variable
|
480
|
+
msu.add_functions_to_module({
|
481
|
+
"get_" + name + "_at":
|
482
|
+
msu.function_with_default(get_prefixed_bbox_at, prefix),
|
483
|
+
"add_" + name:
|
484
|
+
msu.function_with_default(add_prefixed_bbox, prefix),
|
485
|
+
"get_" + name + "_size":
|
486
|
+
msu.function_with_default(get_prefixed_bbox_size, prefix),
|
487
|
+
"has_" + name:
|
488
|
+
msu.function_with_default(has_prefixed_bbox, prefix),
|
489
|
+
"clear_" + name:
|
490
|
+
msu.function_with_default(clear_prefixed_bbox, prefix),
|
491
|
+
}, module_dict=globals())
|
492
|
+
msu.add_functions_to_module({
|
493
|
+
"get_" + name + "_point_at":
|
494
|
+
msu.function_with_default(get_prefixed_point_at, prefix),
|
495
|
+
"add_" + name + "_point":
|
496
|
+
msu.function_with_default(add_prefixed_point, prefix),
|
497
|
+
"get_" + name + "_point_size":
|
498
|
+
msu.function_with_default(get_prefixed_point_size, prefix),
|
499
|
+
"has_" + name + "_point":
|
500
|
+
msu.function_with_default(has_prefixed_point, prefix),
|
501
|
+
"clear_" + name + "_point":
|
502
|
+
msu.function_with_default(clear_prefixed_point, prefix),
|
503
|
+
}, module_dict=globals())
|
504
|
+
msu.add_functions_to_module({
|
505
|
+
"get_" + name + "_3d_point_at":
|
506
|
+
msu.function_with_default(get_prefixed_3d_point_at, prefix),
|
507
|
+
"add_" + name + "_3d_point":
|
508
|
+
msu.function_with_default(add_prefixed_3d_point, prefix),
|
509
|
+
"get_" + name + "_3d_point_size":
|
510
|
+
msu.function_with_default(get_prefixed_3d_point_size, prefix),
|
511
|
+
"has_" + name + "_3d_point":
|
512
|
+
msu.function_with_default(has_prefixed_3d_point, prefix),
|
513
|
+
"clear_" + name + "_3d_point":
|
514
|
+
msu.function_with_default(clear_prefixed_3d_point, prefix),
|
515
|
+
}, module_dict=globals())
|
516
|
+
|
517
|
+
|
518
|
+
PREDICTED_PREFIX = "PREDICTED"
|
519
|
+
_create_region_with_prefix("bbox", "")
|
520
|
+
_create_region_with_prefix("predicted_bbox", PREDICTED_PREFIX)
|
521
|
+
|
522
|
+
|
523
|
+
################################### IMAGES #################################
|
524
|
+
# The format the images are encoded as (e.g. "JPEG", "PNG")
|
525
|
+
IMAGE_FORMAT_KEY = "image/format"
|
526
|
+
# The number of channels in the image.
|
527
|
+
IMAGE_CHANNELS_KEY = "image/channels"
|
528
|
+
# The colorspace of the iamge.
|
529
|
+
IMAGE_COLORSPACE_KEY = "image/colorspace"
|
530
|
+
# The height of the image in pixels.
|
531
|
+
IMAGE_HEIGHT_KEY = "image/height"
|
532
|
+
# The width of the image in pixels.
|
533
|
+
IMAGE_WIDTH_KEY = "image/width"
|
534
|
+
# frame rate in images/second of media.
|
535
|
+
IMAGE_FRAME_RATE_KEY = "image/frame_rate"
|
536
|
+
# The maximum values if the images were saturated and normalized for encoding.
|
537
|
+
IMAGE_SATURATION_KEY = "image/saturation"
|
538
|
+
# The listing from discrete image values (as indices) to class indices.
|
539
|
+
IMAGE_CLASS_LABEL_INDEX_KEY = "image/class/label/index"
|
540
|
+
# The listing from discrete image values (as indices) to class strings.
|
541
|
+
IMAGE_CLASS_LABEL_STRING_KEY = "image/class/label/string"
|
542
|
+
# The listing from discrete instance indices to class indices they embody.
|
543
|
+
IMAGE_OBJECT_CLASS_INDEX_KEY = "image/object/class/index"
|
544
|
+
# The encoded image frame.
|
545
|
+
IMAGE_ENCODED_KEY = "image/encoded"
|
546
|
+
# Multiple images from the same timestep (e.g. multiview video).
|
547
|
+
IMAGE_MULTI_ENCODED_KEY = "image/multi_encoded"
|
548
|
+
# The timestamp of the frame in microseconds.
|
549
|
+
IMAGE_TIMESTAMP_KEY = "image/timestamp"
|
550
|
+
# A per image label if specific frames have labels.
|
551
|
+
# If time spans have labels, segments are preferred to allow changing rates.
|
552
|
+
IMAGE_LABEL_INDEX_KEY = "image/label/index"
|
553
|
+
IMAGE_LABEL_STRING_KEY = "image/label/string"
|
554
|
+
IMAGE_LABEL_CONFIDENCE_KEY = "image/label/confidence"
|
555
|
+
# The path of the image file if it did not come from a media clip.
|
556
|
+
IMAGE_DATA_PATH_KEY = "image/data_path"
|
557
|
+
|
558
|
+
|
559
|
+
def _create_image_with_prefix(name, prefix):
|
560
|
+
"""Create multiple accessors for image based data."""
|
561
|
+
msu.create_bytes_context_feature(name + "_format", IMAGE_FORMAT_KEY,
|
562
|
+
prefix=prefix, module_dict=globals())
|
563
|
+
msu.create_bytes_context_feature(name + "_colorspace", IMAGE_COLORSPACE_KEY,
|
564
|
+
prefix=prefix, module_dict=globals())
|
565
|
+
msu.create_int_context_feature(name + "_channels", IMAGE_CHANNELS_KEY,
|
566
|
+
prefix=prefix, module_dict=globals())
|
567
|
+
msu.create_int_context_feature(name + "_height", IMAGE_HEIGHT_KEY,
|
568
|
+
prefix=prefix, module_dict=globals())
|
569
|
+
msu.create_int_context_feature(name + "_width", IMAGE_WIDTH_KEY,
|
570
|
+
prefix=prefix, module_dict=globals())
|
571
|
+
msu.create_bytes_feature_list(name + "_encoded", IMAGE_ENCODED_KEY,
|
572
|
+
prefix=prefix, module_dict=globals())
|
573
|
+
msu.create_float_context_feature(name + "_frame_rate", IMAGE_FRAME_RATE_KEY,
|
574
|
+
prefix=prefix, module_dict=globals())
|
575
|
+
msu.create_bytes_list_context_feature(
|
576
|
+
name + "_class_label_string", IMAGE_CLASS_LABEL_STRING_KEY,
|
577
|
+
prefix=prefix, module_dict=globals())
|
578
|
+
msu.create_int_list_context_feature(
|
579
|
+
name + "_class_label_index", IMAGE_CLASS_LABEL_INDEX_KEY,
|
580
|
+
prefix=prefix, module_dict=globals())
|
581
|
+
msu.create_int_list_context_feature(
|
582
|
+
name + "_object_class_index", IMAGE_OBJECT_CLASS_INDEX_KEY,
|
583
|
+
prefix=prefix, module_dict=globals())
|
584
|
+
msu.create_bytes_context_feature(name + "_data_path", IMAGE_DATA_PATH_KEY,
|
585
|
+
prefix=prefix, module_dict=globals())
|
586
|
+
msu.create_int_feature_list(name + "_timestamp", IMAGE_TIMESTAMP_KEY,
|
587
|
+
prefix=prefix, module_dict=globals())
|
588
|
+
msu.create_bytes_list_feature_list(name + "_multi_encoded",
|
589
|
+
IMAGE_MULTI_ENCODED_KEY, prefix=prefix,
|
590
|
+
module_dict=globals())
|
591
|
+
FORWARD_FLOW_PREFIX = "FORWARD_FLOW"
|
592
|
+
CLASS_SEGMENTATION_PREFIX = "CLASS_SEGMENTATION"
|
593
|
+
INSTANCE_SEGMENTATION_PREFIX = "INSTANCE_SEGMENTATION"
|
594
|
+
_create_image_with_prefix("image", "")
|
595
|
+
_create_image_with_prefix("forward_flow", FORWARD_FLOW_PREFIX)
|
596
|
+
_create_image_with_prefix("class_segmentation", CLASS_SEGMENTATION_PREFIX)
|
597
|
+
_create_image_with_prefix("instance_segmentation", INSTANCE_SEGMENTATION_PREFIX)
|
598
|
+
|
599
|
+
################################## TEXT #################################
|
600
|
+
# Which language text tokens are likely to be in.
|
601
|
+
TEXT_LANGUAGE_KEY = "text/language"
|
602
|
+
# A large block of text that applies to the media.
|
603
|
+
TEXT_CONTEXT_CONTENT_KEY = "text/context/content"
|
604
|
+
# A large block of text that applies to the media as token ids.
|
605
|
+
TEXT_CONTEXT_TOKEN_ID_KEY = "text/context/token_id"
|
606
|
+
# A large block of text that applies to the media as embeddings.
|
607
|
+
TEXT_CONTEXT_EMBEDDING_KEY = "text/context/embedding"
|
608
|
+
|
609
|
+
# The text contents for a given time.
|
610
|
+
TEXT_CONTENT_KEY = "text/content"
|
611
|
+
# The start time for the text becoming relevant.
|
612
|
+
TEXT_TIMESTAMP_KEY = "text/timestamp"
|
613
|
+
# The duration where the text is relevant.
|
614
|
+
TEXT_DURATION_KEY = "text/duration"
|
615
|
+
# The confidence that this is the correct text.
|
616
|
+
TEXT_CONFIDENCE_KEY = "text/confidence"
|
617
|
+
# A floating point embedding corresponding to the text.
|
618
|
+
TEXT_EMBEDDING_KEY = "text/embedding"
|
619
|
+
# An integer id corresponding to the text.
|
620
|
+
TEXT_TOKEN_ID_KEY = "text/token/id"
|
621
|
+
|
622
|
+
msu.create_bytes_context_feature(
|
623
|
+
"text_language", TEXT_LANGUAGE_KEY, module_dict=globals())
|
624
|
+
msu.create_bytes_context_feature(
|
625
|
+
"text_context_content", TEXT_CONTEXT_CONTENT_KEY, module_dict=globals())
|
626
|
+
msu.create_int_list_context_feature(
|
627
|
+
"text_context_token_id", TEXT_CONTEXT_TOKEN_ID_KEY, module_dict=globals())
|
628
|
+
msu.create_float_list_context_feature(
|
629
|
+
"text_context_embedding", TEXT_CONTEXT_EMBEDDING_KEY, module_dict=globals())
|
630
|
+
msu.create_bytes_feature_list(
|
631
|
+
"text_content", TEXT_CONTENT_KEY, module_dict=globals())
|
632
|
+
msu.create_int_feature_list(
|
633
|
+
"text_timestamp", TEXT_TIMESTAMP_KEY, module_dict=globals())
|
634
|
+
msu.create_int_feature_list(
|
635
|
+
"text_duration", TEXT_DURATION_KEY, module_dict=globals())
|
636
|
+
msu.create_float_feature_list(
|
637
|
+
"text_confidence", TEXT_CONFIDENCE_KEY, module_dict=globals())
|
638
|
+
msu.create_float_list_feature_list(
|
639
|
+
"text_embedding", TEXT_EMBEDDING_KEY, module_dict=globals())
|
640
|
+
msu.create_int_feature_list(
|
641
|
+
"text_token_id", TEXT_TOKEN_ID_KEY, module_dict=globals())
|
642
|
+
|
643
|
+
################################## FEATURES #################################
|
644
|
+
# The dimensions of the feature.
|
645
|
+
FEATURE_DIMENSIONS_KEY = "feature/dimensions"
|
646
|
+
# The rate the features are extracted per second of media.
|
647
|
+
FEATURE_RATE_KEY = "feature/rate"
|
648
|
+
# The encoding format if any for the feature.
|
649
|
+
FEATURE_BYTES_FORMAT_KEY = "feature/bytes/format"
|
650
|
+
# For audio, the rate the samples are extracted per second of media.
|
651
|
+
FEATURE_SAMPLE_RATE_KEY = "feature/sample_rate"
|
652
|
+
# For audio, the number of channels per extracted feature.
|
653
|
+
FEATURE_NUM_CHANNELS_KEY = "feature/num_channels"
|
654
|
+
# For audio, th enumber of samples per extracted feature.
|
655
|
+
FEATURE_NUM_SAMPLES_KEY = "feature/num_samples"
|
656
|
+
# For audio, the rate the features are extracted per second of media.
|
657
|
+
FEATURE_PACKET_RATE_KEY = "feature/packet_rate"
|
658
|
+
# For audio, the original audio sampling rate the feature is derived from.
|
659
|
+
FEATURE_AUDIO_SAMPLE_RATE_KEY = "feature/audio_sample_rate"
|
660
|
+
# The feature as a list of floats.
|
661
|
+
FEATURE_FLOATS_KEY = "feature/floats"
|
662
|
+
# The feature as a list of bytes. May be encoded.
|
663
|
+
FEATURE_BYTES_KEY = "feature/bytes"
|
664
|
+
# The feature as a list of ints.
|
665
|
+
FEATURE_INTS_KEY = "feature/ints"
|
666
|
+
# The timestamp, in microseconds, of the feature.
|
667
|
+
FEATURE_TIMESTAMP_KEY = "feature/timestamp"
|
668
|
+
# It is occasionally useful to indicate that a feature applies to a given range.
|
669
|
+
# This should be used for features only and annotations should be provided as
|
670
|
+
# segments.
|
671
|
+
FEATURE_DURATION_KEY = "feature/duration"
|
672
|
+
# Encodes an optional confidence score for the generated features.
|
673
|
+
FEATURE_CONFIDENCE_KEY = "feature/confidence"
|
674
|
+
# The feature as a list of floats in the context.
|
675
|
+
CONTEXT_FEATURE_FLOATS_KEY = "context_feature/floats"
|
676
|
+
# The feature as a list of bytes in the context. May be encoded.
|
677
|
+
CONTEXT_FEATURE_BYTES_KEY = "context_feature/bytes"
|
678
|
+
# The feature as a list of ints in the context.
|
679
|
+
CONTEXT_FEATURE_INTS_KEY = "context_feature/ints"
|
680
|
+
|
681
|
+
msu.create_int_list_context_feature(
|
682
|
+
"feature_dimensions", FEATURE_DIMENSIONS_KEY, module_dict=globals())
|
683
|
+
msu.create_float_context_feature(
|
684
|
+
"feature_rate", FEATURE_RATE_KEY, module_dict=globals())
|
685
|
+
msu.create_bytes_context_feature(
|
686
|
+
"feature_bytes_format", FEATURE_BYTES_FORMAT_KEY, module_dict=globals())
|
687
|
+
msu.create_float_context_feature(
|
688
|
+
"feature_sample_rate", FEATURE_SAMPLE_RATE_KEY, module_dict=globals())
|
689
|
+
msu.create_int_context_feature(
|
690
|
+
"feature_num_channels", FEATURE_NUM_CHANNELS_KEY, module_dict=globals())
|
691
|
+
msu.create_int_context_feature(
|
692
|
+
"feature_num_samples", FEATURE_NUM_SAMPLES_KEY, module_dict=globals())
|
693
|
+
msu.create_float_context_feature(
|
694
|
+
"feature_packet_rate", FEATURE_PACKET_RATE_KEY, module_dict=globals())
|
695
|
+
msu.create_float_context_feature(
|
696
|
+
"feature_audio_sample_rate", FEATURE_AUDIO_SAMPLE_RATE_KEY,
|
697
|
+
module_dict=globals())
|
698
|
+
msu.create_float_list_feature_list(
|
699
|
+
"feature_floats", FEATURE_FLOATS_KEY, module_dict=globals())
|
700
|
+
msu.create_bytes_list_feature_list(
|
701
|
+
"feature_bytes", FEATURE_BYTES_KEY, module_dict=globals())
|
702
|
+
msu.create_int_list_feature_list(
|
703
|
+
"feature_ints", FEATURE_INTS_KEY, module_dict=globals())
|
704
|
+
msu.create_int_feature_list(
|
705
|
+
"feature_timestamp", FEATURE_TIMESTAMP_KEY, module_dict=globals())
|
706
|
+
msu.create_int_list_feature_list(
|
707
|
+
"feature_duration", FEATURE_DURATION_KEY, module_dict=globals())
|
708
|
+
msu.create_float_list_feature_list(
|
709
|
+
"feature_confidence", FEATURE_CONFIDENCE_KEY, module_dict=globals())
|
710
|
+
msu.create_float_list_context_feature(
|
711
|
+
"context_feature_floats", CONTEXT_FEATURE_FLOATS_KEY, module_dict=globals())
|
712
|
+
msu.create_bytes_list_context_feature(
|
713
|
+
"context_feature_bytes", CONTEXT_FEATURE_BYTES_KEY, module_dict=globals())
|
714
|
+
msu.create_int_list_context_feature(
|
715
|
+
"context_feature_ints", CONTEXT_FEATURE_INTS_KEY, module_dict=globals())
|
716
|
+
|