megadetector 5.0.8__py3-none-any.whl → 5.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/__init__.py +0 -0
- api/batch_processing/__init__.py +0 -0
- api/batch_processing/api_core/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/score.py +0 -1
- api/batch_processing/api_core/server_job_status_table.py +0 -1
- api/batch_processing/api_core_support/__init__.py +0 -0
- api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
- api/batch_processing/api_support/__init__.py +0 -0
- api/batch_processing/api_support/summarize_daily_activity.py +0 -1
- api/batch_processing/data_preparation/__init__.py +0 -0
- api/batch_processing/data_preparation/manage_local_batch.py +65 -65
- api/batch_processing/data_preparation/manage_video_batch.py +8 -8
- api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
- api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
- api/batch_processing/postprocessing/__init__.py +0 -0
- api/batch_processing/postprocessing/add_max_conf.py +12 -12
- api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
- api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
- api/batch_processing/postprocessing/compare_batch_results.py +113 -43
- api/batch_processing/postprocessing/convert_output_format.py +41 -16
- api/batch_processing/postprocessing/load_api_results.py +16 -17
- api/batch_processing/postprocessing/md_to_coco.py +31 -21
- api/batch_processing/postprocessing/md_to_labelme.py +52 -22
- api/batch_processing/postprocessing/merge_detections.py +14 -14
- api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
- api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
- api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
- api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
- api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
- api/synchronous/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
- api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
- api/synchronous/api_core/animal_detection_api/config.py +35 -35
- api/synchronous/api_core/tests/__init__.py +0 -0
- api/synchronous/api_core/tests/load_test.py +109 -109
- classification/__init__.py +0 -0
- classification/aggregate_classifier_probs.py +21 -24
- classification/analyze_failed_images.py +11 -13
- classification/cache_batchapi_outputs.py +51 -51
- classification/create_classification_dataset.py +69 -68
- classification/crop_detections.py +54 -53
- classification/csv_to_json.py +97 -100
- classification/detect_and_crop.py +105 -105
- classification/evaluate_model.py +43 -42
- classification/identify_mislabeled_candidates.py +47 -46
- classification/json_to_azcopy_list.py +10 -10
- classification/json_validator.py +72 -71
- classification/map_classification_categories.py +44 -43
- classification/merge_classification_detection_output.py +68 -68
- classification/prepare_classification_script.py +157 -154
- classification/prepare_classification_script_mc.py +228 -228
- classification/run_classifier.py +27 -26
- classification/save_mislabeled.py +30 -30
- classification/train_classifier.py +20 -20
- classification/train_classifier_tf.py +21 -22
- classification/train_utils.py +10 -10
- data_management/__init__.py +0 -0
- data_management/annotations/__init__.py +0 -0
- data_management/annotations/annotation_constants.py +18 -31
- data_management/camtrap_dp_to_coco.py +238 -0
- data_management/cct_json_utils.py +102 -59
- data_management/cct_to_md.py +176 -158
- data_management/cct_to_wi.py +247 -219
- data_management/coco_to_labelme.py +272 -263
- data_management/coco_to_yolo.py +79 -58
- data_management/databases/__init__.py +0 -0
- data_management/databases/add_width_and_height_to_db.py +20 -16
- data_management/databases/combine_coco_camera_traps_files.py +35 -31
- data_management/databases/integrity_check_json_db.py +62 -24
- data_management/databases/subset_json_db.py +24 -15
- data_management/generate_crops_from_cct.py +27 -45
- data_management/get_image_sizes.py +188 -162
- data_management/importers/add_nacti_sizes.py +8 -8
- data_management/importers/add_timestamps_to_icct.py +78 -78
- data_management/importers/animl_results_to_md_results.py +158 -158
- data_management/importers/auckland_doc_test_to_json.py +9 -9
- data_management/importers/auckland_doc_to_json.py +8 -8
- data_management/importers/awc_to_json.py +7 -7
- data_management/importers/bellevue_to_json.py +15 -15
- data_management/importers/cacophony-thermal-importer.py +13 -13
- data_management/importers/carrizo_shrubfree_2018.py +8 -8
- data_management/importers/carrizo_trail_cam_2017.py +8 -8
- data_management/importers/cct_field_adjustments.py +9 -9
- data_management/importers/channel_islands_to_cct.py +10 -10
- data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
- data_management/importers/ena24_to_json.py +7 -7
- data_management/importers/filenames_to_json.py +8 -8
- data_management/importers/helena_to_cct.py +7 -7
- data_management/importers/idaho-camera-traps.py +7 -7
- data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
- data_management/importers/jb_csv_to_json.py +9 -9
- data_management/importers/mcgill_to_json.py +8 -8
- data_management/importers/missouri_to_json.py +18 -18
- data_management/importers/nacti_fieldname_adjustments.py +10 -10
- data_management/importers/noaa_seals_2019.py +7 -7
- data_management/importers/pc_to_json.py +7 -7
- data_management/importers/plot_wni_giraffes.py +7 -7
- data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
- data_management/importers/prepare_zsl_imerit.py +7 -7
- data_management/importers/rspb_to_json.py +8 -8
- data_management/importers/save_the_elephants_survey_A.py +8 -8
- data_management/importers/save_the_elephants_survey_B.py +9 -9
- data_management/importers/snapshot_safari_importer.py +26 -26
- data_management/importers/snapshot_safari_importer_reprise.py +665 -665
- data_management/importers/snapshot_serengeti_lila.py +14 -14
- data_management/importers/sulross_get_exif.py +8 -9
- data_management/importers/timelapse_csv_set_to_json.py +11 -11
- data_management/importers/ubc_to_json.py +13 -13
- data_management/importers/umn_to_json.py +7 -7
- data_management/importers/wellington_to_json.py +8 -8
- data_management/importers/wi_to_json.py +9 -9
- data_management/importers/zamba_results_to_md_results.py +181 -181
- data_management/labelme_to_coco.py +65 -24
- data_management/labelme_to_yolo.py +8 -8
- data_management/lila/__init__.py +0 -0
- data_management/lila/add_locations_to_island_camera_traps.py +9 -9
- data_management/lila/add_locations_to_nacti.py +147 -147
- data_management/lila/create_lila_blank_set.py +13 -13
- data_management/lila/create_lila_test_set.py +8 -8
- data_management/lila/create_links_to_md_results_files.py +106 -106
- data_management/lila/download_lila_subset.py +44 -110
- data_management/lila/generate_lila_per_image_labels.py +55 -42
- data_management/lila/get_lila_annotation_counts.py +18 -15
- data_management/lila/get_lila_image_counts.py +11 -11
- data_management/lila/lila_common.py +96 -33
- data_management/lila/test_lila_metadata_urls.py +132 -116
- data_management/ocr_tools.py +173 -128
- data_management/read_exif.py +110 -97
- data_management/remap_coco_categories.py +83 -83
- data_management/remove_exif.py +58 -62
- data_management/resize_coco_dataset.py +30 -23
- data_management/wi_download_csv_to_coco.py +246 -239
- data_management/yolo_output_to_md_output.py +86 -73
- data_management/yolo_to_coco.py +300 -60
- detection/__init__.py +0 -0
- detection/detector_training/__init__.py +0 -0
- detection/process_video.py +85 -33
- detection/pytorch_detector.py +43 -25
- detection/run_detector.py +157 -72
- detection/run_detector_batch.py +179 -113
- detection/run_inference_with_yolov5_val.py +108 -48
- detection/run_tiled_inference.py +111 -40
- detection/tf_detector.py +51 -29
- detection/video_utils.py +606 -521
- docs/source/conf.py +43 -0
- md_utils/__init__.py +0 -0
- md_utils/azure_utils.py +9 -9
- md_utils/ct_utils.py +228 -68
- md_utils/directory_listing.py +59 -64
- md_utils/md_tests.py +968 -871
- md_utils/path_utils.py +460 -134
- md_utils/process_utils.py +157 -133
- md_utils/sas_blob_utils.py +20 -20
- md_utils/split_locations_into_train_val.py +45 -32
- md_utils/string_utils.py +33 -10
- md_utils/url_utils.py +176 -60
- md_utils/write_html_image_list.py +40 -33
- md_visualization/__init__.py +0 -0
- md_visualization/plot_utils.py +102 -109
- md_visualization/render_images_with_thumbnails.py +34 -34
- md_visualization/visualization_utils.py +597 -291
- md_visualization/visualize_db.py +76 -48
- md_visualization/visualize_detector_output.py +61 -42
- {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/METADATA +13 -7
- megadetector-5.0.9.dist-info/RECORD +224 -0
- {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
- taxonomy_mapping/__init__.py +0 -0
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
- taxonomy_mapping/map_new_lila_datasets.py +154 -154
- taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
- taxonomy_mapping/preview_lila_taxonomy.py +591 -591
- taxonomy_mapping/retrieve_sample_image.py +12 -12
- taxonomy_mapping/simple_image_download.py +11 -11
- taxonomy_mapping/species_lookup.py +10 -10
- taxonomy_mapping/taxonomy_csv_checker.py +18 -18
- taxonomy_mapping/taxonomy_graph.py +47 -47
- taxonomy_mapping/validate_lila_category_mappings.py +83 -76
- data_management/cct_json_to_filename_json.py +0 -89
- data_management/cct_to_csv.py +0 -140
- data_management/databases/remove_corrupted_images_from_db.py +0 -191
- detection/detector_training/copy_checkpoints.py +0 -43
- megadetector-5.0.8.dist-info/RECORD +0 -205
- {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
- {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/WHEEL +0 -0
detection/process_video.py
CHANGED
|
@@ -1,11 +1,20 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
process_video.py
|
|
4
|
+
|
|
5
|
+
Splits a video (or folder of videos) into frames, runs the frames through run_detector_batch.py,
|
|
6
|
+
and optionally stitches together results into a new video with detection boxes.
|
|
7
|
+
|
|
8
|
+
Operates by separating the video into frames, typically sampling every Nth frame, and writing
|
|
9
|
+
those frames to disk, before running MD. This approach clearly has a downside: it requires
|
|
10
|
+
a bunch more disk space, compared to extracting frames and running MD on them without ever
|
|
11
|
+
writing them to disk. The upside, though, is that this approach allows you to run repeat
|
|
12
|
+
detection elimination after running MegaDetector, and it allows allows more efficient re-use
|
|
13
|
+
of frames if you end up running MD more than once, or running multiple versions of MD.
|
|
14
|
+
|
|
15
|
+
TODO: optionally skip writing frames to disk, and process frames in memory.
|
|
16
|
+
|
|
17
|
+
"""
|
|
9
18
|
|
|
10
19
|
#%% Imports
|
|
11
20
|
|
|
@@ -29,73 +38,113 @@ from uuid import uuid1
|
|
|
29
38
|
from detection.video_utils import default_fourcc
|
|
30
39
|
|
|
31
40
|
|
|
32
|
-
#%%
|
|
41
|
+
#%% Classes
|
|
33
42
|
|
|
34
43
|
class ProcessVideoOptions:
|
|
35
|
-
|
|
36
|
-
|
|
44
|
+
"""
|
|
45
|
+
Options controlling the behavior of process_video()
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
#: Can be a model filename (.pt or .pb) or a model name (e.g. "MDV5A")
|
|
37
49
|
model_file = 'MDV5A'
|
|
38
50
|
|
|
39
|
-
|
|
51
|
+
#: Video (of folder of videos) to process
|
|
40
52
|
input_video_file = ''
|
|
41
53
|
|
|
54
|
+
#: .json file to which we should write results
|
|
42
55
|
output_json_file = None
|
|
43
56
|
|
|
44
|
-
|
|
57
|
+
#: File to which we should write a video with boxes, only relevant if
|
|
58
|
+
#: render_output_video is True
|
|
45
59
|
output_video_file = None
|
|
46
60
|
|
|
47
|
-
|
|
61
|
+
#: Folder to use for extracted frames; will use a folder in system temp space
|
|
62
|
+
#: if this is None
|
|
48
63
|
frame_folder = None
|
|
49
64
|
|
|
50
|
-
# Folder to use for rendered frames (if rendering output video)
|
|
65
|
+
# Folder to use for rendered frames (if rendering output video); will use a folder
|
|
66
|
+
#: in system temp space if this is None
|
|
51
67
|
frame_rendering_folder = None
|
|
52
68
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
69
|
+
#: Should we render a video with detection boxes?
|
|
70
|
+
#:
|
|
71
|
+
#: Only supported when processing a single video, not a folder.
|
|
56
72
|
render_output_video = False
|
|
57
73
|
|
|
58
|
-
|
|
59
|
-
|
|
74
|
+
#: If we are rendering boxes to a new video, should we keep the temporary
|
|
75
|
+
#: rendered frames?
|
|
60
76
|
keep_rendered_frames = False
|
|
61
77
|
|
|
62
|
-
|
|
78
|
+
#: Should we keep the extracted frames?
|
|
63
79
|
keep_extracted_frames = False
|
|
64
80
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
81
|
+
#: Should we delete the entire folder the extracted frames are written to?
|
|
82
|
+
#:
|
|
83
|
+
#: By default, we delete the frame files but leave the (probably-empty) folder in place,
|
|
84
|
+
#: for no reason other than being paranoid about deleting folders.
|
|
68
85
|
force_extracted_frame_folder_deletion = False
|
|
69
86
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
87
|
+
#: Should we delete the entire folder the rendered frames are written to?
|
|
88
|
+
#:
|
|
89
|
+
#: By default, we delete the frame files but leave the (probably-empty) folder in place,
|
|
90
|
+
#: for no reason other than being paranoid about deleting folders.
|
|
73
91
|
force_rendered_frame_folder_deletion = False
|
|
74
|
-
|
|
92
|
+
|
|
93
|
+
#: If we've already run MegaDetector on this video or folder of videos, i.e. if we
|
|
94
|
+
#: find a corresponding MD results file, should we re-use it? Defaults to reprocessing.
|
|
75
95
|
reuse_results_if_available = False
|
|
96
|
+
|
|
97
|
+
#: If we've already split this video or folder of videos into frames, should we
|
|
98
|
+
#: we re-use those extracted frames? Defaults to reprocessing.
|
|
76
99
|
reuse_frames_if_available = False
|
|
77
100
|
|
|
101
|
+
#: If [input_video_file] is a folder, should we search for videos recursively?
|
|
78
102
|
recursive = False
|
|
103
|
+
|
|
104
|
+
#: Enable additional debug console output
|
|
79
105
|
verbose = False
|
|
80
106
|
|
|
107
|
+
#: fourcc code to use for writing videos; only relevant if render_output_video is True
|
|
81
108
|
fourcc = None
|
|
82
109
|
|
|
110
|
+
#: Confidence threshold to use for writing videos with boxes, only relevant if
|
|
111
|
+
#: if render_output_video is True. Defaults to choosing a reasonable threshold
|
|
112
|
+
#: based on the model version.
|
|
83
113
|
rendering_confidence_threshold = None
|
|
114
|
+
|
|
115
|
+
#: Detections below this threshold will not be included in the output file.
|
|
84
116
|
json_confidence_threshold = 0.005
|
|
117
|
+
|
|
118
|
+
#: Sample every Nth frame; set to None (default) or 1 to sample every frame. Typically
|
|
119
|
+
#: we sample down to around 3 fps, so for typical 30 fps videos, frame_sample=10 is a
|
|
120
|
+
#: typical value.
|
|
85
121
|
frame_sample = None
|
|
86
122
|
|
|
123
|
+
#: Number of workers to use for parallelization; set to <= 1 to disable parallelization
|
|
87
124
|
n_cores = 1
|
|
88
125
|
|
|
126
|
+
#: For debugging only, stop processing after a certain number of frames.
|
|
89
127
|
debug_max_frames = -1
|
|
90
128
|
|
|
129
|
+
#: File containing non-standard categories, typically only used if you're running a non-MD
|
|
130
|
+
#: detector.
|
|
91
131
|
class_mapping_filename = None
|
|
92
132
|
|
|
133
|
+
# ...class ProcessVideoOptions
|
|
134
|
+
|
|
93
135
|
|
|
94
136
|
#%% Functions
|
|
95
137
|
|
|
96
138
|
def process_video(options):
|
|
97
139
|
"""
|
|
98
|
-
Process a single video
|
|
140
|
+
Process a single video through MD, optionally writing a new video with boxes
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
options (ProcessVideoOptions): all the parameters used to control this process,
|
|
144
|
+
including filenames; see ProcessVideoOptions for details
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
dict: frame-level MegaDetector results, identical to what's in the output .json file
|
|
99
148
|
"""
|
|
100
149
|
|
|
101
150
|
if options.output_json_file is None:
|
|
@@ -229,7 +278,11 @@ def process_video(options):
|
|
|
229
278
|
|
|
230
279
|
def process_video_folder(options):
|
|
231
280
|
"""
|
|
232
|
-
Process a folder of videos
|
|
281
|
+
Process a folder of videos through MD
|
|
282
|
+
|
|
283
|
+
Args:
|
|
284
|
+
options (ProcessVideoOptions): all the parameters used to control this process,
|
|
285
|
+
including filenames; see ProcessVideoOptions for details
|
|
233
286
|
"""
|
|
234
287
|
|
|
235
288
|
## Validate options
|
|
@@ -428,8 +481,7 @@ def process_video_folder(options):
|
|
|
428
481
|
print('Warning: error deleting frames from folder {}:\n{}'.format(
|
|
429
482
|
frame_output_folder,str(e)))
|
|
430
483
|
pass
|
|
431
|
-
|
|
432
|
-
|
|
484
|
+
|
|
433
485
|
# ...process_video_folder()
|
|
434
486
|
|
|
435
487
|
|
|
@@ -547,7 +599,7 @@ def main():
|
|
|
547
599
|
default_options = ProcessVideoOptions()
|
|
548
600
|
|
|
549
601
|
parser = argparse.ArgumentParser(description=(
|
|
550
|
-
'Run MegaDetector on each frame in a video (or
|
|
602
|
+
'Run MegaDetector on each frame (or every Nth frame) in a video (or folder of videos), optionally '\
|
|
551
603
|
'producing a new video with detections annotated'))
|
|
552
604
|
|
|
553
605
|
parser.add_argument('model_file', type=str,
|
detection/pytorch_detector.py
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
#
|
|
5
|
-
# Module to run MegaDetector v5, a PyTorch YOLOv5 animal detection model.
|
|
6
|
-
#
|
|
7
|
-
########
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
pytorch_detector.py
|
|
8
4
|
|
|
9
|
-
|
|
5
|
+
Module to run MegaDetector v5, a PyTorch YOLOv5 animal detection model.
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
#%% Imports and constants
|
|
10
10
|
|
|
11
11
|
import torch
|
|
12
12
|
import numpy as np
|
|
@@ -104,12 +104,19 @@ print(f'Using PyTorch version {torch.__version__}')
|
|
|
104
104
|
|
|
105
105
|
class PTDetector:
|
|
106
106
|
|
|
107
|
-
|
|
107
|
+
#: Image size passed to YOLOv5's letterbox() function; 1280 means "1280 on the long side, preserving
|
|
108
|
+
#: aspect ratio"
|
|
109
|
+
#:
|
|
110
|
+
#: :meta private:
|
|
111
|
+
IMAGE_SIZE = 1280
|
|
112
|
+
|
|
113
|
+
#: Stride size passed to YOLOv5's letterbox() function
|
|
114
|
+
#:
|
|
115
|
+
#: :meta private:
|
|
108
116
|
STRIDE = 64
|
|
109
117
|
|
|
110
|
-
def __init__(self, model_path
|
|
111
|
-
|
|
112
|
-
use_model_native_classes: bool = False):
|
|
118
|
+
def __init__(self, model_path, force_cpu=False, use_model_native_classes= False):
|
|
119
|
+
|
|
113
120
|
self.device = 'cpu'
|
|
114
121
|
if not force_cpu:
|
|
115
122
|
if torch.cuda.is_available():
|
|
@@ -162,21 +169,26 @@ class PTDetector:
|
|
|
162
169
|
detection_threshold=0.00001, image_size=None,
|
|
163
170
|
skip_image_resizing=False):
|
|
164
171
|
"""
|
|
165
|
-
|
|
172
|
+
Applies the detector to an image.
|
|
166
173
|
|
|
167
174
|
Args:
|
|
168
|
-
img_original: the PIL Image object with EXIF rotation taken into account
|
|
169
|
-
image_id: a path to identify the image; will be in the "file" field
|
|
170
|
-
|
|
171
|
-
|
|
175
|
+
img_original (Image): the PIL Image object with EXIF rotation taken into account
|
|
176
|
+
image_id (str, optional): a path to identify the image; will be in the "file" field
|
|
177
|
+
of the output object
|
|
178
|
+
detection_threshold (float, optional): only detections above this confidence threshold
|
|
179
|
+
will be included in the return value
|
|
180
|
+
image_size (tuple, optional): image size to use for inference, only mess with this
|
|
181
|
+
if (a) you're using a model other than MegaDetector or (b) you know what you're
|
|
182
|
+
doing
|
|
183
|
+
skip_image_resizing (bool, optional): whether to skip internal image resizing (and rely on external
|
|
184
|
+
resizing)
|
|
172
185
|
|
|
173
186
|
Returns:
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
- 'failure'
|
|
187
|
+
dict: a dictionary with the following fields:
|
|
188
|
+
- 'file' (filename, always present)
|
|
189
|
+
- 'max_detection_conf' (removed from MegaDetector output files by default, but generated here)
|
|
190
|
+
- 'detections' (a list of detection objects containing keys 'category', 'conf', and 'bbox')
|
|
191
|
+
- 'failure' (a failure string, or None if everything went fine)
|
|
180
192
|
"""
|
|
181
193
|
|
|
182
194
|
result = {
|
|
@@ -297,13 +309,19 @@ class PTDetector:
|
|
|
297
309
|
|
|
298
310
|
return result
|
|
299
311
|
|
|
312
|
+
# ...def generate_detections_one_image(...)
|
|
313
|
+
|
|
314
|
+
# ...class PTDetector
|
|
315
|
+
|
|
300
316
|
|
|
301
317
|
#%% Command-line driver
|
|
302
318
|
|
|
319
|
+
# For testing only... you don't really want to run this module directly.
|
|
320
|
+
|
|
303
321
|
if __name__ == '__main__':
|
|
304
|
-
|
|
305
|
-
# For testing only... you don't really want to run this module directly
|
|
306
322
|
|
|
323
|
+
pass
|
|
324
|
+
|
|
307
325
|
#%%
|
|
308
326
|
|
|
309
327
|
import md_visualization.visualization_utils as vis_utils
|
detection/run_detector.py
CHANGED
|
@@ -1,40 +1,26 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
# variable CUDA_VISIBLE_DEVICES to "-1".
|
|
25
|
-
#
|
|
26
|
-
# If no output directory is specified, writes detections for c:\foo\bar.jpg to
|
|
27
|
-
# c:\foo\bar_detections.jpg.
|
|
28
|
-
#
|
|
29
|
-
# This script will only consider detections with > 0.005 confidence at all times.
|
|
30
|
-
# The `threshold` you provide is only for rendering the results. If you need to
|
|
31
|
-
# see lower-confidence detections, you can change
|
|
32
|
-
# DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD.
|
|
33
|
-
#
|
|
34
|
-
# Reference:
|
|
35
|
-
# https://github.com/tensorflow/models/blob/master/research/object_detection/inference/detection_inference.py
|
|
36
|
-
#
|
|
37
|
-
########
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
run_detector.py
|
|
4
|
+
|
|
5
|
+
Module to run an animal detection model on images. The main function in this script also renders
|
|
6
|
+
the predicted bounding boxes on images and saves the resulting images (with bounding boxes).
|
|
7
|
+
|
|
8
|
+
**This script is not a good way to process lots of images**. It does not produce a useful
|
|
9
|
+
output format, and it does not facilitate checkpointing the results so if it crashes you
|
|
10
|
+
would have to start from scratch. **If you want to run a detector on lots of images, you should
|
|
11
|
+
check out run_detector_batch.py**.
|
|
12
|
+
|
|
13
|
+
That said, this script (run_detector.py) is a good way to test our detector on a handful of images
|
|
14
|
+
and get super-satisfying, graphical results.
|
|
15
|
+
|
|
16
|
+
If you would like to *not* use the GPU on the machine, set the environment
|
|
17
|
+
variable CUDA_VISIBLE_DEVICES to "-1".
|
|
18
|
+
|
|
19
|
+
This script will only consider detections with > 0.005 confidence at all times.
|
|
20
|
+
The threshold you provide is only for rendering the results. If you need to
|
|
21
|
+
see lower-confidence detections, you can change DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD.
|
|
22
|
+
|
|
23
|
+
"""
|
|
38
24
|
|
|
39
25
|
#%% Constants, imports, environment
|
|
40
26
|
|
|
@@ -163,9 +149,15 @@ device_token_to_mdv5_inference_speed = {
|
|
|
163
149
|
|
|
164
150
|
def convert_to_tf_coords(array):
|
|
165
151
|
"""
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
152
|
+
Converts a bounding box from [x1, y1, width, height] to [y1, x1, y2, x2]. This
|
|
153
|
+
is mostly not helpful, this function only exists to maintain backwards compatibility
|
|
154
|
+
in the synchronous API, which possibly zero people in the world are using.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
array (list): a bounding box in [x,y,w,h] format
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
list: a bounding box in [y1,x1,y2,x2] format
|
|
169
161
|
"""
|
|
170
162
|
|
|
171
163
|
x1 = array[0]
|
|
@@ -174,13 +166,21 @@ def convert_to_tf_coords(array):
|
|
|
174
166
|
height = array[3]
|
|
175
167
|
x2 = x1 + width
|
|
176
168
|
y2 = y1 + height
|
|
169
|
+
|
|
177
170
|
return [y1, x1, y2, x2]
|
|
178
171
|
|
|
179
172
|
|
|
180
173
|
def get_detector_metadata_from_version_string(detector_version):
|
|
181
174
|
"""
|
|
182
|
-
Given a MegaDetector version string (e.g. "v4.1.0"),
|
|
175
|
+
Given a MegaDetector version string (e.g. "v4.1.0"), returns the metadata for
|
|
183
176
|
the model. Used for writing standard defaults to batch output files.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
detector_version (str): a detection version string, e.g. "v4.1.0", which you
|
|
180
|
+
can extract from a filename using get_detector_version_from_filename()
|
|
181
|
+
|
|
182
|
+
Returns:
|
|
183
|
+
dict: metadata for this model, suitable for writing to a MD output file
|
|
184
184
|
"""
|
|
185
185
|
|
|
186
186
|
if detector_version not in DETECTOR_METADATA:
|
|
@@ -196,20 +196,26 @@ def get_detector_metadata_from_version_string(detector_version):
|
|
|
196
196
|
|
|
197
197
|
|
|
198
198
|
def get_detector_version_from_filename(detector_filename):
|
|
199
|
-
"""
|
|
200
|
-
|
|
199
|
+
r"""
|
|
200
|
+
Gets the version number component of the detector from the model filename.
|
|
201
201
|
|
|
202
|
-
|
|
202
|
+
[detector_filename] will almost always end with one of the following:
|
|
203
203
|
|
|
204
|
-
megadetector_v2.pb
|
|
205
|
-
megadetector_v3.pb
|
|
206
|
-
megadetector_v4.1 (not produed by run_detector_batch.py, only found in Azure Batch API
|
|
207
|
-
md_v4.1.0.pb
|
|
208
|
-
md_v5a.0.0.pt
|
|
209
|
-
md_v5b.0.0.pt
|
|
210
|
-
|
|
211
|
-
|
|
204
|
+
* megadetector_v2.pb
|
|
205
|
+
* megadetector_v3.pb
|
|
206
|
+
* megadetector_v4.1 (not produed by run_detector_batch.py, only found in output files from the deprecated Azure Batch API)
|
|
207
|
+
* md_v4.1.0.pb
|
|
208
|
+
* md_v5a.0.0.pt
|
|
209
|
+
* md_v5b.0.0.pt
|
|
210
|
+
|
|
211
|
+
This function identifies the version number as "v2.0.0", "v3.0.0", "v4.1.0",
|
|
212
212
|
"v4.1.0", "v5a.0.0", and "v5b.0.0", respectively.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
detector_filename (str): model filename, e.g. c:/x/z/md_v5a.0.0.pt
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
str: a detector version string, e.g. "v5a.0.0", or "multiple" if I'm confused
|
|
213
219
|
"""
|
|
214
220
|
|
|
215
221
|
fn = os.path.basename(detector_filename).lower()
|
|
@@ -228,10 +234,20 @@ def get_detector_version_from_filename(detector_filename):
|
|
|
228
234
|
|
|
229
235
|
|
|
230
236
|
def estimate_md_images_per_second(model_file, device_name=None):
|
|
231
|
-
"""
|
|
232
|
-
|
|
237
|
+
r"""
|
|
238
|
+
Estimates how fast MegaDetector will run, based on benchmarks. Defaults to querying
|
|
233
239
|
the current device. Returns None if no data is available for the current card/model.
|
|
234
|
-
Estimates only available for a small handful of GPUs.
|
|
240
|
+
Estimates only available for a small handful of GPUs. Uses an absurdly simple lookup
|
|
241
|
+
approach, e.g. if the string "4090" appears in the device name, congratulations,
|
|
242
|
+
you have an RTX 4090.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
model_file (str): model filename, e.g. c:/x/z/md_v5a.0.0.pt
|
|
246
|
+
device_name (str, optional): device name, e.g. blah-blah-4090-blah-blah
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
float: the approximate number of images this model version can process on this
|
|
250
|
+
device per second
|
|
235
251
|
"""
|
|
236
252
|
|
|
237
253
|
if device_name is None:
|
|
@@ -271,8 +287,14 @@ def estimate_md_images_per_second(model_file, device_name=None):
|
|
|
271
287
|
|
|
272
288
|
def get_typical_confidence_threshold_from_results(results):
|
|
273
289
|
"""
|
|
274
|
-
Given the .json data loaded from a MD results file,
|
|
290
|
+
Given the .json data loaded from a MD results file, returns a typical confidence
|
|
275
291
|
threshold based on the detector version.
|
|
292
|
+
|
|
293
|
+
Args:
|
|
294
|
+
results (dict): a dict of MD results, as it would be loaded from a MD results .json file
|
|
295
|
+
|
|
296
|
+
Returns:
|
|
297
|
+
float: a sensible default threshold for this model
|
|
276
298
|
"""
|
|
277
299
|
|
|
278
300
|
if 'detector_metadata' in results['info'] and \
|
|
@@ -293,10 +315,16 @@ def get_typical_confidence_threshold_from_results(results):
|
|
|
293
315
|
|
|
294
316
|
|
|
295
317
|
def is_gpu_available(model_file):
|
|
296
|
-
"""
|
|
297
|
-
|
|
318
|
+
r"""
|
|
319
|
+
Determines whether a GPU is available, importing PyTorch or TF depending on the extension
|
|
298
320
|
of model_file. Does not actually load model_file, just uses that to determine how to check
|
|
299
|
-
for GPU availability.
|
|
321
|
+
for GPU availability (PT vs. TF).
|
|
322
|
+
|
|
323
|
+
Args:
|
|
324
|
+
model_file (str): model filename, e.g. c:/x/z/md_v5a.0.0.pt
|
|
325
|
+
|
|
326
|
+
Returns:
|
|
327
|
+
bool: whether a GPU is available
|
|
300
328
|
"""
|
|
301
329
|
|
|
302
330
|
if model_file.endswith('.pb'):
|
|
@@ -323,8 +351,14 @@ def is_gpu_available(model_file):
|
|
|
323
351
|
|
|
324
352
|
|
|
325
353
|
def load_detector(model_file, force_cpu=False):
|
|
326
|
-
"""
|
|
327
|
-
|
|
354
|
+
r"""
|
|
355
|
+
Loads a TF or PT detector, depending on the extension of model_file.
|
|
356
|
+
|
|
357
|
+
Args:
|
|
358
|
+
model_file (str): model filename, e.g. c:/x/z/md_v5a.0.0.pt
|
|
359
|
+
|
|
360
|
+
Returns:
|
|
361
|
+
object: loaded detector object
|
|
328
362
|
"""
|
|
329
363
|
|
|
330
364
|
# Possibly automatically download the model
|
|
@@ -344,19 +378,41 @@ def load_detector(model_file, force_cpu=False):
|
|
|
344
378
|
raise ValueError('Unrecognized model format: {}'.format(model_file))
|
|
345
379
|
elapsed = time.time() - start_time
|
|
346
380
|
print('Loaded model in {}'.format(humanfriendly.format_timespan(elapsed)))
|
|
381
|
+
|
|
347
382
|
return detector
|
|
348
383
|
|
|
349
384
|
|
|
350
385
|
#%% Main function
|
|
351
386
|
|
|
352
|
-
def load_and_run_detector(model_file,
|
|
387
|
+
def load_and_run_detector(model_file,
|
|
388
|
+
image_file_names,
|
|
389
|
+
output_dir,
|
|
353
390
|
render_confidence_threshold=DEFAULT_RENDERING_CONFIDENCE_THRESHOLD,
|
|
354
|
-
crop_images=False,
|
|
355
|
-
|
|
356
|
-
|
|
391
|
+
crop_images=False,
|
|
392
|
+
box_thickness=DEFAULT_BOX_THICKNESS,
|
|
393
|
+
box_expansion=DEFAULT_BOX_EXPANSION,
|
|
394
|
+
image_size=None,
|
|
395
|
+
label_font_size=DEFAULT_LABEL_FONT_SIZE
|
|
357
396
|
):
|
|
358
|
-
"""
|
|
359
|
-
|
|
397
|
+
r"""
|
|
398
|
+
Loads and runs a detector on target images, and visualizes the results.
|
|
399
|
+
|
|
400
|
+
Args:
|
|
401
|
+
model_file (str): model filename, e.g. c:/x/z/md_v5a.0.0.pt, or a known model
|
|
402
|
+
string, e.g. "MDV5A"
|
|
403
|
+
image_file_names (list): list of absolute paths to process
|
|
404
|
+
output_dir (str): folder to write visualized images to
|
|
405
|
+
render_confidence_threshold (float, optional): only render boxes for detections
|
|
406
|
+
above this threshold
|
|
407
|
+
crop_images (bool, optional): whether to crop detected objects to individual images
|
|
408
|
+
(default is to render images with boxes, rather than cropping)
|
|
409
|
+
box_thickness (float, optional): thickness in pixels for box rendering
|
|
410
|
+
box_expansion (float, optional): box expansion in pixels
|
|
411
|
+
image_size (tuple, optional): image size to use for inference, only mess with this
|
|
412
|
+
if (a) you're using a model other than MegaDetector or (b) you know what you're
|
|
413
|
+
doing
|
|
414
|
+
label_font_size (float, optional): font size to use for displaying class names
|
|
415
|
+
and confidence values in the rendered images
|
|
360
416
|
"""
|
|
361
417
|
|
|
362
418
|
if len(image_file_names) == 0:
|
|
@@ -507,7 +563,12 @@ def load_and_run_detector(model_file, image_file_names, output_dir,
|
|
|
507
563
|
|
|
508
564
|
def download_model(model_name,force_download=False):
|
|
509
565
|
"""
|
|
510
|
-
|
|
566
|
+
Downloads one of the known models to local temp space if it hasn't already been downloaded.
|
|
567
|
+
|
|
568
|
+
Args:
|
|
569
|
+
model_name (str): a known model string, e.g. "MDV5A"
|
|
570
|
+
force_download (bool, optional): whether download the model even if the local target
|
|
571
|
+
file already exists
|
|
511
572
|
"""
|
|
512
573
|
|
|
513
574
|
import tempfile
|
|
@@ -536,9 +597,17 @@ def download_model(model_name,force_download=False):
|
|
|
536
597
|
|
|
537
598
|
def try_download_known_detector(detector_file):
|
|
538
599
|
"""
|
|
539
|
-
|
|
600
|
+
Checks whether detector_file is really the name of a known model, in which case we will
|
|
540
601
|
either read the actual filename from the corresponding environment variable or download
|
|
541
602
|
(if necessary) to local temp space. Otherwise just returns the input string.
|
|
603
|
+
|
|
604
|
+
Args:
|
|
605
|
+
detector_file (str): a known model string (e.g. "MDV5A"), or any other string (in which
|
|
606
|
+
case this function is a no-op)
|
|
607
|
+
|
|
608
|
+
Returns:
|
|
609
|
+
str: the local filename to which the model was downloaded, or the same string that
|
|
610
|
+
was passed in, if it's not recognized as a well-known model name
|
|
542
611
|
"""
|
|
543
612
|
|
|
544
613
|
if detector_file in downloadable_models:
|
|
@@ -606,7 +675,7 @@ def main():
|
|
|
606
675
|
parser.add_argument(
|
|
607
676
|
'--crop',
|
|
608
677
|
default=False,
|
|
609
|
-
action=
|
|
678
|
+
action='store_true',
|
|
610
679
|
help=('If set, produces separate output images for each crop, '
|
|
611
680
|
'rather than adding bounding boxes to the original image'))
|
|
612
681
|
|
|
@@ -630,7 +699,14 @@ def main():
|
|
|
630
699
|
default=DEFAULT_LABEL_FONT_SIZE,
|
|
631
700
|
help=('Label font size (defaults to {})'.format(
|
|
632
701
|
DEFAULT_LABEL_FONT_SIZE)))
|
|
633
|
-
|
|
702
|
+
|
|
703
|
+
parser.add_argument(
|
|
704
|
+
'--process_likely_output_images',
|
|
705
|
+
action='store_true',
|
|
706
|
+
help=('By default, we skip images that end in {}, because they probably came from this script. '\
|
|
707
|
+
.format(DETECTION_FILENAME_INSERT) + \
|
|
708
|
+
'This option disables that behavior.'))
|
|
709
|
+
|
|
634
710
|
if len(sys.argv[1:]) == 0:
|
|
635
711
|
parser.print_help()
|
|
636
712
|
parser.exit()
|
|
@@ -650,6 +726,16 @@ def main():
|
|
|
650
726
|
else:
|
|
651
727
|
image_file_names = path_utils.find_images(args.image_dir, args.recursive)
|
|
652
728
|
|
|
729
|
+
# Optionally skip images that were probably generated by this script
|
|
730
|
+
if not args.process_likely_output_images:
|
|
731
|
+
image_file_names_valid = []
|
|
732
|
+
for fn in image_file_names:
|
|
733
|
+
if os.path.splitext(fn)[0].endswith(DETECTION_FILENAME_INSERT):
|
|
734
|
+
print('Skipping likely output image {}'.format(fn))
|
|
735
|
+
else:
|
|
736
|
+
image_file_names_valid.append(fn)
|
|
737
|
+
image_file_names = image_file_names_valid
|
|
738
|
+
|
|
653
739
|
print('Running detector on {} images...'.format(len(image_file_names)))
|
|
654
740
|
|
|
655
741
|
if args.output_dir:
|
|
@@ -671,7 +757,6 @@ def main():
|
|
|
671
757
|
image_size=args.image_size,
|
|
672
758
|
label_font_size=args.label_font_size)
|
|
673
759
|
|
|
674
|
-
|
|
675
760
|
if __name__ == '__main__':
|
|
676
761
|
main()
|
|
677
762
|
|