megadetector 5.0.8__py3-none-any.whl → 5.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/__init__.py +0 -0
- api/batch_processing/__init__.py +0 -0
- api/batch_processing/api_core/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/score.py +0 -1
- api/batch_processing/api_core/server_job_status_table.py +0 -1
- api/batch_processing/api_core_support/__init__.py +0 -0
- api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
- api/batch_processing/api_support/__init__.py +0 -0
- api/batch_processing/api_support/summarize_daily_activity.py +0 -1
- api/batch_processing/data_preparation/__init__.py +0 -0
- api/batch_processing/data_preparation/manage_local_batch.py +65 -65
- api/batch_processing/data_preparation/manage_video_batch.py +8 -8
- api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
- api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
- api/batch_processing/postprocessing/__init__.py +0 -0
- api/batch_processing/postprocessing/add_max_conf.py +12 -12
- api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
- api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
- api/batch_processing/postprocessing/compare_batch_results.py +113 -43
- api/batch_processing/postprocessing/convert_output_format.py +41 -16
- api/batch_processing/postprocessing/load_api_results.py +16 -17
- api/batch_processing/postprocessing/md_to_coco.py +31 -21
- api/batch_processing/postprocessing/md_to_labelme.py +52 -22
- api/batch_processing/postprocessing/merge_detections.py +14 -14
- api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
- api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
- api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
- api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
- api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
- api/synchronous/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
- api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
- api/synchronous/api_core/animal_detection_api/config.py +35 -35
- api/synchronous/api_core/tests/__init__.py +0 -0
- api/synchronous/api_core/tests/load_test.py +109 -109
- classification/__init__.py +0 -0
- classification/aggregate_classifier_probs.py +21 -24
- classification/analyze_failed_images.py +11 -13
- classification/cache_batchapi_outputs.py +51 -51
- classification/create_classification_dataset.py +69 -68
- classification/crop_detections.py +54 -53
- classification/csv_to_json.py +97 -100
- classification/detect_and_crop.py +105 -105
- classification/evaluate_model.py +43 -42
- classification/identify_mislabeled_candidates.py +47 -46
- classification/json_to_azcopy_list.py +10 -10
- classification/json_validator.py +72 -71
- classification/map_classification_categories.py +44 -43
- classification/merge_classification_detection_output.py +68 -68
- classification/prepare_classification_script.py +157 -154
- classification/prepare_classification_script_mc.py +228 -228
- classification/run_classifier.py +27 -26
- classification/save_mislabeled.py +30 -30
- classification/train_classifier.py +20 -20
- classification/train_classifier_tf.py +21 -22
- classification/train_utils.py +10 -10
- data_management/__init__.py +0 -0
- data_management/annotations/__init__.py +0 -0
- data_management/annotations/annotation_constants.py +18 -31
- data_management/camtrap_dp_to_coco.py +238 -0
- data_management/cct_json_utils.py +102 -59
- data_management/cct_to_md.py +176 -158
- data_management/cct_to_wi.py +247 -219
- data_management/coco_to_labelme.py +272 -263
- data_management/coco_to_yolo.py +79 -58
- data_management/databases/__init__.py +0 -0
- data_management/databases/add_width_and_height_to_db.py +20 -16
- data_management/databases/combine_coco_camera_traps_files.py +35 -31
- data_management/databases/integrity_check_json_db.py +62 -24
- data_management/databases/subset_json_db.py +24 -15
- data_management/generate_crops_from_cct.py +27 -45
- data_management/get_image_sizes.py +188 -162
- data_management/importers/add_nacti_sizes.py +8 -8
- data_management/importers/add_timestamps_to_icct.py +78 -78
- data_management/importers/animl_results_to_md_results.py +158 -158
- data_management/importers/auckland_doc_test_to_json.py +9 -9
- data_management/importers/auckland_doc_to_json.py +8 -8
- data_management/importers/awc_to_json.py +7 -7
- data_management/importers/bellevue_to_json.py +15 -15
- data_management/importers/cacophony-thermal-importer.py +13 -13
- data_management/importers/carrizo_shrubfree_2018.py +8 -8
- data_management/importers/carrizo_trail_cam_2017.py +8 -8
- data_management/importers/cct_field_adjustments.py +9 -9
- data_management/importers/channel_islands_to_cct.py +10 -10
- data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
- data_management/importers/ena24_to_json.py +7 -7
- data_management/importers/filenames_to_json.py +8 -8
- data_management/importers/helena_to_cct.py +7 -7
- data_management/importers/idaho-camera-traps.py +7 -7
- data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
- data_management/importers/jb_csv_to_json.py +9 -9
- data_management/importers/mcgill_to_json.py +8 -8
- data_management/importers/missouri_to_json.py +18 -18
- data_management/importers/nacti_fieldname_adjustments.py +10 -10
- data_management/importers/noaa_seals_2019.py +7 -7
- data_management/importers/pc_to_json.py +7 -7
- data_management/importers/plot_wni_giraffes.py +7 -7
- data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
- data_management/importers/prepare_zsl_imerit.py +7 -7
- data_management/importers/rspb_to_json.py +8 -8
- data_management/importers/save_the_elephants_survey_A.py +8 -8
- data_management/importers/save_the_elephants_survey_B.py +9 -9
- data_management/importers/snapshot_safari_importer.py +26 -26
- data_management/importers/snapshot_safari_importer_reprise.py +665 -665
- data_management/importers/snapshot_serengeti_lila.py +14 -14
- data_management/importers/sulross_get_exif.py +8 -9
- data_management/importers/timelapse_csv_set_to_json.py +11 -11
- data_management/importers/ubc_to_json.py +13 -13
- data_management/importers/umn_to_json.py +7 -7
- data_management/importers/wellington_to_json.py +8 -8
- data_management/importers/wi_to_json.py +9 -9
- data_management/importers/zamba_results_to_md_results.py +181 -181
- data_management/labelme_to_coco.py +65 -24
- data_management/labelme_to_yolo.py +8 -8
- data_management/lila/__init__.py +0 -0
- data_management/lila/add_locations_to_island_camera_traps.py +9 -9
- data_management/lila/add_locations_to_nacti.py +147 -147
- data_management/lila/create_lila_blank_set.py +13 -13
- data_management/lila/create_lila_test_set.py +8 -8
- data_management/lila/create_links_to_md_results_files.py +106 -106
- data_management/lila/download_lila_subset.py +44 -110
- data_management/lila/generate_lila_per_image_labels.py +55 -42
- data_management/lila/get_lila_annotation_counts.py +18 -15
- data_management/lila/get_lila_image_counts.py +11 -11
- data_management/lila/lila_common.py +96 -33
- data_management/lila/test_lila_metadata_urls.py +132 -116
- data_management/ocr_tools.py +173 -128
- data_management/read_exif.py +110 -97
- data_management/remap_coco_categories.py +83 -83
- data_management/remove_exif.py +58 -62
- data_management/resize_coco_dataset.py +30 -23
- data_management/wi_download_csv_to_coco.py +246 -239
- data_management/yolo_output_to_md_output.py +86 -73
- data_management/yolo_to_coco.py +300 -60
- detection/__init__.py +0 -0
- detection/detector_training/__init__.py +0 -0
- detection/process_video.py +85 -33
- detection/pytorch_detector.py +43 -25
- detection/run_detector.py +157 -72
- detection/run_detector_batch.py +179 -113
- detection/run_inference_with_yolov5_val.py +108 -48
- detection/run_tiled_inference.py +111 -40
- detection/tf_detector.py +51 -29
- detection/video_utils.py +606 -521
- docs/source/conf.py +43 -0
- md_utils/__init__.py +0 -0
- md_utils/azure_utils.py +9 -9
- md_utils/ct_utils.py +228 -68
- md_utils/directory_listing.py +59 -64
- md_utils/md_tests.py +968 -871
- md_utils/path_utils.py +460 -134
- md_utils/process_utils.py +157 -133
- md_utils/sas_blob_utils.py +20 -20
- md_utils/split_locations_into_train_val.py +45 -32
- md_utils/string_utils.py +33 -10
- md_utils/url_utils.py +176 -60
- md_utils/write_html_image_list.py +40 -33
- md_visualization/__init__.py +0 -0
- md_visualization/plot_utils.py +102 -109
- md_visualization/render_images_with_thumbnails.py +34 -34
- md_visualization/visualization_utils.py +597 -291
- md_visualization/visualize_db.py +76 -48
- md_visualization/visualize_detector_output.py +61 -42
- {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/METADATA +13 -7
- megadetector-5.0.9.dist-info/RECORD +224 -0
- {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
- taxonomy_mapping/__init__.py +0 -0
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
- taxonomy_mapping/map_new_lila_datasets.py +154 -154
- taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
- taxonomy_mapping/preview_lila_taxonomy.py +591 -591
- taxonomy_mapping/retrieve_sample_image.py +12 -12
- taxonomy_mapping/simple_image_download.py +11 -11
- taxonomy_mapping/species_lookup.py +10 -10
- taxonomy_mapping/taxonomy_csv_checker.py +18 -18
- taxonomy_mapping/taxonomy_graph.py +47 -47
- taxonomy_mapping/validate_lila_category_mappings.py +83 -76
- data_management/cct_json_to_filename_json.py +0 -89
- data_management/cct_to_csv.py +0 -140
- data_management/databases/remove_corrupted_images_from_db.py +0 -191
- detection/detector_training/copy_checkpoints.py +0 -43
- megadetector-5.0.8.dist-info/RECORD +0 -205
- {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
- {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/WHEEL +0 -0
|
@@ -1,58 +1,43 @@
|
|
|
1
|
-
|
|
2
|
-
#
|
|
3
|
-
# crop_detections.py
|
|
4
|
-
#
|
|
5
|
-
# Given a detections JSON file from MegaDetector, crops the bounding boxes above
|
|
6
|
-
# a certain confidence threshold.
|
|
7
|
-
#
|
|
8
|
-
# This script takes as input a detections JSON file, usually the output of
|
|
9
|
-
# detection/run_tf_detector_batch.py or the output of the Batch API in the
|
|
10
|
-
# "Batch processing API output format".
|
|
11
|
-
#
|
|
12
|
-
# See https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing.
|
|
13
|
-
#
|
|
14
|
-
# The script can crop images that are either available locally or that need to be
|
|
15
|
-
# downloaded from an Azure Blob Storage container.
|
|
16
|
-
#
|
|
17
|
-
# We assume that no image contains over 100 bounding boxes, and we always save
|
|
18
|
-
# crops as RGB .jpg files for consistency. For each image, each bounding box is
|
|
19
|
-
# cropped and saved to a file with a suffix "___cropXX_mdvY.Y.jpg" added to the
|
|
20
|
-
# filename as the original image. "XX" ranges from "00" to "99" and "Y.Y"
|
|
21
|
-
# ndicates the MegaDetector version. Based on the given confidence threshold, we
|
|
22
|
-
# may skip saving certain bounding box crops, but we still increment the bounding
|
|
23
|
-
# box number for skipped boxes.
|
|
24
|
-
#
|
|
25
|
-
# Example cropped image path (with MegaDetector bbox):
|
|
26
|
-
#
|
|
27
|
-
# "path/to/image.jpg___crop00_mdv4.1.jpg"
|
|
28
|
-
#
|
|
29
|
-
# By default, the images are cropped exactly per the given bounding box
|
|
30
|
-
# coordinates. However, if square crops are desired, pass the --square-crops
|
|
31
|
-
# flag. This will always generate a square crop whose size is the larger of the
|
|
32
|
-
# bounding box width or height. In the case that the square crop boundaries exceed
|
|
33
|
-
# the original image size, the crop is padded with 0s.
|
|
34
|
-
#
|
|
35
|
-
# This script outputs a log file to:
|
|
36
|
-
#
|
|
37
|
-
# <output_dir>/crop_detections_log_{timestamp}.json
|
|
38
|
-
#
|
|
39
|
-
# ...which contains images that failed to download and crop properly.
|
|
40
|
-
#
|
|
41
|
-
########
|
|
1
|
+
"""
|
|
42
2
|
|
|
43
|
-
|
|
3
|
+
crop_detections.py
|
|
4
|
+
|
|
5
|
+
Given a detections JSON file from MegaDetector, crops the bounding boxes above
|
|
6
|
+
a certain confidence threshold.
|
|
7
|
+
|
|
8
|
+
This script takes as input a detections JSON file, usually the output of
|
|
9
|
+
detection/run_tf_detector_batch.py or the output of the Batch API in the
|
|
10
|
+
"Batch processing API output format".
|
|
11
|
+
|
|
12
|
+
See https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing.
|
|
13
|
+
|
|
14
|
+
The script can crop images that are either available locally or that need to be
|
|
15
|
+
downloaded from an Azure Blob Storage container.
|
|
16
|
+
|
|
17
|
+
We assume that no image contains over 100 bounding boxes, and we always save
|
|
18
|
+
crops as RGB .jpg files for consistency. For each image, each bounding box is
|
|
19
|
+
cropped and saved to a file with a suffix "___cropXX_mdvY.Y.jpg" added to the
|
|
20
|
+
filename as the original image. "XX" ranges from "00" to "99" and "Y.Y"
|
|
21
|
+
ndicates the MegaDetector version. Based on the given confidence threshold, we
|
|
22
|
+
may skip saving certain bounding box crops, but we still increment the bounding
|
|
23
|
+
box number for skipped boxes.
|
|
24
|
+
|
|
25
|
+
Example cropped image path (with MegaDetector bbox):
|
|
26
|
+
|
|
27
|
+
"path/to/image.jpg___crop00_mdv4.1.jpg"
|
|
28
|
+
|
|
29
|
+
By default, the images are cropped exactly per the given bounding box
|
|
30
|
+
coordinates. However, if square crops are desired, pass the --square-crops
|
|
31
|
+
flag. This will always generate a square crop whose size is the larger of the
|
|
32
|
+
bounding box width or height. In the case that the square crop boundaries exceed
|
|
33
|
+
the original image size, the crop is padded with 0s.
|
|
34
|
+
|
|
35
|
+
This script outputs a log file to:
|
|
36
|
+
|
|
37
|
+
<output_dir>/crop_detections_log_{timestamp}.json
|
|
38
|
+
|
|
39
|
+
...which contains images that failed to download and crop properly.
|
|
44
40
|
|
|
45
|
-
"""
|
|
46
|
-
python crop_detections.py \
|
|
47
|
-
detections.json \
|
|
48
|
-
/path/to/crops \
|
|
49
|
-
--images-dir /path/to/images \
|
|
50
|
-
--container-url "https://account.blob.core.windows.net/container?sastoken" \
|
|
51
|
-
--detector-version "4.1" \
|
|
52
|
-
--threshold 0.8 \
|
|
53
|
-
--save-full-images --square-crops \
|
|
54
|
-
--threads 50 \
|
|
55
|
-
--logdir "."
|
|
56
41
|
"""
|
|
57
42
|
|
|
58
43
|
#%% Imports
|
|
@@ -73,6 +58,22 @@ from PIL import Image, ImageOps
|
|
|
73
58
|
from tqdm import tqdm
|
|
74
59
|
|
|
75
60
|
|
|
61
|
+
#%% Example usage
|
|
62
|
+
|
|
63
|
+
"""
|
|
64
|
+
python crop_detections.py \
|
|
65
|
+
detections.json \
|
|
66
|
+
/path/to/crops \
|
|
67
|
+
--images-dir /path/to/images \
|
|
68
|
+
--container-url "https://account.blob.core.windows.net/container?sastoken" \
|
|
69
|
+
--detector-version "4.1" \
|
|
70
|
+
--threshold 0.8 \
|
|
71
|
+
--save-full-images --square-crops \
|
|
72
|
+
--threads 50 \
|
|
73
|
+
--logdir "."
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
|
|
76
77
|
#%% Main function
|
|
77
78
|
|
|
78
79
|
def main(detections_json_path: str,
|
classification/csv_to_json.py
CHANGED
|
@@ -1,105 +1,101 @@
|
|
|
1
|
-
########
|
|
2
|
-
#
|
|
3
|
-
# csv_to_json.py
|
|
4
|
-
#
|
|
5
|
-
# Converts CSV to JSON format for label specification.
|
|
6
|
-
#
|
|
7
|
-
# There are 3 possible values for the 'type' column in the CSV:
|
|
8
|
-
#
|
|
9
|
-
# - "row": this selects a specific rowfrom the master taxonomy CSV
|
|
10
|
-
# content syntax: <dataset_name>|<dataset_label>
|
|
11
|
-
#
|
|
12
|
-
# - "datasettaxon": this selects all animals in a taxon from a particular dataset
|
|
13
|
-
# content syntax: <dataset_name>|<taxon_level>|<taxon_name>
|
|
14
|
-
#
|
|
15
|
-
# - <taxon_level>: this selects all animals in a taxon across all datasets
|
|
16
|
-
# content syntax: <taxon_name>
|
|
17
|
-
#
|
|
18
|
-
# Example CSV input:
|
|
19
|
-
#
|
|
20
|
-
"""
|
|
21
|
-
# comment lines starting with '#' are allowed
|
|
22
|
-
output_label,type,content
|
|
23
|
-
|
|
24
|
-
cervid,row,idfg|deer
|
|
25
|
-
cervid,row,idfg|elk
|
|
26
|
-
cervid,row,idfg|prong
|
|
27
|
-
cervid,row,idfg_swwlf_2019|elk
|
|
28
|
-
cervid,row,idfg_swwlf_2019|muledeer
|
|
29
|
-
cervid,row,idfg_swwlf_2019|whitetaileddeer
|
|
30
|
-
cervid,max_count,50000
|
|
31
|
-
|
|
32
|
-
cervid,family,cervidae
|
|
33
|
-
cervid,datasettaxon,idfg|family|cervidae
|
|
34
|
-
cervid,datasettaxon,idfg_swwlf_2019|family|cervidae
|
|
35
|
-
|
|
36
|
-
bird,row,idfg_swwlf_2019|bird
|
|
37
|
-
bird,class,aves
|
|
38
|
-
bird,max_count,50000
|
|
39
|
-
bird,prioritize,"[['idfg_swwlf_2019'], ['idfg']]"
|
|
40
|
-
|
|
41
|
-
!bird,row,idfg_swwlf_2019|turkey
|
|
42
|
-
!bird,genus,meleagris
|
|
43
1
|
"""
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
2
|
+
|
|
3
|
+
csv_to_json.py
|
|
4
|
+
|
|
5
|
+
Converts CSV to JSON format for label specification.
|
|
6
|
+
|
|
7
|
+
There are 3 possible values for the 'type' column in the CSV:
|
|
8
|
+
|
|
9
|
+
- "row": this selects a specific rowfrom the master taxonomy CSV
|
|
10
|
+
content syntax: <dataset_name>|<dataset_label>
|
|
11
|
+
|
|
12
|
+
- "datasettaxon": this selects all animals in a taxon from a particular dataset
|
|
13
|
+
content syntax: <dataset_name>|<taxon_level>|<taxon_name>
|
|
14
|
+
|
|
15
|
+
- <taxon_level>: this selects all animals in a taxon across all datasets
|
|
16
|
+
content syntax: <taxon_name>
|
|
17
|
+
|
|
18
|
+
Example CSV input:
|
|
19
|
+
|
|
20
|
+
"
|
|
21
|
+
# comment lines starting with '#' are allowed
|
|
22
|
+
output_label,type,content
|
|
23
|
+
cervid,row,idfg|deer
|
|
24
|
+
cervid,row,idfg|elk
|
|
25
|
+
cervid,row,idfg|prong
|
|
26
|
+
cervid,row,idfg_swwlf_2019|elk
|
|
27
|
+
cervid,row,idfg_swwlf_2019|muledeer
|
|
28
|
+
cervid,row,idfg_swwlf_2019|whitetaileddeer
|
|
29
|
+
cervid,max_count,50000
|
|
30
|
+
cervid,family,cervidae
|
|
31
|
+
cervid,datasettaxon,idfg|family|cervidae
|
|
32
|
+
cervid,datasettaxon,idfg_swwlf_2019|family|cervidae
|
|
33
|
+
bird,row,idfg_swwlf_2019|bird
|
|
34
|
+
bird,class,aves
|
|
35
|
+
bird,max_count,50000
|
|
36
|
+
bird,prioritize,"[['idfg_swwlf_2019'], ['idfg']]"
|
|
37
|
+
!bird,row,idfg_swwlf_2019|turkey
|
|
38
|
+
!bird,genus,meleagris
|
|
39
|
+
"
|
|
40
|
+
|
|
41
|
+
Example JSON output:
|
|
42
|
+
|
|
43
|
+
"
|
|
44
|
+
{
|
|
45
|
+
"cervid": {
|
|
46
|
+
"dataset_labels": {
|
|
47
|
+
"idfg": ["deer", "elk", "prong"],
|
|
48
|
+
"idfg_swwlf_2019": ["elk", "muledeer", "whitetaileddeer"]
|
|
49
|
+
},
|
|
50
|
+
"taxa": [
|
|
51
|
+
{
|
|
52
|
+
"level": "family",
|
|
53
|
+
"name": "cervidae"
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
"level": "family",
|
|
57
|
+
"name": "cervidae"
|
|
58
|
+
"datasets": ["idfg"]
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
"level": "family",
|
|
62
|
+
"name": "cervidae"
|
|
63
|
+
"datasets": ["idfg_swwlf_2019"]
|
|
64
|
+
}
|
|
65
|
+
],
|
|
66
|
+
"max_count": 50000
|
|
67
|
+
},
|
|
68
|
+
"bird": {
|
|
69
|
+
"dataset_labels": {
|
|
70
|
+
"idfg_swwlf_2019": ["bird"]
|
|
71
|
+
},
|
|
72
|
+
"taxa": [
|
|
73
|
+
{
|
|
74
|
+
"level": "class",
|
|
75
|
+
"name": "aves"
|
|
76
|
+
}
|
|
77
|
+
],
|
|
78
|
+
"exclude": {
|
|
79
|
+
"dataset_labels": {
|
|
80
|
+
"idfg_swwlf_2019": ["turkey"]
|
|
81
|
+
},
|
|
82
|
+
"taxa": [
|
|
83
|
+
{
|
|
84
|
+
"level": "genus",
|
|
85
|
+
"name": "meleagris"
|
|
86
|
+
}
|
|
87
|
+
]
|
|
88
|
+
},
|
|
89
|
+
"max_count": "50000",
|
|
90
|
+
"prioritize": [
|
|
91
|
+
["idfg_swwlf_2019"],
|
|
92
|
+
["idfg"]
|
|
93
|
+
],
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
"
|
|
97
|
+
|
|
100
98
|
"""
|
|
101
|
-
#
|
|
102
|
-
########
|
|
103
99
|
|
|
104
100
|
#%% Imports
|
|
105
101
|
|
|
@@ -125,6 +121,7 @@ def main():
|
|
|
125
121
|
|
|
126
122
|
|
|
127
123
|
#%% Support functions
|
|
124
|
+
|
|
128
125
|
def parse_csv_row(obj: dict[str, Any], rowtype: str, content: str) -> None:
|
|
129
126
|
"""
|
|
130
127
|
Parses a row in the CSV.
|
|
@@ -1,111 +1,97 @@
|
|
|
1
|
-
|
|
2
|
-
#
|
|
3
|
-
# detect_and_crop.py
|
|
4
|
-
#
|
|
5
|
-
# Run MegaDetector on images via Batch API, then save crops of the detected
|
|
6
|
-
# bounding boxes.
|
|
7
|
-
#
|
|
8
|
-
# The input to this script is a "queried images" JSON file, whose keys are paths
|
|
9
|
-
# to images and values are dicts containing information relevant for training
|
|
10
|
-
# a classifier, including labels and (optionally) ground-truth bounding boxes.
|
|
11
|
-
# The image paths are in the format `<dataset-name>/<blob-name>` where we assume
|
|
12
|
-
# that the dataset name does not contain '/'.
|
|
13
|
-
#
|
|
14
|
-
# {
|
|
15
|
-
# "caltech/cct_images/59f79901-23d2-11e8-a6a3-ec086b02610b.jpg": {
|
|
16
|
-
# "dataset": "caltech",
|
|
17
|
-
# "location": 13,
|
|
18
|
-
# "class": "mountain_lion", # class from dataset
|
|
19
|
-
# "bbox": [{"category": "animal",
|
|
20
|
-
# "bbox": [0, 0.347, 0.237, 0.257]}], # ground-truth bbox
|
|
21
|
-
# "label": ["monutain_lion"] # labels to use in classifier
|
|
22
|
-
# },
|
|
23
|
-
# "caltech/cct_images/59f5fe2b-23d2-11e8-a6a3-ec086b02610b.jpg": {
|
|
24
|
-
# "dataset": "caltech",
|
|
25
|
-
# "location": 13,
|
|
26
|
-
# "class": "mountain_lion", # class from dataset
|
|
27
|
-
# "label": ["monutain_lion"] # labels to use in classifier
|
|
28
|
-
# },
|
|
29
|
-
# ...
|
|
30
|
-
# }
|
|
31
|
-
#
|
|
32
|
-
# We assume that no image contains over 100 bounding boxes, and we always save
|
|
33
|
-
# crops as RGB .jpg files for consistency. For each image, each bounding box is
|
|
34
|
-
# cropped and saved to a file with a suffix "___cropXX.jpg" (ground truth bbox) or
|
|
35
|
-
# "___cropXX_mdvY.Y.jpg" (detected bbox) added to the filename of the original
|
|
36
|
-
# image. "XX" ranges from "00" to "99" and "Y.Y" indicates the MegaDetector
|
|
37
|
-
# version. If an image has ground truth bounding boxes, we assume that they are
|
|
38
|
-
# exhaustive--i.e., there are no other objects of interest, so we don't need to
|
|
39
|
-
# run MegaDetector on the image. If an image does not have ground truth bounding
|
|
40
|
-
# boxes, we run MegaDetector on the image and label the detected boxes in order
|
|
41
|
-
# from 00 up to 99. Based on the given confidence threshold, we may skip saving
|
|
42
|
-
# certain bounding box crops, but we still increment the bounding box number for
|
|
43
|
-
# skipped boxes.
|
|
44
|
-
#
|
|
45
|
-
# Example cropped image path (with ground truth bbox from MegaDB)
|
|
46
|
-
#
|
|
47
|
-
# "path/to/crops/image.jpg___crop00.jpg"
|
|
48
|
-
#
|
|
49
|
-
# Example cropped image path (with MegaDetector bbox)
|
|
50
|
-
#
|
|
51
|
-
# "path/to/crops/image.jpg___crop00_mdv4.1.jpg"
|
|
52
|
-
#
|
|
53
|
-
# By default, the images are cropped exactly per the given bounding box
|
|
54
|
-
# coordinates. However, if square crops are desired, pass the --square-crops
|
|
55
|
-
# flag. This will always generate a square crop whose size is the larger of the
|
|
56
|
-
# bounding box width or height. In the case that the square crop boundaries exceed
|
|
57
|
-
# the original image size, the crop is padded with 0s.
|
|
58
|
-
#
|
|
59
|
-
# This script currently only supports running MegaDetector via the Batch Detection
|
|
60
|
-
# API. See the classification README for instructions on running MegaDetector
|
|
61
|
-
# locally. If running the Batch Detection API, set the following environment
|
|
62
|
-
# variables for the Azure Blob Storage container in which we save the intermediate
|
|
63
|
-
# task lists:
|
|
64
|
-
#
|
|
65
|
-
# BATCH_DETECTION_API_URL # API URL
|
|
66
|
-
# CLASSIFICATION_BLOB_STORAGE_ACCOUNT # storage account name
|
|
67
|
-
# CLASSIFICATION_BLOB_CONTAINER # container name
|
|
68
|
-
# CLASSIFICATION_BLOB_CONTAINER_WRITE_SAS # SAS token, without leading '?'
|
|
69
|
-
# DETECTION_API_CALLER # allow-listed API caller
|
|
70
|
-
#
|
|
71
|
-
# This script allows specifying a directory where MegaDetector outputs are cached
|
|
72
|
-
# via the --detector-output-cache-dir argument. This directory must be
|
|
73
|
-
# organized as:
|
|
74
|
-
#
|
|
75
|
-
# <cache-dir>/<MegaDetector-version>/<dataset-name>.json
|
|
76
|
-
#
|
|
77
|
-
# Example: If the `cameratrapssc/classifier-training` Azure blob storage
|
|
78
|
-
# container is mounted to the local machine via blobfuse, it may be used as
|
|
79
|
-
# a MegaDetector output cache directory by passing
|
|
80
|
-
# "cameratrapssc/classifier-training/mdcache/"
|
|
81
|
-
# as the value for --detector-output-cache-dir.
|
|
82
|
-
#
|
|
83
|
-
# This script outputs either 1 or 3 files, depending on whether the Batch Detection API
|
|
84
|
-
# is run:
|
|
85
|
-
#
|
|
86
|
-
# - <output_dir>/detect_and_crop_log_{timestamp}.json
|
|
87
|
-
# log of images missing detections and images that failed to properly
|
|
88
|
-
# download and crop
|
|
89
|
-
# - <output_dir>/batchapi_tasklists/{task_id}.json
|
|
90
|
-
# (if --run-detector) task lists uploaded to the Batch Detection API
|
|
91
|
-
# - <output_dir>/batchapi_response/{task_id}.json
|
|
92
|
-
# (if --run-detector) task status responses for completed tasks
|
|
93
|
-
#
|
|
94
|
-
########
|
|
1
|
+
"""
|
|
95
2
|
|
|
96
|
-
|
|
3
|
+
detect_and_crop.py
|
|
4
|
+
|
|
5
|
+
Run MegaDetector on images via Batch API, then save crops of the detected
|
|
6
|
+
bounding boxes.
|
|
7
|
+
|
|
8
|
+
The input to this script is a "queried images" JSON file, whose keys are paths
|
|
9
|
+
to images and values are dicts containing information relevant for training
|
|
10
|
+
a classifier, including labels and (optionally) ground-truth bounding boxes.
|
|
11
|
+
The image paths are in the format `<dataset-name>/<blob-name>` where we assume
|
|
12
|
+
that the dataset name does not contain '/'.
|
|
13
|
+
|
|
14
|
+
{
|
|
15
|
+
"caltech/cct_images/59f79901-23d2-11e8-a6a3-ec086b02610b.jpg": {
|
|
16
|
+
"dataset": "caltech",
|
|
17
|
+
"location": 13,
|
|
18
|
+
"class": "mountain_lion", # class from dataset
|
|
19
|
+
"bbox": [{"category": "animal",
|
|
20
|
+
"bbox": [0, 0.347, 0.237, 0.257]}], # ground-truth bbox
|
|
21
|
+
"label": ["monutain_lion"] # labels to use in classifier
|
|
22
|
+
},
|
|
23
|
+
"caltech/cct_images/59f5fe2b-23d2-11e8-a6a3-ec086b02610b.jpg": {
|
|
24
|
+
"dataset": "caltech",
|
|
25
|
+
"location": 13,
|
|
26
|
+
"class": "mountain_lion", # class from dataset
|
|
27
|
+
"label": ["monutain_lion"] # labels to use in classifier
|
|
28
|
+
},
|
|
29
|
+
...
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
We assume that no image contains over 100 bounding boxes, and we always save
|
|
33
|
+
crops as RGB .jpg files for consistency. For each image, each bounding box is
|
|
34
|
+
cropped and saved to a file with a suffix "___cropXX.jpg" (ground truth bbox) or
|
|
35
|
+
"___cropXX_mdvY.Y.jpg" (detected bbox) added to the filename of the original
|
|
36
|
+
image. "XX" ranges from "00" to "99" and "Y.Y" indicates the MegaDetector
|
|
37
|
+
version. If an image has ground truth bounding boxes, we assume that they are
|
|
38
|
+
exhaustive--i.e., there are no other objects of interest, so we don't need to
|
|
39
|
+
run MegaDetector on the image. If an image does not have ground truth bounding
|
|
40
|
+
boxes, we run MegaDetector on the image and label the detected boxes in order
|
|
41
|
+
from 00 up to 99. Based on the given confidence threshold, we may skip saving
|
|
42
|
+
certain bounding box crops, but we still increment the bounding box number for
|
|
43
|
+
skipped boxes.
|
|
44
|
+
|
|
45
|
+
Example cropped image path (with ground truth bbox from MegaDB)
|
|
46
|
+
|
|
47
|
+
"path/to/crops/image.jpg___crop00.jpg"
|
|
48
|
+
|
|
49
|
+
Example cropped image path (with MegaDetector bbox)
|
|
50
|
+
|
|
51
|
+
"path/to/crops/image.jpg___crop00_mdv4.1.jpg"
|
|
52
|
+
|
|
53
|
+
By default, the images are cropped exactly per the given bounding box
|
|
54
|
+
coordinates. However, if square crops are desired, pass the --square-crops
|
|
55
|
+
flag. This will always generate a square crop whose size is the larger of the
|
|
56
|
+
bounding box width or height. In the case that the square crop boundaries exceed
|
|
57
|
+
the original image size, the crop is padded with 0s.
|
|
58
|
+
|
|
59
|
+
This script currently only supports running MegaDetector via the Batch Detection
|
|
60
|
+
API. See the classification README for instructions on running MegaDetector
|
|
61
|
+
locally. If running the Batch Detection API, set the following environment
|
|
62
|
+
variables for the Azure Blob Storage container in which we save the intermediate
|
|
63
|
+
task lists:
|
|
64
|
+
|
|
65
|
+
BATCH_DETECTION_API_URL # API URL
|
|
66
|
+
CLASSIFICATION_BLOB_STORAGE_ACCOUNT # storage account name
|
|
67
|
+
CLASSIFICATION_BLOB_CONTAINER # container name
|
|
68
|
+
CLASSIFICATION_BLOB_CONTAINER_WRITE_SAS # SAS token, without leading '?'
|
|
69
|
+
DETECTION_API_CALLER # allow-listed API caller
|
|
70
|
+
|
|
71
|
+
This script allows specifying a directory where MegaDetector outputs are cached
|
|
72
|
+
via the --detector-output-cache-dir argument. This directory must be
|
|
73
|
+
organized as:
|
|
74
|
+
|
|
75
|
+
<cache-dir>/<MegaDetector-version>/<dataset-name>.json
|
|
76
|
+
|
|
77
|
+
Example: If the `cameratrapssc/classifier-training` Azure blob storage
|
|
78
|
+
container is mounted to the local machine via blobfuse, it may be used as
|
|
79
|
+
a MegaDetector output cache directory by passing
|
|
80
|
+
"cameratrapssc/classifier-training/mdcache/"
|
|
81
|
+
as the value for --detector-output-cache-dir.
|
|
82
|
+
|
|
83
|
+
This script outputs either 1 or 3 files, depending on whether the Batch Detection API
|
|
84
|
+
is run:
|
|
85
|
+
|
|
86
|
+
- <output_dir>/detect_and_crop_log_{timestamp}.json
|
|
87
|
+
log of images missing detections and images that failed to properly
|
|
88
|
+
download and crop
|
|
89
|
+
- <output_dir>/batchapi_tasklists/{task_id}.json
|
|
90
|
+
(if --run-detector) task lists uploaded to the Batch Detection API
|
|
91
|
+
- <output_dir>/batchapi_response/{task_id}.json
|
|
92
|
+
(if --run-detector) task status responses for completed tasks
|
|
97
93
|
|
|
98
94
|
"""
|
|
99
|
-
python detect_and_crop.py \
|
|
100
|
-
base_logdir/queried_images.json \
|
|
101
|
-
base_logdir \
|
|
102
|
-
--detector-output-cache-dir /path/to/classifier-training/mdcache \
|
|
103
|
-
--detector-version 4.1 \
|
|
104
|
-
--run-detector --resume-file base_logdir/resume.json \
|
|
105
|
-
--cropped-images-dir /path/to/crops --square-crops --threshold 0.9 \
|
|
106
|
-
--save-full-images --images-dir /path/to/images --threads 50
|
|
107
|
-
"""
|
|
108
|
-
|
|
109
95
|
|
|
110
96
|
#%% Imports
|
|
111
97
|
|
|
@@ -134,6 +120,20 @@ from md_utils import path_utils
|
|
|
134
120
|
from md_utils import sas_blob_utils
|
|
135
121
|
|
|
136
122
|
|
|
123
|
+
#%% Example usage
|
|
124
|
+
|
|
125
|
+
"""
|
|
126
|
+
python detect_and_crop.py \
|
|
127
|
+
base_logdir/queried_images.json \
|
|
128
|
+
base_logdir \
|
|
129
|
+
--detector-output-cache-dir /path/to/classifier-training/mdcache \
|
|
130
|
+
--detector-version 4.1 \
|
|
131
|
+
--run-detector --resume-file base_logdir/resume.json \
|
|
132
|
+
--cropped-images-dir /path/to/crops --square-crops --threshold 0.9 \
|
|
133
|
+
--save-full-images --images-dir /path/to/images --threads 50
|
|
134
|
+
"""
|
|
135
|
+
|
|
136
|
+
|
|
137
137
|
#%% Main function
|
|
138
138
|
|
|
139
139
|
def main(queried_images_json_path: str,
|
classification/evaluate_model.py
CHANGED
|
@@ -1,47 +1,36 @@
|
|
|
1
|
-
|
|
2
|
-
#
|
|
3
|
-
# evaluate_model.py
|
|
4
|
-
#
|
|
5
|
-
# Evaluate a species classifier.
|
|
6
|
-
#
|
|
7
|
-
# Currently the implementation of multi-label multi-class classification is
|
|
8
|
-
# non-functional.
|
|
9
|
-
#
|
|
10
|
-
# Outputs the following files:
|
|
11
|
-
#
|
|
12
|
-
# 1) outputs_{split}.csv, one file per split, contains columns:
|
|
13
|
-
# - 'path': str, path to cropped image
|
|
14
|
-
# - 'label': str
|
|
15
|
-
# - 'weight': float
|
|
16
|
-
# - [label names]: float, confidence in each label
|
|
17
|
-
#
|
|
18
|
-
# 2) overall_metrics.csv, contains columns:
|
|
19
|
-
# - 'split': str
|
|
20
|
-
# - 'loss': float, mean per-example loss over entire epoch
|
|
21
|
-
# - 'acc_top{k}': float, accuracy@k over the entire epoch
|
|
22
|
-
# - 'loss_weighted' and 'acc_weighted_top{k}': float, weighted versions
|
|
23
|
-
#
|
|
24
|
-
# 3) confusion_matrices.npz
|
|
25
|
-
# - keys ['train', 'val', 'test']
|
|
26
|
-
# - values are np.ndarray, confusion matrices
|
|
27
|
-
#
|
|
28
|
-
# 4) label_stats.csv, per-label statistics, columns
|
|
29
|
-
# - 'split': str
|
|
30
|
-
# - 'label': str
|
|
31
|
-
# - 'precision': float
|
|
32
|
-
# - 'recall': float
|
|
33
|
-
#
|
|
34
|
-
########
|
|
1
|
+
"""
|
|
35
2
|
|
|
36
|
-
|
|
3
|
+
evaluate_model.py
|
|
4
|
+
|
|
5
|
+
Evaluate a species classifier.
|
|
6
|
+
|
|
7
|
+
Currently the implementation of multi-label multi-class classification is
|
|
8
|
+
non-functional.
|
|
9
|
+
|
|
10
|
+
Outputs the following files:
|
|
11
|
+
|
|
12
|
+
1) outputs_{split}.csv, one file per split, contains columns:
|
|
13
|
+
- 'path': str, path to cropped image
|
|
14
|
+
- 'label': str
|
|
15
|
+
- 'weight': float
|
|
16
|
+
- [label names]: float, confidence in each label
|
|
17
|
+
|
|
18
|
+
2) overall_metrics.csv, contains columns:
|
|
19
|
+
- 'split': str
|
|
20
|
+
- 'loss': float, mean per-example loss over entire epoch
|
|
21
|
+
- 'acc_top{k}': float, accuracy@k over the entire epoch
|
|
22
|
+
- 'loss_weighted' and 'acc_weighted_top{k}': float, weighted versions
|
|
23
|
+
|
|
24
|
+
3) confusion_matrices.npz
|
|
25
|
+
- keys ['train', 'val', 'test']
|
|
26
|
+
- values are np.ndarray, confusion matrices
|
|
27
|
+
|
|
28
|
+
4) label_stats.csv, per-label statistics, columns
|
|
29
|
+
- 'split': str
|
|
30
|
+
- 'label': str
|
|
31
|
+
- 'precision': float
|
|
32
|
+
- 'recall': float
|
|
37
33
|
|
|
38
|
-
"""
|
|
39
|
-
python evaluate_model.py \
|
|
40
|
-
$BASE_LOGDIR/$LOGDIR/params.json \
|
|
41
|
-
$BASE_LOGDIR/$LOGDIR/ckpt_XX.pt \
|
|
42
|
-
--output-dir $BASE_LOGDIR/$LOGDIR \
|
|
43
|
-
--splits train val test \
|
|
44
|
-
--batch-size 256
|
|
45
34
|
"""
|
|
46
35
|
|
|
47
36
|
#%% Imports and constants
|
|
@@ -64,6 +53,18 @@ import tqdm
|
|
|
64
53
|
|
|
65
54
|
from classification import efficientnet, train_classifier
|
|
66
55
|
|
|
56
|
+
|
|
57
|
+
#%% Example usage
|
|
58
|
+
|
|
59
|
+
"""
|
|
60
|
+
python evaluate_model.py \
|
|
61
|
+
$BASE_LOGDIR/$LOGDIR/params.json \
|
|
62
|
+
$BASE_LOGDIR/$LOGDIR/ckpt_XX.pt \
|
|
63
|
+
--output-dir $BASE_LOGDIR/$LOGDIR \
|
|
64
|
+
--splits train val test \
|
|
65
|
+
--batch-size 256
|
|
66
|
+
"""
|
|
67
|
+
|
|
67
68
|
SPLITS = ['train', 'val', 'test']
|
|
68
69
|
|
|
69
70
|
|