megadetector 5.0.8__py3-none-any.whl → 5.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (190) hide show
  1. api/__init__.py +0 -0
  2. api/batch_processing/__init__.py +0 -0
  3. api/batch_processing/api_core/__init__.py +0 -0
  4. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. api/batch_processing/api_core/batch_service/score.py +0 -1
  6. api/batch_processing/api_core/server_job_status_table.py +0 -1
  7. api/batch_processing/api_core_support/__init__.py +0 -0
  8. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
  9. api/batch_processing/api_support/__init__.py +0 -0
  10. api/batch_processing/api_support/summarize_daily_activity.py +0 -1
  11. api/batch_processing/data_preparation/__init__.py +0 -0
  12. api/batch_processing/data_preparation/manage_local_batch.py +65 -65
  13. api/batch_processing/data_preparation/manage_video_batch.py +8 -8
  14. api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
  15. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  16. api/batch_processing/postprocessing/__init__.py +0 -0
  17. api/batch_processing/postprocessing/add_max_conf.py +12 -12
  18. api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
  19. api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
  20. api/batch_processing/postprocessing/compare_batch_results.py +113 -43
  21. api/batch_processing/postprocessing/convert_output_format.py +41 -16
  22. api/batch_processing/postprocessing/load_api_results.py +16 -17
  23. api/batch_processing/postprocessing/md_to_coco.py +31 -21
  24. api/batch_processing/postprocessing/md_to_labelme.py +52 -22
  25. api/batch_processing/postprocessing/merge_detections.py +14 -14
  26. api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
  27. api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
  28. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
  29. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
  30. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
  31. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
  32. api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
  33. api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
  34. api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
  35. api/synchronous/__init__.py +0 -0
  36. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  37. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
  38. api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
  39. api/synchronous/api_core/animal_detection_api/config.py +35 -35
  40. api/synchronous/api_core/tests/__init__.py +0 -0
  41. api/synchronous/api_core/tests/load_test.py +109 -109
  42. classification/__init__.py +0 -0
  43. classification/aggregate_classifier_probs.py +21 -24
  44. classification/analyze_failed_images.py +11 -13
  45. classification/cache_batchapi_outputs.py +51 -51
  46. classification/create_classification_dataset.py +69 -68
  47. classification/crop_detections.py +54 -53
  48. classification/csv_to_json.py +97 -100
  49. classification/detect_and_crop.py +105 -105
  50. classification/evaluate_model.py +43 -42
  51. classification/identify_mislabeled_candidates.py +47 -46
  52. classification/json_to_azcopy_list.py +10 -10
  53. classification/json_validator.py +72 -71
  54. classification/map_classification_categories.py +44 -43
  55. classification/merge_classification_detection_output.py +68 -68
  56. classification/prepare_classification_script.py +157 -154
  57. classification/prepare_classification_script_mc.py +228 -228
  58. classification/run_classifier.py +27 -26
  59. classification/save_mislabeled.py +30 -30
  60. classification/train_classifier.py +20 -20
  61. classification/train_classifier_tf.py +21 -22
  62. classification/train_utils.py +10 -10
  63. data_management/__init__.py +0 -0
  64. data_management/annotations/__init__.py +0 -0
  65. data_management/annotations/annotation_constants.py +18 -31
  66. data_management/camtrap_dp_to_coco.py +238 -0
  67. data_management/cct_json_utils.py +102 -59
  68. data_management/cct_to_md.py +176 -158
  69. data_management/cct_to_wi.py +247 -219
  70. data_management/coco_to_labelme.py +272 -263
  71. data_management/coco_to_yolo.py +79 -58
  72. data_management/databases/__init__.py +0 -0
  73. data_management/databases/add_width_and_height_to_db.py +20 -16
  74. data_management/databases/combine_coco_camera_traps_files.py +35 -31
  75. data_management/databases/integrity_check_json_db.py +62 -24
  76. data_management/databases/subset_json_db.py +24 -15
  77. data_management/generate_crops_from_cct.py +27 -45
  78. data_management/get_image_sizes.py +188 -162
  79. data_management/importers/add_nacti_sizes.py +8 -8
  80. data_management/importers/add_timestamps_to_icct.py +78 -78
  81. data_management/importers/animl_results_to_md_results.py +158 -158
  82. data_management/importers/auckland_doc_test_to_json.py +9 -9
  83. data_management/importers/auckland_doc_to_json.py +8 -8
  84. data_management/importers/awc_to_json.py +7 -7
  85. data_management/importers/bellevue_to_json.py +15 -15
  86. data_management/importers/cacophony-thermal-importer.py +13 -13
  87. data_management/importers/carrizo_shrubfree_2018.py +8 -8
  88. data_management/importers/carrizo_trail_cam_2017.py +8 -8
  89. data_management/importers/cct_field_adjustments.py +9 -9
  90. data_management/importers/channel_islands_to_cct.py +10 -10
  91. data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
  92. data_management/importers/ena24_to_json.py +7 -7
  93. data_management/importers/filenames_to_json.py +8 -8
  94. data_management/importers/helena_to_cct.py +7 -7
  95. data_management/importers/idaho-camera-traps.py +7 -7
  96. data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
  97. data_management/importers/jb_csv_to_json.py +9 -9
  98. data_management/importers/mcgill_to_json.py +8 -8
  99. data_management/importers/missouri_to_json.py +18 -18
  100. data_management/importers/nacti_fieldname_adjustments.py +10 -10
  101. data_management/importers/noaa_seals_2019.py +7 -7
  102. data_management/importers/pc_to_json.py +7 -7
  103. data_management/importers/plot_wni_giraffes.py +7 -7
  104. data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
  105. data_management/importers/prepare_zsl_imerit.py +7 -7
  106. data_management/importers/rspb_to_json.py +8 -8
  107. data_management/importers/save_the_elephants_survey_A.py +8 -8
  108. data_management/importers/save_the_elephants_survey_B.py +9 -9
  109. data_management/importers/snapshot_safari_importer.py +26 -26
  110. data_management/importers/snapshot_safari_importer_reprise.py +665 -665
  111. data_management/importers/snapshot_serengeti_lila.py +14 -14
  112. data_management/importers/sulross_get_exif.py +8 -9
  113. data_management/importers/timelapse_csv_set_to_json.py +11 -11
  114. data_management/importers/ubc_to_json.py +13 -13
  115. data_management/importers/umn_to_json.py +7 -7
  116. data_management/importers/wellington_to_json.py +8 -8
  117. data_management/importers/wi_to_json.py +9 -9
  118. data_management/importers/zamba_results_to_md_results.py +181 -181
  119. data_management/labelme_to_coco.py +65 -24
  120. data_management/labelme_to_yolo.py +8 -8
  121. data_management/lila/__init__.py +0 -0
  122. data_management/lila/add_locations_to_island_camera_traps.py +9 -9
  123. data_management/lila/add_locations_to_nacti.py +147 -147
  124. data_management/lila/create_lila_blank_set.py +13 -13
  125. data_management/lila/create_lila_test_set.py +8 -8
  126. data_management/lila/create_links_to_md_results_files.py +106 -106
  127. data_management/lila/download_lila_subset.py +44 -110
  128. data_management/lila/generate_lila_per_image_labels.py +55 -42
  129. data_management/lila/get_lila_annotation_counts.py +18 -15
  130. data_management/lila/get_lila_image_counts.py +11 -11
  131. data_management/lila/lila_common.py +96 -33
  132. data_management/lila/test_lila_metadata_urls.py +132 -116
  133. data_management/ocr_tools.py +173 -128
  134. data_management/read_exif.py +110 -97
  135. data_management/remap_coco_categories.py +83 -83
  136. data_management/remove_exif.py +58 -62
  137. data_management/resize_coco_dataset.py +30 -23
  138. data_management/wi_download_csv_to_coco.py +246 -239
  139. data_management/yolo_output_to_md_output.py +86 -73
  140. data_management/yolo_to_coco.py +300 -60
  141. detection/__init__.py +0 -0
  142. detection/detector_training/__init__.py +0 -0
  143. detection/process_video.py +85 -33
  144. detection/pytorch_detector.py +43 -25
  145. detection/run_detector.py +157 -72
  146. detection/run_detector_batch.py +179 -113
  147. detection/run_inference_with_yolov5_val.py +108 -48
  148. detection/run_tiled_inference.py +111 -40
  149. detection/tf_detector.py +51 -29
  150. detection/video_utils.py +606 -521
  151. docs/source/conf.py +43 -0
  152. md_utils/__init__.py +0 -0
  153. md_utils/azure_utils.py +9 -9
  154. md_utils/ct_utils.py +228 -68
  155. md_utils/directory_listing.py +59 -64
  156. md_utils/md_tests.py +968 -871
  157. md_utils/path_utils.py +460 -134
  158. md_utils/process_utils.py +157 -133
  159. md_utils/sas_blob_utils.py +20 -20
  160. md_utils/split_locations_into_train_val.py +45 -32
  161. md_utils/string_utils.py +33 -10
  162. md_utils/url_utils.py +176 -60
  163. md_utils/write_html_image_list.py +40 -33
  164. md_visualization/__init__.py +0 -0
  165. md_visualization/plot_utils.py +102 -109
  166. md_visualization/render_images_with_thumbnails.py +34 -34
  167. md_visualization/visualization_utils.py +597 -291
  168. md_visualization/visualize_db.py +76 -48
  169. md_visualization/visualize_detector_output.py +61 -42
  170. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/METADATA +13 -7
  171. megadetector-5.0.10.dist-info/RECORD +224 -0
  172. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/top_level.txt +1 -0
  173. taxonomy_mapping/__init__.py +0 -0
  174. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
  175. taxonomy_mapping/map_new_lila_datasets.py +154 -154
  176. taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
  177. taxonomy_mapping/preview_lila_taxonomy.py +591 -591
  178. taxonomy_mapping/retrieve_sample_image.py +12 -12
  179. taxonomy_mapping/simple_image_download.py +11 -11
  180. taxonomy_mapping/species_lookup.py +10 -10
  181. taxonomy_mapping/taxonomy_csv_checker.py +18 -18
  182. taxonomy_mapping/taxonomy_graph.py +47 -47
  183. taxonomy_mapping/validate_lila_category_mappings.py +83 -76
  184. data_management/cct_json_to_filename_json.py +0 -89
  185. data_management/cct_to_csv.py +0 -140
  186. data_management/databases/remove_corrupted_images_from_db.py +0 -191
  187. detection/detector_training/copy_checkpoints.py +0 -43
  188. megadetector-5.0.8.dist-info/RECORD +0 -205
  189. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/LICENSE +0 -0
  190. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/WHEEL +0 -0
@@ -1,58 +1,43 @@
1
- ########
2
- #
3
- # crop_detections.py
4
- #
5
- # Given a detections JSON file from MegaDetector, crops the bounding boxes above
6
- # a certain confidence threshold.
7
- #
8
- # This script takes as input a detections JSON file, usually the output of
9
- # detection/run_tf_detector_batch.py or the output of the Batch API in the
10
- # "Batch processing API output format".
11
- #
12
- # See https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing.
13
- #
14
- # The script can crop images that are either available locally or that need to be
15
- # downloaded from an Azure Blob Storage container.
16
- #
17
- # We assume that no image contains over 100 bounding boxes, and we always save
18
- # crops as RGB .jpg files for consistency. For each image, each bounding box is
19
- # cropped and saved to a file with a suffix "___cropXX_mdvY.Y.jpg" added to the
20
- # filename as the original image. "XX" ranges from "00" to "99" and "Y.Y"
21
- # ndicates the MegaDetector version. Based on the given confidence threshold, we
22
- # may skip saving certain bounding box crops, but we still increment the bounding
23
- # box number for skipped boxes.
24
- #
25
- # Example cropped image path (with MegaDetector bbox):
26
- #
27
- # "path/to/image.jpg___crop00_mdv4.1.jpg"
28
- #
29
- # By default, the images are cropped exactly per the given bounding box
30
- # coordinates. However, if square crops are desired, pass the --square-crops
31
- # flag. This will always generate a square crop whose size is the larger of the
32
- # bounding box width or height. In the case that the square crop boundaries exceed
33
- # the original image size, the crop is padded with 0s.
34
- #
35
- # This script outputs a log file to:
36
- #
37
- # <output_dir>/crop_detections_log_{timestamp}.json
38
- #
39
- # ...which contains images that failed to download and crop properly.
40
- #
41
- ########
1
+ """
42
2
 
43
- #%% Example usage
3
+ crop_detections.py
4
+
5
+ Given a detections JSON file from MegaDetector, crops the bounding boxes above
6
+ a certain confidence threshold.
7
+
8
+ This script takes as input a detections JSON file, usually the output of
9
+ detection/run_tf_detector_batch.py or the output of the Batch API in the
10
+ "Batch processing API output format".
11
+
12
+ See https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing.
13
+
14
+ The script can crop images that are either available locally or that need to be
15
+ downloaded from an Azure Blob Storage container.
16
+
17
+ We assume that no image contains over 100 bounding boxes, and we always save
18
+ crops as RGB .jpg files for consistency. For each image, each bounding box is
19
+ cropped and saved to a file with a suffix "___cropXX_mdvY.Y.jpg" added to the
20
+ filename as the original image. "XX" ranges from "00" to "99" and "Y.Y"
21
+ ndicates the MegaDetector version. Based on the given confidence threshold, we
22
+ may skip saving certain bounding box crops, but we still increment the bounding
23
+ box number for skipped boxes.
24
+
25
+ Example cropped image path (with MegaDetector bbox):
26
+
27
+ "path/to/image.jpg___crop00_mdv4.1.jpg"
28
+
29
+ By default, the images are cropped exactly per the given bounding box
30
+ coordinates. However, if square crops are desired, pass the --square-crops
31
+ flag. This will always generate a square crop whose size is the larger of the
32
+ bounding box width or height. In the case that the square crop boundaries exceed
33
+ the original image size, the crop is padded with 0s.
34
+
35
+ This script outputs a log file to:
36
+
37
+ <output_dir>/crop_detections_log_{timestamp}.json
38
+
39
+ ...which contains images that failed to download and crop properly.
44
40
 
45
- """
46
- python crop_detections.py \
47
- detections.json \
48
- /path/to/crops \
49
- --images-dir /path/to/images \
50
- --container-url "https://account.blob.core.windows.net/container?sastoken" \
51
- --detector-version "4.1" \
52
- --threshold 0.8 \
53
- --save-full-images --square-crops \
54
- --threads 50 \
55
- --logdir "."
56
41
  """
57
42
 
58
43
  #%% Imports
@@ -73,6 +58,22 @@ from PIL import Image, ImageOps
73
58
  from tqdm import tqdm
74
59
 
75
60
 
61
+ #%% Example usage
62
+
63
+ """
64
+ python crop_detections.py \
65
+ detections.json \
66
+ /path/to/crops \
67
+ --images-dir /path/to/images \
68
+ --container-url "https://account.blob.core.windows.net/container?sastoken" \
69
+ --detector-version "4.1" \
70
+ --threshold 0.8 \
71
+ --save-full-images --square-crops \
72
+ --threads 50 \
73
+ --logdir "."
74
+ """
75
+
76
+
76
77
  #%% Main function
77
78
 
78
79
  def main(detections_json_path: str,
@@ -1,105 +1,101 @@
1
- ########
2
- #
3
- # csv_to_json.py
4
- #
5
- # Converts CSV to JSON format for label specification.
6
- #
7
- # There are 3 possible values for the 'type' column in the CSV:
8
- #
9
- # - "row": this selects a specific rowfrom the master taxonomy CSV
10
- # content syntax: <dataset_name>|<dataset_label>
11
- #
12
- # - "datasettaxon": this selects all animals in a taxon from a particular dataset
13
- # content syntax: <dataset_name>|<taxon_level>|<taxon_name>
14
- #
15
- # - <taxon_level>: this selects all animals in a taxon across all datasets
16
- # content syntax: <taxon_name>
17
- #
18
- # Example CSV input:
19
- #
20
- """
21
- # comment lines starting with '#' are allowed
22
- output_label,type,content
23
-
24
- cervid,row,idfg|deer
25
- cervid,row,idfg|elk
26
- cervid,row,idfg|prong
27
- cervid,row,idfg_swwlf_2019|elk
28
- cervid,row,idfg_swwlf_2019|muledeer
29
- cervid,row,idfg_swwlf_2019|whitetaileddeer
30
- cervid,max_count,50000
31
-
32
- cervid,family,cervidae
33
- cervid,datasettaxon,idfg|family|cervidae
34
- cervid,datasettaxon,idfg_swwlf_2019|family|cervidae
35
-
36
- bird,row,idfg_swwlf_2019|bird
37
- bird,class,aves
38
- bird,max_count,50000
39
- bird,prioritize,"[['idfg_swwlf_2019'], ['idfg']]"
40
-
41
- !bird,row,idfg_swwlf_2019|turkey
42
- !bird,genus,meleagris
43
1
  """
44
- #
45
- # Example JSON output:
46
- #
47
- """
48
- {
49
- "cervid": {
50
- "dataset_labels": {
51
- "idfg": ["deer", "elk", "prong"],
52
- "idfg_swwlf_2019": ["elk", "muledeer", "whitetaileddeer"]
53
- },
54
- "taxa": [
55
- {
56
- "level": "family",
57
- "name": "cervidae"
58
- },
59
- {
60
- "level": "family",
61
- "name": "cervidae"
62
- "datasets": ["idfg"]
63
- },
64
- {
65
- "level": "family",
66
- "name": "cervidae"
67
- "datasets": ["idfg_swwlf_2019"]
68
- }
69
- ],
70
- "max_count": 50000
71
- },
72
- "bird": {
73
- "dataset_labels": {
74
- "idfg_swwlf_2019": ["bird"]
75
- },
76
- "taxa": [
77
- {
78
- "level": "class",
79
- "name": "aves"
80
- }
81
- ],
82
- "exclude": {
83
- "dataset_labels": {
84
- "idfg_swwlf_2019": ["turkey"]
85
- },
86
- "taxa": [
87
- {
88
- "level": "genus",
89
- "name": "meleagris"
90
- }
91
- ]
92
- },
93
- "max_count": "50000",
94
- "prioritize": [
95
- ["idfg_swwlf_2019"],
96
- ["idfg"]
97
- ],
98
- }
99
- }
2
+
3
+ csv_to_json.py
4
+
5
+ Converts CSV to JSON format for label specification.
6
+
7
+ There are 3 possible values for the 'type' column in the CSV:
8
+
9
+ - "row": this selects a specific rowfrom the master taxonomy CSV
10
+ content syntax: <dataset_name>|<dataset_label>
11
+
12
+ - "datasettaxon": this selects all animals in a taxon from a particular dataset
13
+ content syntax: <dataset_name>|<taxon_level>|<taxon_name>
14
+
15
+ - <taxon_level>: this selects all animals in a taxon across all datasets
16
+ content syntax: <taxon_name>
17
+
18
+ Example CSV input:
19
+
20
+ "
21
+ # comment lines starting with '#' are allowed
22
+ output_label,type,content
23
+ cervid,row,idfg|deer
24
+ cervid,row,idfg|elk
25
+ cervid,row,idfg|prong
26
+ cervid,row,idfg_swwlf_2019|elk
27
+ cervid,row,idfg_swwlf_2019|muledeer
28
+ cervid,row,idfg_swwlf_2019|whitetaileddeer
29
+ cervid,max_count,50000
30
+ cervid,family,cervidae
31
+ cervid,datasettaxon,idfg|family|cervidae
32
+ cervid,datasettaxon,idfg_swwlf_2019|family|cervidae
33
+ bird,row,idfg_swwlf_2019|bird
34
+ bird,class,aves
35
+ bird,max_count,50000
36
+ bird,prioritize,"[['idfg_swwlf_2019'], ['idfg']]"
37
+ !bird,row,idfg_swwlf_2019|turkey
38
+ !bird,genus,meleagris
39
+ "
40
+
41
+ Example JSON output:
42
+
43
+ "
44
+ {
45
+ "cervid": {
46
+ "dataset_labels": {
47
+ "idfg": ["deer", "elk", "prong"],
48
+ "idfg_swwlf_2019": ["elk", "muledeer", "whitetaileddeer"]
49
+ },
50
+ "taxa": [
51
+ {
52
+ "level": "family",
53
+ "name": "cervidae"
54
+ },
55
+ {
56
+ "level": "family",
57
+ "name": "cervidae"
58
+ "datasets": ["idfg"]
59
+ },
60
+ {
61
+ "level": "family",
62
+ "name": "cervidae"
63
+ "datasets": ["idfg_swwlf_2019"]
64
+ }
65
+ ],
66
+ "max_count": 50000
67
+ },
68
+ "bird": {
69
+ "dataset_labels": {
70
+ "idfg_swwlf_2019": ["bird"]
71
+ },
72
+ "taxa": [
73
+ {
74
+ "level": "class",
75
+ "name": "aves"
76
+ }
77
+ ],
78
+ "exclude": {
79
+ "dataset_labels": {
80
+ "idfg_swwlf_2019": ["turkey"]
81
+ },
82
+ "taxa": [
83
+ {
84
+ "level": "genus",
85
+ "name": "meleagris"
86
+ }
87
+ ]
88
+ },
89
+ "max_count": "50000",
90
+ "prioritize": [
91
+ ["idfg_swwlf_2019"],
92
+ ["idfg"]
93
+ ],
94
+ }
95
+ }
96
+ "
97
+
100
98
  """
101
- #
102
- ########
103
99
 
104
100
  #%% Imports
105
101
 
@@ -125,6 +121,7 @@ def main():
125
121
 
126
122
 
127
123
  #%% Support functions
124
+
128
125
  def parse_csv_row(obj: dict[str, Any], rowtype: str, content: str) -> None:
129
126
  """
130
127
  Parses a row in the CSV.
@@ -1,111 +1,97 @@
1
- ########
2
- #
3
- # detect_and_crop.py
4
- #
5
- # Run MegaDetector on images via Batch API, then save crops of the detected
6
- # bounding boxes.
7
- #
8
- # The input to this script is a "queried images" JSON file, whose keys are paths
9
- # to images and values are dicts containing information relevant for training
10
- # a classifier, including labels and (optionally) ground-truth bounding boxes.
11
- # The image paths are in the format `<dataset-name>/<blob-name>` where we assume
12
- # that the dataset name does not contain '/'.
13
- #
14
- # {
15
- # "caltech/cct_images/59f79901-23d2-11e8-a6a3-ec086b02610b.jpg": {
16
- # "dataset": "caltech",
17
- # "location": 13,
18
- # "class": "mountain_lion", # class from dataset
19
- # "bbox": [{"category": "animal",
20
- # "bbox": [0, 0.347, 0.237, 0.257]}], # ground-truth bbox
21
- # "label": ["monutain_lion"] # labels to use in classifier
22
- # },
23
- # "caltech/cct_images/59f5fe2b-23d2-11e8-a6a3-ec086b02610b.jpg": {
24
- # "dataset": "caltech",
25
- # "location": 13,
26
- # "class": "mountain_lion", # class from dataset
27
- # "label": ["monutain_lion"] # labels to use in classifier
28
- # },
29
- # ...
30
- # }
31
- #
32
- # We assume that no image contains over 100 bounding boxes, and we always save
33
- # crops as RGB .jpg files for consistency. For each image, each bounding box is
34
- # cropped and saved to a file with a suffix "___cropXX.jpg" (ground truth bbox) or
35
- # "___cropXX_mdvY.Y.jpg" (detected bbox) added to the filename of the original
36
- # image. "XX" ranges from "00" to "99" and "Y.Y" indicates the MegaDetector
37
- # version. If an image has ground truth bounding boxes, we assume that they are
38
- # exhaustive--i.e., there are no other objects of interest, so we don't need to
39
- # run MegaDetector on the image. If an image does not have ground truth bounding
40
- # boxes, we run MegaDetector on the image and label the detected boxes in order
41
- # from 00 up to 99. Based on the given confidence threshold, we may skip saving
42
- # certain bounding box crops, but we still increment the bounding box number for
43
- # skipped boxes.
44
- #
45
- # Example cropped image path (with ground truth bbox from MegaDB)
46
- #
47
- # "path/to/crops/image.jpg___crop00.jpg"
48
- #
49
- # Example cropped image path (with MegaDetector bbox)
50
- #
51
- # "path/to/crops/image.jpg___crop00_mdv4.1.jpg"
52
- #
53
- # By default, the images are cropped exactly per the given bounding box
54
- # coordinates. However, if square crops are desired, pass the --square-crops
55
- # flag. This will always generate a square crop whose size is the larger of the
56
- # bounding box width or height. In the case that the square crop boundaries exceed
57
- # the original image size, the crop is padded with 0s.
58
- #
59
- # This script currently only supports running MegaDetector via the Batch Detection
60
- # API. See the classification README for instructions on running MegaDetector
61
- # locally. If running the Batch Detection API, set the following environment
62
- # variables for the Azure Blob Storage container in which we save the intermediate
63
- # task lists:
64
- #
65
- # BATCH_DETECTION_API_URL # API URL
66
- # CLASSIFICATION_BLOB_STORAGE_ACCOUNT # storage account name
67
- # CLASSIFICATION_BLOB_CONTAINER # container name
68
- # CLASSIFICATION_BLOB_CONTAINER_WRITE_SAS # SAS token, without leading '?'
69
- # DETECTION_API_CALLER # allow-listed API caller
70
- #
71
- # This script allows specifying a directory where MegaDetector outputs are cached
72
- # via the --detector-output-cache-dir argument. This directory must be
73
- # organized as:
74
- #
75
- # <cache-dir>/<MegaDetector-version>/<dataset-name>.json
76
- #
77
- # Example: If the `cameratrapssc/classifier-training` Azure blob storage
78
- # container is mounted to the local machine via blobfuse, it may be used as
79
- # a MegaDetector output cache directory by passing
80
- # "cameratrapssc/classifier-training/mdcache/"
81
- # as the value for --detector-output-cache-dir.
82
- #
83
- # This script outputs either 1 or 3 files, depending on whether the Batch Detection API
84
- # is run:
85
- #
86
- # - <output_dir>/detect_and_crop_log_{timestamp}.json
87
- # log of images missing detections and images that failed to properly
88
- # download and crop
89
- # - <output_dir>/batchapi_tasklists/{task_id}.json
90
- # (if --run-detector) task lists uploaded to the Batch Detection API
91
- # - <output_dir>/batchapi_response/{task_id}.json
92
- # (if --run-detector) task status responses for completed tasks
93
- #
94
- ########
1
+ """
95
2
 
96
- #%% Example usage
3
+ detect_and_crop.py
4
+
5
+ Run MegaDetector on images via Batch API, then save crops of the detected
6
+ bounding boxes.
7
+
8
+ The input to this script is a "queried images" JSON file, whose keys are paths
9
+ to images and values are dicts containing information relevant for training
10
+ a classifier, including labels and (optionally) ground-truth bounding boxes.
11
+ The image paths are in the format `<dataset-name>/<blob-name>` where we assume
12
+ that the dataset name does not contain '/'.
13
+
14
+ {
15
+ "caltech/cct_images/59f79901-23d2-11e8-a6a3-ec086b02610b.jpg": {
16
+ "dataset": "caltech",
17
+ "location": 13,
18
+ "class": "mountain_lion", # class from dataset
19
+ "bbox": [{"category": "animal",
20
+ "bbox": [0, 0.347, 0.237, 0.257]}], # ground-truth bbox
21
+ "label": ["monutain_lion"] # labels to use in classifier
22
+ },
23
+ "caltech/cct_images/59f5fe2b-23d2-11e8-a6a3-ec086b02610b.jpg": {
24
+ "dataset": "caltech",
25
+ "location": 13,
26
+ "class": "mountain_lion", # class from dataset
27
+ "label": ["monutain_lion"] # labels to use in classifier
28
+ },
29
+ ...
30
+ }
31
+
32
+ We assume that no image contains over 100 bounding boxes, and we always save
33
+ crops as RGB .jpg files for consistency. For each image, each bounding box is
34
+ cropped and saved to a file with a suffix "___cropXX.jpg" (ground truth bbox) or
35
+ "___cropXX_mdvY.Y.jpg" (detected bbox) added to the filename of the original
36
+ image. "XX" ranges from "00" to "99" and "Y.Y" indicates the MegaDetector
37
+ version. If an image has ground truth bounding boxes, we assume that they are
38
+ exhaustive--i.e., there are no other objects of interest, so we don't need to
39
+ run MegaDetector on the image. If an image does not have ground truth bounding
40
+ boxes, we run MegaDetector on the image and label the detected boxes in order
41
+ from 00 up to 99. Based on the given confidence threshold, we may skip saving
42
+ certain bounding box crops, but we still increment the bounding box number for
43
+ skipped boxes.
44
+
45
+ Example cropped image path (with ground truth bbox from MegaDB)
46
+
47
+ "path/to/crops/image.jpg___crop00.jpg"
48
+
49
+ Example cropped image path (with MegaDetector bbox)
50
+
51
+ "path/to/crops/image.jpg___crop00_mdv4.1.jpg"
52
+
53
+ By default, the images are cropped exactly per the given bounding box
54
+ coordinates. However, if square crops are desired, pass the --square-crops
55
+ flag. This will always generate a square crop whose size is the larger of the
56
+ bounding box width or height. In the case that the square crop boundaries exceed
57
+ the original image size, the crop is padded with 0s.
58
+
59
+ This script currently only supports running MegaDetector via the Batch Detection
60
+ API. See the classification README for instructions on running MegaDetector
61
+ locally. If running the Batch Detection API, set the following environment
62
+ variables for the Azure Blob Storage container in which we save the intermediate
63
+ task lists:
64
+
65
+ BATCH_DETECTION_API_URL # API URL
66
+ CLASSIFICATION_BLOB_STORAGE_ACCOUNT # storage account name
67
+ CLASSIFICATION_BLOB_CONTAINER # container name
68
+ CLASSIFICATION_BLOB_CONTAINER_WRITE_SAS # SAS token, without leading '?'
69
+ DETECTION_API_CALLER # allow-listed API caller
70
+
71
+ This script allows specifying a directory where MegaDetector outputs are cached
72
+ via the --detector-output-cache-dir argument. This directory must be
73
+ organized as:
74
+
75
+ <cache-dir>/<MegaDetector-version>/<dataset-name>.json
76
+
77
+ Example: If the `cameratrapssc/classifier-training` Azure blob storage
78
+ container is mounted to the local machine via blobfuse, it may be used as
79
+ a MegaDetector output cache directory by passing
80
+ "cameratrapssc/classifier-training/mdcache/"
81
+ as the value for --detector-output-cache-dir.
82
+
83
+ This script outputs either 1 or 3 files, depending on whether the Batch Detection API
84
+ is run:
85
+
86
+ - <output_dir>/detect_and_crop_log_{timestamp}.json
87
+ log of images missing detections and images that failed to properly
88
+ download and crop
89
+ - <output_dir>/batchapi_tasklists/{task_id}.json
90
+ (if --run-detector) task lists uploaded to the Batch Detection API
91
+ - <output_dir>/batchapi_response/{task_id}.json
92
+ (if --run-detector) task status responses for completed tasks
97
93
 
98
94
  """
99
- python detect_and_crop.py \
100
- base_logdir/queried_images.json \
101
- base_logdir \
102
- --detector-output-cache-dir /path/to/classifier-training/mdcache \
103
- --detector-version 4.1 \
104
- --run-detector --resume-file base_logdir/resume.json \
105
- --cropped-images-dir /path/to/crops --square-crops --threshold 0.9 \
106
- --save-full-images --images-dir /path/to/images --threads 50
107
- """
108
-
109
95
 
110
96
  #%% Imports
111
97
 
@@ -134,6 +120,20 @@ from md_utils import path_utils
134
120
  from md_utils import sas_blob_utils
135
121
 
136
122
 
123
+ #%% Example usage
124
+
125
+ """
126
+ python detect_and_crop.py \
127
+ base_logdir/queried_images.json \
128
+ base_logdir \
129
+ --detector-output-cache-dir /path/to/classifier-training/mdcache \
130
+ --detector-version 4.1 \
131
+ --run-detector --resume-file base_logdir/resume.json \
132
+ --cropped-images-dir /path/to/crops --square-crops --threshold 0.9 \
133
+ --save-full-images --images-dir /path/to/images --threads 50
134
+ """
135
+
136
+
137
137
  #%% Main function
138
138
 
139
139
  def main(queried_images_json_path: str,
@@ -1,47 +1,36 @@
1
- ########
2
- #
3
- # evaluate_model.py
4
- #
5
- # Evaluate a species classifier.
6
- #
7
- # Currently the implementation of multi-label multi-class classification is
8
- # non-functional.
9
- #
10
- # Outputs the following files:
11
- #
12
- # 1) outputs_{split}.csv, one file per split, contains columns:
13
- # - 'path': str, path to cropped image
14
- # - 'label': str
15
- # - 'weight': float
16
- # - [label names]: float, confidence in each label
17
- #
18
- # 2) overall_metrics.csv, contains columns:
19
- # - 'split': str
20
- # - 'loss': float, mean per-example loss over entire epoch
21
- # - 'acc_top{k}': float, accuracy@k over the entire epoch
22
- # - 'loss_weighted' and 'acc_weighted_top{k}': float, weighted versions
23
- #
24
- # 3) confusion_matrices.npz
25
- # - keys ['train', 'val', 'test']
26
- # - values are np.ndarray, confusion matrices
27
- #
28
- # 4) label_stats.csv, per-label statistics, columns
29
- # - 'split': str
30
- # - 'label': str
31
- # - 'precision': float
32
- # - 'recall': float
33
- #
34
- ########
1
+ """
35
2
 
36
- #%% Example usage
3
+ evaluate_model.py
4
+
5
+ Evaluate a species classifier.
6
+
7
+ Currently the implementation of multi-label multi-class classification is
8
+ non-functional.
9
+
10
+ Outputs the following files:
11
+
12
+ 1) outputs_{split}.csv, one file per split, contains columns:
13
+ - 'path': str, path to cropped image
14
+ - 'label': str
15
+ - 'weight': float
16
+ - [label names]: float, confidence in each label
17
+
18
+ 2) overall_metrics.csv, contains columns:
19
+ - 'split': str
20
+ - 'loss': float, mean per-example loss over entire epoch
21
+ - 'acc_top{k}': float, accuracy@k over the entire epoch
22
+ - 'loss_weighted' and 'acc_weighted_top{k}': float, weighted versions
23
+
24
+ 3) confusion_matrices.npz
25
+ - keys ['train', 'val', 'test']
26
+ - values are np.ndarray, confusion matrices
27
+
28
+ 4) label_stats.csv, per-label statistics, columns
29
+ - 'split': str
30
+ - 'label': str
31
+ - 'precision': float
32
+ - 'recall': float
37
33
 
38
- """
39
- python evaluate_model.py \
40
- $BASE_LOGDIR/$LOGDIR/params.json \
41
- $BASE_LOGDIR/$LOGDIR/ckpt_XX.pt \
42
- --output-dir $BASE_LOGDIR/$LOGDIR \
43
- --splits train val test \
44
- --batch-size 256
45
34
  """
46
35
 
47
36
  #%% Imports and constants
@@ -64,6 +53,18 @@ import tqdm
64
53
 
65
54
  from classification import efficientnet, train_classifier
66
55
 
56
+
57
+ #%% Example usage
58
+
59
+ """
60
+ python evaluate_model.py \
61
+ $BASE_LOGDIR/$LOGDIR/params.json \
62
+ $BASE_LOGDIR/$LOGDIR/ckpt_XX.pt \
63
+ --output-dir $BASE_LOGDIR/$LOGDIR \
64
+ --splits train val test \
65
+ --batch-size 256
66
+ """
67
+
67
68
  SPLITS = ['train', 'val', 'test']
68
69
 
69
70