megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (191) hide show
  1. api/__init__.py +0 -0
  2. api/batch_processing/__init__.py +0 -0
  3. api/batch_processing/api_core/__init__.py +0 -0
  4. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. api/batch_processing/api_core/batch_service/score.py +0 -1
  6. api/batch_processing/api_core/server_job_status_table.py +0 -1
  7. api/batch_processing/api_core_support/__init__.py +0 -0
  8. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
  9. api/batch_processing/api_support/__init__.py +0 -0
  10. api/batch_processing/api_support/summarize_daily_activity.py +0 -1
  11. api/batch_processing/data_preparation/__init__.py +0 -0
  12. api/batch_processing/data_preparation/manage_local_batch.py +93 -79
  13. api/batch_processing/data_preparation/manage_video_batch.py +8 -8
  14. api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
  15. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  16. api/batch_processing/postprocessing/__init__.py +0 -0
  17. api/batch_processing/postprocessing/add_max_conf.py +12 -12
  18. api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
  19. api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
  20. api/batch_processing/postprocessing/compare_batch_results.py +114 -44
  21. api/batch_processing/postprocessing/convert_output_format.py +62 -19
  22. api/batch_processing/postprocessing/load_api_results.py +17 -20
  23. api/batch_processing/postprocessing/md_to_coco.py +31 -21
  24. api/batch_processing/postprocessing/md_to_labelme.py +165 -68
  25. api/batch_processing/postprocessing/merge_detections.py +40 -15
  26. api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
  27. api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
  28. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
  29. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
  30. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
  31. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
  32. api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
  33. api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
  34. api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
  35. api/synchronous/__init__.py +0 -0
  36. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  37. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
  38. api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
  39. api/synchronous/api_core/animal_detection_api/config.py +35 -35
  40. api/synchronous/api_core/tests/__init__.py +0 -0
  41. api/synchronous/api_core/tests/load_test.py +109 -109
  42. classification/__init__.py +0 -0
  43. classification/aggregate_classifier_probs.py +21 -24
  44. classification/analyze_failed_images.py +11 -13
  45. classification/cache_batchapi_outputs.py +51 -51
  46. classification/create_classification_dataset.py +69 -68
  47. classification/crop_detections.py +54 -53
  48. classification/csv_to_json.py +97 -100
  49. classification/detect_and_crop.py +105 -105
  50. classification/evaluate_model.py +43 -42
  51. classification/identify_mislabeled_candidates.py +47 -46
  52. classification/json_to_azcopy_list.py +10 -10
  53. classification/json_validator.py +72 -71
  54. classification/map_classification_categories.py +44 -43
  55. classification/merge_classification_detection_output.py +68 -68
  56. classification/prepare_classification_script.py +157 -154
  57. classification/prepare_classification_script_mc.py +228 -228
  58. classification/run_classifier.py +27 -26
  59. classification/save_mislabeled.py +30 -30
  60. classification/train_classifier.py +20 -20
  61. classification/train_classifier_tf.py +21 -22
  62. classification/train_utils.py +10 -10
  63. data_management/__init__.py +0 -0
  64. data_management/annotations/__init__.py +0 -0
  65. data_management/annotations/annotation_constants.py +18 -31
  66. data_management/camtrap_dp_to_coco.py +238 -0
  67. data_management/cct_json_utils.py +107 -59
  68. data_management/cct_to_md.py +176 -158
  69. data_management/cct_to_wi.py +247 -219
  70. data_management/coco_to_labelme.py +272 -0
  71. data_management/coco_to_yolo.py +86 -62
  72. data_management/databases/__init__.py +0 -0
  73. data_management/databases/add_width_and_height_to_db.py +20 -16
  74. data_management/databases/combine_coco_camera_traps_files.py +35 -31
  75. data_management/databases/integrity_check_json_db.py +130 -83
  76. data_management/databases/subset_json_db.py +25 -16
  77. data_management/generate_crops_from_cct.py +27 -45
  78. data_management/get_image_sizes.py +188 -144
  79. data_management/importers/add_nacti_sizes.py +8 -8
  80. data_management/importers/add_timestamps_to_icct.py +78 -78
  81. data_management/importers/animl_results_to_md_results.py +158 -160
  82. data_management/importers/auckland_doc_test_to_json.py +9 -9
  83. data_management/importers/auckland_doc_to_json.py +8 -8
  84. data_management/importers/awc_to_json.py +7 -7
  85. data_management/importers/bellevue_to_json.py +15 -15
  86. data_management/importers/cacophony-thermal-importer.py +13 -13
  87. data_management/importers/carrizo_shrubfree_2018.py +8 -8
  88. data_management/importers/carrizo_trail_cam_2017.py +8 -8
  89. data_management/importers/cct_field_adjustments.py +9 -9
  90. data_management/importers/channel_islands_to_cct.py +10 -10
  91. data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
  92. data_management/importers/ena24_to_json.py +7 -7
  93. data_management/importers/filenames_to_json.py +8 -8
  94. data_management/importers/helena_to_cct.py +7 -7
  95. data_management/importers/idaho-camera-traps.py +7 -7
  96. data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
  97. data_management/importers/jb_csv_to_json.py +9 -9
  98. data_management/importers/mcgill_to_json.py +8 -8
  99. data_management/importers/missouri_to_json.py +18 -18
  100. data_management/importers/nacti_fieldname_adjustments.py +10 -10
  101. data_management/importers/noaa_seals_2019.py +8 -8
  102. data_management/importers/pc_to_json.py +7 -7
  103. data_management/importers/plot_wni_giraffes.py +7 -7
  104. data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
  105. data_management/importers/prepare_zsl_imerit.py +7 -7
  106. data_management/importers/rspb_to_json.py +8 -8
  107. data_management/importers/save_the_elephants_survey_A.py +8 -8
  108. data_management/importers/save_the_elephants_survey_B.py +9 -9
  109. data_management/importers/snapshot_safari_importer.py +26 -26
  110. data_management/importers/snapshot_safari_importer_reprise.py +665 -665
  111. data_management/importers/snapshot_serengeti_lila.py +14 -14
  112. data_management/importers/sulross_get_exif.py +8 -9
  113. data_management/importers/timelapse_csv_set_to_json.py +11 -11
  114. data_management/importers/ubc_to_json.py +13 -13
  115. data_management/importers/umn_to_json.py +7 -7
  116. data_management/importers/wellington_to_json.py +8 -8
  117. data_management/importers/wi_to_json.py +9 -9
  118. data_management/importers/zamba_results_to_md_results.py +181 -181
  119. data_management/labelme_to_coco.py +309 -159
  120. data_management/labelme_to_yolo.py +103 -60
  121. data_management/lila/__init__.py +0 -0
  122. data_management/lila/add_locations_to_island_camera_traps.py +9 -9
  123. data_management/lila/add_locations_to_nacti.py +147 -147
  124. data_management/lila/create_lila_blank_set.py +114 -31
  125. data_management/lila/create_lila_test_set.py +8 -8
  126. data_management/lila/create_links_to_md_results_files.py +106 -106
  127. data_management/lila/download_lila_subset.py +92 -90
  128. data_management/lila/generate_lila_per_image_labels.py +56 -43
  129. data_management/lila/get_lila_annotation_counts.py +18 -15
  130. data_management/lila/get_lila_image_counts.py +11 -11
  131. data_management/lila/lila_common.py +103 -70
  132. data_management/lila/test_lila_metadata_urls.py +132 -116
  133. data_management/ocr_tools.py +173 -128
  134. data_management/read_exif.py +161 -99
  135. data_management/remap_coco_categories.py +84 -0
  136. data_management/remove_exif.py +58 -62
  137. data_management/resize_coco_dataset.py +32 -44
  138. data_management/wi_download_csv_to_coco.py +246 -0
  139. data_management/yolo_output_to_md_output.py +86 -73
  140. data_management/yolo_to_coco.py +535 -95
  141. detection/__init__.py +0 -0
  142. detection/detector_training/__init__.py +0 -0
  143. detection/process_video.py +85 -33
  144. detection/pytorch_detector.py +43 -25
  145. detection/run_detector.py +157 -72
  146. detection/run_detector_batch.py +189 -114
  147. detection/run_inference_with_yolov5_val.py +118 -51
  148. detection/run_tiled_inference.py +113 -42
  149. detection/tf_detector.py +51 -28
  150. detection/video_utils.py +606 -521
  151. docs/source/conf.py +43 -0
  152. md_utils/__init__.py +0 -0
  153. md_utils/azure_utils.py +9 -9
  154. md_utils/ct_utils.py +249 -70
  155. md_utils/directory_listing.py +59 -64
  156. md_utils/md_tests.py +968 -862
  157. md_utils/path_utils.py +655 -155
  158. md_utils/process_utils.py +157 -133
  159. md_utils/sas_blob_utils.py +20 -20
  160. md_utils/split_locations_into_train_val.py +45 -32
  161. md_utils/string_utils.py +33 -10
  162. md_utils/url_utils.py +208 -27
  163. md_utils/write_html_image_list.py +51 -35
  164. md_visualization/__init__.py +0 -0
  165. md_visualization/plot_utils.py +102 -109
  166. md_visualization/render_images_with_thumbnails.py +34 -34
  167. md_visualization/visualization_utils.py +908 -311
  168. md_visualization/visualize_db.py +109 -58
  169. md_visualization/visualize_detector_output.py +61 -42
  170. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
  171. megadetector-5.0.9.dist-info/RECORD +224 -0
  172. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
  173. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
  174. taxonomy_mapping/__init__.py +0 -0
  175. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
  176. taxonomy_mapping/map_new_lila_datasets.py +154 -154
  177. taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
  178. taxonomy_mapping/preview_lila_taxonomy.py +591 -591
  179. taxonomy_mapping/retrieve_sample_image.py +12 -12
  180. taxonomy_mapping/simple_image_download.py +11 -11
  181. taxonomy_mapping/species_lookup.py +10 -10
  182. taxonomy_mapping/taxonomy_csv_checker.py +18 -18
  183. taxonomy_mapping/taxonomy_graph.py +47 -47
  184. taxonomy_mapping/validate_lila_category_mappings.py +83 -76
  185. data_management/cct_json_to_filename_json.py +0 -89
  186. data_management/cct_to_csv.py +0 -140
  187. data_management/databases/remove_corrupted_images_from_db.py +0 -191
  188. detection/detector_training/copy_checkpoints.py +0 -43
  189. md_visualization/visualize_megadb.py +0 -183
  190. megadetector-5.0.7.dist-info/RECORD +0 -202
  191. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
@@ -1,52 +1,45 @@
1
- ########
2
- #
3
- # identify_mislabeled_candidates.py
4
- #
5
- # Identify images that may have been mislabeled.
6
- #
7
- # A "mislabeled candidate" is defined as an image meeting both criteria:
8
- #
9
- # * according to the ground-truth label, the model made an incorrect prediction
10
- #
11
- # * the model's prediction confidence exceeds its confidence for the ground-truth
12
- # label by at least <margin>
13
- #
14
- # This script outputs for each dataset a text file containing the filenames of
15
- # mislabeled candidates, one per line. The text files are saved to:
16
- #
17
- # <logdir>/mislabeled_candidates_{split}_{dataset}.txt
18
- #
19
- # To this list of files can then be passed to AzCopy to be downloaded:
20
- #
21
- """
22
- azcopy cp "http://<url_of_container>?<sas_token>" "/save/files/here" \
23
- --list-of-files "/path/to/mislabeled_candidates_{split}_{dataset}.txt"
24
- """
25
- #
26
- # To save the filename as <dataset_name>/<blob_name> (instead of just <blob_name>
27
- # by default), pass the --include-dataset-in-filename flag. Then, the images can
28
- # be downloaded with:
29
- #
30
- """
31
- python data_management/megadb/download_images.py txt \
32
- "/path/to/mislabeled_candidates_{split}_{dataset}.txt" \
33
- /save/files/here \
34
- --threads 50
35
1
  """
36
- #
37
- # Assumes the following directory layout:
38
- # <base_logdir>/
39
- # label_index.json
40
- # <logdir>/
41
- # outputs_{split}.csv.gz
42
- #
43
- ########
44
2
 
45
- #%% Example usage
3
+ identify_mislabeled_candidates.py
4
+
5
+ Identify images that may have been mislabeled.
6
+
7
+ A "mislabeled candidate" is defined as an image meeting both criteria:
8
+
9
+ * according to the ground-truth label, the model made an incorrect prediction
10
+
11
+ * the model's prediction confidence exceeds its confidence for the ground-truth
12
+ label by at least <margin>
13
+
14
+ This script outputs for each dataset a text file containing the filenames of
15
+ mislabeled candidates, one per line. The text files are saved to:
16
+
17
+ <logdir>/mislabeled_candidates_{split}_{dataset}.txt
18
+
19
+ To this list of files can then be passed to AzCopy to be downloaded:
20
+
21
+ ""
22
+ azcopy cp "http://<url_of_container>?<sas_token>" "/save/files/here" \
23
+ --list-of-files "/path/to/mislabeled_candidates_{split}_{dataset}.txt"
24
+ ""
25
+
26
+ To save the filename as <dataset_name>/<blob_name> (instead of just <blob_name>
27
+ by default), pass the --include-dataset-in-filename flag. Then, the images can
28
+ be downloaded with:
29
+
30
+ ""
31
+ python data_management/megadb/download_images.py txt \
32
+ "/path/to/mislabeled_candidates_{split}_{dataset}.txt" \
33
+ /save/files/here \
34
+ --threads 50
35
+ ""
36
+
37
+ Assumes the following directory layout:
38
+ <base_logdir>/
39
+ label_index.json
40
+ <logdir>/
41
+ outputs_{split}.csv.gz
46
42
 
47
- """
48
- python identify_mislabeled_candidates.py <base_logdir>/<logdir> \
49
- --margin 0.5 --splits val test
50
43
  """
51
44
 
52
45
  #%% Imports
@@ -63,6 +56,14 @@ import pandas as pd
63
56
  from tqdm import tqdm
64
57
 
65
58
 
59
+ #%% Example usage
60
+
61
+ """
62
+ python identify_mislabeled_candidates.py <base_logdir>/<logdir> \
63
+ --margin 0.5 --splits val test
64
+ """
65
+
66
+
66
67
  #%% Main function
67
68
 
68
69
  def main(logdir: str, splits: Iterable[str], margin: float,
@@ -1,13 +1,13 @@
1
- ########
2
- #
3
- # json_to_azcopy_list.py
4
- #
5
- # Given a queried_images.json file output from json_validator.py, generates
6
- # one text file <dataset>_images.txt for every dataset included.
7
- #
8
- # See: https://github.com/Azure/azure-storage-azcopy/wiki/Listing-specific-files-to-transfer
9
- #
10
- ########
1
+ """
2
+
3
+ json_to_azcopy_list.py
4
+
5
+ Given a queried_images.json file output from json_validator.py, generates
6
+ one text file <dataset>_images.txt for every dataset included.
7
+
8
+ See: https://github.com/Azure/azure-storage-azcopy/wiki/Listing-specific-files-to-transfer
9
+
10
+ """
11
11
 
12
12
  #%% Imports and constants
13
13
 
@@ -1,76 +1,68 @@
1
- ########
2
- #
3
- # json_validator.py
4
- #
5
- # Validates a classification label specification JSON file and optionally
6
- # queries MegaDB to find matching image files.
7
- #
8
- # See README.md for an example of a classification label specification JSON file.
9
- #
10
- # The validation step takes the classification label specification JSON file and
11
- # finds the dataset labels that belong to each classification label. It checks
12
- # that the following conditions hold:
13
- #
14
- # 1) Each classification label specification matches at least 1 dataset label.
15
- #
16
- # 2) If the classification label includes a taxonomical specification, then the
17
- # taxa is actually a part of our master taxonomy.
18
- #
19
- # 3) If the 'prioritize' key is found for a given label, then the label must
20
- # also have a 'max_count' key.
21
- #
22
- # 4) If --allow-multilabel=False, then no dataset label is included in more than
23
- # one classification label.
24
- #
25
- # If --output-dir <output_dir> is given, then we query MegaDB for images
26
- # that match the dataset labels identified during the validation step. We filter
27
- # out images that have unaccepted file extensions and images that don't actually
28
- # exist in Azure Blob Storage. In total, we output the following files:
29
- #
30
- # <output_dir>/
31
- #
32
- # - included_dataset_labels.txt
33
- # lists the original dataset classes included for each classification label
34
- #
35
- # - image_counts_by_label_presample.json
36
- # number of images for each classification label after filtering bad
37
- # images, but before sampling
38
- #
39
- # - image_counts_by_label_sampled.json
40
- # number of images for each classification label in queried_images.json
41
- #
42
- # - json_validator_log_{timestamp}.json
43
- # log of excluded images / labels
44
- #
45
- # - queried_images.json
46
- # main output file, ex:
47
- #
48
- # {
49
- # "caltech/cct_images/59f5fe2b-23d2-11e8-a6a3-ec086b02610b.jpg": {
50
- # "dataset": "caltech",
51
- # "location": 13,
52
- # "class": "mountain_lion", // class from dataset
53
- # "label": ["monutain_lion"] // labels to use in classifier
54
- # },
55
- # "caltech/cct_images/59f79901-23d2-11e8-a6a3-ec086b02610b.jpg": {
56
- # "dataset": "caltech",
57
- # "location": 13,
58
- # "class": "mountain_lion", // class from dataset
59
- # "bbox": [{"category": "animal",
60
- # "bbox": [0, 0.347, 0.237, 0.257]}],
61
- # "label": ["monutain_lion"] // labels to use in classifier
62
- # },
63
- # ...
64
- # }
65
- #
66
- ########
1
+ """
67
2
 
68
- #%% Example usage
3
+ json_validator.py
4
+
5
+ Validates a classification label specification JSON file and optionally
6
+ queries MegaDB to find matching image files.
7
+
8
+ See README.md for an example of a classification label specification JSON file.
9
+
10
+ The validation step takes the classification label specification JSON file and
11
+ finds the dataset labels that belong to each classification label. It checks
12
+ that the following conditions hold:
13
+
14
+ 1) Each classification label specification matches at least 1 dataset label.
15
+
16
+ 2) If the classification label includes a taxonomical specification, then the
17
+ taxa is actually a part of our master taxonomy.
18
+
19
+ 3) If the 'prioritize' key is found for a given label, then the label must
20
+ also have a 'max_count' key.
21
+
22
+ 4) If --allow-multilabel=False, then no dataset label is included in more than
23
+ one classification label.
24
+
25
+ If --output-dir <output_dir> is given, then we query MegaDB for images
26
+ that match the dataset labels identified during the validation step. We filter
27
+ out images that have unaccepted file extensions and images that don't actually
28
+ exist in Azure Blob Storage. In total, we output the following files:
29
+
30
+ <output_dir>/
31
+
32
+ - included_dataset_labels.txt
33
+ lists the original dataset classes included for each classification label
34
+
35
+ - image_counts_by_label_presample.json
36
+ number of images for each classification label after filtering bad
37
+ images, but before sampling
38
+
39
+ - image_counts_by_label_sampled.json
40
+ number of images for each classification label in queried_images.json
41
+
42
+ - json_validator_log_{timestamp}.json
43
+ log of excluded images / labels
44
+
45
+ - queried_images.json
46
+ main output file, ex:
47
+
48
+ {
49
+ "caltech/cct_images/59f5fe2b-23d2-11e8-a6a3-ec086b02610b.jpg": {
50
+ "dataset": "caltech",
51
+ "location": 13,
52
+ "class": "mountain_lion", // class from dataset
53
+ "label": ["monutain_lion"] // labels to use in classifier
54
+ },
55
+ "caltech/cct_images/59f79901-23d2-11e8-a6a3-ec086b02610b.jpg": {
56
+ "dataset": "caltech",
57
+ "location": 13,
58
+ "class": "mountain_lion", // class from dataset
59
+ "bbox": [{"category": "animal",
60
+ "bbox": [0, 0.347, 0.237, 0.257]}],
61
+ "label": ["monutain_lion"] // labels to use in classifier
62
+ },
63
+ ...
64
+ }
69
65
 
70
- """
71
- python json_validator.py label_spec.json \
72
- $HOME/camera-traps-private/camera_trap_taxonomy_mapping.csv \
73
- --output-dir run --json-indent 2
74
66
  """
75
67
 
76
68
  from __future__ import annotations
@@ -96,6 +88,15 @@ from taxonomy_mapping.taxonomy_graph import (
96
88
  build_taxonomy_graph, dag_to_tree, TaxonNode)
97
89
 
98
90
 
91
+ #%% Example usage
92
+
93
+ """
94
+ python json_validator.py label_spec.json \
95
+ $HOME/camera-traps-private/camera_trap_taxonomy_mapping.csv \
96
+ --output-dir run --json-indent 2
97
+ """
98
+
99
+
99
100
  #%% Main function
100
101
 
101
102
  def main(label_spec_json_path: str,
@@ -1,48 +1,39 @@
1
- ########
2
- #
3
- # map_classification_categories.py
4
- #
5
- # Maps a classifier's output categories to desired target categories.
6
- #
7
- # In this file, we use the following terminology:
8
- # * "category": a category output by the classifier
9
- # * "target": name of a desired group, comprising >= 1 classifier categories
10
- #
11
- # Takes as input 2 label specification JSON files:
12
- #
13
- # 1) desired label specification JSON file
14
- # this should not have a target named "other"
15
- #
16
- # 2) label specification JSON file of trained classifier
17
- #
18
- # The mapping is accomplished as follows:
19
- #
20
- # 1. For each category in the classifier label spec, find all taxon nodes that
21
- # belong to that category.
22
- #
23
- # 2. Given a target in the desired label spec, find all taxon nodes that belong
24
- # to that target. If there is any classifier category whose nodes are a
25
- # subset of the target nodes, then map the classifier category to that target.
26
- # Any partial intersection between a target's nodes and a category's nodes
27
- # is considered an error.
28
- #
29
- # 3. If there are any classifier categories that have not yet been assigned a
30
- # target, group them into the "other" target.
31
- #
32
- # This script outputs a JSON file that maps each target to a list of classifier
33
- # categories.
34
- #
35
- # The taxonomy mapping parts of this script are very similar to json_validator.py.
36
- #
37
- ########
1
+ """
38
2
 
39
- #%% Example usage
3
+ map_classification_categories.py
4
+
5
+ Maps a classifier's output categories to desired target categories.
6
+
7
+ In this file, we use the following terminology:
8
+ * "category": a category output by the classifier
9
+ * "target": name of a desired group, comprising >= 1 classifier categories
10
+
11
+ Takes as input 2 label specification JSON files:
12
+
13
+ 1) desired label specification JSON file
14
+ this should not have a target named "other"
15
+
16
+ 2) label specification JSON file of trained classifier
17
+
18
+ The mapping is accomplished as follows:
19
+
20
+ 1. For each category in the classifier label spec, find all taxon nodes that
21
+ belong to that category.
22
+
23
+ 2. Given a target in the desired label spec, find all taxon nodes that belong
24
+ to that target. If there is any classifier category whose nodes are a
25
+ subset of the target nodes, then map the classifier category to that target.
26
+ Any partial intersection between a target's nodes and a category's nodes
27
+ is considered an error.
28
+
29
+ 3. If there are any classifier categories that have not yet been assigned a
30
+ target, group them into the "other" target.
31
+
32
+ This script outputs a JSON file that maps each target to a list of classifier
33
+ categories.
34
+
35
+ The taxonomy mapping parts of this script are very similar to json_validator.py.
40
36
 
41
- """
42
- python map_classification_categories.py \
43
- desired_label_spec.json \
44
- /path/to/classifier/label_spec.json \
45
- $HOME/camera-traps-private/camera_trap_taxonomy_mapping.csv
46
37
  """
47
38
 
48
39
  #%% Imports
@@ -65,6 +56,16 @@ from taxonomy_mapping.taxonomy_graph import (
65
56
  build_taxonomy_graph, dag_to_tree, TaxonNode)
66
57
 
67
58
 
59
+ #%% Example usage
60
+
61
+ """
62
+ python map_classification_categories.py \
63
+ desired_label_spec.json \
64
+ /path/to/classifier/label_spec.json \
65
+ $HOME/camera-traps-private/camera_trap_taxonomy_mapping.csv
66
+ """
67
+
68
+
68
69
  #%% Main function
69
70
 
70
71
  def main(desired_label_spec_json_path: str,
@@ -1,74 +1,59 @@
1
- ########
2
- #
3
- # merge_classification_detection_output.py
4
- #
5
- # Merges classification results with Batch Detection API outputs.
6
- #
7
- # This script takes 2 main files as input:
8
- #
9
- # 1) Either a "dataset CSV" (output of create_classification_dataset.py) or a
10
- # "classification results CSV" (output of evaluate_model.py). The CSV is
11
- # expected to have columns listed below. The 'label' and [label names] columns
12
- # are optional, but at least one of them must be provided.
13
- # * 'path': str, path to cropped image
14
- # * if passing in a detections JSON, must match
15
- # <img_file>___cropXX_mdvY.Y.jpg
16
- # * if passing in a queried images JSON, must match
17
- # <dataset>/<img_file>___cropXX_mdvY.Y.jpg or
18
- # <dataset>/<img_file>___cropXX.jpg
19
- # * 'label': str, label assigned to this crop
20
- # * [label names]: float, confidence in each label
21
- #
22
- # 2) Either a "detections JSON" (output of MegaDetector) or a "queried images
23
- # JSON" (output of json_validatory.py).
24
- #
25
- # If the CSV contains [label names] columns (e.g., output of evaluate_model.py),
26
- # then each crop's "classifications" output will have one value per category.
27
- # Categories are sorted decreasing by confidence.
28
- # "classifications": [
29
- # ["3", 0.901],
30
- # ["1", 0.071],
31
- # ["4", 0.025],
32
- # ["2", 0.003],
33
- # ]
34
- #
35
- # If the CSV only contains the 'label' column (e.g., output of
36
- # create_classification_dataset.py), then each crop's "classifications" output
37
- # will have only one value, with a confidence of 1.0. The label's classification
38
- # category ID is always greater than 1,000,000, to distinguish it from a predicted
39
- # category ID.
40
- # "classifications": [
41
- # ["1000004", 1.0]
42
- # ]
43
- #
44
- # If the CSV contains both [label names] and 'label' columns, then both the
45
- # predicted categories and label category will be included. By default, the
46
- # label-category is included last; if the --label-first flag is given, then the
47
- # label category is placed first in the results.
48
- # "classifications": [
49
- # ["1000004", 1.0], # label put first if --label-first flag is given
50
- # ["3", 0.901], # all other results are sorted by confidence
51
- # ["1", 0.071],
52
- # ["4", 0.025],
53
- # ["2", 0.003]
54
- # ]
55
- #
56
- ########
1
+ """
57
2
 
58
- #%% Example usage
3
+ merge_classification_detection_output.py
4
+
5
+ Merges classification results with Batch Detection API outputs.
6
+
7
+ This script takes 2 main files as input:
8
+
9
+ 1) Either a "dataset CSV" (output of create_classification_dataset.py) or a
10
+ "classification results CSV" (output of evaluate_model.py). The CSV is
11
+ expected to have columns listed below. The 'label' and [label names] columns
12
+ are optional, but at least one of them must be provided.
13
+ * 'path': str, path to cropped image
14
+ * if passing in a detections JSON, must match
15
+ <img_file>___cropXX_mdvY.Y.jpg
16
+ * if passing in a queried images JSON, must match
17
+ <dataset>/<img_file>___cropXX_mdvY.Y.jpg or
18
+ <dataset>/<img_file>___cropXX.jpg
19
+ * 'label': str, label assigned to this crop
20
+ * [label names]: float, confidence in each label
21
+
22
+ 2) Either a "detections JSON" (output of MegaDetector) or a "queried images
23
+ JSON" (output of json_validatory.py).
24
+
25
+ If the CSV contains [label names] columns (e.g., output of evaluate_model.py),
26
+ then each crop's "classifications" output will have one value per category.
27
+ Categories are sorted decreasing by confidence.
28
+ "classifications": [
29
+ ["3", 0.901],
30
+ ["1", 0.071],
31
+ ["4", 0.025],
32
+ ["2", 0.003],
33
+ ]
34
+
35
+ If the CSV only contains the 'label' column (e.g., output of
36
+ create_classification_dataset.py), then each crop's "classifications" output
37
+ will have only one value, with a confidence of 1.0. The label's classification
38
+ category ID is always greater than 1,000,000, to distinguish it from a predicted
39
+ category ID.
40
+ "classifications": [
41
+ ["1000004", 1.0]
42
+ ]
43
+
44
+ If the CSV contains both [label names] and 'label' columns, then both the
45
+ predicted categories and label category will be included. By default, the
46
+ label-category is included last; if the --label-first flag is given, then the
47
+ label category is placed first in the results.
48
+ "classifications": [
49
+ ["1000004", 1.0], # label put first if --label-first flag is given
50
+ ["3", 0.901], # all other results are sorted by confidence
51
+ ["1", 0.071],
52
+ ["4", 0.025],
53
+ ["2", 0.003]
54
+ ]
59
55
 
60
56
  """
61
- python merge_classification_detection_output.py \
62
- BASE_LOGDIR/LOGDIR/outputs_test.csv.gz \
63
- BASE_LOGDIR/label_index.json \
64
- BASE_LOGDIR/queried_images.json \
65
- --classifier-name "efficientnet-b3-idfg-moredata" \
66
- --detector-output-cache-dir $HOME/classifier-training/mdcache \
67
- --detector-version "4.1" \
68
- --output-json BASE_LOGDIR/LOGDIR/classifier_results.json \
69
- --datasets idfg idfg_swwlf_2019
70
- """
71
-
72
57
 
73
58
  #%% Imports
74
59
 
@@ -88,6 +73,21 @@ from tqdm import tqdm
88
73
  from md_utils.ct_utils import truncate_float
89
74
 
90
75
 
76
+ #%% Example usage
77
+
78
+ """
79
+ python merge_classification_detection_output.py \
80
+ BASE_LOGDIR/LOGDIR/outputs_test.csv.gz \
81
+ BASE_LOGDIR/label_index.json \
82
+ BASE_LOGDIR/queried_images.json \
83
+ --classifier-name "efficientnet-b3-idfg-moredata" \
84
+ --detector-output-cache-dir $HOME/classifier-training/mdcache \
85
+ --detector-version "4.1" \
86
+ --output-json BASE_LOGDIR/LOGDIR/classifier_results.json \
87
+ --datasets idfg idfg_swwlf_2019
88
+ """
89
+
90
+
91
91
  #%% Support functions
92
92
 
93
93
  def row_to_classification_list(row: Mapping[str, Any],