megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (197) hide show
  1. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  2. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  3. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  4. megadetector/classification/aggregate_classifier_probs.py +3 -3
  5. megadetector/classification/analyze_failed_images.py +5 -5
  6. megadetector/classification/cache_batchapi_outputs.py +5 -5
  7. megadetector/classification/create_classification_dataset.py +11 -12
  8. megadetector/classification/crop_detections.py +10 -10
  9. megadetector/classification/csv_to_json.py +8 -8
  10. megadetector/classification/detect_and_crop.py +13 -15
  11. megadetector/classification/efficientnet/model.py +8 -8
  12. megadetector/classification/efficientnet/utils.py +6 -5
  13. megadetector/classification/evaluate_model.py +7 -7
  14. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  15. megadetector/classification/json_to_azcopy_list.py +1 -1
  16. megadetector/classification/json_validator.py +29 -32
  17. megadetector/classification/map_classification_categories.py +9 -9
  18. megadetector/classification/merge_classification_detection_output.py +12 -9
  19. megadetector/classification/prepare_classification_script.py +19 -19
  20. megadetector/classification/prepare_classification_script_mc.py +26 -26
  21. megadetector/classification/run_classifier.py +4 -4
  22. megadetector/classification/save_mislabeled.py +6 -6
  23. megadetector/classification/train_classifier.py +1 -1
  24. megadetector/classification/train_classifier_tf.py +9 -9
  25. megadetector/classification/train_utils.py +10 -10
  26. megadetector/data_management/annotations/annotation_constants.py +1 -2
  27. megadetector/data_management/camtrap_dp_to_coco.py +79 -46
  28. megadetector/data_management/cct_json_utils.py +103 -103
  29. megadetector/data_management/cct_to_md.py +49 -49
  30. megadetector/data_management/cct_to_wi.py +33 -33
  31. megadetector/data_management/coco_to_labelme.py +75 -75
  32. megadetector/data_management/coco_to_yolo.py +210 -193
  33. megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
  34. megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
  35. megadetector/data_management/databases/integrity_check_json_db.py +228 -200
  36. megadetector/data_management/databases/subset_json_db.py +33 -33
  37. megadetector/data_management/generate_crops_from_cct.py +88 -39
  38. megadetector/data_management/get_image_sizes.py +54 -49
  39. megadetector/data_management/labelme_to_coco.py +133 -125
  40. megadetector/data_management/labelme_to_yolo.py +159 -73
  41. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  42. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  43. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  44. megadetector/data_management/lila/download_lila_subset.py +21 -24
  45. megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
  46. megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
  47. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  48. megadetector/data_management/lila/lila_common.py +73 -70
  49. megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
  50. megadetector/data_management/mewc_to_md.py +344 -340
  51. megadetector/data_management/ocr_tools.py +262 -255
  52. megadetector/data_management/read_exif.py +249 -227
  53. megadetector/data_management/remap_coco_categories.py +90 -28
  54. megadetector/data_management/remove_exif.py +81 -21
  55. megadetector/data_management/rename_images.py +187 -187
  56. megadetector/data_management/resize_coco_dataset.py +588 -120
  57. megadetector/data_management/speciesnet_to_md.py +41 -41
  58. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  59. megadetector/data_management/yolo_output_to_md_output.py +248 -122
  60. megadetector/data_management/yolo_to_coco.py +333 -191
  61. megadetector/detection/change_detection.py +832 -0
  62. megadetector/detection/process_video.py +340 -337
  63. megadetector/detection/pytorch_detector.py +358 -278
  64. megadetector/detection/run_detector.py +399 -186
  65. megadetector/detection/run_detector_batch.py +404 -377
  66. megadetector/detection/run_inference_with_yolov5_val.py +340 -327
  67. megadetector/detection/run_tiled_inference.py +257 -249
  68. megadetector/detection/tf_detector.py +24 -24
  69. megadetector/detection/video_utils.py +332 -295
  70. megadetector/postprocessing/add_max_conf.py +19 -11
  71. megadetector/postprocessing/categorize_detections_by_size.py +45 -45
  72. megadetector/postprocessing/classification_postprocessing.py +468 -433
  73. megadetector/postprocessing/combine_batch_outputs.py +23 -23
  74. megadetector/postprocessing/compare_batch_results.py +590 -525
  75. megadetector/postprocessing/convert_output_format.py +106 -102
  76. megadetector/postprocessing/create_crop_folder.py +347 -147
  77. megadetector/postprocessing/detector_calibration.py +173 -168
  78. megadetector/postprocessing/generate_csv_report.py +508 -499
  79. megadetector/postprocessing/load_api_results.py +48 -27
  80. megadetector/postprocessing/md_to_coco.py +133 -102
  81. megadetector/postprocessing/md_to_labelme.py +107 -90
  82. megadetector/postprocessing/md_to_wi.py +40 -40
  83. megadetector/postprocessing/merge_detections.py +92 -114
  84. megadetector/postprocessing/postprocess_batch_results.py +319 -301
  85. megadetector/postprocessing/remap_detection_categories.py +91 -38
  86. megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
  87. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  88. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  89. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
  90. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  91. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  92. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  93. megadetector/postprocessing/validate_batch_results.py +70 -70
  94. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  95. megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
  96. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
  97. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
  98. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  99. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  100. megadetector/taxonomy_mapping/species_lookup.py +156 -74
  101. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  102. megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
  103. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  104. megadetector/utils/ct_utils.py +1049 -211
  105. megadetector/utils/directory_listing.py +21 -77
  106. megadetector/utils/gpu_test.py +22 -22
  107. megadetector/utils/md_tests.py +632 -529
  108. megadetector/utils/path_utils.py +1520 -431
  109. megadetector/utils/process_utils.py +41 -41
  110. megadetector/utils/split_locations_into_train_val.py +62 -62
  111. megadetector/utils/string_utils.py +148 -27
  112. megadetector/utils/url_utils.py +489 -176
  113. megadetector/utils/wi_utils.py +2658 -2526
  114. megadetector/utils/write_html_image_list.py +137 -137
  115. megadetector/visualization/plot_utils.py +34 -30
  116. megadetector/visualization/render_images_with_thumbnails.py +39 -74
  117. megadetector/visualization/visualization_utils.py +487 -435
  118. megadetector/visualization/visualize_db.py +232 -198
  119. megadetector/visualization/visualize_detector_output.py +82 -76
  120. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
  121. megadetector-10.0.0.dist-info/RECORD +139 -0
  122. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
  123. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  124. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  125. megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
  126. megadetector/api/batch_processing/api_core/server.py +0 -294
  127. megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
  128. megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
  129. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  130. megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
  131. megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
  132. megadetector/api/batch_processing/api_core/server_utils.py +0 -88
  133. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  134. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  135. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  136. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  137. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  138. megadetector/api/synchronous/__init__.py +0 -0
  139. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  140. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
  141. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
  142. megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
  143. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  144. megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
  145. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  146. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  147. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  148. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  149. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  150. megadetector/data_management/importers/awc_to_json.py +0 -191
  151. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  152. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  153. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  154. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  155. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  156. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  157. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  158. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  159. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  160. megadetector/data_management/importers/ena24_to_json.py +0 -276
  161. megadetector/data_management/importers/filenames_to_json.py +0 -386
  162. megadetector/data_management/importers/helena_to_cct.py +0 -283
  163. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  164. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  165. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  166. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  167. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  168. megadetector/data_management/importers/missouri_to_json.py +0 -490
  169. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  170. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  171. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  172. megadetector/data_management/importers/pc_to_json.py +0 -365
  173. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  174. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  175. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  176. megadetector/data_management/importers/rspb_to_json.py +0 -356
  177. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  178. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  179. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  180. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  181. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  182. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  183. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  184. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  185. megadetector/data_management/importers/ubc_to_json.py +0 -399
  186. megadetector/data_management/importers/umn_to_json.py +0 -507
  187. megadetector/data_management/importers/wellington_to_json.py +0 -263
  188. megadetector/data_management/importers/wi_to_json.py +0 -442
  189. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  190. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  191. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  192. megadetector/utils/azure_utils.py +0 -178
  193. megadetector/utils/sas_blob_utils.py +0 -509
  194. megadetector-5.0.28.dist-info/RECORD +0 -209
  195. /megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
  196. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
  197. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0
@@ -3,12 +3,12 @@
3
3
  classification_postprocessing.py
4
4
 
5
5
  Functions for postprocessing species classification results, particularly:
6
-
6
+
7
7
  * Smoothing results within an image (an image with 700 cows and one deer is really just 701
8
8
  cows)
9
9
  * Smoothing results within a sequence (a sequence that looks like deer/deer/deer/elk/deer/deer
10
10
  is really just a deer)
11
-
11
+
12
12
  """
13
13
 
14
14
  #%% Constants and imports
@@ -32,7 +32,7 @@ from megadetector.utils.wi_utils import taxonomy_level_string_to_index
32
32
  from megadetector.utils.wi_utils import non_taxonomic_prediction_strings
33
33
  from megadetector.utils.wi_utils import human_prediction_string
34
34
  from megadetector.utils.wi_utils import animal_prediction_string
35
- from megadetector.utils.wi_utils import blank_prediction_string
35
+ from megadetector.utils.wi_utils import blank_prediction_string # noqa
36
36
 
37
37
 
38
38
  #%% Options classes
@@ -44,86 +44,94 @@ class ClassificationSmoothingOptions:
44
44
  """
45
45
 
46
46
  def __init__(self):
47
-
48
- #: How many detections do we need in a dominant category to overwrite
49
- #: non-dominant classifications? This is irrelevant if
47
+
48
+ #: How many detections do we need in a dominant category to overwrite
49
+ #: non-dominant classifications? This is irrelevant if
50
50
  #: max_detections_nondominant_class <= 1.
51
51
  self.min_detections_to_overwrite_secondary = 4
52
-
53
- #: Even if we have a dominant class, if a non-dominant class has at least
52
+
53
+ #: Even if we have a dominant class, if a non-dominant class has at least
54
54
  #: this many classifications in an image, leave them alone.
55
55
  #:
56
56
  #: If this is <= 1, we won't replace non-dominant, non-other classes
57
57
  #: with the dominant class, even if there are 900 cows and 1 deer.
58
58
  self.max_detections_nondominant_class = 1
59
-
60
- #: How many detections do we need in a dominant category to overwrite
61
- #: non-dominant classifications in the same family? If this is <= 0,
62
- #: we'll skip this step. This option doesn't mean anything if
59
+
60
+ #: How many detections do we need in a dominant category to overwrite
61
+ #: non-dominant classifications in the same family? If this is <= 0,
62
+ #: we'll skip this step. This option doesn't mean anything if
63
63
  #: max_detections_nondominant_class_same_family <= 1.
64
64
  self.min_detections_to_overwrite_secondary_same_family = 2
65
-
66
- #: If we have this many classifications of a nondominant category,
65
+
66
+ #: If we have this many classifications of a nondominant category,
67
67
  #: we won't do same-family overwrites. <= 1 means "even if there are
68
68
  #: a million deer, if there are two million moose, call all the deer
69
- #: moose". This option doesn't mean anything if
69
+ #: moose". This option doesn't mean anything if
70
70
  #: min_detections_to_overwrite_secondary_same_family <= 0.
71
71
  self.max_detections_nondominant_class_same_family = -1
72
-
73
- #: If the dominant class has at least this many classifications, overwrite
72
+
73
+ #: If the dominant class has at least this many classifications, overwrite
74
74
  #: "other" classifications with the dominant class
75
75
  self.min_detections_to_overwrite_other = 2
76
-
76
+
77
77
  #: Names to treat as "other" categories; can't be None, but can be empty
78
78
  #:
79
79
  #: "Other" classifications will be changed to the dominant category, regardless
80
- #: of confidence, as long as there are at least min_detections_to_overwrite_other
80
+ #: of confidence, as long as there are at least min_detections_to_overwrite_other
81
81
  #: examples of the dominant class. For example, cow/other will remain unchanged,
82
82
  #: but cow/cow/other will become cow/cow/cow.
83
83
  self.other_category_names = ['other','unknown','no cv result','animal','blank','mammal']
84
-
84
+
85
85
  #: We're not even going to mess around with classifications below this threshold.
86
86
  #:
87
87
  #: We won't count them, we won't over-write them, they don't exist during the
88
88
  #: within-image smoothing step.
89
89
  self.classification_confidence_threshold = 0.5
90
-
90
+
91
91
  #: We're not even going to mess around with detections below this threshold.
92
92
  #:
93
93
  #: We won't count them, we won't over-write them, they don't exist during the
94
94
  #: within-image smoothing step.
95
95
  self.detection_confidence_threshold = 0.15
96
-
96
+
97
97
  #: If classification descriptions are present and appear to represent taxonomic
98
- #: information, should we propagate classifications when lower-level taxa are more
99
- #: common in an image? For example, if we see "carnivore/fox/fox/deer", should
98
+ #: information, should we propagate classifications when lower-level taxa are more
99
+ #: common in an image? For example, if we see "carnivore/fox/fox/deer", should
100
100
  #: we make that "fox/fox/fox/deer"?
101
101
  self.propagate_classifications_through_taxonomy = True
102
-
103
- #: When propagating classifications down through taxonomy levels, we have to
102
+
103
+ #: When propagating classifications down through taxonomy levels, we have to
104
104
  #: decide whether we prefer more frequent categories or more specific categories.
105
105
  #: taxonomy_propagation_level_weight and taxonomy_propagation_count_weight
106
106
  #: balance levels against counts in this process.
107
107
  self.taxonomy_propagation_level_weight = 1.0
108
-
109
- #: When propagating classifications down through taxonomy levels, we have to
108
+
109
+ #: When propagating classifications down through taxonomy levels, we have to
110
110
  #: decide whether we prefer more frequent categories or more specific categories.
111
111
  #: taxonomy_propagation_level_weight and taxonomy_propagation_count_weight
112
112
  #: balance levels against counts in this process.
113
113
  #:
114
114
  #: With a very low default value, this just breaks ties.
115
115
  self.taxonomy_propagation_count_weight = 0.01
116
-
116
+
117
117
  #: Should we record information about the state of labels prior to smoothing?
118
118
  self.add_pre_smoothing_description = True
119
-
119
+
120
120
  #: When a dict (rather than a file) is passed to either smoothing function,
121
121
  #: if this is True, we'll make a copy of the input dict before modifying.
122
122
  self.modify_in_place = False
123
-
123
+
124
+ #: Only include these categories in the smoothing process (None to use all categories)
125
+ self.detection_category_names_to_smooth = ['animal']
126
+
124
127
  #: Debug options
125
128
  self.break_at_image = None
126
129
 
130
+ ## Populated internally
131
+
132
+ #: #: Only include these categories in the smoothing process (None to use all categories)
133
+ self._detection_category_ids_to_smooth = None
134
+
127
135
 
128
136
  #%% Utility functions
129
137
 
@@ -131,60 +139,79 @@ def _results_for_sequence(images_this_sequence,filename_to_results):
131
139
  """
132
140
  Fetch MD results for every image in this sequence, based on the 'file_name' field
133
141
  """
134
-
142
+
135
143
  results_this_sequence = []
136
144
  for im in images_this_sequence:
137
145
  fn = im['file_name']
138
146
  results_this_image = filename_to_results[fn]
139
147
  assert isinstance(results_this_image,dict)
140
148
  results_this_sequence.append(results_this_image)
141
-
149
+
142
150
  return results_this_sequence
143
-
144
-
151
+
152
+
145
153
  def _sort_images_by_time(images):
146
154
  """
147
155
  Returns a copy of [images], sorted by the 'datetime' field (ascending).
148
156
  """
149
- return sorted(images, key = lambda im: im['datetime'])
157
+ return sorted(images, key = lambda im: im['datetime'])
158
+
159
+
160
+ def _detection_is_relevant_for_smoothing(det,options):
161
+ """
162
+ Determine whether [det] has classifications that might be meaningful for smoothing.
163
+ """
164
+
165
+ if ('classifications' not in det) or \
166
+ (det['conf'] < options.detection_confidence_threshold):
167
+ return False
168
+
169
+ # Ignore non-smoothed categories
170
+ if (options._detection_category_ids_to_smooth is not None) and \
171
+ (det['category'] not in options._detection_category_ids_to_smooth):
172
+ return False
173
+
174
+ return True
150
175
 
151
176
 
152
177
  def count_detections_by_classification_category(detections,options=None):
153
178
  """
154
179
  Count the number of instances of each classification category in the detections list
155
- [detections] that have an above-threshold detection. Sort results in descending
180
+ [detections] that have an above-threshold detection. Sort results in descending
156
181
  order by count. Returns a dict mapping category ID --> count. If no detections
157
182
  are above threshold, returns an empty dict.
158
-
183
+
159
184
  Only processes the top classification for each detection.
160
185
 
161
186
  Args:
162
- detections: detections list
187
+ detections (list of dict): detections list
163
188
  options (ClassificationSmoothingOptions, optional): see ClassificationSmoothingOptions
164
189
 
165
190
  Returns:
166
191
  dict mapping above-threshold category IDs to counts
167
192
  """
168
-
193
+
169
194
  if detections is None or len(detections) == 0:
170
195
  return {}
171
-
196
+
172
197
  if options is None:
173
198
  options = ClassificationSmoothingOptions()
174
199
 
175
200
  category_to_count = defaultdict(int)
176
-
201
+
177
202
  for det in detections:
178
- if ('classifications' in det) and (det['conf'] >= options.detection_confidence_threshold):
179
- # assert len(det['classifications']) == 1
180
- c = det['classifications'][0]
181
- if c[1] >= options.classification_confidence_threshold:
182
- category_to_count[c[0]] += 1
183
-
203
+
204
+ if not _detection_is_relevant_for_smoothing(det,options):
205
+ continue
206
+
207
+ c = det['classifications'][0]
208
+ if c[1] >= options.classification_confidence_threshold:
209
+ category_to_count[c[0]] += 1
210
+
184
211
  category_to_count = {k: v for k, v in sorted(category_to_count.items(),
185
- key=lambda item: item[1],
212
+ key=lambda item: item[1],
186
213
  reverse=True)}
187
-
214
+
188
215
  return category_to_count
189
216
 
190
217
 
@@ -199,7 +226,7 @@ def get_classification_description_string(category_to_count,classification_descr
199
226
  Returns:
200
227
  string: a description of this image's content, e.g. "rabbit (4), human (1)"
201
228
  """
202
-
229
+
203
230
  category_strings = []
204
231
  # category_id = next(iter(category_to_count))
205
232
  for category_id in category_to_count:
@@ -212,29 +239,31 @@ def get_classification_description_string(category_to_count,classification_descr
212
239
  count = category_to_count[category_id]
213
240
  category_string = '{} ({})'.format(category_name,count)
214
241
  category_strings.append(category_string)
215
-
242
+
216
243
  return ', '.join(category_strings)
217
-
244
+
218
245
 
219
246
  def _print_counts_with_names(category_to_count,classification_descriptions):
220
247
  """
221
248
  Print a list of classification categories with counts, based in the name --> count
222
249
  dict [category_to_count]
223
250
  """
224
-
251
+
225
252
  for category_id in category_to_count:
226
253
  category_name = classification_descriptions[category_id]
227
254
  count = category_to_count[category_id]
228
255
  print('{}: {} ({})'.format(category_id,category_name,count))
229
-
230
-
256
+
257
+
231
258
  def _prepare_results_for_smoothing(input_file,options):
232
259
  """
233
- Load results from [input_file] if necessary, prepare category descriptions
260
+ Load results from [input_file] if necessary, prepare category descriptions
234
261
  for smoothing. Adds pre-smoothing descriptions to every image if the options
235
262
  say we're supposed to do that.
263
+
264
+ May modify some fields in [options].
236
265
  """
237
-
266
+
238
267
  if isinstance(input_file,str):
239
268
  with open(input_file,'r') as f:
240
269
  print('Loading results from:\n{}'.format(input_file))
@@ -249,71 +278,82 @@ def _prepare_results_for_smoothing(input_file,options):
249
278
 
250
279
 
251
280
  ## Category processing
252
-
281
+
253
282
  category_name_to_id = {d['classification_categories'][k]:k for k in d['classification_categories']}
254
283
  other_category_ids = []
255
284
  for s in options.other_category_names:
256
285
  if s in category_name_to_id:
257
286
  other_category_ids.append(category_name_to_id[s])
258
-
287
+
288
+ # Possibly update the list of category IDs we should smooth
289
+ if options.detection_category_names_to_smooth is None:
290
+ options._detection_category_ids_to_smooth = None
291
+ else:
292
+ detection_category_id_to_name = d['detection_categories']
293
+ detection_category_name_to_id = invert_dictionary(detection_category_id_to_name)
294
+ options._detection_category_ids_to_smooth = []
295
+ for category_name in options.detection_category_names_to_smooth:
296
+ options._detection_category_ids_to_smooth.append(detection_category_name_to_id[category_name])
297
+
259
298
  # Before we do anything else, get rid of everything but the top classification
260
299
  # for each detection, and remove the 'classifications' field from detections with
261
300
  # no classifications.
262
301
  for im in tqdm(d['images']):
263
-
302
+
264
303
  if 'detections' not in im or im['detections'] is None or len(im['detections']) == 0:
265
304
  continue
266
-
305
+
267
306
  detections = im['detections']
268
-
307
+
269
308
  for det in detections:
270
-
309
+
271
310
  if 'classifications' not in det:
272
311
  continue
273
312
  if len(det['classifications']) == 0:
274
313
  del det['classifications']
275
314
  continue
276
-
315
+
277
316
  classification_confidence_values = [c[1] for c in det['classifications']]
278
317
  assert is_list_sorted(classification_confidence_values,reverse=True)
279
318
  det['classifications'] = [det['classifications'][0]]
280
-
319
+
281
320
  # ...for each detection in this image
282
-
321
+
283
322
  # ...for each image
284
-
285
-
286
- ## Clean up classification descriptions so we can test taxonomic relationships
287
- ## by substring testing.
288
-
323
+
324
+
325
+ ## Clean up classification descriptions...
326
+
327
+ # ...so we can test taxonomic relationships by substring testing.
328
+
289
329
  classification_descriptions_clean = None
290
330
  classification_descriptions = None
291
-
331
+
292
332
  if 'classification_category_descriptions' in d:
293
333
  classification_descriptions = d['classification_category_descriptions']
294
334
  classification_descriptions_clean = {}
295
335
  # category_id = next(iter(classification_descriptions))
296
- for category_id in classification_descriptions:
336
+ for category_id in classification_descriptions:
297
337
  classification_descriptions_clean[category_id] = \
298
338
  clean_taxonomy_string(classification_descriptions[category_id]).strip(';').lower()
299
-
300
-
339
+
340
+
301
341
  ## Optionally add pre-smoothing descriptions to every image
302
-
303
- if options.add_pre_smoothing_description:
304
-
342
+
343
+ if options.add_pre_smoothing_description and (classification_descriptions is not None):
344
+
305
345
  for im in tqdm(d['images']):
306
-
346
+
307
347
  if 'detections' not in im or im['detections'] is None or len(im['detections']) == 0:
308
348
  continue
309
-
310
- detections = im['detections']
349
+
350
+ detections = im['detections']
311
351
  category_to_count = count_detections_by_classification_category(detections, options)
312
-
352
+
313
353
  im['pre_smoothing_description'] = \
314
354
  get_classification_description_string(category_to_count, classification_descriptions)
315
-
316
-
355
+
356
+
317
357
  return {
318
358
  'd':d,
319
359
  'other_category_ids':other_category_ids,
@@ -321,7 +361,7 @@ def _prepare_results_for_smoothing(input_file,options):
321
361
  'classification_descriptions':classification_descriptions
322
362
  }
323
363
 
324
- # ...def _prepare_results_for_smoothing(...)
364
+ # ...def _prepare_results_for_smoothing(...)
325
365
 
326
366
 
327
367
  def _smooth_classifications_for_list_of_detections(detections,
@@ -332,142 +372,140 @@ def _smooth_classifications_for_list_of_detections(detections,
332
372
  """
333
373
  Smooth classifications for a list of detections, which may have come from a single
334
374
  image, or may represent an entire sequence.
335
-
375
+
336
376
  Returns None if no changes are made, else a dict.
337
-
338
- classification_descriptions_clean should be semicolon-delimited taxonomic strings
377
+
378
+ classification_descriptions_clean should be semicolon-delimited taxonomic strings
339
379
  from which common names and GUIDs have already been removed.
340
-
380
+
341
381
  Assumes there is only one classification per detection, i.e. that non-top classifications
342
- have already been remoevd.
382
+ have already been remoevd.
343
383
  """
344
-
384
+
345
385
  ## Count the number of instances of each category in this image
346
-
386
+
347
387
  category_to_count = count_detections_by_classification_category(detections, options)
348
388
  # _print_counts_with_names(category_to_count,classification_descriptions)
349
389
  # get_classification_description_string(category_to_count, classification_descriptions)
350
-
390
+
351
391
  if len(category_to_count) <= 1:
352
392
  return None
353
-
393
+
354
394
  keys = list(category_to_count.keys())
355
-
356
- # Handle a quirky special case: if the most common category is "other" and
395
+
396
+ # Handle a quirky special case: if the most common category is "other" and
357
397
  # it's "tied" with the second-most-common category, swap them
358
398
  if (len(keys) > 1) and \
359
399
  (keys[0] in other_category_ids) and \
360
400
  (keys[1] not in other_category_ids) and \
361
401
  (category_to_count[keys[0]] == category_to_count[keys[1]]):
362
402
  keys[1], keys[0] = keys[0], keys[1]
363
-
364
- max_count = category_to_count[keys[0]]
403
+
404
+ max_count = category_to_count[keys[0]]
365
405
  most_common_category = keys[0]
366
406
  del keys
367
-
368
-
407
+
408
+
369
409
  ## Debug tools
370
-
410
+
371
411
  verbose_debug_enabled = False
372
-
412
+
373
413
  if options.break_at_image is not None:
374
414
  for det in detections:
375
415
  if 'image_filename' in det and \
376
416
  det['image_filename'] == options.break_at_image:
377
417
  verbose_debug_enabled = True
378
418
  break
379
-
419
+
380
420
  if verbose_debug_enabled:
381
421
  _print_counts_with_names(category_to_count,classification_descriptions)
382
422
  from IPython import embed; embed()
383
-
384
-
423
+
424
+
385
425
  ## Possibly change "other" classifications to the most common category
386
-
426
+
387
427
  # ...if the dominant category is not an "other" category.
388
-
428
+
389
429
  n_other_classifications_changed_this_image = 0
390
-
430
+
391
431
  # If we have at least *min_detections_to_overwrite_other* in a category that isn't
392
432
  # "other", change all "other" classifications to that category
393
433
  if (max_count >= options.min_detections_to_overwrite_other) and \
394
434
  (most_common_category not in other_category_ids):
395
-
435
+
396
436
  for det in detections:
397
-
398
- if ('classifications' not in det) or \
399
- (det['conf'] < options.detection_confidence_threshold):
437
+
438
+ if not _detection_is_relevant_for_smoothing(det,options):
400
439
  continue
401
-
440
+
402
441
  assert len(det['classifications']) == 1
403
442
  c = det['classifications'][0]
404
-
443
+
405
444
  if (c[1] >= options.classification_confidence_threshold) and \
406
445
  (c[0] in other_category_ids):
407
-
446
+
408
447
  if verbose_debug_enabled:
409
448
  print('Replacing {} with {}'.format(
410
449
  classification_descriptions[c[0]],
411
450
  classification_descriptions[c[1]]))
412
-
451
+
413
452
  n_other_classifications_changed_this_image += 1
414
453
  c[0] = most_common_category
415
-
454
+
416
455
  # ...if there are classifications for this detection
417
-
456
+
418
457
  # ...for each detection
419
-
458
+
420
459
  # ...if we should overwrite all "other" classifications
421
460
 
422
461
  if verbose_debug_enabled:
423
462
  print('Made {} other changes'.format(n_other_classifications_changed_this_image))
424
-
425
-
463
+
464
+
426
465
  ## Re-count
427
-
466
+
428
467
  category_to_count = count_detections_by_classification_category(detections, options)
429
- # _print_counts_with_names(category_to_count,classification_descriptions)
468
+ # _print_counts_with_names(category_to_count,classification_descriptions)
430
469
  keys = list(category_to_count.keys())
431
- max_count = category_to_count[keys[0]]
470
+ max_count = category_to_count[keys[0]]
432
471
  most_common_category = keys[0]
433
472
  del keys
434
-
435
-
473
+
474
+
436
475
  ## Possibly change some non-dominant classifications to the dominant category
437
-
476
+
438
477
  process_taxonomic_rules = \
439
478
  (classification_descriptions_clean is not None) and \
440
479
  (len(classification_descriptions_clean) > 0) and \
441
480
  (len(category_to_count) > 1)
442
-
481
+
443
482
  n_detections_flipped_this_image = 0
444
-
445
- # Don't do this if the most common category is an "other" category, or
483
+
484
+ # Don't do this if the most common category is an "other" category, or
446
485
  # if we don't have enough of the most common category
447
486
  if (most_common_category not in other_category_ids) and \
448
487
  (max_count >= options.min_detections_to_overwrite_secondary):
449
-
488
+
450
489
  # i_det = 0; det = detections[i_det]
451
490
  for i_det,det in enumerate(detections):
452
-
453
- if ('classifications' not in det) or \
454
- (det['conf'] < options.detection_confidence_threshold):
491
+
492
+ if not _detection_is_relevant_for_smoothing(det,options):
455
493
  continue
456
-
494
+
457
495
  assert len(det['classifications']) == 1
458
496
  c = det['classifications'][0]
459
-
497
+
460
498
  # Don't over-write the most common category with itself
461
499
  if c[0] == most_common_category:
462
500
  continue
463
-
501
+
464
502
  # Don't bother with below-threshold classifications
465
503
  if c[1] < options.classification_confidence_threshold:
466
504
  continue
467
-
505
+
468
506
  # If we're doing taxonomic processing, at this stage, don't turn children
469
507
  # into parents; we'll likely turn parents into children in the next stage.
470
-
508
+
471
509
  if process_taxonomic_rules:
472
510
 
473
511
  most_common_category_description = \
@@ -475,180 +513,179 @@ def _smooth_classifications_for_list_of_detections(detections,
475
513
 
476
514
  category_id_this_classification = c[0]
477
515
  assert category_id_this_classification in category_to_count
478
-
516
+
479
517
  category_description_this_classification = \
480
518
  classification_descriptions_clean[category_id_this_classification]
481
-
482
- # An empty description corresponds to the "animal" category. We don't handle
483
- # "animal" here as a parent category, that would be handled in the "other smoothing"
519
+
520
+ # An empty description corresponds to the "animal" category. We don't handle
521
+ # "animal" here as a parent category, that would be handled in the "other smoothing"
484
522
  # step above.
485
523
  if len(category_description_this_classification) == 0:
486
524
  continue
487
-
525
+
488
526
  most_common_category_is_parent_of_this_category = \
489
527
  most_common_category_description in category_description_this_classification
490
-
528
+
491
529
  if most_common_category_is_parent_of_this_category:
492
530
  continue
493
-
531
+
494
532
  # If we have fewer of this category than the most common category,
495
533
  # but not *too* many, flip it to the most common category.
496
534
  if (max_count > category_to_count[c[0]]) and \
497
535
  (category_to_count[c[0]] <= options.max_detections_nondominant_class):
498
-
536
+
499
537
  c[0] = most_common_category
500
- n_detections_flipped_this_image += 1
501
-
538
+ n_detections_flipped_this_image += 1
539
+
502
540
  # ...for each detection
503
541
 
504
- # ...if the dominant category is legit
505
-
542
+ # ...if the dominant category is legit
543
+
506
544
  if verbose_debug_enabled:
507
545
  print('Made {} non-dominant --> dominant changes'.format(
508
546
  n_detections_flipped_this_image))
509
547
 
510
-
548
+
511
549
  ## Re-count
512
-
550
+
513
551
  category_to_count = count_detections_by_classification_category(detections, options)
514
- # _print_counts_with_names(category_to_count,classification_descriptions)
552
+ # _print_counts_with_names(category_to_count,classification_descriptions)
515
553
  keys = list(category_to_count.keys())
516
- max_count = category_to_count[keys[0]]
554
+ max_count = category_to_count[keys[0]]
517
555
  most_common_category = keys[0]
518
556
  del keys
519
-
520
-
557
+
558
+
521
559
  ## Possibly collapse higher-level taxonomic predictions down to lower levels
522
-
560
+
523
561
  n_taxonomic_changes_this_image = 0
524
-
562
+
525
563
  process_taxonomic_rules = \
526
564
  (classification_descriptions_clean is not None) and \
527
565
  (len(classification_descriptions_clean) > 0) and \
528
566
  (len(category_to_count) > 1)
529
-
567
+
530
568
  if process_taxonomic_rules and options.propagate_classifications_through_taxonomy:
531
-
569
+
532
570
  # det = detections[3]
533
571
  for det in detections:
534
-
535
- if ('classifications' not in det) or \
536
- (det['conf'] < options.detection_confidence_threshold):
572
+
573
+ if not _detection_is_relevant_for_smoothing(det,options):
537
574
  continue
538
-
575
+
539
576
  assert len(det['classifications']) == 1
540
577
  c = det['classifications'][0]
541
-
578
+
542
579
  # Don't bother with any classifications below the confidence threshold
543
580
  if c[1] < options.classification_confidence_threshold:
544
581
  continue
545
582
 
546
583
  category_id_this_classification = c[0]
547
584
  assert category_id_this_classification in category_to_count
548
-
585
+
549
586
  category_description_this_classification = \
550
587
  classification_descriptions_clean[category_id_this_classification]
551
-
552
- # An empty description corresponds to the "animal" category. We don't handle
553
- # "animal" here as a parent category, that would be handled in the "other smoothing"
588
+
589
+ # An empty description corresponds to the "animal" category. We don't handle
590
+ # "animal" here as a parent category, that would be handled in the "other smoothing"
554
591
  # step above.
555
592
  if len(category_description_this_classification) == 0:
556
593
  continue
557
-
594
+
558
595
  # We may have multiple child categories to choose from; this keeps track of
559
596
  # the "best" we've seen so far. "Best" is based on the level (species is better
560
597
  # than genus) and number.
561
598
  child_category_to_score = defaultdict(float)
562
-
599
+
563
600
  for category_id_of_candidate_child in category_to_count.keys():
564
-
601
+
565
602
  # A category is never its own child
566
603
  if category_id_of_candidate_child == category_id_this_classification:
567
604
  continue
568
-
605
+
569
606
  # Is this candidate a child of the current classification?
570
607
  category_description_candidate_child = \
571
608
  classification_descriptions_clean[category_id_of_candidate_child]
572
-
609
+
573
610
  # An empty description corresponds to "animal", which can never
574
611
  # be a child of another category.
575
612
  if len(category_description_candidate_child) == 0:
576
613
  continue
577
-
578
- # As long as we're using "clean" descriptions, parent/child taxonomic
614
+
615
+ # As long as we're using "clean" descriptions, parent/child taxonomic
579
616
  # relationships are defined by a substring relationship
580
617
  is_child = category_description_this_classification in \
581
618
  category_description_candidate_child
582
619
  if not is_child:
583
620
  continue
584
-
621
+
585
622
  # How many instances of this child category are there?
586
623
  child_category_count = category_to_count[category_id_of_candidate_child]
587
-
624
+
588
625
  # What taxonomy level is this child category defined at?
589
626
  child_category_level = taxonomy_level_index(
590
627
  classification_descriptions[category_id_of_candidate_child])
591
-
628
+
592
629
  child_category_to_score[category_id_of_candidate_child] = \
593
630
  child_category_level * options.taxonomy_propagation_level_weight + \
594
631
  child_category_count * options.taxonomy_propagation_count_weight
595
-
632
+
596
633
  # ...for each category we are considering reducing this classification to
597
-
634
+
598
635
  # Did we find a category we want to change this classification to?
599
636
  if len(child_category_to_score) > 0:
600
-
637
+
601
638
  # Find the child category with the highest score
602
639
  child_category_to_score = sort_dictionary_by_value(
603
640
  child_category_to_score,reverse=True)
604
641
  best_child_category = next(iter(child_category_to_score.keys()))
605
-
642
+
606
643
  if verbose_debug_enabled:
607
644
  old_category_name = \
608
645
  classification_descriptions_clean[c[0]]
609
646
  new_category_name = \
610
647
  classification_descriptions_clean[best_child_category]
611
648
  print('Replacing {} with {}'.format(
612
- old_category_name,new_category_name))
613
-
649
+ old_category_name,new_category_name))
650
+
614
651
  c[0] = best_child_category
615
- n_taxonomic_changes_this_image += 1
616
-
652
+ n_taxonomic_changes_this_image += 1
653
+
617
654
  # ...for each detection
618
-
619
- # ...if we have taxonomic information available
620
-
621
-
655
+
656
+ # ...if we have taxonomic information available
657
+
658
+
622
659
  ## Re-count
623
-
660
+
624
661
  category_to_count = count_detections_by_classification_category(detections, options)
625
- # _print_counts_with_names(category_to_count,classification_descriptions)
662
+ # _print_counts_with_names(category_to_count,classification_descriptions)
626
663
  keys = list(category_to_count.keys())
627
- max_count = category_to_count[keys[0]]
664
+ max_count = category_to_count[keys[0]]
628
665
  most_common_category = keys[0]
629
666
  del keys
630
-
631
-
667
+
668
+
632
669
  ## Possibly do within-family smoothing
633
-
670
+
634
671
  n_within_family_smoothing_changes = 0
635
-
672
+
636
673
  # min_detections_to_overwrite_secondary_same_family = -1
637
674
  # max_detections_nondominant_class_same_family = 1
638
675
  family_level = taxonomy_level_string_to_index('family')
639
-
676
+
640
677
  if process_taxonomic_rules:
641
-
678
+
642
679
  category_description_most_common_category = \
643
680
  classification_descriptions[most_common_category]
644
681
  most_common_category_taxonomic_level = \
645
- taxonomy_level_index(category_description_most_common_category)
682
+ taxonomy_level_index(category_description_most_common_category)
646
683
  n_most_common_category = category_to_count[most_common_category]
647
684
  tokens = category_description_most_common_category.split(';')
648
685
  assert len(tokens) == 7
649
686
  most_common_category_family = tokens[3]
650
687
  most_common_category_genus = tokens[4]
651
-
688
+
652
689
  # Only consider remapping to genus or species level, and only when we have
653
690
  # a high enough count in the most common category
654
691
  if process_taxonomic_rules and \
@@ -656,36 +693,35 @@ def _smooth_classifications_for_list_of_detections(detections,
656
693
  (most_common_category not in other_category_ids) and \
657
694
  (most_common_category_taxonomic_level > family_level) and \
658
695
  (n_most_common_category >= options.min_detections_to_overwrite_secondary_same_family):
659
-
696
+
660
697
  # det = detections[0]
661
698
  for det in detections:
662
-
663
- if ('classifications' not in det) or \
664
- (det['conf'] < options.detection_confidence_threshold):
699
+
700
+ if not _detection_is_relevant_for_smoothing(det,options):
665
701
  continue
666
-
702
+
667
703
  assert len(det['classifications']) == 1
668
704
  c = det['classifications'][0]
669
-
705
+
670
706
  # Don't over-write the most common category with itself
671
707
  if c[0] == most_common_category:
672
708
  continue
673
-
709
+
674
710
  # Don't bother with below-threshold classifications
675
711
  if c[1] < options.classification_confidence_threshold:
676
- continue
677
-
712
+ continue
713
+
678
714
  n_candidate_flip_category = category_to_count[c[0]]
679
-
715
+
680
716
  # Do we have too many of the non-dominant category to do this kind of swap?
681
717
  if n_candidate_flip_category > \
682
718
  options.max_detections_nondominant_class_same_family:
683
719
  continue
684
720
 
685
- # Don't flip classes when it's a tie
721
+ # Don't flip classes when it's a tie
686
722
  if n_candidate_flip_category == n_most_common_category:
687
723
  continue
688
-
724
+
689
725
  category_description_candidate_flip = \
690
726
  classification_descriptions[c[0]]
691
727
  tokens = category_description_candidate_flip.split(';')
@@ -693,34 +729,33 @@ def _smooth_classifications_for_list_of_detections(detections,
693
729
  candidate_flip_category_family = tokens[3]
694
730
  candidate_flip_category_genus = tokens[4]
695
731
  candidate_flip_category_taxonomic_level = \
696
- taxonomy_level_index(category_description_candidate_flip)
697
-
732
+ taxonomy_level_index(category_description_candidate_flip)
733
+
698
734
  # Only proceed if we have valid family strings
699
735
  if (len(candidate_flip_category_family) == 0) or \
700
736
  (len(most_common_category_family) == 0):
701
737
  continue
702
-
703
- # Only proceed if the candidate and the most common category are in the same family
738
+
739
+ # Only proceed if the candidate and the most common category are in the same family
704
740
  if candidate_flip_category_family != most_common_category_family:
705
741
  continue
706
-
742
+
707
743
  # Don't flip from a species to the genus level in the same genus
708
744
  if (candidate_flip_category_genus == most_common_category_genus) and \
709
745
  (candidate_flip_category_taxonomic_level > \
710
746
  most_common_category_taxonomic_level):
711
747
  continue
712
-
748
+
713
749
  old_category_name = classification_descriptions_clean[c[0]]
714
750
  new_category_name = classification_descriptions_clean[most_common_category]
715
-
751
+
716
752
  c[0] = most_common_category
717
- n_within_family_smoothing_changes += 1
718
-
753
+ n_within_family_smoothing_changes += 1
754
+
719
755
  # ...for each detection
720
-
756
+
721
757
  # ...if the dominant category is legit and we have taxonomic information available
722
-
723
-
758
+
724
759
  return {'n_other_classifications_changed_this_image':n_other_classifications_changed_this_image,
725
760
  'n_detections_flipped_this_image':n_detections_flipped_this_image,
726
761
  'n_taxonomic_changes_this_image':n_taxonomic_changes_this_image,
@@ -737,33 +772,33 @@ def _smooth_single_image(im,
737
772
  """
738
773
  Smooth classifications for a single image. Returns None if no changes are made,
739
774
  else a dict.
740
-
741
- classification_descriptions_clean should be semicolon-delimited taxonomic strings
775
+
776
+ classification_descriptions_clean should be semicolon-delimited taxonomic strings
742
777
  from which common names and GUIDs have already been removed.
743
-
778
+
744
779
  Assumes there is only one classification per detection, i.e. that non-top classifications
745
780
  have already been remoevd.
746
781
  """
747
-
782
+
748
783
  if 'detections' not in im or im['detections'] is None or len(im['detections']) == 0:
749
784
  return
750
-
785
+
751
786
  detections = im['detections']
752
-
787
+
753
788
  # Simplify debugging
754
789
  for det in detections:
755
790
  det['image_filename'] = im['file']
756
-
757
- to_return = _smooth_classifications_for_list_of_detections(detections,
758
- options=options,
791
+
792
+ to_return = _smooth_classifications_for_list_of_detections(detections,
793
+ options=options,
759
794
  other_category_ids=other_category_ids,
760
- classification_descriptions=classification_descriptions,
795
+ classification_descriptions=classification_descriptions,
761
796
  classification_descriptions_clean=classification_descriptions_clean)
762
797
 
763
798
  # Clean out debug information
764
799
  for det in detections:
765
800
  del det['image_filename']
766
-
801
+
767
802
  return to_return
768
803
 
769
804
  # ...def smooth_single_image
@@ -775,104 +810,104 @@ def smooth_classification_results_image_level(input_file,output_file=None,option
775
810
  """
776
811
  Smooth classifications at the image level for all results in the MD-formatted results
777
812
  file [input_file], optionally writing a new set of results to [output_file].
778
-
779
- This function generally expresses the notion that an image with 700 cows and one deer
813
+
814
+ This function generally expresses the notion that an image with 700 cows and one deer
780
815
  is really just 701 cows.
781
-
816
+
782
817
  Only count detections with a classification confidence threshold above
783
818
  [options.classification_confidence_threshold], which in practice means we're only
784
819
  looking at one category per detection.
785
-
820
+
786
821
  If an image has at least [options.min_detections_to_overwrite_secondary] such detections
787
822
  in the most common category, and no more than [options.max_detections_nondominant_class]
788
823
  in the second-most-common category, flip all detections to the most common
789
824
  category.
790
-
791
- Optionally treat some classes as particularly unreliable, typically used to overwrite an
825
+
826
+ Optionally treat some classes as particularly unreliable, typically used to overwrite an
792
827
  "other" class.
793
-
828
+
794
829
  This function also removes everything but the non-dominant classification for each detection.
795
-
830
+
796
831
  Args:
797
832
  input_file (str): MegaDetector-formatted classification results file to smooth. Can
798
833
  also be an already-loaded results dict.
799
834
  output_file (str, optional): .json file to write smoothed results
800
- options (ClassificationSmoothingOptions, optional): see
835
+ options (ClassificationSmoothingOptions, optional): see
801
836
  ClassificationSmoothingOptions for details.
802
-
837
+
803
838
  Returns:
804
839
  dict: MegaDetector-results-formatted dict, identical to what's written to
805
840
  [output_file] if [output_file] is not None.
806
841
  """
807
-
842
+
808
843
  ## Input validation
809
-
844
+
810
845
  if options is None:
811
846
  options = ClassificationSmoothingOptions()
812
-
847
+
813
848
  r = _prepare_results_for_smoothing(input_file, options)
814
849
  d = r['d']
815
850
  other_category_ids = r['other_category_ids']
816
851
  classification_descriptions_clean = r['classification_descriptions_clean']
817
852
  classification_descriptions = r['classification_descriptions']
818
-
819
-
853
+
854
+
820
855
  ## Smoothing
821
-
856
+
822
857
  n_other_classifications_changed = 0
823
858
  n_other_images_changed = 0
824
859
  n_taxonomic_images_changed = 0
825
-
860
+
826
861
  n_detections_flipped = 0
827
862
  n_images_changed = 0
828
- n_taxonomic_classification_changes = 0
829
-
830
- # im = d['images'][0]
863
+ n_taxonomic_classification_changes = 0
864
+
865
+ # im = d['images'][0]
831
866
  for im in tqdm(d['images']):
832
-
867
+
833
868
  r = _smooth_single_image(im,
834
869
  options,
835
870
  other_category_ids,
836
871
  classification_descriptions=classification_descriptions,
837
872
  classification_descriptions_clean=classification_descriptions_clean)
838
-
873
+
839
874
  if r is None:
840
875
  continue
841
-
876
+
842
877
  n_detections_flipped_this_image = r['n_detections_flipped_this_image']
843
878
  n_other_classifications_changed_this_image = \
844
879
  r['n_other_classifications_changed_this_image']
845
880
  n_taxonomic_changes_this_image = r['n_taxonomic_changes_this_image']
846
-
881
+
847
882
  n_detections_flipped += n_detections_flipped_this_image
848
883
  n_other_classifications_changed += n_other_classifications_changed_this_image
849
884
  n_taxonomic_classification_changes += n_taxonomic_changes_this_image
850
-
885
+
851
886
  if n_detections_flipped_this_image > 0:
852
887
  n_images_changed += 1
853
888
  if n_other_classifications_changed_this_image > 0:
854
889
  n_other_images_changed += 1
855
890
  if n_taxonomic_changes_this_image > 0:
856
891
  n_taxonomic_images_changed += 1
857
-
858
- # ...for each image
859
-
892
+
893
+ # ...for each image
894
+
860
895
  print('Classification smoothing: changed {} detections on {} images'.format(
861
896
  n_detections_flipped,n_images_changed))
862
-
897
+
863
898
  print('"Other" smoothing: changed {} detections on {} images'.format(
864
899
  n_other_classifications_changed,n_other_images_changed))
865
-
900
+
866
901
  print('Taxonomic smoothing: changed {} detections on {} images'.format(
867
902
  n_taxonomic_classification_changes,n_taxonomic_images_changed))
868
-
869
-
903
+
904
+
870
905
  ## Write output
871
-
872
- if output_file is not None:
906
+
907
+ if output_file is not None:
873
908
  print('Writing results after image-level smoothing to:\n{}'.format(output_file))
874
909
  with open(output_file,'w') as f:
875
- json.dump(d,f,indent=1)
910
+ json.dump(d,f,indent=1)
876
911
 
877
912
  return d
878
913
 
@@ -880,7 +915,7 @@ def smooth_classification_results_image_level(input_file,output_file=None,option
880
915
 
881
916
 
882
917
  #%% Sequence-level smoothing
883
-
918
+
884
919
  def smooth_classification_results_sequence_level(input_file,
885
920
  cct_sequence_information,
886
921
  output_file=None,
@@ -888,39 +923,39 @@ def smooth_classification_results_sequence_level(input_file,
888
923
  """
889
924
  Smooth classifications at the sequence level for all results in the MD-formatted results
890
925
  file [md_results_file], optionally writing a new set of results to [output_file].
891
-
926
+
892
927
  This function generally expresses the notion that a sequence that looks like
893
928
  deer/deer/deer/elk/deer/deer/deer/deer is really just a deer.
894
-
929
+
895
930
  Args:
896
931
  input_file (str or dict): MegaDetector-formatted classification results file to smooth
897
- (or already-loaded results). If you supply a dict, it's modified in place by default, but
898
- a copy can be forced by setting options.modify_in_place=False.
932
+ (or already-loaded results). If you supply a dict, it's copied by default, but
933
+ in-place modification is supported via options.modify_in_place.
899
934
  cct_sequence_information (str, dict, or list): COCO Camera Traps file containing sequence IDs for
900
935
  each image (or an already-loaded CCT-formatted dict, or just the 'images' list from a CCT dict).
901
936
  output_file (str, optional): .json file to write smoothed results
902
- options (ClassificationSmoothingOptions, optional): see
937
+ options (ClassificationSmoothingOptions, optional): see
903
938
  ClassificationSmoothingOptions for details.
904
-
939
+
905
940
  Returns:
906
941
  dict: MegaDetector-results-formatted dict, identical to what's written to
907
942
  [output_file] if [output_file] is not None.
908
943
  """
909
-
944
+
910
945
  ## Input validation
911
-
946
+
912
947
  if options is None:
913
948
  options = ClassificationSmoothingOptions()
914
-
949
+
915
950
  r = _prepare_results_for_smoothing(input_file, options)
916
951
  d = r['d']
917
952
  other_category_ids = r['other_category_ids']
918
953
  classification_descriptions_clean = r['classification_descriptions_clean']
919
954
  classification_descriptions = r['classification_descriptions']
920
-
921
-
955
+
956
+
922
957
  ## Make a list of images appearing in each sequence
923
-
958
+
924
959
  if isinstance(cct_sequence_information,list):
925
960
  image_info = cct_sequence_information
926
961
  elif isinstance(cct_sequence_information,str):
@@ -931,77 +966,77 @@ def smooth_classification_results_sequence_level(input_file,
931
966
  else:
932
967
  assert isinstance(cct_sequence_information,dict)
933
968
  image_info = cct_sequence_information['images']
934
-
969
+
935
970
  sequence_to_image_filenames = defaultdict(list)
936
-
971
+
937
972
  # im = image_info[0]
938
973
  for im in tqdm(image_info):
939
- sequence_to_image_filenames[im['seq_id']].append(im['file_name'])
974
+ sequence_to_image_filenames[im['seq_id']].append(im['file_name'])
940
975
  del image_info
941
-
976
+
942
977
  image_fn_to_classification_results = {}
943
978
  for im in d['images']:
944
979
  fn = im['file']
945
980
  assert fn not in image_fn_to_classification_results
946
981
  image_fn_to_classification_results[fn] = im
947
-
948
-
982
+
983
+
949
984
  ## Smoothing
950
-
985
+
951
986
  n_other_classifications_changed = 0
952
987
  n_other_sequences_changed = 0
953
988
  n_taxonomic_sequences_changed = 0
954
989
  n_within_family_sequences_changed = 0
955
-
990
+
956
991
  n_detections_flipped = 0
957
992
  n_sequences_changed = 0
958
- n_taxonomic_classification_changes = 0
959
- n_within_family_changes = 0
960
-
993
+ n_taxonomic_classification_changes = 0
994
+ n_within_family_changes = 0
995
+
961
996
  # sequence_id = list(sequence_to_image_filenames.keys())[0]
962
997
  for sequence_id in sequence_to_image_filenames.keys():
963
998
 
964
999
  image_filenames_this_sequence = sequence_to_image_filenames[sequence_id]
965
-
1000
+
966
1001
  # if 'file' in image_filenames_this_sequence:
967
1002
  # from IPython import embed; embed()
968
-
1003
+
969
1004
  detections_this_sequence = []
970
1005
  for image_filename in image_filenames_this_sequence:
971
1006
  im = image_fn_to_classification_results[image_filename]
972
1007
  if 'detections' not in im or im['detections'] is None:
973
1008
  continue
974
1009
  detections_this_sequence.extend(im['detections'])
975
-
1010
+
976
1011
  # Temporarily add image filenames to every detection,
977
1012
  # for debugging
978
1013
  for det in im['detections']:
979
1014
  det['image_filename'] = im['file']
980
-
1015
+
981
1016
  if len(detections_this_sequence) == 0:
982
1017
  continue
983
-
1018
+
984
1019
  r = _smooth_classifications_for_list_of_detections(
985
- detections=detections_this_sequence,
986
- options=options,
1020
+ detections=detections_this_sequence,
1021
+ options=options,
987
1022
  other_category_ids=other_category_ids,
988
- classification_descriptions=classification_descriptions,
1023
+ classification_descriptions=classification_descriptions,
989
1024
  classification_descriptions_clean=classification_descriptions_clean)
990
-
1025
+
991
1026
  if r is None:
992
1027
  continue
993
-
1028
+
994
1029
  n_detections_flipped_this_sequence = r['n_detections_flipped_this_image']
995
1030
  n_other_classifications_changed_this_sequence = \
996
1031
  r['n_other_classifications_changed_this_image']
997
1032
  n_taxonomic_changes_this_sequence = r['n_taxonomic_changes_this_image']
998
1033
  n_within_family_changes_this_sequence = r['n_within_family_smoothing_changes']
999
-
1034
+
1000
1035
  n_detections_flipped += n_detections_flipped_this_sequence
1001
1036
  n_other_classifications_changed += n_other_classifications_changed_this_sequence
1002
1037
  n_taxonomic_classification_changes += n_taxonomic_changes_this_sequence
1003
1038
  n_within_family_changes += n_within_family_changes_this_sequence
1004
-
1039
+
1005
1040
  if n_detections_flipped_this_sequence > 0:
1006
1041
  n_sequences_changed += 1
1007
1042
  if n_other_classifications_changed_this_sequence > 0:
@@ -1010,40 +1045,40 @@ def smooth_classification_results_sequence_level(input_file,
1010
1045
  n_taxonomic_sequences_changed += 1
1011
1046
  if n_within_family_changes_this_sequence > 0:
1012
1047
  n_within_family_sequences_changed += 1
1013
-
1048
+
1014
1049
  # ...for each sequence
1015
-
1050
+
1016
1051
  print('Classification smoothing: changed {} detections in {} sequences'.format(
1017
1052
  n_detections_flipped,n_sequences_changed))
1018
-
1053
+
1019
1054
  print('"Other" smoothing: changed {} detections in {} sequences'.format(
1020
1055
  n_other_classifications_changed,n_other_sequences_changed))
1021
-
1056
+
1022
1057
  print('Taxonomic smoothing: changed {} detections in {} sequences'.format(
1023
1058
  n_taxonomic_classification_changes,n_taxonomic_sequences_changed))
1024
1059
 
1025
1060
  print('Within-family smoothing: changed {} detections in {} sequences'.format(
1026
1061
  n_within_family_changes,n_within_family_sequences_changed))
1027
-
1028
-
1062
+
1063
+
1029
1064
  ## Clean up debug information
1030
-
1065
+
1031
1066
  for im in d['images']:
1032
1067
  if 'detections' not in im or im['detections'] is None:
1033
1068
  continue
1034
1069
  for det in im['detections']:
1035
1070
  if 'image_filename' in det:
1036
1071
  del det['image_filename']
1037
-
1072
+
1038
1073
 
1039
1074
  ## Write output
1040
-
1041
- if output_file is not None:
1075
+
1076
+ if output_file is not None:
1042
1077
  print('Writing sequence-smoothed classification results to {}'.format(
1043
- output_file))
1078
+ output_file))
1044
1079
  with open(output_file,'w') as f:
1045
1080
  json.dump(d,f,indent=1)
1046
-
1081
+
1047
1082
  return d
1048
1083
 
1049
1084
  # ...smooth_classification_results_sequence_level(...)
@@ -1058,14 +1093,14 @@ def restrict_to_taxa_list(taxa_list,
1058
1093
  """
1059
1094
  Given a prediction file in MD .json format, likely without having had
1060
1095
  a geofence applied, apply a custom taxa list.
1061
-
1096
+
1062
1097
  Args:
1063
1098
  taxa_list (str or list): list of latin names, or a text file containing
1064
1099
  a list of latin names. Optionally may contain a second (comma-delimited)
1065
1100
  column containing common names, used only for debugging. Latin names
1066
1101
  must exist in the SpeciesNet taxonomy.
1067
- speciesnet_taxonomy_file (str): taxonomy filename, in the same format used for
1068
- model release (with 7-token taxonomy entries)
1102
+ speciesnet_taxonomy_file (str): taxonomy filename, in the same format used for
1103
+ model release (with 7-token taxonomy entries)
1069
1104
  input_file (str): .json file to read, in MD format. This can be None, in which
1070
1105
  case this function just validates [taxa_list].
1071
1106
  output_file (str): .json file to write, in MD format
@@ -1074,22 +1109,22 @@ def restrict_to_taxa_list(taxa_list,
1074
1109
  For example, if only a single felid species is allowed, should other
1075
1110
  felid predictions be mapped to that species, as opposed to being mapped
1076
1111
  to the family?
1077
- add_pre_restriction_description (bool, optional): should we add a new metadata
1078
- field that summarizes each image's classifications prior to taxonomic
1112
+ add_pre_filtering_description (bool, optional): should we add a new metadata
1113
+ field that summarizes each image's classifications prior to taxonomic
1079
1114
  restriction?
1080
1115
  """
1081
1116
 
1082
1117
  ##%% Read target taxa list
1083
-
1118
+
1084
1119
  if isinstance(taxa_list,str):
1085
1120
  assert os.path.isfile(taxa_list), \
1086
1121
  'Could not find taxa list file {}'.format(taxa_list)
1087
1122
  with open(taxa_list,'r') as f:
1088
1123
  taxa_list = f.readlines()
1089
-
1124
+
1090
1125
  taxa_list = [s.strip().lower() for s in taxa_list]
1091
1126
  taxa_list = [s for s in taxa_list if len(s) > 0]
1092
-
1127
+
1093
1128
  target_latin_to_common = {}
1094
1129
  for s in taxa_list:
1095
1130
  if s.strip().startswith('#'):
@@ -1105,38 +1140,38 @@ def restrict_to_taxa_list(taxa_list,
1105
1140
  common_name = None
1106
1141
  assert binomial_name not in target_latin_to_common
1107
1142
  target_latin_to_common[binomial_name] = common_name
1108
-
1143
+
1109
1144
 
1110
1145
  ##%% Read taxonomy file
1111
-
1146
+
1112
1147
  with open(speciesnet_taxonomy_file,'r') as f:
1113
1148
  speciesnet_taxonomy_list = f.readlines()
1114
1149
  speciesnet_taxonomy_list = [s.strip() for s in \
1115
1150
  speciesnet_taxonomy_list if len(s.strip()) > 0]
1116
-
1151
+
1117
1152
  # Maps the latin name of every taxon to the corresponding full taxon string
1118
1153
  #
1119
1154
  # For species, the key is a binomial name
1120
1155
  speciesnet_latin_name_to_taxon_string = {}
1121
1156
  speciesnet_common_name_to_taxon_string = {}
1122
-
1157
+
1123
1158
  def _insert_taxonomy_string(s):
1124
-
1159
+
1125
1160
  tokens = s.split(';')
1126
1161
  assert len(tokens) == 7
1127
-
1162
+
1128
1163
  guid = tokens[0] # noqa
1129
1164
  class_name = tokens[1]
1130
1165
  order = tokens[2]
1131
1166
  family = tokens[3]
1132
1167
  genus = tokens[4]
1133
- species = tokens[5]
1168
+ species = tokens[5]
1134
1169
  common_name = tokens[6]
1135
-
1170
+
1136
1171
  if len(class_name) == 0:
1137
1172
  assert common_name in ('animal','vehicle','blank')
1138
1173
  return
1139
-
1174
+
1140
1175
  if len(species) > 0:
1141
1176
  assert all([len(s) > 0 for s in [genus,family,order]])
1142
1177
  binomial_name = genus + ' ' + species
@@ -1156,43 +1191,43 @@ def restrict_to_taxa_list(taxa_list,
1156
1191
  else:
1157
1192
  if class_name not in speciesnet_latin_name_to_taxon_string:
1158
1193
  speciesnet_latin_name_to_taxon_string[class_name] = s
1159
-
1194
+
1160
1195
  if len(common_name) > 0:
1161
1196
  if common_name not in speciesnet_common_name_to_taxon_string:
1162
1197
  speciesnet_common_name_to_taxon_string[common_name] = s
1163
-
1198
+
1164
1199
  for s in speciesnet_taxonomy_list:
1165
-
1200
+
1166
1201
  _insert_taxonomy_string(s)
1167
-
1168
-
1202
+
1203
+
1169
1204
  ##%% Make sure all parent taxa are represented in the taxonomy
1170
-
1205
+
1171
1206
  # In theory any taxon that appears as the parent of another taxon should
1172
1207
  # also be in the taxonomy, but this isn't always true, so we fix it here.
1173
-
1208
+
1174
1209
  new_taxon_string_to_missing_tokens = defaultdict(list)
1175
-
1210
+
1176
1211
  # latin_name = next(iter(speciesnet_latin_name_to_taxon_string.keys()))
1177
1212
  for latin_name in speciesnet_latin_name_to_taxon_string.keys():
1178
-
1213
+
1179
1214
  if 'no cv result' in latin_name:
1180
1215
  continue
1181
-
1216
+
1182
1217
  taxon_string = speciesnet_latin_name_to_taxon_string[latin_name]
1183
1218
  tokens = taxon_string.split(';')
1184
-
1219
+
1185
1220
  # Don't process GUID, species, or common name
1186
1221
  # i_token = 6
1187
1222
  for i_token in range(1,len(tokens)-2):
1188
-
1189
- test_token = tokens[i_token]
1223
+
1224
+ test_token = tokens[i_token]
1190
1225
  if len(test_token) == 0:
1191
1226
  continue
1192
-
1227
+
1193
1228
  # Do we need to make up a taxon for this token?
1194
1229
  if test_token not in speciesnet_latin_name_to_taxon_string:
1195
-
1230
+
1196
1231
  new_tokens = [''] * 7
1197
1232
  new_tokens[0] = 'fake_guid'
1198
1233
  for i_copy_token in range(1,i_token+1):
@@ -1202,28 +1237,28 @@ def restrict_to_taxa_list(taxa_list,
1202
1237
  new_taxon_string = ';'.join(new_tokens)
1203
1238
  # assert new_taxon_string not in new_taxon_strings
1204
1239
  new_taxon_string_to_missing_tokens[new_taxon_string].append(test_token)
1205
-
1240
+
1206
1241
  # ...for each token
1207
-
1242
+
1208
1243
  # ...for each taxon
1209
-
1244
+
1210
1245
  print('Found {} taxa that need to be inserted to make the taxonomy valid:\n'.format(
1211
1246
  len(new_taxon_string_to_missing_tokens)))
1212
-
1247
+
1213
1248
  new_taxon_string_to_missing_tokens = \
1214
1249
  sort_dictionary_by_key(new_taxon_string_to_missing_tokens)
1215
1250
  for taxon_string in new_taxon_string_to_missing_tokens:
1216
1251
  missing_taxa = ','.join(new_taxon_string_to_missing_tokens[taxon_string])
1217
1252
  print('{} ({})'.format(taxon_string,missing_taxa))
1218
-
1253
+
1219
1254
  for new_taxon_string in new_taxon_string_to_missing_tokens:
1220
1255
  _insert_taxonomy_string(new_taxon_string)
1221
-
1222
-
1256
+
1257
+
1223
1258
  ##%% Make sure all species on the allow-list are in the taxonomy
1224
-
1259
+
1225
1260
  n_failed_mappings = 0
1226
-
1261
+
1227
1262
  for target_taxon_latin_name in target_latin_to_common.keys():
1228
1263
  if target_taxon_latin_name not in speciesnet_latin_name_to_taxon_string:
1229
1264
  common_name = target_latin_to_common[target_taxon_latin_name]
@@ -1234,99 +1269,99 @@ def restrict_to_taxa_list(taxa_list,
1234
1269
  speciesnet_common_name_to_taxon_string[common_name])
1235
1270
  print(s)
1236
1271
  n_failed_mappings += 1
1237
-
1272
+
1238
1273
  if n_failed_mappings > 0:
1239
1274
  raise ValueError('Cannot continue with geofence generation')
1240
-
1241
-
1275
+
1276
+
1242
1277
  ##%% For the allow-list, map each parent taxon to a set of allowable child taxa
1243
-
1244
- # Maps parent names to all allowed child names, or None if this is the
1278
+
1279
+ # Maps parent names to all allowed child names, or None if this is the
1245
1280
  # lowest-level allowable taxon on this path
1246
1281
  allowed_parent_taxon_to_child_taxa = defaultdict(set)
1247
-
1282
+
1248
1283
  # latin_name = next(iter(target_latin_to_common.keys()))
1249
1284
  for latin_name in target_latin_to_common:
1250
-
1285
+
1251
1286
  taxon_string = speciesnet_latin_name_to_taxon_string[latin_name]
1252
1287
  tokens = taxon_string.split(';')
1253
1288
  assert len(tokens) == 7
1254
-
1289
+
1255
1290
  # Remove GUID and common mame
1256
1291
  #
1257
1292
  # This is now always class/order/family/genus/species
1258
1293
  tokens = tokens[1:-1]
1259
-
1294
+
1260
1295
  child_taxon = None
1261
-
1296
+
1262
1297
  # If this is a species
1263
1298
  if len(tokens[-1]) > 0:
1264
1299
  binomial_name = tokens[-2] + ' ' + tokens[-1]
1265
1300
  assert binomial_name == latin_name
1266
1301
  allowed_parent_taxon_to_child_taxa[binomial_name].add(None)
1267
1302
  child_taxon = binomial_name
1268
-
1269
- # The first candidate parent is the genus
1303
+
1304
+ # The first candidate parent is the genus
1270
1305
  parent_token_index = len(tokens) - 2
1271
1306
 
1272
1307
  while(parent_token_index >= 0):
1273
-
1308
+
1274
1309
  parent_taxon = tokens[parent_token_index]
1275
1310
  allowed_parent_taxon_to_child_taxa[parent_taxon].add(child_taxon)
1276
1311
  child_taxon = parent_taxon
1277
- parent_token_index -= 1
1278
-
1312
+ parent_token_index -= 1
1313
+
1279
1314
  # ...for each allowed latin name
1280
-
1315
+
1281
1316
  allowed_parent_taxon_to_child_taxa = \
1282
1317
  sort_dictionary_by_key(allowed_parent_taxon_to_child_taxa)
1283
-
1284
-
1318
+
1319
+
1285
1320
  ##%% If we were just validating the custom taxa file, we're done
1286
1321
 
1287
1322
  if input_file is None:
1288
1323
  print('Finished validating custom taxonomy list')
1289
1324
  return
1290
-
1325
+
1291
1326
 
1292
1327
  ##%% Map all predictions that exist in this dataset...
1293
-
1328
+
1294
1329
  # ...to the prediction we should generate.
1295
-
1330
+
1296
1331
  with open(input_file,'r') as f:
1297
1332
  input_data = json.load(f)
1298
-
1333
+
1299
1334
  input_category_id_to_common_name = input_data['classification_categories'] #noqa
1300
1335
  input_category_id_to_taxonomy_string = \
1301
1336
  input_data['classification_category_descriptions']
1302
-
1337
+
1303
1338
  input_category_id_to_output_taxon_string = {}
1304
-
1339
+
1305
1340
  # input_category_id = next(iter(input_category_id_to_taxonomy_string.keys()))
1306
1341
  for input_category_id in input_category_id_to_taxonomy_string.keys():
1307
-
1342
+
1308
1343
  input_taxon_string = input_category_id_to_taxonomy_string[input_category_id]
1309
1344
  input_taxon_tokens = input_taxon_string.split(';')
1310
1345
  assert len(input_taxon_tokens) == 7
1311
-
1346
+
1312
1347
  # Don't mess with blank/no-cv-result/animal/human
1313
1348
  if (input_taxon_string in non_taxonomic_prediction_strings) or \
1314
1349
  (input_taxon_string == human_prediction_string):
1315
1350
  input_category_id_to_output_taxon_string[input_category_id] = \
1316
1351
  input_taxon_string
1317
1352
  continue
1318
-
1353
+
1319
1354
  # Remove GUID and common mame
1320
-
1355
+
1321
1356
  # This is now always class/order/family/genus/species
1322
1357
  input_taxon_tokens = input_taxon_tokens[1:-1]
1323
-
1358
+
1324
1359
  test_index = len(input_taxon_tokens) - 1
1325
1360
  target_taxon = None
1326
-
1361
+
1327
1362
  # Start at the species level, and see whether each taxon is allowed
1328
1363
  while((test_index >= 0) and (target_taxon is None)):
1329
-
1364
+
1330
1365
  # Species are represented as binomial names
1331
1366
  if (test_index == (len(input_taxon_tokens) - 1)) and \
1332
1367
  (len(input_taxon_tokens[-1]) > 0):
@@ -1334,27 +1369,27 @@ def restrict_to_taxa_list(taxa_list,
1334
1369
  input_taxon_tokens[-2] + ' ' + input_taxon_tokens[-1]
1335
1370
  else:
1336
1371
  test_taxon_name = input_taxon_tokens[test_index]
1337
-
1372
+
1338
1373
  # If we haven't yet found the level at which this taxon is non-empty,
1339
1374
  # keep going up
1340
- if len(test_taxon_name) == 0:
1375
+ if len(test_taxon_name) == 0:
1341
1376
  test_index -= 1
1342
1377
  continue
1343
-
1378
+
1344
1379
  assert test_taxon_name in speciesnet_latin_name_to_taxon_string
1345
-
1380
+
1346
1381
  # Is this taxon allowed according to the custom species list?
1347
1382
  if test_taxon_name in allowed_parent_taxon_to_child_taxa:
1348
-
1383
+
1349
1384
  allowed_child_taxa = allowed_parent_taxon_to_child_taxa[test_taxon_name]
1350
1385
  assert allowed_child_taxa is not None
1351
-
1352
- # If this is the lowest-level allowable token or there is not a
1386
+
1387
+ # If this is the lowest-level allowable token or there is not a
1353
1388
  # unique child, don't walk any further, even if walking down
1354
1389
  # is enabled.
1355
1390
  if (None in allowed_child_taxa):
1356
1391
  assert len(allowed_child_taxa) == 1
1357
-
1392
+
1358
1393
  if (None in allowed_child_taxa) or (len(allowed_child_taxa) > 1):
1359
1394
  target_taxon = test_taxon_name
1360
1395
  elif not allow_walk_down:
@@ -1370,72 +1405,72 @@ def restrict_to_taxa_list(taxa_list,
1370
1405
  allowed_child_taxa = \
1371
1406
  allowed_parent_taxon_to_child_taxa[candidate_taxon]
1372
1407
  target_taxon = candidate_taxon
1373
-
1408
+
1374
1409
  # ...if this is an allowed taxon
1375
-
1410
+
1376
1411
  test_index -= 1
1377
-
1412
+
1378
1413
  # ...for each token
1379
-
1414
+
1380
1415
  if target_taxon is None:
1381
- output_taxon_string = animal_prediction_string
1416
+ output_taxon_string = animal_prediction_string
1382
1417
  else:
1383
1418
  output_taxon_string = speciesnet_latin_name_to_taxon_string[target_taxon]
1384
- input_category_id_to_output_taxon_string[input_category_id] = output_taxon_string
1385
-
1419
+ input_category_id_to_output_taxon_string[input_category_id] = output_taxon_string
1420
+
1386
1421
  # ...for each category
1387
-
1388
-
1422
+
1423
+
1389
1424
  ##%% Build the new tables
1390
-
1425
+
1391
1426
  input_category_id_to_output_category_id = {}
1392
1427
  output_taxon_string_to_category_id = {}
1393
1428
  output_category_id_to_common_name = {}
1394
-
1429
+
1395
1430
  for input_category_id in input_category_id_to_output_taxon_string:
1396
-
1431
+
1397
1432
  original_common_name = \
1398
1433
  input_category_id_to_common_name[input_category_id]
1399
1434
  original_taxon_string = \
1400
1435
  input_category_id_to_taxonomy_string[input_category_id]
1401
1436
  output_taxon_string = \
1402
1437
  input_category_id_to_output_taxon_string[input_category_id]
1403
-
1438
+
1404
1439
  output_common_name = output_taxon_string.split(';')[-1]
1405
-
1440
+
1406
1441
  # Do we need to create a new output category?
1407
1442
  if output_taxon_string not in output_taxon_string_to_category_id:
1408
1443
  output_category_id = str(len(output_taxon_string_to_category_id))
1409
1444
  output_taxon_string_to_category_id[output_taxon_string] = \
1410
1445
  output_category_id
1411
1446
  output_category_id_to_common_name[output_category_id] = \
1412
- output_common_name
1447
+ output_common_name
1413
1448
  else:
1414
1449
  output_category_id = \
1415
1450
  output_taxon_string_to_category_id[output_taxon_string]
1416
-
1451
+
1417
1452
  input_category_id_to_output_category_id[input_category_id] = \
1418
1453
  output_category_id
1419
-
1454
+
1420
1455
  if False:
1421
1456
  print('Mapping {} ({}) to:\n{} ({})\n'.format(
1422
1457
  original_common_name,original_taxon_string,
1423
1458
  output_common_name,output_taxon_string))
1424
- if False:
1459
+ if False:
1425
1460
  print('Mapping {} to {}'.format(
1426
1461
  original_common_name,output_common_name,))
1427
-
1462
+
1428
1463
  # ...for each category
1429
-
1430
-
1464
+
1465
+
1431
1466
  ##%% Remap all category labels
1432
-
1467
+
1433
1468
  assert len(set(output_taxon_string_to_category_id.keys())) == \
1434
1469
  len(set(output_taxon_string_to_category_id.values()))
1435
-
1470
+
1436
1471
  output_category_id_to_taxon_string = \
1437
1472
  invert_dictionary(output_taxon_string_to_category_id)
1438
-
1473
+
1439
1474
  with open(input_file,'r') as f:
1440
1475
  output_data = json.load(f)
1441
1476
 
@@ -1447,7 +1482,7 @@ def restrict_to_taxa_list(taxa_list,
1447
1482
 
1448
1483
  if 'detections' not in im or im['detections'] is None:
1449
1484
  continue
1450
-
1485
+
1451
1486
  # Possibly prepare a pre-filtering description
1452
1487
  pre_filtering_description = None
1453
1488
  if classification_descriptions is not None and add_pre_filtering_description:
@@ -1462,16 +1497,16 @@ def restrict_to_taxa_list(taxa_list,
1462
1497
  classification[0] = \
1463
1498
  input_category_id_to_output_category_id[classification[0]]
1464
1499
 
1465
- # ...for each image
1466
-
1500
+ # ...for each image
1501
+
1467
1502
  output_data['classification_categories'] = output_category_id_to_common_name
1468
1503
  output_data['classification_category_descriptions'] = \
1469
1504
  output_category_id_to_taxon_string
1470
-
1471
-
1505
+
1506
+
1472
1507
  ##%% Write output
1473
-
1508
+
1474
1509
  with open(output_file,'w') as f:
1475
1510
  json.dump(output_data,f,indent=1)
1476
-
1511
+
1477
1512
  # ...def restrict_to_taxa_list(...)