megadetector 5.0.27__py3-none-any.whl → 5.0.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (176) hide show
  1. megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
  2. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
  3. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  7. megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
  8. megadetector/classification/aggregate_classifier_probs.py +3 -3
  9. megadetector/classification/analyze_failed_images.py +5 -5
  10. megadetector/classification/cache_batchapi_outputs.py +5 -5
  11. megadetector/classification/create_classification_dataset.py +11 -12
  12. megadetector/classification/crop_detections.py +10 -10
  13. megadetector/classification/csv_to_json.py +8 -8
  14. megadetector/classification/detect_and_crop.py +13 -15
  15. megadetector/classification/evaluate_model.py +7 -7
  16. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  17. megadetector/classification/json_to_azcopy_list.py +1 -1
  18. megadetector/classification/json_validator.py +29 -32
  19. megadetector/classification/map_classification_categories.py +9 -9
  20. megadetector/classification/merge_classification_detection_output.py +12 -9
  21. megadetector/classification/prepare_classification_script.py +19 -19
  22. megadetector/classification/prepare_classification_script_mc.py +23 -23
  23. megadetector/classification/run_classifier.py +4 -4
  24. megadetector/classification/save_mislabeled.py +6 -6
  25. megadetector/classification/train_classifier.py +1 -1
  26. megadetector/classification/train_classifier_tf.py +9 -9
  27. megadetector/classification/train_utils.py +10 -10
  28. megadetector/data_management/annotations/annotation_constants.py +1 -1
  29. megadetector/data_management/camtrap_dp_to_coco.py +45 -45
  30. megadetector/data_management/cct_json_utils.py +101 -101
  31. megadetector/data_management/cct_to_md.py +49 -49
  32. megadetector/data_management/cct_to_wi.py +33 -33
  33. megadetector/data_management/coco_to_labelme.py +75 -75
  34. megadetector/data_management/coco_to_yolo.py +189 -189
  35. megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
  36. megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
  37. megadetector/data_management/databases/integrity_check_json_db.py +202 -188
  38. megadetector/data_management/databases/subset_json_db.py +33 -33
  39. megadetector/data_management/generate_crops_from_cct.py +38 -38
  40. megadetector/data_management/get_image_sizes.py +54 -49
  41. megadetector/data_management/labelme_to_coco.py +130 -124
  42. megadetector/data_management/labelme_to_yolo.py +78 -72
  43. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  44. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  45. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  46. megadetector/data_management/lila/download_lila_subset.py +21 -24
  47. megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
  48. megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
  49. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  50. megadetector/data_management/lila/lila_common.py +70 -70
  51. megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
  52. megadetector/data_management/mewc_to_md.py +339 -340
  53. megadetector/data_management/ocr_tools.py +258 -252
  54. megadetector/data_management/read_exif.py +232 -223
  55. megadetector/data_management/remap_coco_categories.py +26 -26
  56. megadetector/data_management/remove_exif.py +31 -20
  57. megadetector/data_management/rename_images.py +187 -187
  58. megadetector/data_management/resize_coco_dataset.py +41 -41
  59. megadetector/data_management/speciesnet_to_md.py +41 -41
  60. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  61. megadetector/data_management/yolo_output_to_md_output.py +117 -120
  62. megadetector/data_management/yolo_to_coco.py +195 -188
  63. megadetector/detection/change_detection.py +831 -0
  64. megadetector/detection/process_video.py +341 -338
  65. megadetector/detection/pytorch_detector.py +308 -266
  66. megadetector/detection/run_detector.py +186 -166
  67. megadetector/detection/run_detector_batch.py +366 -364
  68. megadetector/detection/run_inference_with_yolov5_val.py +328 -325
  69. megadetector/detection/run_tiled_inference.py +312 -253
  70. megadetector/detection/tf_detector.py +24 -24
  71. megadetector/detection/video_utils.py +291 -283
  72. megadetector/postprocessing/add_max_conf.py +15 -11
  73. megadetector/postprocessing/categorize_detections_by_size.py +44 -44
  74. megadetector/postprocessing/classification_postprocessing.py +808 -311
  75. megadetector/postprocessing/combine_batch_outputs.py +20 -21
  76. megadetector/postprocessing/compare_batch_results.py +528 -517
  77. megadetector/postprocessing/convert_output_format.py +97 -97
  78. megadetector/postprocessing/create_crop_folder.py +220 -147
  79. megadetector/postprocessing/detector_calibration.py +173 -168
  80. megadetector/postprocessing/generate_csv_report.py +508 -0
  81. megadetector/postprocessing/load_api_results.py +25 -22
  82. megadetector/postprocessing/md_to_coco.py +129 -98
  83. megadetector/postprocessing/md_to_labelme.py +89 -83
  84. megadetector/postprocessing/md_to_wi.py +40 -40
  85. megadetector/postprocessing/merge_detections.py +87 -114
  86. megadetector/postprocessing/postprocess_batch_results.py +319 -302
  87. megadetector/postprocessing/remap_detection_categories.py +36 -36
  88. megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
  89. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  90. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  91. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
  92. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  93. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  94. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  95. megadetector/postprocessing/validate_batch_results.py +70 -70
  96. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  97. megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
  98. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
  99. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -69
  100. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  101. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  102. megadetector/taxonomy_mapping/species_lookup.py +33 -33
  103. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  104. megadetector/taxonomy_mapping/taxonomy_graph.py +11 -11
  105. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  106. megadetector/utils/azure_utils.py +22 -22
  107. megadetector/utils/ct_utils.py +1019 -200
  108. megadetector/utils/directory_listing.py +21 -77
  109. megadetector/utils/gpu_test.py +22 -22
  110. megadetector/utils/md_tests.py +541 -518
  111. megadetector/utils/path_utils.py +1511 -406
  112. megadetector/utils/process_utils.py +41 -41
  113. megadetector/utils/sas_blob_utils.py +53 -49
  114. megadetector/utils/split_locations_into_train_val.py +73 -60
  115. megadetector/utils/string_utils.py +147 -26
  116. megadetector/utils/url_utils.py +463 -173
  117. megadetector/utils/wi_utils.py +2629 -2868
  118. megadetector/utils/write_html_image_list.py +137 -137
  119. megadetector/visualization/plot_utils.py +21 -21
  120. megadetector/visualization/render_images_with_thumbnails.py +37 -73
  121. megadetector/visualization/visualization_utils.py +424 -404
  122. megadetector/visualization/visualize_db.py +197 -190
  123. megadetector/visualization/visualize_detector_output.py +126 -98
  124. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/METADATA +6 -3
  125. megadetector-5.0.29.dist-info/RECORD +163 -0
  126. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
  127. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  128. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  129. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  130. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  131. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  132. megadetector/data_management/importers/awc_to_json.py +0 -191
  133. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  134. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  135. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  136. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  137. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  138. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  139. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  140. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  141. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  142. megadetector/data_management/importers/ena24_to_json.py +0 -276
  143. megadetector/data_management/importers/filenames_to_json.py +0 -386
  144. megadetector/data_management/importers/helena_to_cct.py +0 -283
  145. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  146. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  147. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  148. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  149. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  150. megadetector/data_management/importers/missouri_to_json.py +0 -490
  151. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  152. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  153. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  154. megadetector/data_management/importers/pc_to_json.py +0 -365
  155. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  156. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  157. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  158. megadetector/data_management/importers/rspb_to_json.py +0 -356
  159. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  160. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  161. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  162. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  163. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  164. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  165. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  166. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  167. megadetector/data_management/importers/ubc_to_json.py +0 -399
  168. megadetector/data_management/importers/umn_to_json.py +0 -507
  169. megadetector/data_management/importers/wellington_to_json.py +0 -263
  170. megadetector/data_management/importers/wi_to_json.py +0 -442
  171. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  172. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  173. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  174. megadetector-5.0.27.dist-info/RECORD +0 -208
  175. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
  176. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
@@ -3,16 +3,17 @@
3
3
  classification_postprocessing.py
4
4
 
5
5
  Functions for postprocessing species classification results, particularly:
6
-
6
+
7
7
  * Smoothing results within an image (an image with 700 cows and one deer is really just 701
8
8
  cows)
9
9
  * Smoothing results within a sequence (a sequence that looks like deer/deer/deer/elk/deer/deer
10
10
  is really just a deer)
11
-
11
+
12
12
  """
13
13
 
14
14
  #%% Constants and imports
15
15
 
16
+ import os
16
17
  import json
17
18
  import copy
18
19
 
@@ -20,10 +21,18 @@ from collections import defaultdict
20
21
  from tqdm import tqdm
21
22
 
22
23
  from megadetector.utils.ct_utils import is_list_sorted
24
+ from megadetector.utils.ct_utils import sort_dictionary_by_value
25
+ from megadetector.utils.ct_utils import sort_dictionary_by_key
26
+ from megadetector.utils.ct_utils import invert_dictionary
27
+
23
28
  from megadetector.utils.wi_utils import clean_taxonomy_string
24
29
  from megadetector.utils.wi_utils import taxonomy_level_index
25
30
  from megadetector.utils.wi_utils import taxonomy_level_string_to_index
26
- from megadetector.utils.ct_utils import sort_dictionary_by_value
31
+
32
+ from megadetector.utils.wi_utils import non_taxonomic_prediction_strings
33
+ from megadetector.utils.wi_utils import human_prediction_string
34
+ from megadetector.utils.wi_utils import animal_prediction_string
35
+ from megadetector.utils.wi_utils import blank_prediction_string # noqa
27
36
 
28
37
 
29
38
  #%% Options classes
@@ -35,83 +44,83 @@ class ClassificationSmoothingOptions:
35
44
  """
36
45
 
37
46
  def __init__(self):
38
-
39
- #: How many detections do we need in a dominant category to overwrite
40
- #: non-dominant classifications? This is irrelevant if
47
+
48
+ #: How many detections do we need in a dominant category to overwrite
49
+ #: non-dominant classifications? This is irrelevant if
41
50
  #: max_detections_nondominant_class <= 1.
42
51
  self.min_detections_to_overwrite_secondary = 4
43
-
44
- #: Even if we have a dominant class, if a non-dominant class has at least
52
+
53
+ #: Even if we have a dominant class, if a non-dominant class has at least
45
54
  #: this many classifications in an image, leave them alone.
46
55
  #:
47
56
  #: If this is <= 1, we won't replace non-dominant, non-other classes
48
57
  #: with the dominant class, even if there are 900 cows and 1 deer.
49
58
  self.max_detections_nondominant_class = 1
50
-
51
- #: How many detections do we need in a dominant category to overwrite
52
- #: non-dominant classifications in the same family? If this is <= 0,
53
- #: we'll skip this step. This option doesn't mean anything if
59
+
60
+ #: How many detections do we need in a dominant category to overwrite
61
+ #: non-dominant classifications in the same family? If this is <= 0,
62
+ #: we'll skip this step. This option doesn't mean anything if
54
63
  #: max_detections_nondominant_class_same_family <= 1.
55
64
  self.min_detections_to_overwrite_secondary_same_family = 2
56
-
57
- #: If we have this many classifications of a nondominant category,
65
+
66
+ #: If we have this many classifications of a nondominant category,
58
67
  #: we won't do same-family overwrites. <= 1 means "even if there are
59
68
  #: a million deer, if there are two million moose, call all the deer
60
- #: moose". This option doesn't mean anything if
69
+ #: moose". This option doesn't mean anything if
61
70
  #: min_detections_to_overwrite_secondary_same_family <= 0.
62
71
  self.max_detections_nondominant_class_same_family = -1
63
-
64
- #: If the dominant class has at least this many classifications, overwrite
72
+
73
+ #: If the dominant class has at least this many classifications, overwrite
65
74
  #: "other" classifications with the dominant class
66
75
  self.min_detections_to_overwrite_other = 2
67
-
76
+
68
77
  #: Names to treat as "other" categories; can't be None, but can be empty
69
78
  #:
70
79
  #: "Other" classifications will be changed to the dominant category, regardless
71
- #: of confidence, as long as there are at least min_detections_to_overwrite_other
80
+ #: of confidence, as long as there are at least min_detections_to_overwrite_other
72
81
  #: examples of the dominant class. For example, cow/other will remain unchanged,
73
82
  #: but cow/cow/other will become cow/cow/cow.
74
83
  self.other_category_names = ['other','unknown','no cv result','animal','blank','mammal']
75
-
84
+
76
85
  #: We're not even going to mess around with classifications below this threshold.
77
86
  #:
78
87
  #: We won't count them, we won't over-write them, they don't exist during the
79
88
  #: within-image smoothing step.
80
89
  self.classification_confidence_threshold = 0.5
81
-
90
+
82
91
  #: We're not even going to mess around with detections below this threshold.
83
92
  #:
84
93
  #: We won't count them, we won't over-write them, they don't exist during the
85
94
  #: within-image smoothing step.
86
95
  self.detection_confidence_threshold = 0.15
87
-
96
+
88
97
  #: If classification descriptions are present and appear to represent taxonomic
89
- #: information, should we propagate classifications when lower-level taxa are more
90
- #: common in an image? For example, if we see "carnivore/fox/fox/deer", should
98
+ #: information, should we propagate classifications when lower-level taxa are more
99
+ #: common in an image? For example, if we see "carnivore/fox/fox/deer", should
91
100
  #: we make that "fox/fox/fox/deer"?
92
101
  self.propagate_classifications_through_taxonomy = True
93
-
94
- #: When propagating classifications down through taxonomy levels, we have to
102
+
103
+ #: When propagating classifications down through taxonomy levels, we have to
95
104
  #: decide whether we prefer more frequent categories or more specific categories.
96
105
  #: taxonomy_propagation_level_weight and taxonomy_propagation_count_weight
97
106
  #: balance levels against counts in this process.
98
107
  self.taxonomy_propagation_level_weight = 1.0
99
-
100
- #: When propagating classifications down through taxonomy levels, we have to
108
+
109
+ #: When propagating classifications down through taxonomy levels, we have to
101
110
  #: decide whether we prefer more frequent categories or more specific categories.
102
111
  #: taxonomy_propagation_level_weight and taxonomy_propagation_count_weight
103
112
  #: balance levels against counts in this process.
104
113
  #:
105
114
  #: With a very low default value, this just breaks ties.
106
115
  self.taxonomy_propagation_count_weight = 0.01
107
-
116
+
108
117
  #: Should we record information about the state of labels prior to smoothing?
109
118
  self.add_pre_smoothing_description = True
110
-
119
+
111
120
  #: When a dict (rather than a file) is passed to either smoothing function,
112
121
  #: if this is True, we'll make a copy of the input dict before modifying.
113
122
  self.modify_in_place = False
114
-
123
+
115
124
  #: Debug options
116
125
  self.break_at_image = None
117
126
 
@@ -122,56 +131,75 @@ def _results_for_sequence(images_this_sequence,filename_to_results):
122
131
  """
123
132
  Fetch MD results for every image in this sequence, based on the 'file_name' field
124
133
  """
125
-
134
+
126
135
  results_this_sequence = []
127
136
  for im in images_this_sequence:
128
137
  fn = im['file_name']
129
138
  results_this_image = filename_to_results[fn]
130
139
  assert isinstance(results_this_image,dict)
131
140
  results_this_sequence.append(results_this_image)
132
-
141
+
133
142
  return results_this_sequence
134
-
135
-
143
+
144
+
136
145
  def _sort_images_by_time(images):
137
146
  """
138
147
  Returns a copy of [images], sorted by the 'datetime' field (ascending).
139
148
  """
140
- return sorted(images, key = lambda im: im['datetime'])
149
+ return sorted(images, key = lambda im: im['datetime'])
141
150
 
142
151
 
143
- def _count_detections_by_category(detections,options):
152
+ def count_detections_by_classification_category(detections,options=None):
144
153
  """
145
- Count the number of instances of each category in the detections list
146
- [detections] that have an above-threshold detection. Sort results in descending
154
+ Count the number of instances of each classification category in the detections list
155
+ [detections] that have an above-threshold detection. Sort results in descending
147
156
  order by count. Returns a dict mapping category ID --> count. If no detections
148
157
  are above threshold, returns an empty dict.
149
-
150
- Assumes that if the 'classifications' field is present for a detection, it has
151
- length 1, i.e. that non-top classifications have already been removed.
158
+
159
+ Only processes the top classification for each detection.
160
+
161
+ Args:
162
+ detections: detections list
163
+ options (ClassificationSmoothingOptions, optional): see ClassificationSmoothingOptions
164
+
165
+ Returns:
166
+ dict mapping above-threshold category IDs to counts
152
167
  """
153
-
168
+
169
+ if detections is None or len(detections) == 0:
170
+ return {}
171
+
172
+ if options is None:
173
+ options = ClassificationSmoothingOptions()
174
+
154
175
  category_to_count = defaultdict(int)
155
-
176
+
156
177
  for det in detections:
157
178
  if ('classifications' in det) and (det['conf'] >= options.detection_confidence_threshold):
158
- assert len(det['classifications']) == 1
179
+ # assert len(det['classifications']) == 1
159
180
  c = det['classifications'][0]
160
181
  if c[1] >= options.classification_confidence_threshold:
161
- category_to_count[c[0]] += 1
162
-
182
+ category_to_count[c[0]] += 1
183
+
163
184
  category_to_count = {k: v for k, v in sorted(category_to_count.items(),
164
- key=lambda item: item[1],
185
+ key=lambda item: item[1],
165
186
  reverse=True)}
166
-
187
+
167
188
  return category_to_count
168
189
 
169
190
 
170
- def _get_description_string(category_to_count,classification_descriptions):
191
+ def get_classification_description_string(category_to_count,classification_descriptions):
171
192
  """
172
193
  Return a string summarizing the image content according to [category_to_count].
194
+
195
+ Args:
196
+ category_to_count (dict): a dict mapping category IDs to counts
197
+ classification_descriptions (dict): a dict mapping category IDs to description strings
198
+
199
+ Returns:
200
+ string: a description of this image's content, e.g. "rabbit (4), human (1)"
173
201
  """
174
-
202
+
175
203
  category_strings = []
176
204
  # category_id = next(iter(category_to_count))
177
205
  for category_id in category_to_count:
@@ -184,29 +212,29 @@ def _get_description_string(category_to_count,classification_descriptions):
184
212
  count = category_to_count[category_id]
185
213
  category_string = '{} ({})'.format(category_name,count)
186
214
  category_strings.append(category_string)
187
-
215
+
188
216
  return ', '.join(category_strings)
189
-
217
+
190
218
 
191
219
  def _print_counts_with_names(category_to_count,classification_descriptions):
192
220
  """
193
221
  Print a list of classification categories with counts, based in the name --> count
194
222
  dict [category_to_count]
195
223
  """
196
-
224
+
197
225
  for category_id in category_to_count:
198
226
  category_name = classification_descriptions[category_id]
199
227
  count = category_to_count[category_id]
200
228
  print('{}: {} ({})'.format(category_id,category_name,count))
201
-
202
-
229
+
230
+
203
231
  def _prepare_results_for_smoothing(input_file,options):
204
232
  """
205
- Load results from [input_file] if necessary, prepare category descrptions
233
+ Load results from [input_file] if necessary, prepare category descriptions
206
234
  for smoothing. Adds pre-smoothing descriptions to every image if the options
207
235
  say we're supposed to do that.
208
236
  """
209
-
237
+
210
238
  if isinstance(input_file,str):
211
239
  with open(input_file,'r') as f:
212
240
  print('Loading results from:\n{}'.format(input_file))
@@ -221,71 +249,71 @@ def _prepare_results_for_smoothing(input_file,options):
221
249
 
222
250
 
223
251
  ## Category processing
224
-
252
+
225
253
  category_name_to_id = {d['classification_categories'][k]:k for k in d['classification_categories']}
226
254
  other_category_ids = []
227
255
  for s in options.other_category_names:
228
256
  if s in category_name_to_id:
229
257
  other_category_ids.append(category_name_to_id[s])
230
-
258
+
231
259
  # Before we do anything else, get rid of everything but the top classification
232
260
  # for each detection, and remove the 'classifications' field from detections with
233
261
  # no classifications.
234
262
  for im in tqdm(d['images']):
235
-
263
+
236
264
  if 'detections' not in im or im['detections'] is None or len(im['detections']) == 0:
237
265
  continue
238
-
266
+
239
267
  detections = im['detections']
240
-
268
+
241
269
  for det in detections:
242
-
270
+
243
271
  if 'classifications' not in det:
244
272
  continue
245
273
  if len(det['classifications']) == 0:
246
274
  del det['classifications']
247
275
  continue
248
-
276
+
249
277
  classification_confidence_values = [c[1] for c in det['classifications']]
250
278
  assert is_list_sorted(classification_confidence_values,reverse=True)
251
279
  det['classifications'] = [det['classifications'][0]]
252
-
280
+
253
281
  # ...for each detection in this image
254
-
282
+
255
283
  # ...for each image
256
-
257
-
284
+
285
+
258
286
  ## Clean up classification descriptions so we can test taxonomic relationships
259
287
  ## by substring testing.
260
-
288
+
261
289
  classification_descriptions_clean = None
262
290
  classification_descriptions = None
263
-
291
+
264
292
  if 'classification_category_descriptions' in d:
265
293
  classification_descriptions = d['classification_category_descriptions']
266
294
  classification_descriptions_clean = {}
267
295
  # category_id = next(iter(classification_descriptions))
268
- for category_id in classification_descriptions:
296
+ for category_id in classification_descriptions:
269
297
  classification_descriptions_clean[category_id] = \
270
298
  clean_taxonomy_string(classification_descriptions[category_id]).strip(';').lower()
271
-
272
-
299
+
300
+
273
301
  ## Optionally add pre-smoothing descriptions to every image
274
-
275
- if options.add_pre_smoothing_description:
276
-
302
+
303
+ if options.add_pre_smoothing_description and (classification_descriptions is not None):
304
+
277
305
  for im in tqdm(d['images']):
278
-
306
+
279
307
  if 'detections' not in im or im['detections'] is None or len(im['detections']) == 0:
280
308
  continue
281
-
282
- detections = im['detections']
283
- category_to_count = _count_detections_by_category(detections, options)
284
-
309
+
310
+ detections = im['detections']
311
+ category_to_count = count_detections_by_classification_category(detections, options)
312
+
285
313
  im['pre_smoothing_description'] = \
286
- _get_description_string(category_to_count, classification_descriptions)
287
-
288
-
314
+ get_classification_description_string(category_to_count, classification_descriptions)
315
+
316
+
289
317
  return {
290
318
  'd':d,
291
319
  'other_category_ids':other_category_ids,
@@ -293,7 +321,7 @@ def _prepare_results_for_smoothing(input_file,options):
293
321
  'classification_descriptions':classification_descriptions
294
322
  }
295
323
 
296
- # ...def _prepare_results_for_smoothing(...)
324
+ # ...def _prepare_results_for_smoothing(...)
297
325
 
298
326
 
299
327
  def _smooth_classifications_for_list_of_detections(detections,
@@ -304,282 +332,323 @@ def _smooth_classifications_for_list_of_detections(detections,
304
332
  """
305
333
  Smooth classifications for a list of detections, which may have come from a single
306
334
  image, or may represent an entire sequence.
307
-
335
+
308
336
  Returns None if no changes are made, else a dict.
309
-
310
- classification_descriptions_clean should be semicolon-delimited taxonomic strings
337
+
338
+ classification_descriptions_clean should be semicolon-delimited taxonomic strings
311
339
  from which common names and GUIDs have already been removed.
312
-
340
+
313
341
  Assumes there is only one classification per detection, i.e. that non-top classifications
314
- have already been remoevd.
342
+ have already been remoevd.
315
343
  """
316
-
344
+
317
345
  ## Count the number of instances of each category in this image
318
-
319
- category_to_count = _count_detections_by_category(detections, options)
346
+
347
+ category_to_count = count_detections_by_classification_category(detections, options)
320
348
  # _print_counts_with_names(category_to_count,classification_descriptions)
321
- # _get_description_string(category_to_count, classification_descriptions)
322
-
349
+ # get_classification_description_string(category_to_count, classification_descriptions)
350
+
323
351
  if len(category_to_count) <= 1:
324
352
  return None
325
-
353
+
326
354
  keys = list(category_to_count.keys())
327
-
328
- # Handle a quirky special case: if the most common category is "other" and
355
+
356
+ # Handle a quirky special case: if the most common category is "other" and
329
357
  # it's "tied" with the second-most-common category, swap them
330
358
  if (len(keys) > 1) and \
331
359
  (keys[0] in other_category_ids) and \
332
360
  (keys[1] not in other_category_ids) and \
333
361
  (category_to_count[keys[0]] == category_to_count[keys[1]]):
334
362
  keys[1], keys[0] = keys[0], keys[1]
335
-
336
- max_count = category_to_count[keys[0]]
363
+
364
+ max_count = category_to_count[keys[0]]
337
365
  most_common_category = keys[0]
338
366
  del keys
339
-
340
-
367
+
368
+
341
369
  ## Debug tools
342
-
370
+
343
371
  verbose_debug_enabled = False
344
-
372
+
345
373
  if options.break_at_image is not None:
346
374
  for det in detections:
347
375
  if 'image_filename' in det and \
348
376
  det['image_filename'] == options.break_at_image:
349
377
  verbose_debug_enabled = True
350
378
  break
351
-
379
+
352
380
  if verbose_debug_enabled:
353
381
  _print_counts_with_names(category_to_count,classification_descriptions)
354
- import pdb; pdb.set_trace()
355
-
356
-
382
+ from IPython import embed; embed()
383
+
384
+
357
385
  ## Possibly change "other" classifications to the most common category
358
-
386
+
359
387
  # ...if the dominant category is not an "other" category.
360
-
388
+
361
389
  n_other_classifications_changed_this_image = 0
362
-
390
+
363
391
  # If we have at least *min_detections_to_overwrite_other* in a category that isn't
364
392
  # "other", change all "other" classifications to that category
365
393
  if (max_count >= options.min_detections_to_overwrite_other) and \
366
394
  (most_common_category not in other_category_ids):
367
-
395
+
368
396
  for det in detections:
369
-
397
+
370
398
  if ('classifications' not in det) or \
371
399
  (det['conf'] < options.detection_confidence_threshold):
372
400
  continue
373
-
401
+
374
402
  assert len(det['classifications']) == 1
375
403
  c = det['classifications'][0]
376
-
404
+
377
405
  if (c[1] >= options.classification_confidence_threshold) and \
378
406
  (c[0] in other_category_ids):
379
-
407
+
408
+ if verbose_debug_enabled:
409
+ print('Replacing {} with {}'.format(
410
+ classification_descriptions[c[0]],
411
+ classification_descriptions[c[1]]))
412
+
380
413
  n_other_classifications_changed_this_image += 1
381
414
  c[0] = most_common_category
382
-
415
+
383
416
  # ...if there are classifications for this detection
384
-
417
+
385
418
  # ...for each detection
386
-
419
+
387
420
  # ...if we should overwrite all "other" classifications
388
-
389
-
421
+
422
+ if verbose_debug_enabled:
423
+ print('Made {} other changes'.format(n_other_classifications_changed_this_image))
424
+
425
+
390
426
  ## Re-count
391
-
392
- category_to_count = _count_detections_by_category(detections, options)
393
- # _print_counts_with_names(category_to_count,classification_descriptions)
427
+
428
+ category_to_count = count_detections_by_classification_category(detections, options)
429
+ # _print_counts_with_names(category_to_count,classification_descriptions)
394
430
  keys = list(category_to_count.keys())
395
- max_count = category_to_count[keys[0]]
431
+ max_count = category_to_count[keys[0]]
396
432
  most_common_category = keys[0]
397
433
  del keys
398
-
399
-
434
+
435
+
400
436
  ## Possibly change some non-dominant classifications to the dominant category
401
-
437
+
438
+ process_taxonomic_rules = \
439
+ (classification_descriptions_clean is not None) and \
440
+ (len(classification_descriptions_clean) > 0) and \
441
+ (len(category_to_count) > 1)
442
+
402
443
  n_detections_flipped_this_image = 0
403
-
404
- # Don't do this if the most common category is an "other" category, or
444
+
445
+ # Don't do this if the most common category is an "other" category, or
405
446
  # if we don't have enough of the most common category
406
447
  if (most_common_category not in other_category_ids) and \
407
448
  (max_count >= options.min_detections_to_overwrite_secondary):
408
-
449
+
409
450
  # i_det = 0; det = detections[i_det]
410
451
  for i_det,det in enumerate(detections):
411
-
452
+
412
453
  if ('classifications' not in det) or \
413
454
  (det['conf'] < options.detection_confidence_threshold):
414
455
  continue
415
-
456
+
416
457
  assert len(det['classifications']) == 1
417
458
  c = det['classifications'][0]
418
-
459
+
419
460
  # Don't over-write the most common category with itself
420
461
  if c[0] == most_common_category:
421
462
  continue
422
-
463
+
423
464
  # Don't bother with below-threshold classifications
424
465
  if c[1] < options.classification_confidence_threshold:
425
466
  continue
426
-
467
+
468
+ # If we're doing taxonomic processing, at this stage, don't turn children
469
+ # into parents; we'll likely turn parents into children in the next stage.
470
+
471
+ if process_taxonomic_rules:
472
+
473
+ most_common_category_description = \
474
+ classification_descriptions_clean[most_common_category]
475
+
476
+ category_id_this_classification = c[0]
477
+ assert category_id_this_classification in category_to_count
478
+
479
+ category_description_this_classification = \
480
+ classification_descriptions_clean[category_id_this_classification]
481
+
482
+ # An empty description corresponds to the "animal" category. We don't handle
483
+ # "animal" here as a parent category, that would be handled in the "other smoothing"
484
+ # step above.
485
+ if len(category_description_this_classification) == 0:
486
+ continue
487
+
488
+ most_common_category_is_parent_of_this_category = \
489
+ most_common_category_description in category_description_this_classification
490
+
491
+ if most_common_category_is_parent_of_this_category:
492
+ continue
493
+
427
494
  # If we have fewer of this category than the most common category,
428
495
  # but not *too* many, flip it to the most common category.
429
496
  if (max_count > category_to_count[c[0]]) and \
430
497
  (category_to_count[c[0]] <= options.max_detections_nondominant_class):
431
-
498
+
432
499
  c[0] = most_common_category
433
- n_detections_flipped_this_image += 1
434
-
500
+ n_detections_flipped_this_image += 1
501
+
435
502
  # ...for each detection
436
503
 
437
- # ...if the dominant category is legit
438
-
439
-
504
+ # ...if the dominant category is legit
505
+
506
+ if verbose_debug_enabled:
507
+ print('Made {} non-dominant --> dominant changes'.format(
508
+ n_detections_flipped_this_image))
509
+
510
+
440
511
  ## Re-count
441
-
442
- category_to_count = _count_detections_by_category(detections, options)
443
- # _print_counts_with_names(category_to_count,classification_descriptions)
512
+
513
+ category_to_count = count_detections_by_classification_category(detections, options)
514
+ # _print_counts_with_names(category_to_count,classification_descriptions)
444
515
  keys = list(category_to_count.keys())
445
- max_count = category_to_count[keys[0]]
516
+ max_count = category_to_count[keys[0]]
446
517
  most_common_category = keys[0]
447
518
  del keys
448
-
449
-
519
+
520
+
450
521
  ## Possibly collapse higher-level taxonomic predictions down to lower levels
451
-
452
- # ...when the most common class is a child of a less common class.
453
-
522
+
454
523
  n_taxonomic_changes_this_image = 0
455
-
524
+
456
525
  process_taxonomic_rules = \
457
526
  (classification_descriptions_clean is not None) and \
458
527
  (len(classification_descriptions_clean) > 0) and \
459
528
  (len(category_to_count) > 1)
460
-
529
+
461
530
  if process_taxonomic_rules and options.propagate_classifications_through_taxonomy:
462
-
531
+
463
532
  # det = detections[3]
464
533
  for det in detections:
465
-
534
+
466
535
  if ('classifications' not in det) or \
467
536
  (det['conf'] < options.detection_confidence_threshold):
468
537
  continue
469
-
538
+
470
539
  assert len(det['classifications']) == 1
471
540
  c = det['classifications'][0]
472
-
541
+
473
542
  # Don't bother with any classifications below the confidence threshold
474
543
  if c[1] < options.classification_confidence_threshold:
475
544
  continue
476
545
 
477
546
  category_id_this_classification = c[0]
478
547
  assert category_id_this_classification in category_to_count
479
-
548
+
480
549
  category_description_this_classification = \
481
550
  classification_descriptions_clean[category_id_this_classification]
482
-
483
- # An empty description corresponds to the "animal" category. We don't handle
484
- # "animal" here as a parent category, that would be handled in the "other smoothing"
551
+
552
+ # An empty description corresponds to the "animal" category. We don't handle
553
+ # "animal" here as a parent category, that would be handled in the "other smoothing"
485
554
  # step above.
486
555
  if len(category_description_this_classification) == 0:
487
556
  continue
488
-
557
+
489
558
  # We may have multiple child categories to choose from; this keeps track of
490
559
  # the "best" we've seen so far. "Best" is based on the level (species is better
491
560
  # than genus) and number.
492
561
  child_category_to_score = defaultdict(float)
493
-
562
+
494
563
  for category_id_of_candidate_child in category_to_count.keys():
495
-
564
+
496
565
  # A category is never its own child
497
566
  if category_id_of_candidate_child == category_id_this_classification:
498
567
  continue
499
-
568
+
500
569
  # Is this candidate a child of the current classification?
501
570
  category_description_candidate_child = \
502
571
  classification_descriptions_clean[category_id_of_candidate_child]
503
-
572
+
504
573
  # An empty description corresponds to "animal", which can never
505
574
  # be a child of another category.
506
575
  if len(category_description_candidate_child) == 0:
507
576
  continue
508
-
509
- # As long as we're using "clean" descriptions, parent/child taxonomic
577
+
578
+ # As long as we're using "clean" descriptions, parent/child taxonomic
510
579
  # relationships are defined by a substring relationship
511
580
  is_child = category_description_this_classification in \
512
581
  category_description_candidate_child
513
582
  if not is_child:
514
583
  continue
515
-
584
+
516
585
  # How many instances of this child category are there?
517
586
  child_category_count = category_to_count[category_id_of_candidate_child]
518
-
587
+
519
588
  # What taxonomy level is this child category defined at?
520
589
  child_category_level = taxonomy_level_index(
521
590
  classification_descriptions[category_id_of_candidate_child])
522
-
591
+
523
592
  child_category_to_score[category_id_of_candidate_child] = \
524
593
  child_category_level * options.taxonomy_propagation_level_weight + \
525
594
  child_category_count * options.taxonomy_propagation_count_weight
526
-
595
+
527
596
  # ...for each category we are considering reducing this classification to
528
-
597
+
529
598
  # Did we find a category we want to change this classification to?
530
599
  if len(child_category_to_score) > 0:
531
-
600
+
532
601
  # Find the child category with the highest score
533
602
  child_category_to_score = sort_dictionary_by_value(
534
603
  child_category_to_score,reverse=True)
535
604
  best_child_category = next(iter(child_category_to_score.keys()))
536
-
605
+
537
606
  if verbose_debug_enabled:
538
607
  old_category_name = \
539
608
  classification_descriptions_clean[c[0]]
540
609
  new_category_name = \
541
610
  classification_descriptions_clean[best_child_category]
542
611
  print('Replacing {} with {}'.format(
543
- old_category_name,new_category_name))
544
-
612
+ old_category_name,new_category_name))
613
+
545
614
  c[0] = best_child_category
546
- n_taxonomic_changes_this_image += 1
547
-
615
+ n_taxonomic_changes_this_image += 1
616
+
548
617
  # ...for each detection
549
-
550
- # ...if we have taxonomic information available
551
-
552
-
618
+
619
+ # ...if we have taxonomic information available
620
+
621
+
553
622
  ## Re-count
554
-
555
- category_to_count = _count_detections_by_category(detections, options)
556
- # _print_counts_with_names(category_to_count,classification_descriptions)
623
+
624
+ category_to_count = count_detections_by_classification_category(detections, options)
625
+ # _print_counts_with_names(category_to_count,classification_descriptions)
557
626
  keys = list(category_to_count.keys())
558
- max_count = category_to_count[keys[0]]
627
+ max_count = category_to_count[keys[0]]
559
628
  most_common_category = keys[0]
560
629
  del keys
561
-
562
-
630
+
631
+
563
632
  ## Possibly do within-family smoothing
564
-
633
+
565
634
  n_within_family_smoothing_changes = 0
566
-
635
+
567
636
  # min_detections_to_overwrite_secondary_same_family = -1
568
637
  # max_detections_nondominant_class_same_family = 1
569
638
  family_level = taxonomy_level_string_to_index('family')
570
-
639
+
571
640
  if process_taxonomic_rules:
572
-
641
+
573
642
  category_description_most_common_category = \
574
643
  classification_descriptions[most_common_category]
575
644
  most_common_category_taxonomic_level = \
576
- taxonomy_level_index(category_description_most_common_category)
645
+ taxonomy_level_index(category_description_most_common_category)
577
646
  n_most_common_category = category_to_count[most_common_category]
578
647
  tokens = category_description_most_common_category.split(';')
579
648
  assert len(tokens) == 7
580
649
  most_common_category_family = tokens[3]
581
650
  most_common_category_genus = tokens[4]
582
-
651
+
583
652
  # Only consider remapping to genus or species level, and only when we have
584
653
  # a high enough count in the most common category
585
654
  if process_taxonomic_rules and \
@@ -587,36 +656,36 @@ def _smooth_classifications_for_list_of_detections(detections,
587
656
  (most_common_category not in other_category_ids) and \
588
657
  (most_common_category_taxonomic_level > family_level) and \
589
658
  (n_most_common_category >= options.min_detections_to_overwrite_secondary_same_family):
590
-
659
+
591
660
  # det = detections[0]
592
661
  for det in detections:
593
-
662
+
594
663
  if ('classifications' not in det) or \
595
664
  (det['conf'] < options.detection_confidence_threshold):
596
665
  continue
597
-
666
+
598
667
  assert len(det['classifications']) == 1
599
668
  c = det['classifications'][0]
600
-
669
+
601
670
  # Don't over-write the most common category with itself
602
671
  if c[0] == most_common_category:
603
672
  continue
604
-
673
+
605
674
  # Don't bother with below-threshold classifications
606
675
  if c[1] < options.classification_confidence_threshold:
607
- continue
608
-
676
+ continue
677
+
609
678
  n_candidate_flip_category = category_to_count[c[0]]
610
-
679
+
611
680
  # Do we have too many of the non-dominant category to do this kind of swap?
612
681
  if n_candidate_flip_category > \
613
682
  options.max_detections_nondominant_class_same_family:
614
683
  continue
615
684
 
616
- # Don't flip classes when it's a tie
685
+ # Don't flip classes when it's a tie
617
686
  if n_candidate_flip_category == n_most_common_category:
618
687
  continue
619
-
688
+
620
689
  category_description_candidate_flip = \
621
690
  classification_descriptions[c[0]]
622
691
  tokens = category_description_candidate_flip.split(';')
@@ -624,34 +693,34 @@ def _smooth_classifications_for_list_of_detections(detections,
624
693
  candidate_flip_category_family = tokens[3]
625
694
  candidate_flip_category_genus = tokens[4]
626
695
  candidate_flip_category_taxonomic_level = \
627
- taxonomy_level_index(category_description_candidate_flip)
628
-
696
+ taxonomy_level_index(category_description_candidate_flip)
697
+
629
698
  # Only proceed if we have valid family strings
630
699
  if (len(candidate_flip_category_family) == 0) or \
631
700
  (len(most_common_category_family) == 0):
632
701
  continue
633
-
634
- # Only proceed if the candidate and the most common category are in the same family
702
+
703
+ # Only proceed if the candidate and the most common category are in the same family
635
704
  if candidate_flip_category_family != most_common_category_family:
636
705
  continue
637
-
706
+
638
707
  # Don't flip from a species to the genus level in the same genus
639
708
  if (candidate_flip_category_genus == most_common_category_genus) and \
640
709
  (candidate_flip_category_taxonomic_level > \
641
710
  most_common_category_taxonomic_level):
642
711
  continue
643
-
712
+
644
713
  old_category_name = classification_descriptions_clean[c[0]]
645
714
  new_category_name = classification_descriptions_clean[most_common_category]
646
-
715
+
647
716
  c[0] = most_common_category
648
- n_within_family_smoothing_changes += 1
649
-
717
+ n_within_family_smoothing_changes += 1
718
+
650
719
  # ...for each detection
651
-
720
+
652
721
  # ...if the dominant category is legit and we have taxonomic information available
653
-
654
-
722
+
723
+
655
724
  return {'n_other_classifications_changed_this_image':n_other_classifications_changed_this_image,
656
725
  'n_detections_flipped_this_image':n_detections_flipped_this_image,
657
726
  'n_taxonomic_changes_this_image':n_taxonomic_changes_this_image,
@@ -668,33 +737,33 @@ def _smooth_single_image(im,
668
737
  """
669
738
  Smooth classifications for a single image. Returns None if no changes are made,
670
739
  else a dict.
671
-
672
- classification_descriptions_clean should be semicolon-delimited taxonomic strings
740
+
741
+ classification_descriptions_clean should be semicolon-delimited taxonomic strings
673
742
  from which common names and GUIDs have already been removed.
674
-
743
+
675
744
  Assumes there is only one classification per detection, i.e. that non-top classifications
676
745
  have already been remoevd.
677
746
  """
678
-
747
+
679
748
  if 'detections' not in im or im['detections'] is None or len(im['detections']) == 0:
680
749
  return
681
-
750
+
682
751
  detections = im['detections']
683
-
752
+
684
753
  # Simplify debugging
685
754
  for det in detections:
686
755
  det['image_filename'] = im['file']
687
-
688
- to_return = _smooth_classifications_for_list_of_detections(detections,
689
- options=options,
756
+
757
+ to_return = _smooth_classifications_for_list_of_detections(detections,
758
+ options=options,
690
759
  other_category_ids=other_category_ids,
691
- classification_descriptions=classification_descriptions,
760
+ classification_descriptions=classification_descriptions,
692
761
  classification_descriptions_clean=classification_descriptions_clean)
693
762
 
694
763
  # Clean out debug information
695
764
  for det in detections:
696
765
  del det['image_filename']
697
-
766
+
698
767
  return to_return
699
768
 
700
769
  # ...def smooth_single_image
@@ -706,104 +775,104 @@ def smooth_classification_results_image_level(input_file,output_file=None,option
706
775
  """
707
776
  Smooth classifications at the image level for all results in the MD-formatted results
708
777
  file [input_file], optionally writing a new set of results to [output_file].
709
-
710
- This function generally expresses the notion that an image with 700 cows and one deer
778
+
779
+ This function generally expresses the notion that an image with 700 cows and one deer
711
780
  is really just 701 cows.
712
-
781
+
713
782
  Only count detections with a classification confidence threshold above
714
783
  [options.classification_confidence_threshold], which in practice means we're only
715
784
  looking at one category per detection.
716
-
785
+
717
786
  If an image has at least [options.min_detections_to_overwrite_secondary] such detections
718
787
  in the most common category, and no more than [options.max_detections_nondominant_class]
719
788
  in the second-most-common category, flip all detections to the most common
720
789
  category.
721
-
722
- Optionally treat some classes as particularly unreliable, typically used to overwrite an
790
+
791
+ Optionally treat some classes as particularly unreliable, typically used to overwrite an
723
792
  "other" class.
724
-
793
+
725
794
  This function also removes everything but the non-dominant classification for each detection.
726
-
795
+
727
796
  Args:
728
797
  input_file (str): MegaDetector-formatted classification results file to smooth. Can
729
798
  also be an already-loaded results dict.
730
799
  output_file (str, optional): .json file to write smoothed results
731
- options (ClassificationSmoothingOptions, optional): see
800
+ options (ClassificationSmoothingOptions, optional): see
732
801
  ClassificationSmoothingOptions for details.
733
-
802
+
734
803
  Returns:
735
804
  dict: MegaDetector-results-formatted dict, identical to what's written to
736
805
  [output_file] if [output_file] is not None.
737
806
  """
738
-
807
+
739
808
  ## Input validation
740
-
809
+
741
810
  if options is None:
742
811
  options = ClassificationSmoothingOptions()
743
-
812
+
744
813
  r = _prepare_results_for_smoothing(input_file, options)
745
814
  d = r['d']
746
815
  other_category_ids = r['other_category_ids']
747
816
  classification_descriptions_clean = r['classification_descriptions_clean']
748
817
  classification_descriptions = r['classification_descriptions']
749
-
750
-
818
+
819
+
751
820
  ## Smoothing
752
-
821
+
753
822
  n_other_classifications_changed = 0
754
823
  n_other_images_changed = 0
755
824
  n_taxonomic_images_changed = 0
756
-
825
+
757
826
  n_detections_flipped = 0
758
827
  n_images_changed = 0
759
- n_taxonomic_classification_changes = 0
760
-
761
- # im = d['images'][0]
828
+ n_taxonomic_classification_changes = 0
829
+
830
+ # im = d['images'][0]
762
831
  for im in tqdm(d['images']):
763
-
832
+
764
833
  r = _smooth_single_image(im,
765
834
  options,
766
835
  other_category_ids,
767
836
  classification_descriptions=classification_descriptions,
768
837
  classification_descriptions_clean=classification_descriptions_clean)
769
-
838
+
770
839
  if r is None:
771
840
  continue
772
-
841
+
773
842
  n_detections_flipped_this_image = r['n_detections_flipped_this_image']
774
843
  n_other_classifications_changed_this_image = \
775
844
  r['n_other_classifications_changed_this_image']
776
845
  n_taxonomic_changes_this_image = r['n_taxonomic_changes_this_image']
777
-
846
+
778
847
  n_detections_flipped += n_detections_flipped_this_image
779
848
  n_other_classifications_changed += n_other_classifications_changed_this_image
780
849
  n_taxonomic_classification_changes += n_taxonomic_changes_this_image
781
-
850
+
782
851
  if n_detections_flipped_this_image > 0:
783
852
  n_images_changed += 1
784
853
  if n_other_classifications_changed_this_image > 0:
785
854
  n_other_images_changed += 1
786
855
  if n_taxonomic_changes_this_image > 0:
787
856
  n_taxonomic_images_changed += 1
788
-
789
- # ...for each image
790
-
857
+
858
+ # ...for each image
859
+
791
860
  print('Classification smoothing: changed {} detections on {} images'.format(
792
861
  n_detections_flipped,n_images_changed))
793
-
862
+
794
863
  print('"Other" smoothing: changed {} detections on {} images'.format(
795
864
  n_other_classifications_changed,n_other_images_changed))
796
-
865
+
797
866
  print('Taxonomic smoothing: changed {} detections on {} images'.format(
798
867
  n_taxonomic_classification_changes,n_taxonomic_images_changed))
799
-
800
-
868
+
869
+
801
870
  ## Write output
802
-
803
- if output_file is not None:
871
+
872
+ if output_file is not None:
804
873
  print('Writing results after image-level smoothing to:\n{}'.format(output_file))
805
874
  with open(output_file,'w') as f:
806
- json.dump(d,f,indent=1)
875
+ json.dump(d,f,indent=1)
807
876
 
808
877
  return d
809
878
 
@@ -811,7 +880,7 @@ def smooth_classification_results_image_level(input_file,output_file=None,option
811
880
 
812
881
 
813
882
  #%% Sequence-level smoothing
814
-
883
+
815
884
  def smooth_classification_results_sequence_level(input_file,
816
885
  cct_sequence_information,
817
886
  output_file=None,
@@ -819,10 +888,10 @@ def smooth_classification_results_sequence_level(input_file,
819
888
  """
820
889
  Smooth classifications at the sequence level for all results in the MD-formatted results
821
890
  file [md_results_file], optionally writing a new set of results to [output_file].
822
-
891
+
823
892
  This function generally expresses the notion that a sequence that looks like
824
893
  deer/deer/deer/elk/deer/deer/deer/deer is really just a deer.
825
-
894
+
826
895
  Args:
827
896
  input_file (str or dict): MegaDetector-formatted classification results file to smooth
828
897
  (or already-loaded results). If you supply a dict, it's modified in place by default, but
@@ -830,28 +899,28 @@ def smooth_classification_results_sequence_level(input_file,
830
899
  cct_sequence_information (str, dict, or list): COCO Camera Traps file containing sequence IDs for
831
900
  each image (or an already-loaded CCT-formatted dict, or just the 'images' list from a CCT dict).
832
901
  output_file (str, optional): .json file to write smoothed results
833
- options (ClassificationSmoothingOptions, optional): see
902
+ options (ClassificationSmoothingOptions, optional): see
834
903
  ClassificationSmoothingOptions for details.
835
-
904
+
836
905
  Returns:
837
906
  dict: MegaDetector-results-formatted dict, identical to what's written to
838
907
  [output_file] if [output_file] is not None.
839
908
  """
840
-
909
+
841
910
  ## Input validation
842
-
911
+
843
912
  if options is None:
844
913
  options = ClassificationSmoothingOptions()
845
-
914
+
846
915
  r = _prepare_results_for_smoothing(input_file, options)
847
916
  d = r['d']
848
917
  other_category_ids = r['other_category_ids']
849
918
  classification_descriptions_clean = r['classification_descriptions_clean']
850
919
  classification_descriptions = r['classification_descriptions']
851
-
852
-
920
+
921
+
853
922
  ## Make a list of images appearing in each sequence
854
-
923
+
855
924
  if isinstance(cct_sequence_information,list):
856
925
  image_info = cct_sequence_information
857
926
  elif isinstance(cct_sequence_information,str):
@@ -862,77 +931,77 @@ def smooth_classification_results_sequence_level(input_file,
862
931
  else:
863
932
  assert isinstance(cct_sequence_information,dict)
864
933
  image_info = cct_sequence_information['images']
865
-
934
+
866
935
  sequence_to_image_filenames = defaultdict(list)
867
-
936
+
868
937
  # im = image_info[0]
869
938
  for im in tqdm(image_info):
870
- sequence_to_image_filenames[im['seq_id']].append(im['file_name'])
939
+ sequence_to_image_filenames[im['seq_id']].append(im['file_name'])
871
940
  del image_info
872
-
941
+
873
942
  image_fn_to_classification_results = {}
874
943
  for im in d['images']:
875
944
  fn = im['file']
876
945
  assert fn not in image_fn_to_classification_results
877
946
  image_fn_to_classification_results[fn] = im
878
-
879
-
947
+
948
+
880
949
  ## Smoothing
881
-
950
+
882
951
  n_other_classifications_changed = 0
883
952
  n_other_sequences_changed = 0
884
953
  n_taxonomic_sequences_changed = 0
885
954
  n_within_family_sequences_changed = 0
886
-
955
+
887
956
  n_detections_flipped = 0
888
957
  n_sequences_changed = 0
889
- n_taxonomic_classification_changes = 0
890
- n_within_family_changes = 0
891
-
958
+ n_taxonomic_classification_changes = 0
959
+ n_within_family_changes = 0
960
+
892
961
  # sequence_id = list(sequence_to_image_filenames.keys())[0]
893
962
  for sequence_id in sequence_to_image_filenames.keys():
894
963
 
895
964
  image_filenames_this_sequence = sequence_to_image_filenames[sequence_id]
896
-
965
+
897
966
  # if 'file' in image_filenames_this_sequence:
898
- # import pdb; pdb.set_trace()
899
-
967
+ # from IPython import embed; embed()
968
+
900
969
  detections_this_sequence = []
901
970
  for image_filename in image_filenames_this_sequence:
902
971
  im = image_fn_to_classification_results[image_filename]
903
972
  if 'detections' not in im or im['detections'] is None:
904
973
  continue
905
974
  detections_this_sequence.extend(im['detections'])
906
-
975
+
907
976
  # Temporarily add image filenames to every detection,
908
977
  # for debugging
909
978
  for det in im['detections']:
910
979
  det['image_filename'] = im['file']
911
-
980
+
912
981
  if len(detections_this_sequence) == 0:
913
982
  continue
914
-
983
+
915
984
  r = _smooth_classifications_for_list_of_detections(
916
- detections=detections_this_sequence,
917
- options=options,
985
+ detections=detections_this_sequence,
986
+ options=options,
918
987
  other_category_ids=other_category_ids,
919
- classification_descriptions=classification_descriptions,
988
+ classification_descriptions=classification_descriptions,
920
989
  classification_descriptions_clean=classification_descriptions_clean)
921
-
990
+
922
991
  if r is None:
923
992
  continue
924
-
993
+
925
994
  n_detections_flipped_this_sequence = r['n_detections_flipped_this_image']
926
995
  n_other_classifications_changed_this_sequence = \
927
996
  r['n_other_classifications_changed_this_image']
928
997
  n_taxonomic_changes_this_sequence = r['n_taxonomic_changes_this_image']
929
998
  n_within_family_changes_this_sequence = r['n_within_family_smoothing_changes']
930
-
999
+
931
1000
  n_detections_flipped += n_detections_flipped_this_sequence
932
1001
  n_other_classifications_changed += n_other_classifications_changed_this_sequence
933
1002
  n_taxonomic_classification_changes += n_taxonomic_changes_this_sequence
934
1003
  n_within_family_changes += n_within_family_changes_this_sequence
935
-
1004
+
936
1005
  if n_detections_flipped_this_sequence > 0:
937
1006
  n_sequences_changed += 1
938
1007
  if n_other_classifications_changed_this_sequence > 0:
@@ -941,40 +1010,468 @@ def smooth_classification_results_sequence_level(input_file,
941
1010
  n_taxonomic_sequences_changed += 1
942
1011
  if n_within_family_changes_this_sequence > 0:
943
1012
  n_within_family_sequences_changed += 1
944
-
1013
+
945
1014
  # ...for each sequence
946
-
1015
+
947
1016
  print('Classification smoothing: changed {} detections in {} sequences'.format(
948
1017
  n_detections_flipped,n_sequences_changed))
949
-
1018
+
950
1019
  print('"Other" smoothing: changed {} detections in {} sequences'.format(
951
1020
  n_other_classifications_changed,n_other_sequences_changed))
952
-
1021
+
953
1022
  print('Taxonomic smoothing: changed {} detections in {} sequences'.format(
954
1023
  n_taxonomic_classification_changes,n_taxonomic_sequences_changed))
955
1024
 
956
1025
  print('Within-family smoothing: changed {} detections in {} sequences'.format(
957
1026
  n_within_family_changes,n_within_family_sequences_changed))
958
-
959
-
1027
+
1028
+
960
1029
  ## Clean up debug information
961
-
1030
+
962
1031
  for im in d['images']:
963
1032
  if 'detections' not in im or im['detections'] is None:
964
1033
  continue
965
1034
  for det in im['detections']:
966
1035
  if 'image_filename' in det:
967
1036
  del det['image_filename']
968
-
1037
+
969
1038
 
970
1039
  ## Write output
971
-
972
- if output_file is not None:
1040
+
1041
+ if output_file is not None:
973
1042
  print('Writing sequence-smoothed classification results to {}'.format(
974
- output_file))
1043
+ output_file))
975
1044
  with open(output_file,'w') as f:
976
1045
  json.dump(d,f,indent=1)
977
-
1046
+
978
1047
  return d
979
1048
 
980
1049
  # ...smooth_classification_results_sequence_level(...)
1050
+
1051
+
1052
+ def restrict_to_taxa_list(taxa_list,
1053
+ speciesnet_taxonomy_file,
1054
+ input_file,
1055
+ output_file,
1056
+ allow_walk_down=False,
1057
+ add_pre_filtering_description=True):
1058
+ """
1059
+ Given a prediction file in MD .json format, likely without having had
1060
+ a geofence applied, apply a custom taxa list.
1061
+
1062
+ Args:
1063
+ taxa_list (str or list): list of latin names, or a text file containing
1064
+ a list of latin names. Optionally may contain a second (comma-delimited)
1065
+ column containing common names, used only for debugging. Latin names
1066
+ must exist in the SpeciesNet taxonomy.
1067
+ speciesnet_taxonomy_file (str): taxonomy filename, in the same format used for
1068
+ model release (with 7-token taxonomy entries)
1069
+ input_file (str): .json file to read, in MD format. This can be None, in which
1070
+ case this function just validates [taxa_list].
1071
+ output_file (str): .json file to write, in MD format
1072
+ allow_walk_down (bool, optional): should we walk down the taxonomy tree
1073
+ when making mappings if a parent has only a single allowable child?
1074
+ For example, if only a single felid species is allowed, should other
1075
+ felid predictions be mapped to that species, as opposed to being mapped
1076
+ to the family?
1077
+ add_pre_restriction_description (bool, optional): should we add a new metadata
1078
+ field that summarizes each image's classifications prior to taxonomic
1079
+ restriction?
1080
+ """
1081
+
1082
+ ##%% Read target taxa list
1083
+
1084
+ if isinstance(taxa_list,str):
1085
+ assert os.path.isfile(taxa_list), \
1086
+ 'Could not find taxa list file {}'.format(taxa_list)
1087
+ with open(taxa_list,'r') as f:
1088
+ taxa_list = f.readlines()
1089
+
1090
+ taxa_list = [s.strip().lower() for s in taxa_list]
1091
+ taxa_list = [s for s in taxa_list if len(s) > 0]
1092
+
1093
+ target_latin_to_common = {}
1094
+ for s in taxa_list:
1095
+ if s.strip().startswith('#'):
1096
+ continue
1097
+ tokens = s.split(',')
1098
+ assert len(tokens) <= 2
1099
+ binomial_name = tokens[0]
1100
+ assert len(binomial_name.split(' ')) in (1,2,3), \
1101
+ 'Illegal binomial name in species list: {}'.format(binomial_name)
1102
+ if len(tokens) > 0:
1103
+ common_name = tokens[1].strip().lower()
1104
+ else:
1105
+ common_name = None
1106
+ assert binomial_name not in target_latin_to_common
1107
+ target_latin_to_common[binomial_name] = common_name
1108
+
1109
+
1110
+ ##%% Read taxonomy file
1111
+
1112
+ with open(speciesnet_taxonomy_file,'r') as f:
1113
+ speciesnet_taxonomy_list = f.readlines()
1114
+ speciesnet_taxonomy_list = [s.strip() for s in \
1115
+ speciesnet_taxonomy_list if len(s.strip()) > 0]
1116
+
1117
+ # Maps the latin name of every taxon to the corresponding full taxon string
1118
+ #
1119
+ # For species, the key is a binomial name
1120
+ speciesnet_latin_name_to_taxon_string = {}
1121
+ speciesnet_common_name_to_taxon_string = {}
1122
+
1123
+ def _insert_taxonomy_string(s):
1124
+
1125
+ tokens = s.split(';')
1126
+ assert len(tokens) == 7
1127
+
1128
+ guid = tokens[0] # noqa
1129
+ class_name = tokens[1]
1130
+ order = tokens[2]
1131
+ family = tokens[3]
1132
+ genus = tokens[4]
1133
+ species = tokens[5]
1134
+ common_name = tokens[6]
1135
+
1136
+ if len(class_name) == 0:
1137
+ assert common_name in ('animal','vehicle','blank')
1138
+ return
1139
+
1140
+ if len(species) > 0:
1141
+ assert all([len(s) > 0 for s in [genus,family,order]])
1142
+ binomial_name = genus + ' ' + species
1143
+ if binomial_name not in speciesnet_latin_name_to_taxon_string:
1144
+ speciesnet_latin_name_to_taxon_string[binomial_name] = s
1145
+ elif len(genus) > 0:
1146
+ assert all([len(s) > 0 for s in [family,order]])
1147
+ if genus not in speciesnet_latin_name_to_taxon_string:
1148
+ speciesnet_latin_name_to_taxon_string[genus] = s
1149
+ elif len(family) > 0:
1150
+ assert len(order) > 0
1151
+ if family not in speciesnet_latin_name_to_taxon_string:
1152
+ speciesnet_latin_name_to_taxon_string[family] = s
1153
+ elif len(order) > 0:
1154
+ if order not in speciesnet_latin_name_to_taxon_string:
1155
+ speciesnet_latin_name_to_taxon_string[order] = s
1156
+ else:
1157
+ if class_name not in speciesnet_latin_name_to_taxon_string:
1158
+ speciesnet_latin_name_to_taxon_string[class_name] = s
1159
+
1160
+ if len(common_name) > 0:
1161
+ if common_name not in speciesnet_common_name_to_taxon_string:
1162
+ speciesnet_common_name_to_taxon_string[common_name] = s
1163
+
1164
+ for s in speciesnet_taxonomy_list:
1165
+
1166
+ _insert_taxonomy_string(s)
1167
+
1168
+
1169
+ ##%% Make sure all parent taxa are represented in the taxonomy
1170
+
1171
+ # In theory any taxon that appears as the parent of another taxon should
1172
+ # also be in the taxonomy, but this isn't always true, so we fix it here.
1173
+
1174
+ new_taxon_string_to_missing_tokens = defaultdict(list)
1175
+
1176
+ # latin_name = next(iter(speciesnet_latin_name_to_taxon_string.keys()))
1177
+ for latin_name in speciesnet_latin_name_to_taxon_string.keys():
1178
+
1179
+ if 'no cv result' in latin_name:
1180
+ continue
1181
+
1182
+ taxon_string = speciesnet_latin_name_to_taxon_string[latin_name]
1183
+ tokens = taxon_string.split(';')
1184
+
1185
+ # Don't process GUID, species, or common name
1186
+ # i_token = 6
1187
+ for i_token in range(1,len(tokens)-2):
1188
+
1189
+ test_token = tokens[i_token]
1190
+ if len(test_token) == 0:
1191
+ continue
1192
+
1193
+ # Do we need to make up a taxon for this token?
1194
+ if test_token not in speciesnet_latin_name_to_taxon_string:
1195
+
1196
+ new_tokens = [''] * 7
1197
+ new_tokens[0] = 'fake_guid'
1198
+ for i_copy_token in range(1,i_token+1):
1199
+ new_tokens[i_copy_token] = tokens[i_copy_token]
1200
+ new_tokens[-1] = test_token + ' species'
1201
+ assert new_tokens[-2] == ''
1202
+ new_taxon_string = ';'.join(new_tokens)
1203
+ # assert new_taxon_string not in new_taxon_strings
1204
+ new_taxon_string_to_missing_tokens[new_taxon_string].append(test_token)
1205
+
1206
+ # ...for each token
1207
+
1208
+ # ...for each taxon
1209
+
1210
+ print('Found {} taxa that need to be inserted to make the taxonomy valid:\n'.format(
1211
+ len(new_taxon_string_to_missing_tokens)))
1212
+
1213
+ new_taxon_string_to_missing_tokens = \
1214
+ sort_dictionary_by_key(new_taxon_string_to_missing_tokens)
1215
+ for taxon_string in new_taxon_string_to_missing_tokens:
1216
+ missing_taxa = ','.join(new_taxon_string_to_missing_tokens[taxon_string])
1217
+ print('{} ({})'.format(taxon_string,missing_taxa))
1218
+
1219
+ for new_taxon_string in new_taxon_string_to_missing_tokens:
1220
+ _insert_taxonomy_string(new_taxon_string)
1221
+
1222
+
1223
+ ##%% Make sure all species on the allow-list are in the taxonomy
1224
+
1225
+ n_failed_mappings = 0
1226
+
1227
+ for target_taxon_latin_name in target_latin_to_common.keys():
1228
+ if target_taxon_latin_name not in speciesnet_latin_name_to_taxon_string:
1229
+ common_name = target_latin_to_common[target_taxon_latin_name]
1230
+ s = '{} ({}) not in speciesnet taxonomy'.format(
1231
+ target_taxon_latin_name,common_name)
1232
+ if common_name in speciesnet_common_name_to_taxon_string:
1233
+ s += ' (common name maps to {})'.format(
1234
+ speciesnet_common_name_to_taxon_string[common_name])
1235
+ print(s)
1236
+ n_failed_mappings += 1
1237
+
1238
+ if n_failed_mappings > 0:
1239
+ raise ValueError('Cannot continue with geofence generation')
1240
+
1241
+
1242
+ ##%% For the allow-list, map each parent taxon to a set of allowable child taxa
1243
+
1244
+ # Maps parent names to all allowed child names, or None if this is the
1245
+ # lowest-level allowable taxon on this path
1246
+ allowed_parent_taxon_to_child_taxa = defaultdict(set)
1247
+
1248
+ # latin_name = next(iter(target_latin_to_common.keys()))
1249
+ for latin_name in target_latin_to_common:
1250
+
1251
+ taxon_string = speciesnet_latin_name_to_taxon_string[latin_name]
1252
+ tokens = taxon_string.split(';')
1253
+ assert len(tokens) == 7
1254
+
1255
+ # Remove GUID and common mame
1256
+ #
1257
+ # This is now always class/order/family/genus/species
1258
+ tokens = tokens[1:-1]
1259
+
1260
+ child_taxon = None
1261
+
1262
+ # If this is a species
1263
+ if len(tokens[-1]) > 0:
1264
+ binomial_name = tokens[-2] + ' ' + tokens[-1]
1265
+ assert binomial_name == latin_name
1266
+ allowed_parent_taxon_to_child_taxa[binomial_name].add(None)
1267
+ child_taxon = binomial_name
1268
+
1269
+ # The first candidate parent is the genus
1270
+ parent_token_index = len(tokens) - 2
1271
+
1272
+ while(parent_token_index >= 0):
1273
+
1274
+ parent_taxon = tokens[parent_token_index]
1275
+ allowed_parent_taxon_to_child_taxa[parent_taxon].add(child_taxon)
1276
+ child_taxon = parent_taxon
1277
+ parent_token_index -= 1
1278
+
1279
+ # ...for each allowed latin name
1280
+
1281
+ allowed_parent_taxon_to_child_taxa = \
1282
+ sort_dictionary_by_key(allowed_parent_taxon_to_child_taxa)
1283
+
1284
+
1285
+ ##%% If we were just validating the custom taxa file, we're done
1286
+
1287
+ if input_file is None:
1288
+ print('Finished validating custom taxonomy list')
1289
+ return
1290
+
1291
+
1292
+ ##%% Map all predictions that exist in this dataset...
1293
+
1294
+ # ...to the prediction we should generate.
1295
+
1296
+ with open(input_file,'r') as f:
1297
+ input_data = json.load(f)
1298
+
1299
+ input_category_id_to_common_name = input_data['classification_categories'] #noqa
1300
+ input_category_id_to_taxonomy_string = \
1301
+ input_data['classification_category_descriptions']
1302
+
1303
+ input_category_id_to_output_taxon_string = {}
1304
+
1305
+ # input_category_id = next(iter(input_category_id_to_taxonomy_string.keys()))
1306
+ for input_category_id in input_category_id_to_taxonomy_string.keys():
1307
+
1308
+ input_taxon_string = input_category_id_to_taxonomy_string[input_category_id]
1309
+ input_taxon_tokens = input_taxon_string.split(';')
1310
+ assert len(input_taxon_tokens) == 7
1311
+
1312
+ # Don't mess with blank/no-cv-result/animal/human
1313
+ if (input_taxon_string in non_taxonomic_prediction_strings) or \
1314
+ (input_taxon_string == human_prediction_string):
1315
+ input_category_id_to_output_taxon_string[input_category_id] = \
1316
+ input_taxon_string
1317
+ continue
1318
+
1319
+ # Remove GUID and common mame
1320
+
1321
+ # This is now always class/order/family/genus/species
1322
+ input_taxon_tokens = input_taxon_tokens[1:-1]
1323
+
1324
+ test_index = len(input_taxon_tokens) - 1
1325
+ target_taxon = None
1326
+
1327
+ # Start at the species level, and see whether each taxon is allowed
1328
+ while((test_index >= 0) and (target_taxon is None)):
1329
+
1330
+ # Species are represented as binomial names
1331
+ if (test_index == (len(input_taxon_tokens) - 1)) and \
1332
+ (len(input_taxon_tokens[-1]) > 0):
1333
+ test_taxon_name = \
1334
+ input_taxon_tokens[-2] + ' ' + input_taxon_tokens[-1]
1335
+ else:
1336
+ test_taxon_name = input_taxon_tokens[test_index]
1337
+
1338
+ # If we haven't yet found the level at which this taxon is non-empty,
1339
+ # keep going up
1340
+ if len(test_taxon_name) == 0:
1341
+ test_index -= 1
1342
+ continue
1343
+
1344
+ assert test_taxon_name in speciesnet_latin_name_to_taxon_string
1345
+
1346
+ # Is this taxon allowed according to the custom species list?
1347
+ if test_taxon_name in allowed_parent_taxon_to_child_taxa:
1348
+
1349
+ allowed_child_taxa = allowed_parent_taxon_to_child_taxa[test_taxon_name]
1350
+ assert allowed_child_taxa is not None
1351
+
1352
+ # If this is the lowest-level allowable token or there is not a
1353
+ # unique child, don't walk any further, even if walking down
1354
+ # is enabled.
1355
+ if (None in allowed_child_taxa):
1356
+ assert len(allowed_child_taxa) == 1
1357
+
1358
+ if (None in allowed_child_taxa) or (len(allowed_child_taxa) > 1):
1359
+ target_taxon = test_taxon_name
1360
+ elif not allow_walk_down:
1361
+ target_taxon = test_taxon_name
1362
+ else:
1363
+ # If there's a unique child, walk back *down* the allowable
1364
+ # taxa until we run out of unique children
1365
+ while ((next(iter(allowed_child_taxa)) is not None) and \
1366
+ (len(allowed_child_taxa) == 1)):
1367
+ candidate_taxon = next(iter(allowed_child_taxa))
1368
+ assert candidate_taxon in allowed_parent_taxon_to_child_taxa
1369
+ assert candidate_taxon in speciesnet_latin_name_to_taxon_string
1370
+ allowed_child_taxa = \
1371
+ allowed_parent_taxon_to_child_taxa[candidate_taxon]
1372
+ target_taxon = candidate_taxon
1373
+
1374
+ # ...if this is an allowed taxon
1375
+
1376
+ test_index -= 1
1377
+
1378
+ # ...for each token
1379
+
1380
+ if target_taxon is None:
1381
+ output_taxon_string = animal_prediction_string
1382
+ else:
1383
+ output_taxon_string = speciesnet_latin_name_to_taxon_string[target_taxon]
1384
+ input_category_id_to_output_taxon_string[input_category_id] = output_taxon_string
1385
+
1386
+ # ...for each category
1387
+
1388
+
1389
+ ##%% Build the new tables
1390
+
1391
+ input_category_id_to_output_category_id = {}
1392
+ output_taxon_string_to_category_id = {}
1393
+ output_category_id_to_common_name = {}
1394
+
1395
+ for input_category_id in input_category_id_to_output_taxon_string:
1396
+
1397
+ original_common_name = \
1398
+ input_category_id_to_common_name[input_category_id]
1399
+ original_taxon_string = \
1400
+ input_category_id_to_taxonomy_string[input_category_id]
1401
+ output_taxon_string = \
1402
+ input_category_id_to_output_taxon_string[input_category_id]
1403
+
1404
+ output_common_name = output_taxon_string.split(';')[-1]
1405
+
1406
+ # Do we need to create a new output category?
1407
+ if output_taxon_string not in output_taxon_string_to_category_id:
1408
+ output_category_id = str(len(output_taxon_string_to_category_id))
1409
+ output_taxon_string_to_category_id[output_taxon_string] = \
1410
+ output_category_id
1411
+ output_category_id_to_common_name[output_category_id] = \
1412
+ output_common_name
1413
+ else:
1414
+ output_category_id = \
1415
+ output_taxon_string_to_category_id[output_taxon_string]
1416
+
1417
+ input_category_id_to_output_category_id[input_category_id] = \
1418
+ output_category_id
1419
+
1420
+ if False:
1421
+ print('Mapping {} ({}) to:\n{} ({})\n'.format(
1422
+ original_common_name,original_taxon_string,
1423
+ output_common_name,output_taxon_string))
1424
+ if False:
1425
+ print('Mapping {} to {}'.format(
1426
+ original_common_name,output_common_name,))
1427
+
1428
+ # ...for each category
1429
+
1430
+
1431
+ ##%% Remap all category labels
1432
+
1433
+ assert len(set(output_taxon_string_to_category_id.keys())) == \
1434
+ len(set(output_taxon_string_to_category_id.values()))
1435
+
1436
+ output_category_id_to_taxon_string = \
1437
+ invert_dictionary(output_taxon_string_to_category_id)
1438
+
1439
+ with open(input_file,'r') as f:
1440
+ output_data = json.load(f)
1441
+
1442
+ classification_descriptions = None
1443
+ if 'classification_category_descriptions' in output_data:
1444
+ classification_descriptions = output_data['classification_category_descriptions']
1445
+
1446
+ for im in tqdm(output_data['images']):
1447
+
1448
+ if 'detections' not in im or im['detections'] is None:
1449
+ continue
1450
+
1451
+ # Possibly prepare a pre-filtering description
1452
+ pre_filtering_description = None
1453
+ if classification_descriptions is not None and add_pre_filtering_description:
1454
+ category_to_count = count_detections_by_classification_category(im['detections'])
1455
+ pre_filtering_description = \
1456
+ get_classification_description_string(category_to_count,classification_descriptions)
1457
+ im['pre_filtering_description'] = pre_filtering_description
1458
+
1459
+ for det in im['detections']:
1460
+ if 'classifications' in det:
1461
+ for classification in det['classifications']:
1462
+ classification[0] = \
1463
+ input_category_id_to_output_category_id[classification[0]]
1464
+
1465
+ # ...for each image
1466
+
1467
+ output_data['classification_categories'] = output_category_id_to_common_name
1468
+ output_data['classification_category_descriptions'] = \
1469
+ output_category_id_to_taxon_string
1470
+
1471
+
1472
+ ##%% Write output
1473
+
1474
+ with open(output_file,'w') as f:
1475
+ json.dump(output_data,f,indent=1)
1476
+
1477
+ # ...def restrict_to_taxa_list(...)