megadetector 5.0.28__py3-none-any.whl → 5.0.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (176) hide show
  1. megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
  2. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
  3. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  7. megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
  8. megadetector/classification/aggregate_classifier_probs.py +3 -3
  9. megadetector/classification/analyze_failed_images.py +5 -5
  10. megadetector/classification/cache_batchapi_outputs.py +5 -5
  11. megadetector/classification/create_classification_dataset.py +11 -12
  12. megadetector/classification/crop_detections.py +10 -10
  13. megadetector/classification/csv_to_json.py +8 -8
  14. megadetector/classification/detect_and_crop.py +13 -15
  15. megadetector/classification/evaluate_model.py +7 -7
  16. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  17. megadetector/classification/json_to_azcopy_list.py +1 -1
  18. megadetector/classification/json_validator.py +29 -32
  19. megadetector/classification/map_classification_categories.py +9 -9
  20. megadetector/classification/merge_classification_detection_output.py +12 -9
  21. megadetector/classification/prepare_classification_script.py +19 -19
  22. megadetector/classification/prepare_classification_script_mc.py +23 -23
  23. megadetector/classification/run_classifier.py +4 -4
  24. megadetector/classification/save_mislabeled.py +6 -6
  25. megadetector/classification/train_classifier.py +1 -1
  26. megadetector/classification/train_classifier_tf.py +9 -9
  27. megadetector/classification/train_utils.py +10 -10
  28. megadetector/data_management/annotations/annotation_constants.py +1 -1
  29. megadetector/data_management/camtrap_dp_to_coco.py +45 -45
  30. megadetector/data_management/cct_json_utils.py +101 -101
  31. megadetector/data_management/cct_to_md.py +49 -49
  32. megadetector/data_management/cct_to_wi.py +33 -33
  33. megadetector/data_management/coco_to_labelme.py +75 -75
  34. megadetector/data_management/coco_to_yolo.py +189 -189
  35. megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
  36. megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
  37. megadetector/data_management/databases/integrity_check_json_db.py +202 -188
  38. megadetector/data_management/databases/subset_json_db.py +33 -33
  39. megadetector/data_management/generate_crops_from_cct.py +38 -38
  40. megadetector/data_management/get_image_sizes.py +54 -49
  41. megadetector/data_management/labelme_to_coco.py +130 -124
  42. megadetector/data_management/labelme_to_yolo.py +78 -72
  43. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  44. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  45. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  46. megadetector/data_management/lila/download_lila_subset.py +21 -24
  47. megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
  48. megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
  49. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  50. megadetector/data_management/lila/lila_common.py +70 -70
  51. megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
  52. megadetector/data_management/mewc_to_md.py +339 -340
  53. megadetector/data_management/ocr_tools.py +258 -252
  54. megadetector/data_management/read_exif.py +231 -224
  55. megadetector/data_management/remap_coco_categories.py +26 -26
  56. megadetector/data_management/remove_exif.py +31 -20
  57. megadetector/data_management/rename_images.py +187 -187
  58. megadetector/data_management/resize_coco_dataset.py +41 -41
  59. megadetector/data_management/speciesnet_to_md.py +41 -41
  60. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  61. megadetector/data_management/yolo_output_to_md_output.py +117 -120
  62. megadetector/data_management/yolo_to_coco.py +195 -188
  63. megadetector/detection/change_detection.py +831 -0
  64. megadetector/detection/process_video.py +340 -337
  65. megadetector/detection/pytorch_detector.py +304 -262
  66. megadetector/detection/run_detector.py +177 -164
  67. megadetector/detection/run_detector_batch.py +364 -363
  68. megadetector/detection/run_inference_with_yolov5_val.py +328 -325
  69. megadetector/detection/run_tiled_inference.py +256 -249
  70. megadetector/detection/tf_detector.py +24 -24
  71. megadetector/detection/video_utils.py +290 -282
  72. megadetector/postprocessing/add_max_conf.py +15 -11
  73. megadetector/postprocessing/categorize_detections_by_size.py +44 -44
  74. megadetector/postprocessing/classification_postprocessing.py +415 -415
  75. megadetector/postprocessing/combine_batch_outputs.py +20 -21
  76. megadetector/postprocessing/compare_batch_results.py +528 -517
  77. megadetector/postprocessing/convert_output_format.py +97 -97
  78. megadetector/postprocessing/create_crop_folder.py +219 -146
  79. megadetector/postprocessing/detector_calibration.py +173 -168
  80. megadetector/postprocessing/generate_csv_report.py +508 -499
  81. megadetector/postprocessing/load_api_results.py +23 -20
  82. megadetector/postprocessing/md_to_coco.py +129 -98
  83. megadetector/postprocessing/md_to_labelme.py +89 -83
  84. megadetector/postprocessing/md_to_wi.py +40 -40
  85. megadetector/postprocessing/merge_detections.py +87 -114
  86. megadetector/postprocessing/postprocess_batch_results.py +313 -298
  87. megadetector/postprocessing/remap_detection_categories.py +36 -36
  88. megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
  89. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  90. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  91. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
  92. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  93. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  94. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  95. megadetector/postprocessing/validate_batch_results.py +70 -70
  96. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  97. megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
  98. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
  99. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -66
  100. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  101. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  102. megadetector/taxonomy_mapping/species_lookup.py +33 -33
  103. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  104. megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
  105. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  106. megadetector/utils/azure_utils.py +22 -22
  107. megadetector/utils/ct_utils.py +1018 -200
  108. megadetector/utils/directory_listing.py +21 -77
  109. megadetector/utils/gpu_test.py +22 -22
  110. megadetector/utils/md_tests.py +541 -518
  111. megadetector/utils/path_utils.py +1457 -398
  112. megadetector/utils/process_utils.py +41 -41
  113. megadetector/utils/sas_blob_utils.py +53 -49
  114. megadetector/utils/split_locations_into_train_val.py +61 -61
  115. megadetector/utils/string_utils.py +147 -26
  116. megadetector/utils/url_utils.py +463 -173
  117. megadetector/utils/wi_utils.py +2629 -2526
  118. megadetector/utils/write_html_image_list.py +137 -137
  119. megadetector/visualization/plot_utils.py +21 -21
  120. megadetector/visualization/render_images_with_thumbnails.py +37 -73
  121. megadetector/visualization/visualization_utils.py +401 -397
  122. megadetector/visualization/visualize_db.py +197 -190
  123. megadetector/visualization/visualize_detector_output.py +79 -73
  124. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/METADATA +135 -132
  125. megadetector-5.0.29.dist-info/RECORD +163 -0
  126. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
  127. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
  128. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
  129. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  130. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  131. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  132. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  133. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  134. megadetector/data_management/importers/awc_to_json.py +0 -191
  135. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  136. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  137. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  138. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  139. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  140. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  141. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  142. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  143. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  144. megadetector/data_management/importers/ena24_to_json.py +0 -276
  145. megadetector/data_management/importers/filenames_to_json.py +0 -386
  146. megadetector/data_management/importers/helena_to_cct.py +0 -283
  147. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  148. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  149. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  150. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  151. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  152. megadetector/data_management/importers/missouri_to_json.py +0 -490
  153. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  154. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  155. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  156. megadetector/data_management/importers/pc_to_json.py +0 -365
  157. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  158. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  159. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  160. megadetector/data_management/importers/rspb_to_json.py +0 -356
  161. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  162. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  163. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  164. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  165. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  166. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  167. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  168. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  169. megadetector/data_management/importers/ubc_to_json.py +0 -399
  170. megadetector/data_management/importers/umn_to_json.py +0 -507
  171. megadetector/data_management/importers/wellington_to_json.py +0 -263
  172. megadetector/data_management/importers/wi_to_json.py +0 -442
  173. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  174. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  175. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  176. megadetector-5.0.28.dist-info/RECORD +0 -209
@@ -3,12 +3,12 @@
3
3
  classification_postprocessing.py
4
4
 
5
5
  Functions for postprocessing species classification results, particularly:
6
-
6
+
7
7
  * Smoothing results within an image (an image with 700 cows and one deer is really just 701
8
8
  cows)
9
9
  * Smoothing results within a sequence (a sequence that looks like deer/deer/deer/elk/deer/deer
10
10
  is really just a deer)
11
-
11
+
12
12
  """
13
13
 
14
14
  #%% Constants and imports
@@ -32,7 +32,7 @@ from megadetector.utils.wi_utils import taxonomy_level_string_to_index
32
32
  from megadetector.utils.wi_utils import non_taxonomic_prediction_strings
33
33
  from megadetector.utils.wi_utils import human_prediction_string
34
34
  from megadetector.utils.wi_utils import animal_prediction_string
35
- from megadetector.utils.wi_utils import blank_prediction_string
35
+ from megadetector.utils.wi_utils import blank_prediction_string # noqa
36
36
 
37
37
 
38
38
  #%% Options classes
@@ -44,83 +44,83 @@ class ClassificationSmoothingOptions:
44
44
  """
45
45
 
46
46
  def __init__(self):
47
-
48
- #: How many detections do we need in a dominant category to overwrite
49
- #: non-dominant classifications? This is irrelevant if
47
+
48
+ #: How many detections do we need in a dominant category to overwrite
49
+ #: non-dominant classifications? This is irrelevant if
50
50
  #: max_detections_nondominant_class <= 1.
51
51
  self.min_detections_to_overwrite_secondary = 4
52
-
53
- #: Even if we have a dominant class, if a non-dominant class has at least
52
+
53
+ #: Even if we have a dominant class, if a non-dominant class has at least
54
54
  #: this many classifications in an image, leave them alone.
55
55
  #:
56
56
  #: If this is <= 1, we won't replace non-dominant, non-other classes
57
57
  #: with the dominant class, even if there are 900 cows and 1 deer.
58
58
  self.max_detections_nondominant_class = 1
59
-
60
- #: How many detections do we need in a dominant category to overwrite
61
- #: non-dominant classifications in the same family? If this is <= 0,
62
- #: we'll skip this step. This option doesn't mean anything if
59
+
60
+ #: How many detections do we need in a dominant category to overwrite
61
+ #: non-dominant classifications in the same family? If this is <= 0,
62
+ #: we'll skip this step. This option doesn't mean anything if
63
63
  #: max_detections_nondominant_class_same_family <= 1.
64
64
  self.min_detections_to_overwrite_secondary_same_family = 2
65
-
66
- #: If we have this many classifications of a nondominant category,
65
+
66
+ #: If we have this many classifications of a nondominant category,
67
67
  #: we won't do same-family overwrites. <= 1 means "even if there are
68
68
  #: a million deer, if there are two million moose, call all the deer
69
- #: moose". This option doesn't mean anything if
69
+ #: moose". This option doesn't mean anything if
70
70
  #: min_detections_to_overwrite_secondary_same_family <= 0.
71
71
  self.max_detections_nondominant_class_same_family = -1
72
-
73
- #: If the dominant class has at least this many classifications, overwrite
72
+
73
+ #: If the dominant class has at least this many classifications, overwrite
74
74
  #: "other" classifications with the dominant class
75
75
  self.min_detections_to_overwrite_other = 2
76
-
76
+
77
77
  #: Names to treat as "other" categories; can't be None, but can be empty
78
78
  #:
79
79
  #: "Other" classifications will be changed to the dominant category, regardless
80
- #: of confidence, as long as there are at least min_detections_to_overwrite_other
80
+ #: of confidence, as long as there are at least min_detections_to_overwrite_other
81
81
  #: examples of the dominant class. For example, cow/other will remain unchanged,
82
82
  #: but cow/cow/other will become cow/cow/cow.
83
83
  self.other_category_names = ['other','unknown','no cv result','animal','blank','mammal']
84
-
84
+
85
85
  #: We're not even going to mess around with classifications below this threshold.
86
86
  #:
87
87
  #: We won't count them, we won't over-write them, they don't exist during the
88
88
  #: within-image smoothing step.
89
89
  self.classification_confidence_threshold = 0.5
90
-
90
+
91
91
  #: We're not even going to mess around with detections below this threshold.
92
92
  #:
93
93
  #: We won't count them, we won't over-write them, they don't exist during the
94
94
  #: within-image smoothing step.
95
95
  self.detection_confidence_threshold = 0.15
96
-
96
+
97
97
  #: If classification descriptions are present and appear to represent taxonomic
98
- #: information, should we propagate classifications when lower-level taxa are more
99
- #: common in an image? For example, if we see "carnivore/fox/fox/deer", should
98
+ #: information, should we propagate classifications when lower-level taxa are more
99
+ #: common in an image? For example, if we see "carnivore/fox/fox/deer", should
100
100
  #: we make that "fox/fox/fox/deer"?
101
101
  self.propagate_classifications_through_taxonomy = True
102
-
103
- #: When propagating classifications down through taxonomy levels, we have to
102
+
103
+ #: When propagating classifications down through taxonomy levels, we have to
104
104
  #: decide whether we prefer more frequent categories or more specific categories.
105
105
  #: taxonomy_propagation_level_weight and taxonomy_propagation_count_weight
106
106
  #: balance levels against counts in this process.
107
107
  self.taxonomy_propagation_level_weight = 1.0
108
-
109
- #: When propagating classifications down through taxonomy levels, we have to
108
+
109
+ #: When propagating classifications down through taxonomy levels, we have to
110
110
  #: decide whether we prefer more frequent categories or more specific categories.
111
111
  #: taxonomy_propagation_level_weight and taxonomy_propagation_count_weight
112
112
  #: balance levels against counts in this process.
113
113
  #:
114
114
  #: With a very low default value, this just breaks ties.
115
115
  self.taxonomy_propagation_count_weight = 0.01
116
-
116
+
117
117
  #: Should we record information about the state of labels prior to smoothing?
118
118
  self.add_pre_smoothing_description = True
119
-
119
+
120
120
  #: When a dict (rather than a file) is passed to either smoothing function,
121
121
  #: if this is True, we'll make a copy of the input dict before modifying.
122
122
  self.modify_in_place = False
123
-
123
+
124
124
  #: Debug options
125
125
  self.break_at_image = None
126
126
 
@@ -131,31 +131,31 @@ def _results_for_sequence(images_this_sequence,filename_to_results):
131
131
  """
132
132
  Fetch MD results for every image in this sequence, based on the 'file_name' field
133
133
  """
134
-
134
+
135
135
  results_this_sequence = []
136
136
  for im in images_this_sequence:
137
137
  fn = im['file_name']
138
138
  results_this_image = filename_to_results[fn]
139
139
  assert isinstance(results_this_image,dict)
140
140
  results_this_sequence.append(results_this_image)
141
-
141
+
142
142
  return results_this_sequence
143
-
144
-
143
+
144
+
145
145
  def _sort_images_by_time(images):
146
146
  """
147
147
  Returns a copy of [images], sorted by the 'datetime' field (ascending).
148
148
  """
149
- return sorted(images, key = lambda im: im['datetime'])
149
+ return sorted(images, key = lambda im: im['datetime'])
150
150
 
151
151
 
152
152
  def count_detections_by_classification_category(detections,options=None):
153
153
  """
154
154
  Count the number of instances of each classification category in the detections list
155
- [detections] that have an above-threshold detection. Sort results in descending
155
+ [detections] that have an above-threshold detection. Sort results in descending
156
156
  order by count. Returns a dict mapping category ID --> count. If no detections
157
157
  are above threshold, returns an empty dict.
158
-
158
+
159
159
  Only processes the top classification for each detection.
160
160
 
161
161
  Args:
@@ -165,26 +165,26 @@ def count_detections_by_classification_category(detections,options=None):
165
165
  Returns:
166
166
  dict mapping above-threshold category IDs to counts
167
167
  """
168
-
168
+
169
169
  if detections is None or len(detections) == 0:
170
170
  return {}
171
-
171
+
172
172
  if options is None:
173
173
  options = ClassificationSmoothingOptions()
174
174
 
175
175
  category_to_count = defaultdict(int)
176
-
176
+
177
177
  for det in detections:
178
178
  if ('classifications' in det) and (det['conf'] >= options.detection_confidence_threshold):
179
179
  # assert len(det['classifications']) == 1
180
180
  c = det['classifications'][0]
181
181
  if c[1] >= options.classification_confidence_threshold:
182
- category_to_count[c[0]] += 1
183
-
182
+ category_to_count[c[0]] += 1
183
+
184
184
  category_to_count = {k: v for k, v in sorted(category_to_count.items(),
185
- key=lambda item: item[1],
185
+ key=lambda item: item[1],
186
186
  reverse=True)}
187
-
187
+
188
188
  return category_to_count
189
189
 
190
190
 
@@ -199,7 +199,7 @@ def get_classification_description_string(category_to_count,classification_descr
199
199
  Returns:
200
200
  string: a description of this image's content, e.g. "rabbit (4), human (1)"
201
201
  """
202
-
202
+
203
203
  category_strings = []
204
204
  # category_id = next(iter(category_to_count))
205
205
  for category_id in category_to_count:
@@ -212,29 +212,29 @@ def get_classification_description_string(category_to_count,classification_descr
212
212
  count = category_to_count[category_id]
213
213
  category_string = '{} ({})'.format(category_name,count)
214
214
  category_strings.append(category_string)
215
-
215
+
216
216
  return ', '.join(category_strings)
217
-
217
+
218
218
 
219
219
  def _print_counts_with_names(category_to_count,classification_descriptions):
220
220
  """
221
221
  Print a list of classification categories with counts, based in the name --> count
222
222
  dict [category_to_count]
223
223
  """
224
-
224
+
225
225
  for category_id in category_to_count:
226
226
  category_name = classification_descriptions[category_id]
227
227
  count = category_to_count[category_id]
228
228
  print('{}: {} ({})'.format(category_id,category_name,count))
229
-
230
-
229
+
230
+
231
231
  def _prepare_results_for_smoothing(input_file,options):
232
232
  """
233
- Load results from [input_file] if necessary, prepare category descriptions
233
+ Load results from [input_file] if necessary, prepare category descriptions
234
234
  for smoothing. Adds pre-smoothing descriptions to every image if the options
235
235
  say we're supposed to do that.
236
236
  """
237
-
237
+
238
238
  if isinstance(input_file,str):
239
239
  with open(input_file,'r') as f:
240
240
  print('Loading results from:\n{}'.format(input_file))
@@ -249,71 +249,71 @@ def _prepare_results_for_smoothing(input_file,options):
249
249
 
250
250
 
251
251
  ## Category processing
252
-
252
+
253
253
  category_name_to_id = {d['classification_categories'][k]:k for k in d['classification_categories']}
254
254
  other_category_ids = []
255
255
  for s in options.other_category_names:
256
256
  if s in category_name_to_id:
257
257
  other_category_ids.append(category_name_to_id[s])
258
-
258
+
259
259
  # Before we do anything else, get rid of everything but the top classification
260
260
  # for each detection, and remove the 'classifications' field from detections with
261
261
  # no classifications.
262
262
  for im in tqdm(d['images']):
263
-
263
+
264
264
  if 'detections' not in im or im['detections'] is None or len(im['detections']) == 0:
265
265
  continue
266
-
266
+
267
267
  detections = im['detections']
268
-
268
+
269
269
  for det in detections:
270
-
270
+
271
271
  if 'classifications' not in det:
272
272
  continue
273
273
  if len(det['classifications']) == 0:
274
274
  del det['classifications']
275
275
  continue
276
-
276
+
277
277
  classification_confidence_values = [c[1] for c in det['classifications']]
278
278
  assert is_list_sorted(classification_confidence_values,reverse=True)
279
279
  det['classifications'] = [det['classifications'][0]]
280
-
280
+
281
281
  # ...for each detection in this image
282
-
282
+
283
283
  # ...for each image
284
-
285
-
284
+
285
+
286
286
  ## Clean up classification descriptions so we can test taxonomic relationships
287
287
  ## by substring testing.
288
-
288
+
289
289
  classification_descriptions_clean = None
290
290
  classification_descriptions = None
291
-
291
+
292
292
  if 'classification_category_descriptions' in d:
293
293
  classification_descriptions = d['classification_category_descriptions']
294
294
  classification_descriptions_clean = {}
295
295
  # category_id = next(iter(classification_descriptions))
296
- for category_id in classification_descriptions:
296
+ for category_id in classification_descriptions:
297
297
  classification_descriptions_clean[category_id] = \
298
298
  clean_taxonomy_string(classification_descriptions[category_id]).strip(';').lower()
299
-
300
-
299
+
300
+
301
301
  ## Optionally add pre-smoothing descriptions to every image
302
-
303
- if options.add_pre_smoothing_description:
304
-
302
+
303
+ if options.add_pre_smoothing_description and (classification_descriptions is not None):
304
+
305
305
  for im in tqdm(d['images']):
306
-
306
+
307
307
  if 'detections' not in im or im['detections'] is None or len(im['detections']) == 0:
308
308
  continue
309
-
310
- detections = im['detections']
309
+
310
+ detections = im['detections']
311
311
  category_to_count = count_detections_by_classification_category(detections, options)
312
-
312
+
313
313
  im['pre_smoothing_description'] = \
314
314
  get_classification_description_string(category_to_count, classification_descriptions)
315
-
316
-
315
+
316
+
317
317
  return {
318
318
  'd':d,
319
319
  'other_category_ids':other_category_ids,
@@ -321,7 +321,7 @@ def _prepare_results_for_smoothing(input_file,options):
321
321
  'classification_descriptions':classification_descriptions
322
322
  }
323
323
 
324
- # ...def _prepare_results_for_smoothing(...)
324
+ # ...def _prepare_results_for_smoothing(...)
325
325
 
326
326
 
327
327
  def _smooth_classifications_for_list_of_detections(detections,
@@ -332,142 +332,142 @@ def _smooth_classifications_for_list_of_detections(detections,
332
332
  """
333
333
  Smooth classifications for a list of detections, which may have come from a single
334
334
  image, or may represent an entire sequence.
335
-
335
+
336
336
  Returns None if no changes are made, else a dict.
337
-
338
- classification_descriptions_clean should be semicolon-delimited taxonomic strings
337
+
338
+ classification_descriptions_clean should be semicolon-delimited taxonomic strings
339
339
  from which common names and GUIDs have already been removed.
340
-
340
+
341
341
  Assumes there is only one classification per detection, i.e. that non-top classifications
342
- have already been remoevd.
342
+ have already been remoevd.
343
343
  """
344
-
344
+
345
345
  ## Count the number of instances of each category in this image
346
-
346
+
347
347
  category_to_count = count_detections_by_classification_category(detections, options)
348
348
  # _print_counts_with_names(category_to_count,classification_descriptions)
349
349
  # get_classification_description_string(category_to_count, classification_descriptions)
350
-
350
+
351
351
  if len(category_to_count) <= 1:
352
352
  return None
353
-
353
+
354
354
  keys = list(category_to_count.keys())
355
-
356
- # Handle a quirky special case: if the most common category is "other" and
355
+
356
+ # Handle a quirky special case: if the most common category is "other" and
357
357
  # it's "tied" with the second-most-common category, swap them
358
358
  if (len(keys) > 1) and \
359
359
  (keys[0] in other_category_ids) and \
360
360
  (keys[1] not in other_category_ids) and \
361
361
  (category_to_count[keys[0]] == category_to_count[keys[1]]):
362
362
  keys[1], keys[0] = keys[0], keys[1]
363
-
364
- max_count = category_to_count[keys[0]]
363
+
364
+ max_count = category_to_count[keys[0]]
365
365
  most_common_category = keys[0]
366
366
  del keys
367
-
368
-
367
+
368
+
369
369
  ## Debug tools
370
-
370
+
371
371
  verbose_debug_enabled = False
372
-
372
+
373
373
  if options.break_at_image is not None:
374
374
  for det in detections:
375
375
  if 'image_filename' in det and \
376
376
  det['image_filename'] == options.break_at_image:
377
377
  verbose_debug_enabled = True
378
378
  break
379
-
379
+
380
380
  if verbose_debug_enabled:
381
381
  _print_counts_with_names(category_to_count,classification_descriptions)
382
382
  from IPython import embed; embed()
383
-
384
-
383
+
384
+
385
385
  ## Possibly change "other" classifications to the most common category
386
-
386
+
387
387
  # ...if the dominant category is not an "other" category.
388
-
388
+
389
389
  n_other_classifications_changed_this_image = 0
390
-
390
+
391
391
  # If we have at least *min_detections_to_overwrite_other* in a category that isn't
392
392
  # "other", change all "other" classifications to that category
393
393
  if (max_count >= options.min_detections_to_overwrite_other) and \
394
394
  (most_common_category not in other_category_ids):
395
-
395
+
396
396
  for det in detections:
397
-
397
+
398
398
  if ('classifications' not in det) or \
399
399
  (det['conf'] < options.detection_confidence_threshold):
400
400
  continue
401
-
401
+
402
402
  assert len(det['classifications']) == 1
403
403
  c = det['classifications'][0]
404
-
404
+
405
405
  if (c[1] >= options.classification_confidence_threshold) and \
406
406
  (c[0] in other_category_ids):
407
-
407
+
408
408
  if verbose_debug_enabled:
409
409
  print('Replacing {} with {}'.format(
410
410
  classification_descriptions[c[0]],
411
411
  classification_descriptions[c[1]]))
412
-
412
+
413
413
  n_other_classifications_changed_this_image += 1
414
414
  c[0] = most_common_category
415
-
415
+
416
416
  # ...if there are classifications for this detection
417
-
417
+
418
418
  # ...for each detection
419
-
419
+
420
420
  # ...if we should overwrite all "other" classifications
421
421
 
422
422
  if verbose_debug_enabled:
423
423
  print('Made {} other changes'.format(n_other_classifications_changed_this_image))
424
-
425
-
424
+
425
+
426
426
  ## Re-count
427
-
427
+
428
428
  category_to_count = count_detections_by_classification_category(detections, options)
429
- # _print_counts_with_names(category_to_count,classification_descriptions)
429
+ # _print_counts_with_names(category_to_count,classification_descriptions)
430
430
  keys = list(category_to_count.keys())
431
- max_count = category_to_count[keys[0]]
431
+ max_count = category_to_count[keys[0]]
432
432
  most_common_category = keys[0]
433
433
  del keys
434
-
435
-
434
+
435
+
436
436
  ## Possibly change some non-dominant classifications to the dominant category
437
-
437
+
438
438
  process_taxonomic_rules = \
439
439
  (classification_descriptions_clean is not None) and \
440
440
  (len(classification_descriptions_clean) > 0) and \
441
441
  (len(category_to_count) > 1)
442
-
442
+
443
443
  n_detections_flipped_this_image = 0
444
-
445
- # Don't do this if the most common category is an "other" category, or
444
+
445
+ # Don't do this if the most common category is an "other" category, or
446
446
  # if we don't have enough of the most common category
447
447
  if (most_common_category not in other_category_ids) and \
448
448
  (max_count >= options.min_detections_to_overwrite_secondary):
449
-
449
+
450
450
  # i_det = 0; det = detections[i_det]
451
451
  for i_det,det in enumerate(detections):
452
-
452
+
453
453
  if ('classifications' not in det) or \
454
454
  (det['conf'] < options.detection_confidence_threshold):
455
455
  continue
456
-
456
+
457
457
  assert len(det['classifications']) == 1
458
458
  c = det['classifications'][0]
459
-
459
+
460
460
  # Don't over-write the most common category with itself
461
461
  if c[0] == most_common_category:
462
462
  continue
463
-
463
+
464
464
  # Don't bother with below-threshold classifications
465
465
  if c[1] < options.classification_confidence_threshold:
466
466
  continue
467
-
467
+
468
468
  # If we're doing taxonomic processing, at this stage, don't turn children
469
469
  # into parents; we'll likely turn parents into children in the next stage.
470
-
470
+
471
471
  if process_taxonomic_rules:
472
472
 
473
473
  most_common_category_description = \
@@ -475,180 +475,180 @@ def _smooth_classifications_for_list_of_detections(detections,
475
475
 
476
476
  category_id_this_classification = c[0]
477
477
  assert category_id_this_classification in category_to_count
478
-
478
+
479
479
  category_description_this_classification = \
480
480
  classification_descriptions_clean[category_id_this_classification]
481
-
482
- # An empty description corresponds to the "animal" category. We don't handle
483
- # "animal" here as a parent category, that would be handled in the "other smoothing"
481
+
482
+ # An empty description corresponds to the "animal" category. We don't handle
483
+ # "animal" here as a parent category, that would be handled in the "other smoothing"
484
484
  # step above.
485
485
  if len(category_description_this_classification) == 0:
486
486
  continue
487
-
487
+
488
488
  most_common_category_is_parent_of_this_category = \
489
489
  most_common_category_description in category_description_this_classification
490
-
490
+
491
491
  if most_common_category_is_parent_of_this_category:
492
492
  continue
493
-
493
+
494
494
  # If we have fewer of this category than the most common category,
495
495
  # but not *too* many, flip it to the most common category.
496
496
  if (max_count > category_to_count[c[0]]) and \
497
497
  (category_to_count[c[0]] <= options.max_detections_nondominant_class):
498
-
498
+
499
499
  c[0] = most_common_category
500
- n_detections_flipped_this_image += 1
501
-
500
+ n_detections_flipped_this_image += 1
501
+
502
502
  # ...for each detection
503
503
 
504
- # ...if the dominant category is legit
505
-
504
+ # ...if the dominant category is legit
505
+
506
506
  if verbose_debug_enabled:
507
507
  print('Made {} non-dominant --> dominant changes'.format(
508
508
  n_detections_flipped_this_image))
509
509
 
510
-
510
+
511
511
  ## Re-count
512
-
512
+
513
513
  category_to_count = count_detections_by_classification_category(detections, options)
514
- # _print_counts_with_names(category_to_count,classification_descriptions)
514
+ # _print_counts_with_names(category_to_count,classification_descriptions)
515
515
  keys = list(category_to_count.keys())
516
- max_count = category_to_count[keys[0]]
516
+ max_count = category_to_count[keys[0]]
517
517
  most_common_category = keys[0]
518
518
  del keys
519
-
520
-
519
+
520
+
521
521
  ## Possibly collapse higher-level taxonomic predictions down to lower levels
522
-
522
+
523
523
  n_taxonomic_changes_this_image = 0
524
-
524
+
525
525
  process_taxonomic_rules = \
526
526
  (classification_descriptions_clean is not None) and \
527
527
  (len(classification_descriptions_clean) > 0) and \
528
528
  (len(category_to_count) > 1)
529
-
529
+
530
530
  if process_taxonomic_rules and options.propagate_classifications_through_taxonomy:
531
-
531
+
532
532
  # det = detections[3]
533
533
  for det in detections:
534
-
534
+
535
535
  if ('classifications' not in det) or \
536
536
  (det['conf'] < options.detection_confidence_threshold):
537
537
  continue
538
-
538
+
539
539
  assert len(det['classifications']) == 1
540
540
  c = det['classifications'][0]
541
-
541
+
542
542
  # Don't bother with any classifications below the confidence threshold
543
543
  if c[1] < options.classification_confidence_threshold:
544
544
  continue
545
545
 
546
546
  category_id_this_classification = c[0]
547
547
  assert category_id_this_classification in category_to_count
548
-
548
+
549
549
  category_description_this_classification = \
550
550
  classification_descriptions_clean[category_id_this_classification]
551
-
552
- # An empty description corresponds to the "animal" category. We don't handle
553
- # "animal" here as a parent category, that would be handled in the "other smoothing"
551
+
552
+ # An empty description corresponds to the "animal" category. We don't handle
553
+ # "animal" here as a parent category, that would be handled in the "other smoothing"
554
554
  # step above.
555
555
  if len(category_description_this_classification) == 0:
556
556
  continue
557
-
557
+
558
558
  # We may have multiple child categories to choose from; this keeps track of
559
559
  # the "best" we've seen so far. "Best" is based on the level (species is better
560
560
  # than genus) and number.
561
561
  child_category_to_score = defaultdict(float)
562
-
562
+
563
563
  for category_id_of_candidate_child in category_to_count.keys():
564
-
564
+
565
565
  # A category is never its own child
566
566
  if category_id_of_candidate_child == category_id_this_classification:
567
567
  continue
568
-
568
+
569
569
  # Is this candidate a child of the current classification?
570
570
  category_description_candidate_child = \
571
571
  classification_descriptions_clean[category_id_of_candidate_child]
572
-
572
+
573
573
  # An empty description corresponds to "animal", which can never
574
574
  # be a child of another category.
575
575
  if len(category_description_candidate_child) == 0:
576
576
  continue
577
-
578
- # As long as we're using "clean" descriptions, parent/child taxonomic
577
+
578
+ # As long as we're using "clean" descriptions, parent/child taxonomic
579
579
  # relationships are defined by a substring relationship
580
580
  is_child = category_description_this_classification in \
581
581
  category_description_candidate_child
582
582
  if not is_child:
583
583
  continue
584
-
584
+
585
585
  # How many instances of this child category are there?
586
586
  child_category_count = category_to_count[category_id_of_candidate_child]
587
-
587
+
588
588
  # What taxonomy level is this child category defined at?
589
589
  child_category_level = taxonomy_level_index(
590
590
  classification_descriptions[category_id_of_candidate_child])
591
-
591
+
592
592
  child_category_to_score[category_id_of_candidate_child] = \
593
593
  child_category_level * options.taxonomy_propagation_level_weight + \
594
594
  child_category_count * options.taxonomy_propagation_count_weight
595
-
595
+
596
596
  # ...for each category we are considering reducing this classification to
597
-
597
+
598
598
  # Did we find a category we want to change this classification to?
599
599
  if len(child_category_to_score) > 0:
600
-
600
+
601
601
  # Find the child category with the highest score
602
602
  child_category_to_score = sort_dictionary_by_value(
603
603
  child_category_to_score,reverse=True)
604
604
  best_child_category = next(iter(child_category_to_score.keys()))
605
-
605
+
606
606
  if verbose_debug_enabled:
607
607
  old_category_name = \
608
608
  classification_descriptions_clean[c[0]]
609
609
  new_category_name = \
610
610
  classification_descriptions_clean[best_child_category]
611
611
  print('Replacing {} with {}'.format(
612
- old_category_name,new_category_name))
613
-
612
+ old_category_name,new_category_name))
613
+
614
614
  c[0] = best_child_category
615
- n_taxonomic_changes_this_image += 1
616
-
615
+ n_taxonomic_changes_this_image += 1
616
+
617
617
  # ...for each detection
618
-
619
- # ...if we have taxonomic information available
620
-
621
-
618
+
619
+ # ...if we have taxonomic information available
620
+
621
+
622
622
  ## Re-count
623
-
623
+
624
624
  category_to_count = count_detections_by_classification_category(detections, options)
625
- # _print_counts_with_names(category_to_count,classification_descriptions)
625
+ # _print_counts_with_names(category_to_count,classification_descriptions)
626
626
  keys = list(category_to_count.keys())
627
- max_count = category_to_count[keys[0]]
627
+ max_count = category_to_count[keys[0]]
628
628
  most_common_category = keys[0]
629
629
  del keys
630
-
631
-
630
+
631
+
632
632
  ## Possibly do within-family smoothing
633
-
633
+
634
634
  n_within_family_smoothing_changes = 0
635
-
635
+
636
636
  # min_detections_to_overwrite_secondary_same_family = -1
637
637
  # max_detections_nondominant_class_same_family = 1
638
638
  family_level = taxonomy_level_string_to_index('family')
639
-
639
+
640
640
  if process_taxonomic_rules:
641
-
641
+
642
642
  category_description_most_common_category = \
643
643
  classification_descriptions[most_common_category]
644
644
  most_common_category_taxonomic_level = \
645
- taxonomy_level_index(category_description_most_common_category)
645
+ taxonomy_level_index(category_description_most_common_category)
646
646
  n_most_common_category = category_to_count[most_common_category]
647
647
  tokens = category_description_most_common_category.split(';')
648
648
  assert len(tokens) == 7
649
649
  most_common_category_family = tokens[3]
650
650
  most_common_category_genus = tokens[4]
651
-
651
+
652
652
  # Only consider remapping to genus or species level, and only when we have
653
653
  # a high enough count in the most common category
654
654
  if process_taxonomic_rules and \
@@ -656,36 +656,36 @@ def _smooth_classifications_for_list_of_detections(detections,
656
656
  (most_common_category not in other_category_ids) and \
657
657
  (most_common_category_taxonomic_level > family_level) and \
658
658
  (n_most_common_category >= options.min_detections_to_overwrite_secondary_same_family):
659
-
659
+
660
660
  # det = detections[0]
661
661
  for det in detections:
662
-
662
+
663
663
  if ('classifications' not in det) or \
664
664
  (det['conf'] < options.detection_confidence_threshold):
665
665
  continue
666
-
666
+
667
667
  assert len(det['classifications']) == 1
668
668
  c = det['classifications'][0]
669
-
669
+
670
670
  # Don't over-write the most common category with itself
671
671
  if c[0] == most_common_category:
672
672
  continue
673
-
673
+
674
674
  # Don't bother with below-threshold classifications
675
675
  if c[1] < options.classification_confidence_threshold:
676
- continue
677
-
676
+ continue
677
+
678
678
  n_candidate_flip_category = category_to_count[c[0]]
679
-
679
+
680
680
  # Do we have too many of the non-dominant category to do this kind of swap?
681
681
  if n_candidate_flip_category > \
682
682
  options.max_detections_nondominant_class_same_family:
683
683
  continue
684
684
 
685
- # Don't flip classes when it's a tie
685
+ # Don't flip classes when it's a tie
686
686
  if n_candidate_flip_category == n_most_common_category:
687
687
  continue
688
-
688
+
689
689
  category_description_candidate_flip = \
690
690
  classification_descriptions[c[0]]
691
691
  tokens = category_description_candidate_flip.split(';')
@@ -693,34 +693,34 @@ def _smooth_classifications_for_list_of_detections(detections,
693
693
  candidate_flip_category_family = tokens[3]
694
694
  candidate_flip_category_genus = tokens[4]
695
695
  candidate_flip_category_taxonomic_level = \
696
- taxonomy_level_index(category_description_candidate_flip)
697
-
696
+ taxonomy_level_index(category_description_candidate_flip)
697
+
698
698
  # Only proceed if we have valid family strings
699
699
  if (len(candidate_flip_category_family) == 0) or \
700
700
  (len(most_common_category_family) == 0):
701
701
  continue
702
-
703
- # Only proceed if the candidate and the most common category are in the same family
702
+
703
+ # Only proceed if the candidate and the most common category are in the same family
704
704
  if candidate_flip_category_family != most_common_category_family:
705
705
  continue
706
-
706
+
707
707
  # Don't flip from a species to the genus level in the same genus
708
708
  if (candidate_flip_category_genus == most_common_category_genus) and \
709
709
  (candidate_flip_category_taxonomic_level > \
710
710
  most_common_category_taxonomic_level):
711
711
  continue
712
-
712
+
713
713
  old_category_name = classification_descriptions_clean[c[0]]
714
714
  new_category_name = classification_descriptions_clean[most_common_category]
715
-
715
+
716
716
  c[0] = most_common_category
717
- n_within_family_smoothing_changes += 1
718
-
717
+ n_within_family_smoothing_changes += 1
718
+
719
719
  # ...for each detection
720
-
720
+
721
721
  # ...if the dominant category is legit and we have taxonomic information available
722
-
723
-
722
+
723
+
724
724
  return {'n_other_classifications_changed_this_image':n_other_classifications_changed_this_image,
725
725
  'n_detections_flipped_this_image':n_detections_flipped_this_image,
726
726
  'n_taxonomic_changes_this_image':n_taxonomic_changes_this_image,
@@ -737,33 +737,33 @@ def _smooth_single_image(im,
737
737
  """
738
738
  Smooth classifications for a single image. Returns None if no changes are made,
739
739
  else a dict.
740
-
741
- classification_descriptions_clean should be semicolon-delimited taxonomic strings
740
+
741
+ classification_descriptions_clean should be semicolon-delimited taxonomic strings
742
742
  from which common names and GUIDs have already been removed.
743
-
743
+
744
744
  Assumes there is only one classification per detection, i.e. that non-top classifications
745
745
  have already been remoevd.
746
746
  """
747
-
747
+
748
748
  if 'detections' not in im or im['detections'] is None or len(im['detections']) == 0:
749
749
  return
750
-
750
+
751
751
  detections = im['detections']
752
-
752
+
753
753
  # Simplify debugging
754
754
  for det in detections:
755
755
  det['image_filename'] = im['file']
756
-
757
- to_return = _smooth_classifications_for_list_of_detections(detections,
758
- options=options,
756
+
757
+ to_return = _smooth_classifications_for_list_of_detections(detections,
758
+ options=options,
759
759
  other_category_ids=other_category_ids,
760
- classification_descriptions=classification_descriptions,
760
+ classification_descriptions=classification_descriptions,
761
761
  classification_descriptions_clean=classification_descriptions_clean)
762
762
 
763
763
  # Clean out debug information
764
764
  for det in detections:
765
765
  del det['image_filename']
766
-
766
+
767
767
  return to_return
768
768
 
769
769
  # ...def smooth_single_image
@@ -775,104 +775,104 @@ def smooth_classification_results_image_level(input_file,output_file=None,option
775
775
  """
776
776
  Smooth classifications at the image level for all results in the MD-formatted results
777
777
  file [input_file], optionally writing a new set of results to [output_file].
778
-
779
- This function generally expresses the notion that an image with 700 cows and one deer
778
+
779
+ This function generally expresses the notion that an image with 700 cows and one deer
780
780
  is really just 701 cows.
781
-
781
+
782
782
  Only count detections with a classification confidence threshold above
783
783
  [options.classification_confidence_threshold], which in practice means we're only
784
784
  looking at one category per detection.
785
-
785
+
786
786
  If an image has at least [options.min_detections_to_overwrite_secondary] such detections
787
787
  in the most common category, and no more than [options.max_detections_nondominant_class]
788
788
  in the second-most-common category, flip all detections to the most common
789
789
  category.
790
-
791
- Optionally treat some classes as particularly unreliable, typically used to overwrite an
790
+
791
+ Optionally treat some classes as particularly unreliable, typically used to overwrite an
792
792
  "other" class.
793
-
793
+
794
794
  This function also removes everything but the non-dominant classification for each detection.
795
-
795
+
796
796
  Args:
797
797
  input_file (str): MegaDetector-formatted classification results file to smooth. Can
798
798
  also be an already-loaded results dict.
799
799
  output_file (str, optional): .json file to write smoothed results
800
- options (ClassificationSmoothingOptions, optional): see
800
+ options (ClassificationSmoothingOptions, optional): see
801
801
  ClassificationSmoothingOptions for details.
802
-
802
+
803
803
  Returns:
804
804
  dict: MegaDetector-results-formatted dict, identical to what's written to
805
805
  [output_file] if [output_file] is not None.
806
806
  """
807
-
807
+
808
808
  ## Input validation
809
-
809
+
810
810
  if options is None:
811
811
  options = ClassificationSmoothingOptions()
812
-
812
+
813
813
  r = _prepare_results_for_smoothing(input_file, options)
814
814
  d = r['d']
815
815
  other_category_ids = r['other_category_ids']
816
816
  classification_descriptions_clean = r['classification_descriptions_clean']
817
817
  classification_descriptions = r['classification_descriptions']
818
-
819
-
818
+
819
+
820
820
  ## Smoothing
821
-
821
+
822
822
  n_other_classifications_changed = 0
823
823
  n_other_images_changed = 0
824
824
  n_taxonomic_images_changed = 0
825
-
825
+
826
826
  n_detections_flipped = 0
827
827
  n_images_changed = 0
828
- n_taxonomic_classification_changes = 0
829
-
830
- # im = d['images'][0]
828
+ n_taxonomic_classification_changes = 0
829
+
830
+ # im = d['images'][0]
831
831
  for im in tqdm(d['images']):
832
-
832
+
833
833
  r = _smooth_single_image(im,
834
834
  options,
835
835
  other_category_ids,
836
836
  classification_descriptions=classification_descriptions,
837
837
  classification_descriptions_clean=classification_descriptions_clean)
838
-
838
+
839
839
  if r is None:
840
840
  continue
841
-
841
+
842
842
  n_detections_flipped_this_image = r['n_detections_flipped_this_image']
843
843
  n_other_classifications_changed_this_image = \
844
844
  r['n_other_classifications_changed_this_image']
845
845
  n_taxonomic_changes_this_image = r['n_taxonomic_changes_this_image']
846
-
846
+
847
847
  n_detections_flipped += n_detections_flipped_this_image
848
848
  n_other_classifications_changed += n_other_classifications_changed_this_image
849
849
  n_taxonomic_classification_changes += n_taxonomic_changes_this_image
850
-
850
+
851
851
  if n_detections_flipped_this_image > 0:
852
852
  n_images_changed += 1
853
853
  if n_other_classifications_changed_this_image > 0:
854
854
  n_other_images_changed += 1
855
855
  if n_taxonomic_changes_this_image > 0:
856
856
  n_taxonomic_images_changed += 1
857
-
858
- # ...for each image
859
-
857
+
858
+ # ...for each image
859
+
860
860
  print('Classification smoothing: changed {} detections on {} images'.format(
861
861
  n_detections_flipped,n_images_changed))
862
-
862
+
863
863
  print('"Other" smoothing: changed {} detections on {} images'.format(
864
864
  n_other_classifications_changed,n_other_images_changed))
865
-
865
+
866
866
  print('Taxonomic smoothing: changed {} detections on {} images'.format(
867
867
  n_taxonomic_classification_changes,n_taxonomic_images_changed))
868
-
869
-
868
+
869
+
870
870
  ## Write output
871
-
872
- if output_file is not None:
871
+
872
+ if output_file is not None:
873
873
  print('Writing results after image-level smoothing to:\n{}'.format(output_file))
874
874
  with open(output_file,'w') as f:
875
- json.dump(d,f,indent=1)
875
+ json.dump(d,f,indent=1)
876
876
 
877
877
  return d
878
878
 
@@ -880,7 +880,7 @@ def smooth_classification_results_image_level(input_file,output_file=None,option
880
880
 
881
881
 
882
882
  #%% Sequence-level smoothing
883
-
883
+
884
884
  def smooth_classification_results_sequence_level(input_file,
885
885
  cct_sequence_information,
886
886
  output_file=None,
@@ -888,10 +888,10 @@ def smooth_classification_results_sequence_level(input_file,
888
888
  """
889
889
  Smooth classifications at the sequence level for all results in the MD-formatted results
890
890
  file [md_results_file], optionally writing a new set of results to [output_file].
891
-
891
+
892
892
  This function generally expresses the notion that a sequence that looks like
893
893
  deer/deer/deer/elk/deer/deer/deer/deer is really just a deer.
894
-
894
+
895
895
  Args:
896
896
  input_file (str or dict): MegaDetector-formatted classification results file to smooth
897
897
  (or already-loaded results). If you supply a dict, it's modified in place by default, but
@@ -899,28 +899,28 @@ def smooth_classification_results_sequence_level(input_file,
899
899
  cct_sequence_information (str, dict, or list): COCO Camera Traps file containing sequence IDs for
900
900
  each image (or an already-loaded CCT-formatted dict, or just the 'images' list from a CCT dict).
901
901
  output_file (str, optional): .json file to write smoothed results
902
- options (ClassificationSmoothingOptions, optional): see
902
+ options (ClassificationSmoothingOptions, optional): see
903
903
  ClassificationSmoothingOptions for details.
904
-
904
+
905
905
  Returns:
906
906
  dict: MegaDetector-results-formatted dict, identical to what's written to
907
907
  [output_file] if [output_file] is not None.
908
908
  """
909
-
909
+
910
910
  ## Input validation
911
-
911
+
912
912
  if options is None:
913
913
  options = ClassificationSmoothingOptions()
914
-
914
+
915
915
  r = _prepare_results_for_smoothing(input_file, options)
916
916
  d = r['d']
917
917
  other_category_ids = r['other_category_ids']
918
918
  classification_descriptions_clean = r['classification_descriptions_clean']
919
919
  classification_descriptions = r['classification_descriptions']
920
-
921
-
920
+
921
+
922
922
  ## Make a list of images appearing in each sequence
923
-
923
+
924
924
  if isinstance(cct_sequence_information,list):
925
925
  image_info = cct_sequence_information
926
926
  elif isinstance(cct_sequence_information,str):
@@ -931,77 +931,77 @@ def smooth_classification_results_sequence_level(input_file,
931
931
  else:
932
932
  assert isinstance(cct_sequence_information,dict)
933
933
  image_info = cct_sequence_information['images']
934
-
934
+
935
935
  sequence_to_image_filenames = defaultdict(list)
936
-
936
+
937
937
  # im = image_info[0]
938
938
  for im in tqdm(image_info):
939
- sequence_to_image_filenames[im['seq_id']].append(im['file_name'])
939
+ sequence_to_image_filenames[im['seq_id']].append(im['file_name'])
940
940
  del image_info
941
-
941
+
942
942
  image_fn_to_classification_results = {}
943
943
  for im in d['images']:
944
944
  fn = im['file']
945
945
  assert fn not in image_fn_to_classification_results
946
946
  image_fn_to_classification_results[fn] = im
947
-
948
-
947
+
948
+
949
949
  ## Smoothing
950
-
950
+
951
951
  n_other_classifications_changed = 0
952
952
  n_other_sequences_changed = 0
953
953
  n_taxonomic_sequences_changed = 0
954
954
  n_within_family_sequences_changed = 0
955
-
955
+
956
956
  n_detections_flipped = 0
957
957
  n_sequences_changed = 0
958
- n_taxonomic_classification_changes = 0
959
- n_within_family_changes = 0
960
-
958
+ n_taxonomic_classification_changes = 0
959
+ n_within_family_changes = 0
960
+
961
961
  # sequence_id = list(sequence_to_image_filenames.keys())[0]
962
962
  for sequence_id in sequence_to_image_filenames.keys():
963
963
 
964
964
  image_filenames_this_sequence = sequence_to_image_filenames[sequence_id]
965
-
965
+
966
966
  # if 'file' in image_filenames_this_sequence:
967
967
  # from IPython import embed; embed()
968
-
968
+
969
969
  detections_this_sequence = []
970
970
  for image_filename in image_filenames_this_sequence:
971
971
  im = image_fn_to_classification_results[image_filename]
972
972
  if 'detections' not in im or im['detections'] is None:
973
973
  continue
974
974
  detections_this_sequence.extend(im['detections'])
975
-
975
+
976
976
  # Temporarily add image filenames to every detection,
977
977
  # for debugging
978
978
  for det in im['detections']:
979
979
  det['image_filename'] = im['file']
980
-
980
+
981
981
  if len(detections_this_sequence) == 0:
982
982
  continue
983
-
983
+
984
984
  r = _smooth_classifications_for_list_of_detections(
985
- detections=detections_this_sequence,
986
- options=options,
985
+ detections=detections_this_sequence,
986
+ options=options,
987
987
  other_category_ids=other_category_ids,
988
- classification_descriptions=classification_descriptions,
988
+ classification_descriptions=classification_descriptions,
989
989
  classification_descriptions_clean=classification_descriptions_clean)
990
-
990
+
991
991
  if r is None:
992
992
  continue
993
-
993
+
994
994
  n_detections_flipped_this_sequence = r['n_detections_flipped_this_image']
995
995
  n_other_classifications_changed_this_sequence = \
996
996
  r['n_other_classifications_changed_this_image']
997
997
  n_taxonomic_changes_this_sequence = r['n_taxonomic_changes_this_image']
998
998
  n_within_family_changes_this_sequence = r['n_within_family_smoothing_changes']
999
-
999
+
1000
1000
  n_detections_flipped += n_detections_flipped_this_sequence
1001
1001
  n_other_classifications_changed += n_other_classifications_changed_this_sequence
1002
1002
  n_taxonomic_classification_changes += n_taxonomic_changes_this_sequence
1003
1003
  n_within_family_changes += n_within_family_changes_this_sequence
1004
-
1004
+
1005
1005
  if n_detections_flipped_this_sequence > 0:
1006
1006
  n_sequences_changed += 1
1007
1007
  if n_other_classifications_changed_this_sequence > 0:
@@ -1010,40 +1010,40 @@ def smooth_classification_results_sequence_level(input_file,
1010
1010
  n_taxonomic_sequences_changed += 1
1011
1011
  if n_within_family_changes_this_sequence > 0:
1012
1012
  n_within_family_sequences_changed += 1
1013
-
1013
+
1014
1014
  # ...for each sequence
1015
-
1015
+
1016
1016
  print('Classification smoothing: changed {} detections in {} sequences'.format(
1017
1017
  n_detections_flipped,n_sequences_changed))
1018
-
1018
+
1019
1019
  print('"Other" smoothing: changed {} detections in {} sequences'.format(
1020
1020
  n_other_classifications_changed,n_other_sequences_changed))
1021
-
1021
+
1022
1022
  print('Taxonomic smoothing: changed {} detections in {} sequences'.format(
1023
1023
  n_taxonomic_classification_changes,n_taxonomic_sequences_changed))
1024
1024
 
1025
1025
  print('Within-family smoothing: changed {} detections in {} sequences'.format(
1026
1026
  n_within_family_changes,n_within_family_sequences_changed))
1027
-
1028
-
1027
+
1028
+
1029
1029
  ## Clean up debug information
1030
-
1030
+
1031
1031
  for im in d['images']:
1032
1032
  if 'detections' not in im or im['detections'] is None:
1033
1033
  continue
1034
1034
  for det in im['detections']:
1035
1035
  if 'image_filename' in det:
1036
1036
  del det['image_filename']
1037
-
1037
+
1038
1038
 
1039
1039
  ## Write output
1040
-
1041
- if output_file is not None:
1040
+
1041
+ if output_file is not None:
1042
1042
  print('Writing sequence-smoothed classification results to {}'.format(
1043
- output_file))
1043
+ output_file))
1044
1044
  with open(output_file,'w') as f:
1045
1045
  json.dump(d,f,indent=1)
1046
-
1046
+
1047
1047
  return d
1048
1048
 
1049
1049
  # ...smooth_classification_results_sequence_level(...)
@@ -1058,14 +1058,14 @@ def restrict_to_taxa_list(taxa_list,
1058
1058
  """
1059
1059
  Given a prediction file in MD .json format, likely without having had
1060
1060
  a geofence applied, apply a custom taxa list.
1061
-
1061
+
1062
1062
  Args:
1063
1063
  taxa_list (str or list): list of latin names, or a text file containing
1064
1064
  a list of latin names. Optionally may contain a second (comma-delimited)
1065
1065
  column containing common names, used only for debugging. Latin names
1066
1066
  must exist in the SpeciesNet taxonomy.
1067
- speciesnet_taxonomy_file (str): taxonomy filename, in the same format used for
1068
- model release (with 7-token taxonomy entries)
1067
+ speciesnet_taxonomy_file (str): taxonomy filename, in the same format used for
1068
+ model release (with 7-token taxonomy entries)
1069
1069
  input_file (str): .json file to read, in MD format. This can be None, in which
1070
1070
  case this function just validates [taxa_list].
1071
1071
  output_file (str): .json file to write, in MD format
@@ -1075,21 +1075,21 @@ def restrict_to_taxa_list(taxa_list,
1075
1075
  felid predictions be mapped to that species, as opposed to being mapped
1076
1076
  to the family?
1077
1077
  add_pre_restriction_description (bool, optional): should we add a new metadata
1078
- field that summarizes each image's classifications prior to taxonomic
1078
+ field that summarizes each image's classifications prior to taxonomic
1079
1079
  restriction?
1080
1080
  """
1081
1081
 
1082
1082
  ##%% Read target taxa list
1083
-
1083
+
1084
1084
  if isinstance(taxa_list,str):
1085
1085
  assert os.path.isfile(taxa_list), \
1086
1086
  'Could not find taxa list file {}'.format(taxa_list)
1087
1087
  with open(taxa_list,'r') as f:
1088
1088
  taxa_list = f.readlines()
1089
-
1089
+
1090
1090
  taxa_list = [s.strip().lower() for s in taxa_list]
1091
1091
  taxa_list = [s for s in taxa_list if len(s) > 0]
1092
-
1092
+
1093
1093
  target_latin_to_common = {}
1094
1094
  for s in taxa_list:
1095
1095
  if s.strip().startswith('#'):
@@ -1105,38 +1105,38 @@ def restrict_to_taxa_list(taxa_list,
1105
1105
  common_name = None
1106
1106
  assert binomial_name not in target_latin_to_common
1107
1107
  target_latin_to_common[binomial_name] = common_name
1108
-
1108
+
1109
1109
 
1110
1110
  ##%% Read taxonomy file
1111
-
1111
+
1112
1112
  with open(speciesnet_taxonomy_file,'r') as f:
1113
1113
  speciesnet_taxonomy_list = f.readlines()
1114
1114
  speciesnet_taxonomy_list = [s.strip() for s in \
1115
1115
  speciesnet_taxonomy_list if len(s.strip()) > 0]
1116
-
1116
+
1117
1117
  # Maps the latin name of every taxon to the corresponding full taxon string
1118
1118
  #
1119
1119
  # For species, the key is a binomial name
1120
1120
  speciesnet_latin_name_to_taxon_string = {}
1121
1121
  speciesnet_common_name_to_taxon_string = {}
1122
-
1122
+
1123
1123
  def _insert_taxonomy_string(s):
1124
-
1124
+
1125
1125
  tokens = s.split(';')
1126
1126
  assert len(tokens) == 7
1127
-
1127
+
1128
1128
  guid = tokens[0] # noqa
1129
1129
  class_name = tokens[1]
1130
1130
  order = tokens[2]
1131
1131
  family = tokens[3]
1132
1132
  genus = tokens[4]
1133
- species = tokens[5]
1133
+ species = tokens[5]
1134
1134
  common_name = tokens[6]
1135
-
1135
+
1136
1136
  if len(class_name) == 0:
1137
1137
  assert common_name in ('animal','vehicle','blank')
1138
1138
  return
1139
-
1139
+
1140
1140
  if len(species) > 0:
1141
1141
  assert all([len(s) > 0 for s in [genus,family,order]])
1142
1142
  binomial_name = genus + ' ' + species
@@ -1156,43 +1156,43 @@ def restrict_to_taxa_list(taxa_list,
1156
1156
  else:
1157
1157
  if class_name not in speciesnet_latin_name_to_taxon_string:
1158
1158
  speciesnet_latin_name_to_taxon_string[class_name] = s
1159
-
1159
+
1160
1160
  if len(common_name) > 0:
1161
1161
  if common_name not in speciesnet_common_name_to_taxon_string:
1162
1162
  speciesnet_common_name_to_taxon_string[common_name] = s
1163
-
1163
+
1164
1164
  for s in speciesnet_taxonomy_list:
1165
-
1165
+
1166
1166
  _insert_taxonomy_string(s)
1167
-
1168
-
1167
+
1168
+
1169
1169
  ##%% Make sure all parent taxa are represented in the taxonomy
1170
-
1170
+
1171
1171
  # In theory any taxon that appears as the parent of another taxon should
1172
1172
  # also be in the taxonomy, but this isn't always true, so we fix it here.
1173
-
1173
+
1174
1174
  new_taxon_string_to_missing_tokens = defaultdict(list)
1175
-
1175
+
1176
1176
  # latin_name = next(iter(speciesnet_latin_name_to_taxon_string.keys()))
1177
1177
  for latin_name in speciesnet_latin_name_to_taxon_string.keys():
1178
-
1178
+
1179
1179
  if 'no cv result' in latin_name:
1180
1180
  continue
1181
-
1181
+
1182
1182
  taxon_string = speciesnet_latin_name_to_taxon_string[latin_name]
1183
1183
  tokens = taxon_string.split(';')
1184
-
1184
+
1185
1185
  # Don't process GUID, species, or common name
1186
1186
  # i_token = 6
1187
1187
  for i_token in range(1,len(tokens)-2):
1188
-
1189
- test_token = tokens[i_token]
1188
+
1189
+ test_token = tokens[i_token]
1190
1190
  if len(test_token) == 0:
1191
1191
  continue
1192
-
1192
+
1193
1193
  # Do we need to make up a taxon for this token?
1194
1194
  if test_token not in speciesnet_latin_name_to_taxon_string:
1195
-
1195
+
1196
1196
  new_tokens = [''] * 7
1197
1197
  new_tokens[0] = 'fake_guid'
1198
1198
  for i_copy_token in range(1,i_token+1):
@@ -1202,28 +1202,28 @@ def restrict_to_taxa_list(taxa_list,
1202
1202
  new_taxon_string = ';'.join(new_tokens)
1203
1203
  # assert new_taxon_string not in new_taxon_strings
1204
1204
  new_taxon_string_to_missing_tokens[new_taxon_string].append(test_token)
1205
-
1205
+
1206
1206
  # ...for each token
1207
-
1207
+
1208
1208
  # ...for each taxon
1209
-
1209
+
1210
1210
  print('Found {} taxa that need to be inserted to make the taxonomy valid:\n'.format(
1211
1211
  len(new_taxon_string_to_missing_tokens)))
1212
-
1212
+
1213
1213
  new_taxon_string_to_missing_tokens = \
1214
1214
  sort_dictionary_by_key(new_taxon_string_to_missing_tokens)
1215
1215
  for taxon_string in new_taxon_string_to_missing_tokens:
1216
1216
  missing_taxa = ','.join(new_taxon_string_to_missing_tokens[taxon_string])
1217
1217
  print('{} ({})'.format(taxon_string,missing_taxa))
1218
-
1218
+
1219
1219
  for new_taxon_string in new_taxon_string_to_missing_tokens:
1220
1220
  _insert_taxonomy_string(new_taxon_string)
1221
-
1222
-
1221
+
1222
+
1223
1223
  ##%% Make sure all species on the allow-list are in the taxonomy
1224
-
1224
+
1225
1225
  n_failed_mappings = 0
1226
-
1226
+
1227
1227
  for target_taxon_latin_name in target_latin_to_common.keys():
1228
1228
  if target_taxon_latin_name not in speciesnet_latin_name_to_taxon_string:
1229
1229
  common_name = target_latin_to_common[target_taxon_latin_name]
@@ -1234,99 +1234,99 @@ def restrict_to_taxa_list(taxa_list,
1234
1234
  speciesnet_common_name_to_taxon_string[common_name])
1235
1235
  print(s)
1236
1236
  n_failed_mappings += 1
1237
-
1237
+
1238
1238
  if n_failed_mappings > 0:
1239
1239
  raise ValueError('Cannot continue with geofence generation')
1240
-
1241
-
1240
+
1241
+
1242
1242
  ##%% For the allow-list, map each parent taxon to a set of allowable child taxa
1243
-
1244
- # Maps parent names to all allowed child names, or None if this is the
1243
+
1244
+ # Maps parent names to all allowed child names, or None if this is the
1245
1245
  # lowest-level allowable taxon on this path
1246
1246
  allowed_parent_taxon_to_child_taxa = defaultdict(set)
1247
-
1247
+
1248
1248
  # latin_name = next(iter(target_latin_to_common.keys()))
1249
1249
  for latin_name in target_latin_to_common:
1250
-
1250
+
1251
1251
  taxon_string = speciesnet_latin_name_to_taxon_string[latin_name]
1252
1252
  tokens = taxon_string.split(';')
1253
1253
  assert len(tokens) == 7
1254
-
1254
+
1255
1255
  # Remove GUID and common mame
1256
1256
  #
1257
1257
  # This is now always class/order/family/genus/species
1258
1258
  tokens = tokens[1:-1]
1259
-
1259
+
1260
1260
  child_taxon = None
1261
-
1261
+
1262
1262
  # If this is a species
1263
1263
  if len(tokens[-1]) > 0:
1264
1264
  binomial_name = tokens[-2] + ' ' + tokens[-1]
1265
1265
  assert binomial_name == latin_name
1266
1266
  allowed_parent_taxon_to_child_taxa[binomial_name].add(None)
1267
1267
  child_taxon = binomial_name
1268
-
1269
- # The first candidate parent is the genus
1268
+
1269
+ # The first candidate parent is the genus
1270
1270
  parent_token_index = len(tokens) - 2
1271
1271
 
1272
1272
  while(parent_token_index >= 0):
1273
-
1273
+
1274
1274
  parent_taxon = tokens[parent_token_index]
1275
1275
  allowed_parent_taxon_to_child_taxa[parent_taxon].add(child_taxon)
1276
1276
  child_taxon = parent_taxon
1277
- parent_token_index -= 1
1278
-
1277
+ parent_token_index -= 1
1278
+
1279
1279
  # ...for each allowed latin name
1280
-
1280
+
1281
1281
  allowed_parent_taxon_to_child_taxa = \
1282
1282
  sort_dictionary_by_key(allowed_parent_taxon_to_child_taxa)
1283
-
1284
-
1283
+
1284
+
1285
1285
  ##%% If we were just validating the custom taxa file, we're done
1286
1286
 
1287
1287
  if input_file is None:
1288
1288
  print('Finished validating custom taxonomy list')
1289
1289
  return
1290
-
1290
+
1291
1291
 
1292
1292
  ##%% Map all predictions that exist in this dataset...
1293
-
1293
+
1294
1294
  # ...to the prediction we should generate.
1295
-
1295
+
1296
1296
  with open(input_file,'r') as f:
1297
1297
  input_data = json.load(f)
1298
-
1298
+
1299
1299
  input_category_id_to_common_name = input_data['classification_categories'] #noqa
1300
1300
  input_category_id_to_taxonomy_string = \
1301
1301
  input_data['classification_category_descriptions']
1302
-
1302
+
1303
1303
  input_category_id_to_output_taxon_string = {}
1304
-
1304
+
1305
1305
  # input_category_id = next(iter(input_category_id_to_taxonomy_string.keys()))
1306
1306
  for input_category_id in input_category_id_to_taxonomy_string.keys():
1307
-
1307
+
1308
1308
  input_taxon_string = input_category_id_to_taxonomy_string[input_category_id]
1309
1309
  input_taxon_tokens = input_taxon_string.split(';')
1310
1310
  assert len(input_taxon_tokens) == 7
1311
-
1311
+
1312
1312
  # Don't mess with blank/no-cv-result/animal/human
1313
1313
  if (input_taxon_string in non_taxonomic_prediction_strings) or \
1314
1314
  (input_taxon_string == human_prediction_string):
1315
1315
  input_category_id_to_output_taxon_string[input_category_id] = \
1316
1316
  input_taxon_string
1317
1317
  continue
1318
-
1318
+
1319
1319
  # Remove GUID and common mame
1320
-
1320
+
1321
1321
  # This is now always class/order/family/genus/species
1322
1322
  input_taxon_tokens = input_taxon_tokens[1:-1]
1323
-
1323
+
1324
1324
  test_index = len(input_taxon_tokens) - 1
1325
1325
  target_taxon = None
1326
-
1326
+
1327
1327
  # Start at the species level, and see whether each taxon is allowed
1328
1328
  while((test_index >= 0) and (target_taxon is None)):
1329
-
1329
+
1330
1330
  # Species are represented as binomial names
1331
1331
  if (test_index == (len(input_taxon_tokens) - 1)) and \
1332
1332
  (len(input_taxon_tokens[-1]) > 0):
@@ -1334,27 +1334,27 @@ def restrict_to_taxa_list(taxa_list,
1334
1334
  input_taxon_tokens[-2] + ' ' + input_taxon_tokens[-1]
1335
1335
  else:
1336
1336
  test_taxon_name = input_taxon_tokens[test_index]
1337
-
1337
+
1338
1338
  # If we haven't yet found the level at which this taxon is non-empty,
1339
1339
  # keep going up
1340
- if len(test_taxon_name) == 0:
1340
+ if len(test_taxon_name) == 0:
1341
1341
  test_index -= 1
1342
1342
  continue
1343
-
1343
+
1344
1344
  assert test_taxon_name in speciesnet_latin_name_to_taxon_string
1345
-
1345
+
1346
1346
  # Is this taxon allowed according to the custom species list?
1347
1347
  if test_taxon_name in allowed_parent_taxon_to_child_taxa:
1348
-
1348
+
1349
1349
  allowed_child_taxa = allowed_parent_taxon_to_child_taxa[test_taxon_name]
1350
1350
  assert allowed_child_taxa is not None
1351
-
1352
- # If this is the lowest-level allowable token or there is not a
1351
+
1352
+ # If this is the lowest-level allowable token or there is not a
1353
1353
  # unique child, don't walk any further, even if walking down
1354
1354
  # is enabled.
1355
1355
  if (None in allowed_child_taxa):
1356
1356
  assert len(allowed_child_taxa) == 1
1357
-
1357
+
1358
1358
  if (None in allowed_child_taxa) or (len(allowed_child_taxa) > 1):
1359
1359
  target_taxon = test_taxon_name
1360
1360
  elif not allow_walk_down:
@@ -1370,72 +1370,72 @@ def restrict_to_taxa_list(taxa_list,
1370
1370
  allowed_child_taxa = \
1371
1371
  allowed_parent_taxon_to_child_taxa[candidate_taxon]
1372
1372
  target_taxon = candidate_taxon
1373
-
1373
+
1374
1374
  # ...if this is an allowed taxon
1375
-
1375
+
1376
1376
  test_index -= 1
1377
-
1377
+
1378
1378
  # ...for each token
1379
-
1379
+
1380
1380
  if target_taxon is None:
1381
- output_taxon_string = animal_prediction_string
1381
+ output_taxon_string = animal_prediction_string
1382
1382
  else:
1383
1383
  output_taxon_string = speciesnet_latin_name_to_taxon_string[target_taxon]
1384
- input_category_id_to_output_taxon_string[input_category_id] = output_taxon_string
1385
-
1384
+ input_category_id_to_output_taxon_string[input_category_id] = output_taxon_string
1385
+
1386
1386
  # ...for each category
1387
-
1388
-
1387
+
1388
+
1389
1389
  ##%% Build the new tables
1390
-
1390
+
1391
1391
  input_category_id_to_output_category_id = {}
1392
1392
  output_taxon_string_to_category_id = {}
1393
1393
  output_category_id_to_common_name = {}
1394
-
1394
+
1395
1395
  for input_category_id in input_category_id_to_output_taxon_string:
1396
-
1396
+
1397
1397
  original_common_name = \
1398
1398
  input_category_id_to_common_name[input_category_id]
1399
1399
  original_taxon_string = \
1400
1400
  input_category_id_to_taxonomy_string[input_category_id]
1401
1401
  output_taxon_string = \
1402
1402
  input_category_id_to_output_taxon_string[input_category_id]
1403
-
1403
+
1404
1404
  output_common_name = output_taxon_string.split(';')[-1]
1405
-
1405
+
1406
1406
  # Do we need to create a new output category?
1407
1407
  if output_taxon_string not in output_taxon_string_to_category_id:
1408
1408
  output_category_id = str(len(output_taxon_string_to_category_id))
1409
1409
  output_taxon_string_to_category_id[output_taxon_string] = \
1410
1410
  output_category_id
1411
1411
  output_category_id_to_common_name[output_category_id] = \
1412
- output_common_name
1412
+ output_common_name
1413
1413
  else:
1414
1414
  output_category_id = \
1415
1415
  output_taxon_string_to_category_id[output_taxon_string]
1416
-
1416
+
1417
1417
  input_category_id_to_output_category_id[input_category_id] = \
1418
1418
  output_category_id
1419
-
1419
+
1420
1420
  if False:
1421
1421
  print('Mapping {} ({}) to:\n{} ({})\n'.format(
1422
1422
  original_common_name,original_taxon_string,
1423
1423
  output_common_name,output_taxon_string))
1424
- if False:
1424
+ if False:
1425
1425
  print('Mapping {} to {}'.format(
1426
1426
  original_common_name,output_common_name,))
1427
-
1427
+
1428
1428
  # ...for each category
1429
-
1430
-
1429
+
1430
+
1431
1431
  ##%% Remap all category labels
1432
-
1432
+
1433
1433
  assert len(set(output_taxon_string_to_category_id.keys())) == \
1434
1434
  len(set(output_taxon_string_to_category_id.values()))
1435
-
1435
+
1436
1436
  output_category_id_to_taxon_string = \
1437
1437
  invert_dictionary(output_taxon_string_to_category_id)
1438
-
1438
+
1439
1439
  with open(input_file,'r') as f:
1440
1440
  output_data = json.load(f)
1441
1441
 
@@ -1447,7 +1447,7 @@ def restrict_to_taxa_list(taxa_list,
1447
1447
 
1448
1448
  if 'detections' not in im or im['detections'] is None:
1449
1449
  continue
1450
-
1450
+
1451
1451
  # Possibly prepare a pre-filtering description
1452
1452
  pre_filtering_description = None
1453
1453
  if classification_descriptions is not None and add_pre_filtering_description:
@@ -1462,16 +1462,16 @@ def restrict_to_taxa_list(taxa_list,
1462
1462
  classification[0] = \
1463
1463
  input_category_id_to_output_category_id[classification[0]]
1464
1464
 
1465
- # ...for each image
1466
-
1465
+ # ...for each image
1466
+
1467
1467
  output_data['classification_categories'] = output_category_id_to_common_name
1468
1468
  output_data['classification_category_descriptions'] = \
1469
1469
  output_category_id_to_taxon_string
1470
-
1471
-
1470
+
1471
+
1472
1472
  ##%% Write output
1473
-
1473
+
1474
1474
  with open(output_file,'w') as f:
1475
1475
  json.dump(output_data,f,indent=1)
1476
-
1476
+
1477
1477
  # ...def restrict_to_taxa_list(...)