megadetector 5.0.8__py3-none-any.whl → 5.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (190) hide show
  1. api/__init__.py +0 -0
  2. api/batch_processing/__init__.py +0 -0
  3. api/batch_processing/api_core/__init__.py +0 -0
  4. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. api/batch_processing/api_core/batch_service/score.py +0 -1
  6. api/batch_processing/api_core/server_job_status_table.py +0 -1
  7. api/batch_processing/api_core_support/__init__.py +0 -0
  8. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
  9. api/batch_processing/api_support/__init__.py +0 -0
  10. api/batch_processing/api_support/summarize_daily_activity.py +0 -1
  11. api/batch_processing/data_preparation/__init__.py +0 -0
  12. api/batch_processing/data_preparation/manage_local_batch.py +65 -65
  13. api/batch_processing/data_preparation/manage_video_batch.py +8 -8
  14. api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
  15. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  16. api/batch_processing/postprocessing/__init__.py +0 -0
  17. api/batch_processing/postprocessing/add_max_conf.py +12 -12
  18. api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
  19. api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
  20. api/batch_processing/postprocessing/compare_batch_results.py +113 -43
  21. api/batch_processing/postprocessing/convert_output_format.py +41 -16
  22. api/batch_processing/postprocessing/load_api_results.py +16 -17
  23. api/batch_processing/postprocessing/md_to_coco.py +31 -21
  24. api/batch_processing/postprocessing/md_to_labelme.py +52 -22
  25. api/batch_processing/postprocessing/merge_detections.py +14 -14
  26. api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
  27. api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
  28. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
  29. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
  30. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
  31. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
  32. api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
  33. api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
  34. api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
  35. api/synchronous/__init__.py +0 -0
  36. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  37. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
  38. api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
  39. api/synchronous/api_core/animal_detection_api/config.py +35 -35
  40. api/synchronous/api_core/tests/__init__.py +0 -0
  41. api/synchronous/api_core/tests/load_test.py +109 -109
  42. classification/__init__.py +0 -0
  43. classification/aggregate_classifier_probs.py +21 -24
  44. classification/analyze_failed_images.py +11 -13
  45. classification/cache_batchapi_outputs.py +51 -51
  46. classification/create_classification_dataset.py +69 -68
  47. classification/crop_detections.py +54 -53
  48. classification/csv_to_json.py +97 -100
  49. classification/detect_and_crop.py +105 -105
  50. classification/evaluate_model.py +43 -42
  51. classification/identify_mislabeled_candidates.py +47 -46
  52. classification/json_to_azcopy_list.py +10 -10
  53. classification/json_validator.py +72 -71
  54. classification/map_classification_categories.py +44 -43
  55. classification/merge_classification_detection_output.py +68 -68
  56. classification/prepare_classification_script.py +157 -154
  57. classification/prepare_classification_script_mc.py +228 -228
  58. classification/run_classifier.py +27 -26
  59. classification/save_mislabeled.py +30 -30
  60. classification/train_classifier.py +20 -20
  61. classification/train_classifier_tf.py +21 -22
  62. classification/train_utils.py +10 -10
  63. data_management/__init__.py +0 -0
  64. data_management/annotations/__init__.py +0 -0
  65. data_management/annotations/annotation_constants.py +18 -31
  66. data_management/camtrap_dp_to_coco.py +238 -0
  67. data_management/cct_json_utils.py +102 -59
  68. data_management/cct_to_md.py +176 -158
  69. data_management/cct_to_wi.py +247 -219
  70. data_management/coco_to_labelme.py +272 -263
  71. data_management/coco_to_yolo.py +79 -58
  72. data_management/databases/__init__.py +0 -0
  73. data_management/databases/add_width_and_height_to_db.py +20 -16
  74. data_management/databases/combine_coco_camera_traps_files.py +35 -31
  75. data_management/databases/integrity_check_json_db.py +62 -24
  76. data_management/databases/subset_json_db.py +24 -15
  77. data_management/generate_crops_from_cct.py +27 -45
  78. data_management/get_image_sizes.py +188 -162
  79. data_management/importers/add_nacti_sizes.py +8 -8
  80. data_management/importers/add_timestamps_to_icct.py +78 -78
  81. data_management/importers/animl_results_to_md_results.py +158 -158
  82. data_management/importers/auckland_doc_test_to_json.py +9 -9
  83. data_management/importers/auckland_doc_to_json.py +8 -8
  84. data_management/importers/awc_to_json.py +7 -7
  85. data_management/importers/bellevue_to_json.py +15 -15
  86. data_management/importers/cacophony-thermal-importer.py +13 -13
  87. data_management/importers/carrizo_shrubfree_2018.py +8 -8
  88. data_management/importers/carrizo_trail_cam_2017.py +8 -8
  89. data_management/importers/cct_field_adjustments.py +9 -9
  90. data_management/importers/channel_islands_to_cct.py +10 -10
  91. data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
  92. data_management/importers/ena24_to_json.py +7 -7
  93. data_management/importers/filenames_to_json.py +8 -8
  94. data_management/importers/helena_to_cct.py +7 -7
  95. data_management/importers/idaho-camera-traps.py +7 -7
  96. data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
  97. data_management/importers/jb_csv_to_json.py +9 -9
  98. data_management/importers/mcgill_to_json.py +8 -8
  99. data_management/importers/missouri_to_json.py +18 -18
  100. data_management/importers/nacti_fieldname_adjustments.py +10 -10
  101. data_management/importers/noaa_seals_2019.py +7 -7
  102. data_management/importers/pc_to_json.py +7 -7
  103. data_management/importers/plot_wni_giraffes.py +7 -7
  104. data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
  105. data_management/importers/prepare_zsl_imerit.py +7 -7
  106. data_management/importers/rspb_to_json.py +8 -8
  107. data_management/importers/save_the_elephants_survey_A.py +8 -8
  108. data_management/importers/save_the_elephants_survey_B.py +9 -9
  109. data_management/importers/snapshot_safari_importer.py +26 -26
  110. data_management/importers/snapshot_safari_importer_reprise.py +665 -665
  111. data_management/importers/snapshot_serengeti_lila.py +14 -14
  112. data_management/importers/sulross_get_exif.py +8 -9
  113. data_management/importers/timelapse_csv_set_to_json.py +11 -11
  114. data_management/importers/ubc_to_json.py +13 -13
  115. data_management/importers/umn_to_json.py +7 -7
  116. data_management/importers/wellington_to_json.py +8 -8
  117. data_management/importers/wi_to_json.py +9 -9
  118. data_management/importers/zamba_results_to_md_results.py +181 -181
  119. data_management/labelme_to_coco.py +65 -24
  120. data_management/labelme_to_yolo.py +8 -8
  121. data_management/lila/__init__.py +0 -0
  122. data_management/lila/add_locations_to_island_camera_traps.py +9 -9
  123. data_management/lila/add_locations_to_nacti.py +147 -147
  124. data_management/lila/create_lila_blank_set.py +13 -13
  125. data_management/lila/create_lila_test_set.py +8 -8
  126. data_management/lila/create_links_to_md_results_files.py +106 -106
  127. data_management/lila/download_lila_subset.py +44 -110
  128. data_management/lila/generate_lila_per_image_labels.py +55 -42
  129. data_management/lila/get_lila_annotation_counts.py +18 -15
  130. data_management/lila/get_lila_image_counts.py +11 -11
  131. data_management/lila/lila_common.py +96 -33
  132. data_management/lila/test_lila_metadata_urls.py +132 -116
  133. data_management/ocr_tools.py +173 -128
  134. data_management/read_exif.py +110 -97
  135. data_management/remap_coco_categories.py +83 -83
  136. data_management/remove_exif.py +58 -62
  137. data_management/resize_coco_dataset.py +30 -23
  138. data_management/wi_download_csv_to_coco.py +246 -239
  139. data_management/yolo_output_to_md_output.py +86 -73
  140. data_management/yolo_to_coco.py +300 -60
  141. detection/__init__.py +0 -0
  142. detection/detector_training/__init__.py +0 -0
  143. detection/process_video.py +85 -33
  144. detection/pytorch_detector.py +43 -25
  145. detection/run_detector.py +157 -72
  146. detection/run_detector_batch.py +179 -113
  147. detection/run_inference_with_yolov5_val.py +108 -48
  148. detection/run_tiled_inference.py +111 -40
  149. detection/tf_detector.py +51 -29
  150. detection/video_utils.py +606 -521
  151. docs/source/conf.py +43 -0
  152. md_utils/__init__.py +0 -0
  153. md_utils/azure_utils.py +9 -9
  154. md_utils/ct_utils.py +228 -68
  155. md_utils/directory_listing.py +59 -64
  156. md_utils/md_tests.py +968 -871
  157. md_utils/path_utils.py +460 -134
  158. md_utils/process_utils.py +157 -133
  159. md_utils/sas_blob_utils.py +20 -20
  160. md_utils/split_locations_into_train_val.py +45 -32
  161. md_utils/string_utils.py +33 -10
  162. md_utils/url_utils.py +176 -60
  163. md_utils/write_html_image_list.py +40 -33
  164. md_visualization/__init__.py +0 -0
  165. md_visualization/plot_utils.py +102 -109
  166. md_visualization/render_images_with_thumbnails.py +34 -34
  167. md_visualization/visualization_utils.py +597 -291
  168. md_visualization/visualize_db.py +76 -48
  169. md_visualization/visualize_detector_output.py +61 -42
  170. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/METADATA +13 -7
  171. megadetector-5.0.10.dist-info/RECORD +224 -0
  172. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/top_level.txt +1 -0
  173. taxonomy_mapping/__init__.py +0 -0
  174. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
  175. taxonomy_mapping/map_new_lila_datasets.py +154 -154
  176. taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
  177. taxonomy_mapping/preview_lila_taxonomy.py +591 -591
  178. taxonomy_mapping/retrieve_sample_image.py +12 -12
  179. taxonomy_mapping/simple_image_download.py +11 -11
  180. taxonomy_mapping/species_lookup.py +10 -10
  181. taxonomy_mapping/taxonomy_csv_checker.py +18 -18
  182. taxonomy_mapping/taxonomy_graph.py +47 -47
  183. taxonomy_mapping/validate_lila_category_mappings.py +83 -76
  184. data_management/cct_json_to_filename_json.py +0 -89
  185. data_management/cct_to_csv.py +0 -140
  186. data_management/databases/remove_corrupted_images_from_db.py +0 -191
  187. detection/detector_training/copy_checkpoints.py +0 -43
  188. megadetector-5.0.8.dist-info/RECORD +0 -205
  189. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/LICENSE +0 -0
  190. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/WHEEL +0 -0
@@ -1,20 +1,21 @@
1
- ########
2
- #
3
- # postprocess_batch_results.py
4
- #
5
- # Given a .json or .csv file representing the output from the batch detection API,
6
- # do one or more of the following:
7
- #
8
- # * Evaluate detector precision/recall, optionally rendering results (requires
9
- # ground truth)
10
- # * Sample true/false positives/negatives and render to HTML (requires ground
11
- # truth)
12
- # * Sample detections/non-detections and render to HTML (when ground truth isn't
13
- # available)
14
- #
15
- # Ground truth, if available, must be in the COCO Camera Traps format.
16
- #
17
- ########
1
+ """
2
+
3
+ postprocess_batch_results.py
4
+
5
+ Given a .json or .csv file containing MD results, do one or more of the following:
6
+
7
+ * Sample detections/non-detections and render to HTML (when ground truth isn't
8
+ available) (this is 99.9% of what this module is for)
9
+ * Evaluate detector precision/recall, optionally rendering results (requires
10
+ ground truth)
11
+ * Sample true/false positives/negatives and render to HTML (requires ground
12
+ truth)
13
+
14
+ Ground truth, if available, must be in COCO Camera Traps format:
15
+
16
+ https://github.com/agentmorris/MegaDetector/blob/main/data_management/README.md#coco-camera-traps-format
17
+
18
+ """
18
19
 
19
20
  #%% Constants and imports
20
21
 
@@ -30,7 +31,6 @@ import uuid
30
31
  import warnings
31
32
  import random
32
33
 
33
- from typing import Any, Dict, Iterable, Optional, Tuple
34
34
  from enum import IntEnum
35
35
  from multiprocessing.pool import ThreadPool
36
36
  from multiprocessing.pool import Pool
@@ -51,7 +51,7 @@ from md_utils.write_html_image_list import write_html_image_list
51
51
  from md_utils import path_utils
52
52
  from data_management.cct_json_utils import (CameraTrapJsonUtils, IndexedJsonDb)
53
53
  from api.batch_processing.postprocessing.load_api_results import load_api_results
54
- from md_utils.ct_utils import args_to_object
54
+ from md_utils.ct_utils import args_to_object, sets_overlap
55
55
 
56
56
  from detection.run_detector import get_typical_confidence_threshold_from_results
57
57
 
@@ -63,136 +63,163 @@ warnings.filterwarnings('ignore', '(Possibly )?corrupt EXIF data', UserWarning)
63
63
  DEFAULT_NEGATIVE_CLASSES = ['empty']
64
64
  DEFAULT_UNKNOWN_CLASSES = ['unknown', 'unlabeled', 'ambiguous']
65
65
 
66
-
67
- def has_overlap(set1: Iterable, set2: Iterable) -> bool:
68
- """
69
- Check whether two sets overlap.
70
- """
71
-
72
- return not set(set1).isdisjoint(set(set2))
73
-
74
-
75
66
  # Make sure there is no overlap between the two sets, because this will cause
76
67
  # issues in the code
77
- assert not has_overlap(DEFAULT_NEGATIVE_CLASSES, DEFAULT_UNKNOWN_CLASSES), (
68
+ assert not sets_overlap(DEFAULT_NEGATIVE_CLASSES, DEFAULT_UNKNOWN_CLASSES), (
78
69
  'Default negative and unknown classes cannot overlap.')
79
70
 
80
71
 
81
72
  class PostProcessingOptions:
82
-
73
+ """
74
+ Options used to parameterize process_batch_results().
75
+ """
76
+
83
77
  ### Required inputs
84
78
 
85
- api_output_file = ''
79
+ #: MD results .json file to process
80
+ md_results_file = ''
81
+
82
+ #: Folder to which we should write HTML output
86
83
  output_dir = ''
87
84
 
88
85
  ### Options
89
86
 
90
- # Can be a folder or a SAS URL
87
+ #: Folder where images live (filenames in [md_results_file] should be relative to this folder)
91
88
  image_base_dir = '.'
92
89
 
93
- ground_truth_json_file = ''
94
-
95
90
  ## These apply only when we're doing ground-truth comparisons
96
91
 
97
- # Classes we'll treat as negative
98
- #
99
- # Include the token "#NO_LABELS#" to indicate that an image with no annotations
100
- # should be considered empty.
92
+ #: Optional .json file containing ground truth information
93
+ ground_truth_json_file = ''
94
+
95
+ #: Classes we'll treat as negative
96
+ #:
97
+ #: Include the token "#NO_LABELS#" to indicate that an image with no annotations
98
+ #: should be considered empty.
101
99
  negative_classes = DEFAULT_NEGATIVE_CLASSES
102
100
 
103
- # Classes we'll treat as neither positive nor negative
101
+ #: Classes we'll treat as neither positive nor negative
104
102
  unlabeled_classes = DEFAULT_UNKNOWN_CLASSES
105
103
 
106
- # A list of output sets that we should count, but not render images for.
107
- #
108
- # Typically used to preview sets with lots of empties, where you don't want to
109
- # subset but also don't want to render 100,000 empty images.
110
- #
111
- # detections, non_detections
112
- # detections_animal, detections_person, detections_vehicle
104
+ #: A list of output sets that we should count, but not render images for.
105
+ #:
106
+ #: Typically used to preview sets with lots of empties, where you don't want to
107
+ #: subset but also don't want to render 100,000 empty images.
108
+ #:
109
+ #: detections, non_detections
110
+ #: detections_animal, detections_person, detections_vehicle
113
111
  rendering_bypass_sets = []
114
112
 
115
- # If this is None, choose a confidence threshold based on the detector version.
116
- #
117
- # This can either be a float or a dictionary mapping category names (not IDs) to
118
- # thresholds. The category "default" can be used to specify thresholds for
119
- # other categories. Currently the use of a dict here is not supported when
120
- # ground truth is supplied.
113
+ #: If this is None, choose a confidence threshold based on the detector version.
114
+ #:
115
+ #: This can either be a float or a dictionary mapping category names (not IDs) to
116
+ #: thresholds. The category "default" can be used to specify thresholds for
117
+ #: other categories. Currently the use of a dict here is not supported when
118
+ #: ground truth is supplied.
121
119
  confidence_threshold = None
122
120
 
123
- # Confidence threshold to apply to classification (not detection) results
124
- #
125
- # Only a float is supported here (unlike the "confidence_threshold" parameter, which
126
- # can be a dict).
121
+ #: Confidence threshold to apply to classification (not detection) results
122
+ #:
123
+ #: Only a float is supported here (unlike the "confidence_threshold" parameter, which
124
+ #: can be a dict).
127
125
  classification_confidence_threshold = 0.5
128
126
 
129
- # Used for summary statistics only
127
+ #: Used for summary statistics only
130
128
  target_recall = 0.9
131
129
 
132
- # Number of images to sample, -1 for "all images"
130
+ #: Number of images to sample, -1 for "all images"
133
131
  num_images_to_sample = 500
134
132
 
135
- # Random seed for sampling, or None
136
- sample_seed: Optional[int] = 0 # None
133
+ #: Random seed for sampling, or None
134
+ sample_seed = 0 # None
137
135
 
136
+ #: Image width for images in the HTML output
138
137
  viz_target_width = 800
139
138
 
139
+ #: Line width (in pixels) for rendering detections
140
140
  line_thickness = 4
141
+
142
+ #: Box expansion (in pixels) for rendering detections
141
143
  box_expansion = 0
142
144
 
145
+ #: Job name to include in big letters in the output HTML
143
146
  job_name_string = None
147
+
148
+ #: Model version string to include in the output HTML
144
149
  model_version_string = None
145
150
 
146
- # Sort order for the output, should be one of "filename", "confidence", or "random"
151
+ #: Sort order for the output, should be one of "filename", "confidence", or "random"
147
152
  html_sort_order = 'filename'
148
153
 
154
+ #: If True, images in the output HTML will be links back to the original images
149
155
  link_images_to_originals = True
150
156
 
151
- # Optionally separate detections into categories (animal/vehicle/human)
152
- #
153
- # Currently only supported when ground truth is unavailable
157
+ #: Optionally separate detections into categories (animal/vehicle/human)
158
+ #:
159
+ #: Currently only supported when ground truth is unavailable
154
160
  separate_detections_by_category = True
155
161
 
156
- # Optionally replace one or more strings in filenames with other strings;
157
- # useful for taking a set of results generated for one folder structure
158
- # and applying them to a slightly different folder structure.
162
+ #: Optionally replace one or more strings in filenames with other strings;
163
+ #: useful for taking a set of results generated for one folder structure
164
+ #: and applying them to a slightly different folder structure.
159
165
  api_output_filename_replacements = {}
166
+
167
+ #: Optionally replace one or more strings in filenames with other strings;
168
+ #: useful for taking a set of results generated for one folder structure
169
+ #: and applying them to a slightly different folder structure.
160
170
  ground_truth_filename_replacements = {}
161
171
 
162
- # Allow bypassing API output loading when operating on previously-loaded
163
- # results
164
- api_detection_results: Optional[pd.DataFrame] = None
165
- api_other_fields: Optional[Dict[str, Any]] = None
166
-
167
- # Should we also split out a separate report about the detections that were
168
- # just below our main confidence threshold?
169
- #
170
- # Currently only supported when ground truth is unavailable
172
+ #: Allow bypassing API output loading when operating on previously-loaded
173
+ #: results. If present, this is a Pandas DataFrame. Almost never useful.
174
+ api_detection_results = None
175
+
176
+ #: Allow bypassing API output loading when operating on previously-loaded
177
+ #: results. If present, this is a str --> obj dict. Almost never useful.
178
+ api_other_fields = None
179
+
180
+ #: Should we also split out a separate report about the detections that were
181
+ #: just below our main confidence threshold?
182
+ #:
183
+ #: Currently only supported when ground truth is unavailable.
171
184
  include_almost_detections = False
172
185
 
173
- # Only a float is supported here (unlike the "confidence_threshold" parameter, which
174
- # can be a dict).
186
+ #: Only a float is supported here (unlike the "confidence_threshold" parameter, which
187
+ #: can be a dict).
175
188
  almost_detection_confidence_threshold = None
176
189
 
177
- # Control rendering parallelization
178
- parallelize_rendering_n_cores: Optional[int] = 100
179
- parallelize_rendering_with_threads = True
190
+ #: Enable/disable rendering parallelization
180
191
  parallelize_rendering = False
181
192
 
193
+ #: Number of threads/processes to use for rendering parallelization
194
+ parallelize_rendering_n_cores = 25
195
+
196
+ #: Whether to use threads (True) or processes (False) for rendering parallelization
197
+ parallelize_rendering_with_threads = True
198
+
199
+ #: When classification results are present, should be sort alphabetically by class name (False)
200
+ #: or in descending order by frequency (True)?
182
201
  sort_classification_results_by_count = False
183
202
 
184
- # Should we split individual pages up into smaller pages if there are more than
185
- # N images?
203
+ #: Should we split individual pages up into smaller pages if there are more than
204
+ #: N images?
186
205
  max_figures_per_html_file = None
187
206
 
188
207
  # ...PostProcessingOptions
189
208
 
190
209
 
191
210
  class PostProcessingResults:
192
-
211
+ """
212
+ Return format from process_batch_results
213
+ """
214
+
215
+ #: HTML file to which preview information was written
193
216
  output_html_file = ''
194
- api_detection_results: Optional[pd.DataFrame] = None
195
- api_other_fields: Optional[Dict[str, Any]] = None
217
+
218
+ #: Pandas Dataframe containing detection results
219
+ api_detection_results = None
220
+
221
+ #: str --> obj dictionary containing other information loaded from the results file
222
+ api_other_fields = None
196
223
 
197
224
 
198
225
  ##%% Helper classes and functions
@@ -201,6 +228,8 @@ class DetectionStatus(IntEnum):
201
228
  """
202
229
  Flags used to mark images as positive or negative for P/R analysis
203
230
  (according to ground truth and/or detector output)
231
+
232
+ :meta private:
204
233
  """
205
234
 
206
235
  DS_NEGATIVE = 0
@@ -223,11 +252,9 @@ class DetectionStatus(IntEnum):
223
252
  DS_ALMOST = 5
224
253
 
225
254
 
226
- def mark_detection_status(
227
- indexed_db: IndexedJsonDb,
228
- negative_classes: Iterable[str] = DEFAULT_NEGATIVE_CLASSES,
229
- unknown_classes: Iterable[str] = DEFAULT_UNKNOWN_CLASSES
230
- ) -> Tuple[int, int, int, int]:
255
+ def _mark_detection_status(indexed_db,
256
+ negative_classes=DEFAULT_NEGATIVE_CLASSES,
257
+ unknown_classes=DEFAULT_UNKNOWN_CLASSES):
231
258
  """
232
259
  For each image in indexed_db.db['images'], add a '_detection_status' field
233
260
  to indicate whether to treat this image as positive, negative, ambiguous,
@@ -259,8 +286,8 @@ def mark_detection_status(
259
286
  # - unknown / unassigned-type labels
260
287
  # - negative-type labels
261
288
  # - positive labels (i.e., labels that are neither unknown nor negative)
262
- has_unknown_labels = has_overlap(category_names, unknown_classes)
263
- has_negative_labels = has_overlap(category_names, negative_classes)
289
+ has_unknown_labels = sets_overlap(category_names, unknown_classes)
290
+ has_negative_labels = sets_overlap(category_names, negative_classes)
264
291
  has_positive_labels = 0 < len(category_names - (unknown_classes | negative_classes))
265
292
  # assert has_unknown_labels is False, '{} has unknown labels'.format(annotations)
266
293
 
@@ -315,23 +342,27 @@ def mark_detection_status(
315
342
 
316
343
  return n_negative, n_positive, n_unknown, n_ambiguous
317
344
 
318
- # ...mark_detection_status()
345
+ # ..._mark_detection_status()
319
346
 
320
347
 
321
- def is_sas_url(s: str) -> bool:
348
+ def is_sas_url(s) -> bool:
322
349
  """
323
350
  Placeholder for a more robust way to verify that a link is a SAS URL.
324
351
  99.999% of the time this will suffice for what we're using it for right now.
352
+
353
+ :meta private:
325
354
  """
326
355
 
327
356
  return (s.startswith(('http://', 'https://')) and ('core.windows.net' in s)
328
357
  and ('?' in s))
329
358
 
330
359
 
331
- def relative_sas_url(folder_url: str, relative_path: str) -> Optional[str]:
360
+ def relative_sas_url(folder_url, relative_path):
332
361
  """
333
362
  Given a container-level or folder-level SAS URL, create a SAS URL to the
334
363
  specified relative path.
364
+
365
+ :meta private:
335
366
  """
336
367
 
337
368
  relative_path = relative_path.replace('%','%25')
@@ -349,7 +380,7 @@ def relative_sas_url(folder_url: str, relative_path: str) -> Optional[str]:
349
380
  return tokens[0] + relative_path + '?' + tokens[1]
350
381
 
351
382
 
352
- def render_bounding_boxes(
383
+ def _render_bounding_boxes(
353
384
  image_base_dir,
354
385
  image_relative_path,
355
386
  display_name,
@@ -361,6 +392,9 @@ def render_bounding_boxes(
361
392
  options=None):
362
393
  """
363
394
  Renders detection bounding boxes on a single image.
395
+
396
+ This is an internal function; if you want tools for rendering boxes on images, see
397
+ md_visualization.visualization_utils.
364
398
 
365
399
  The source image is:
366
400
 
@@ -379,6 +413,8 @@ def render_bounding_boxes(
379
413
 
380
414
  Returns the html info struct for this image in the format that's used for
381
415
  write_html_image_list.
416
+
417
+ :meta private:
382
418
  """
383
419
 
384
420
  if options is None:
@@ -448,7 +484,7 @@ def render_bounding_boxes(
448
484
  rendering_confidence_threshold = {}
449
485
  for category_id in category_ids:
450
486
  rendering_confidence_threshold[category_id] = \
451
- get_threshold_for_category_id(category_id, options, detection_categories)
487
+ _get_threshold_for_category_id(category_id, options, detection_categories)
452
488
 
453
489
  vis_utils.render_detection_bounding_boxes(
454
490
  detections, image,
@@ -493,10 +529,10 @@ def render_bounding_boxes(
493
529
 
494
530
  return info
495
531
 
496
- # ...render_bounding_boxes
532
+ # ..._render_bounding_boxes
497
533
 
498
534
 
499
- def prepare_html_subpages(images_html, output_dir, options=None):
535
+ def _prepare_html_subpages(images_html, output_dir, options=None):
500
536
  """
501
537
  Write out a series of html image lists, e.g. the "detections" or "non-detections"
502
538
  pages.
@@ -562,11 +598,13 @@ def prepare_html_subpages(images_html, output_dir, options=None):
562
598
 
563
599
  return image_counts
564
600
 
565
- # ...prepare_html_subpages()
601
+ # ..._prepare_html_subpages()
566
602
 
567
603
 
568
- # Determine the confidence threshold we should use for a specific category name
569
- def get_threshold_for_category_name(category_name,options):
604
+ def _get_threshold_for_category_name(category_name,options):
605
+ """
606
+ Determines the confidence threshold we should use for a specific category name.
607
+ """
570
608
 
571
609
  if isinstance(options.confidence_threshold,float):
572
610
  return options.confidence_threshold
@@ -585,10 +623,12 @@ def get_threshold_for_category_name(category_name,options):
585
623
  return options.confidence_threshold['default']
586
624
 
587
625
 
588
- # Determine the confidence threshold we should use for a specific category ID
589
- #
590
- # detection_categories is a dict mapping category IDs to names.
591
- def get_threshold_for_category_id(category_id,options,detection_categories):
626
+ def _get_threshold_for_category_id(category_id,options,detection_categories):
627
+ """
628
+ Determines the confidence threshold we should use for a specific category ID.
629
+
630
+ [detection_categories] is a dict mapping category IDs to names.
631
+ """
592
632
 
593
633
  if isinstance(options.confidence_threshold,float):
594
634
  return options.confidence_threshold
@@ -598,66 +638,73 @@ def get_threshold_for_category_id(category_id,options,detection_categories):
598
638
 
599
639
  category_name = detection_categories[category_id]
600
640
 
601
- return get_threshold_for_category_name(category_name,options)
641
+ return _get_threshold_for_category_name(category_name,options)
602
642
 
603
643
 
604
- # Get a sorted list of unique categories (as string IDs) above the threshold for this image
605
- #
606
- # "detection_categories" is a dict mapping category IDs to names.
607
- def get_positive_categories(detections,options,detection_categories):
644
+ def _get_positive_categories(detections,options,detection_categories):
645
+ """
646
+ Gets a sorted list of unique categories (as string IDs) above the threshold for this image
647
+
648
+ [detection_categories] is a dict mapping category IDs to names.
649
+ """
650
+
608
651
  positive_categories = set()
609
652
  for d in detections:
610
- threshold = get_threshold_for_category_id(d['category'], options, detection_categories)
653
+ threshold = _get_threshold_for_category_id(d['category'], options, detection_categories)
611
654
  if d['conf'] >= threshold:
612
655
  positive_categories.add(d['category'])
613
656
  return sorted(positive_categories)
614
657
 
615
658
 
616
- # Determine whether any positive detections are present in the detection list
617
- # [detections].
618
- def has_positive_detection(detections,options,detection_categories):
659
+ def _has_positive_detection(detections,options,detection_categories):
660
+ """
661
+ Determines whether any positive detections are present in the detection list
662
+ [detections].
663
+ """
619
664
 
620
665
  found_positive_detection = False
621
666
  for d in detections:
622
- threshold = get_threshold_for_category_id(d['category'], options, detection_categories)
667
+ threshold = _get_threshold_for_category_id(d['category'], options, detection_categories)
623
668
  if d['conf'] >= threshold:
624
669
  found_positive_detection = True
625
670
  break
626
671
  return found_positive_detection
627
672
 
628
673
 
629
- # Render an image (with no ground truth information)
630
- #
631
- # Returns a list of rendering structs, where the first item is a category (e.g. "detections_animal"),
632
- # and the second is a dict of information needed for rendering. E.g.:
633
- #
634
- # [['detections_animal',
635
- # {
636
- # 'filename': 'detections_animal/detections_animal_blah~01060415.JPG',
637
- # 'title': '<b>Result type</b>: detections_animal,
638
- # <b>Image</b>: blah\\01060415.JPG,
639
- # <b>Max conf</b>: 0.897',
640
- # 'textStyle': 'font-family:verdana,arial,calibri;font-size:80%;text-align:left;margin-top:20;margin-bottom:5',
641
- # 'linkTarget': 'full_path_to_%5C01060415.JPG'
642
- # }]]
643
- #
644
- # When no classification data is present, this list will always be length-1. When
645
- # classification data is present, an image may appear in multiple categories.
646
- #
647
- # Populates the 'max_conf' field of the first element of the list.
648
- #
649
- # Returns None if there are any errors.
650
- def render_image_no_gt(file_info,detection_categories_to_results_name,
674
+ def _render_image_no_gt(file_info,detection_categories_to_results_name,
651
675
  detection_categories,classification_categories,
652
676
  options):
653
-
677
+ """
678
+ Renders an image (with no ground truth information)
679
+
680
+ Returns a list of rendering structs, where the first item is a category (e.g. "detections_animal"),
681
+ and the second is a dict of information needed for rendering. E.g.:
682
+
683
+ [['detections_animal',
684
+ {
685
+ 'filename': 'detections_animal/detections_animal_blah~01060415.JPG',
686
+ 'title': '<b>Result type</b>: detections_animal,
687
+ <b>Image</b>: blah\\01060415.JPG,
688
+ <b>Max conf</b>: 0.897',
689
+ 'textStyle': 'font-family:verdana,arial,calibri;font-size:80%;text-align:left;margin-top:20;margin-bottom:5',
690
+ 'linkTarget': 'full_path_to_%5C01060415.JPG'
691
+ }]]
692
+
693
+ When no classification data is present, this list will always be length-1. When
694
+ classification data is present, an image may appear in multiple categories.
695
+
696
+ Populates the 'max_conf' field of the first element of the list.
697
+
698
+ Returns None if there are any errors.
699
+ """
700
+
654
701
  image_relative_path = file_info[0]
655
702
  max_conf = file_info[1]
656
703
  detections = file_info[2]
657
704
 
658
705
  # Determine whether any positive detections are present (using a threshold that
659
706
  # may vary by category)
660
- found_positive_detection = has_positive_detection(detections,options,detection_categories)
707
+ found_positive_detection = _has_positive_detection(detections,options,detection_categories)
661
708
 
662
709
  detection_status = DetectionStatus.DS_UNASSIGNED
663
710
  if found_positive_detection:
@@ -673,7 +720,7 @@ def render_image_no_gt(file_info,detection_categories_to_results_name,
673
720
 
674
721
  if detection_status == DetectionStatus.DS_POSITIVE:
675
722
  if options.separate_detections_by_category:
676
- positive_categories = tuple(get_positive_categories(detections,options,detection_categories))
723
+ positive_categories = tuple(_get_positive_categories(detections,options,detection_categories))
677
724
  if positive_categories not in detection_categories_to_results_name:
678
725
  raise ValueError('Error: {} not in category mapping (file {})'.format(
679
726
  str(positive_categories),image_relative_path))
@@ -695,7 +742,7 @@ def render_image_no_gt(file_info,detection_categories_to_results_name,
695
742
  rendering_options.confidence_threshold = \
696
743
  rendering_options.almost_detection_confidence_threshold
697
744
 
698
- rendered_image_html_info = render_bounding_boxes(
745
+ rendered_image_html_info = _render_bounding_boxes(
699
746
  image_base_dir=options.image_base_dir,
700
747
  image_relative_path=image_relative_path,
701
748
  display_name=display_name,
@@ -743,18 +790,20 @@ def render_image_no_gt(file_info,detection_categories_to_results_name,
743
790
 
744
791
  image_result[0][1]['max_conf'] = max_conf
745
792
 
746
- # ...if we got valid rendering info back from render_bounding_boxes()
793
+ # ...if we got valid rendering info back from _render_bounding_boxes()
747
794
 
748
795
  return image_result
749
796
 
750
- # ...def render_image_no_gt()
797
+ # ...def _render_image_no_gt()
751
798
 
752
799
 
753
- # Render an image with ground truth information. See render_image_no_gt for return
754
- # data format.
755
- def render_image_with_gt(file_info,ground_truth_indexed_db,
800
+ def _render_image_with_gt(file_info,ground_truth_indexed_db,
756
801
  detection_categories,classification_categories,options):
757
-
802
+ """
803
+ Render an image with ground truth information. See _render_image_no_gt for return
804
+ data format.
805
+ """
806
+
758
807
  image_relative_path = file_info[0]
759
808
  max_conf = file_info[1]
760
809
  detections = file_info[2]
@@ -780,7 +829,7 @@ def render_image_with_gt(file_info,ground_truth_indexed_db,
780
829
 
781
830
  gt_presence = bool(gt_status)
782
831
 
783
- gt_classes = CameraTrapJsonUtils.annotations_to_classnames(
832
+ gt_classes = CameraTrapJsonUtils.annotations_to_class_names(
784
833
  annotations, ground_truth_indexed_db.cat_id_to_name)
785
834
  gt_class_summary = ','.join(gt_classes)
786
835
 
@@ -789,7 +838,7 @@ def render_image_with_gt(file_info,ground_truth_indexed_db,
789
838
  f'ground truth status (status: {gt_status}, classes: {gt_class_summary})')
790
839
  return None
791
840
 
792
- detected = has_positive_detection(detections, options, detection_categories)
841
+ detected = _has_positive_detection(detections, options, detection_categories)
793
842
 
794
843
  if gt_presence and detected:
795
844
  if '_classification_accuracy' not in image.keys():
@@ -809,7 +858,7 @@ def render_image_with_gt(file_info,ground_truth_indexed_db,
809
858
  res.upper(), str(gt_presence), gt_class_summary,
810
859
  max_conf * 100, image_relative_path)
811
860
 
812
- rendered_image_html_info = render_bounding_boxes(
861
+ rendered_image_html_info = _render_bounding_boxes(
813
862
  image_base_dir=options.image_base_dir,
814
863
  image_relative_path=image_relative_path,
815
864
  display_name=display_name,
@@ -828,14 +877,35 @@ def render_image_with_gt(file_info,ground_truth_indexed_db,
828
877
 
829
878
  return image_result
830
879
 
831
- # ...def render_image_with_gt()
880
+ # ...def _render_image_with_gt()
832
881
 
833
882
 
834
883
  #%% Main function
835
884
 
836
- def process_batch_results(options: PostProcessingOptions
837
- ) -> PostProcessingResults:
885
+ def process_batch_results(options):
886
+
887
+ """
888
+ Given a .json or .csv file containing MD results, do one or more of the following:
838
889
 
890
+ * Sample detections/non-detections and render to HTML (when ground truth isn't
891
+ available) (this is 99.9% of what this module is for)
892
+ * Evaluate detector precision/recall, optionally rendering results (requires
893
+ ground truth)
894
+ * Sample true/false positives/negatives and render to HTML (requires ground
895
+ truth)
896
+
897
+ Ground truth, if available, must be in COCO Camera Traps format:
898
+
899
+ https://github.com/agentmorris/MegaDetector/blob/main/data_management/README.md#coco-camera-traps-format
900
+
901
+ Args:
902
+ options (PostProcessingOptions): everything we need to render a preview/analysis for
903
+ this set of results; see the PostProcessingOptions class for details.
904
+
905
+ Returns:
906
+ PostProcessingResults: information about the results/preview, most importantly the HTML filename
907
+ of the output. See the PostProcessingResults class for details.
908
+ """
839
909
  ppresults = PostProcessingResults()
840
910
 
841
911
  ##%% Expand some options for convenience
@@ -852,7 +922,7 @@ def process_batch_results(options: PostProcessingOptions
852
922
 
853
923
  ground_truth_indexed_db = None
854
924
 
855
- if (options.ground_truth_json_file is not None):
925
+ if (options.ground_truth_json_file is not None) and (len(options.ground_truth_json_file) > 0):
856
926
  assert (options.confidence_threshold is None) or (isinstance(options.confidence_threshold,float)), \
857
927
  'Variable confidence thresholds are not supported when supplying ground truth'
858
928
 
@@ -868,7 +938,7 @@ def process_batch_results(options: PostProcessingOptions
868
938
  filename_replacements=options.ground_truth_filename_replacements)
869
939
 
870
940
  # Mark images in the ground truth as positive or negative
871
- n_negative, n_positive, n_unknown, n_ambiguous = mark_detection_status(
941
+ n_negative, n_positive, n_unknown, n_ambiguous = _mark_detection_status(
872
942
  ground_truth_indexed_db, negative_classes=options.negative_classes,
873
943
  unknown_classes=options.unlabeled_classes)
874
944
  print(f'Finished loading and indexing ground truth: {n_negative} '
@@ -900,7 +970,10 @@ def process_batch_results(options: PostProcessingOptions
900
970
  print('Choosing default confidence threshold of {} based on MD version'.format(
901
971
  options.confidence_threshold))
902
972
 
903
- if options.almost_detection_confidence_threshold is None:
973
+ if options.almost_detection_confidence_threshold is None and options.include_almost_detections:
974
+ assert isinstance(options.confidence_threshold,float), \
975
+ 'If you are using a dictionary of confidence thresholds and almost-detections are enabled, ' + \
976
+ 'you need to supply a threshold for almost detections.'
904
977
  options.almost_detection_confidence_threshold = options.confidence_threshold - 0.05
905
978
  if options.almost_detection_confidence_threshold < 0:
906
979
  options.almost_detection_confidence_threshold = 0
@@ -934,7 +1007,7 @@ def process_batch_results(options: PostProcessingOptions
934
1007
 
935
1008
  detections = row['detections']
936
1009
  max_conf = row['max_detection_conf']
937
- if has_positive_detection(detections, options, detection_categories):
1010
+ if _has_positive_detection(detections, options, detection_categories):
938
1011
  n_positives += 1
939
1012
  elif (options.almost_detection_confidence_threshold is not None) and \
940
1013
  (max_conf >= options.almost_detection_confidence_threshold):
@@ -1284,7 +1357,7 @@ def process_batch_results(options: PostProcessingOptions
1284
1357
  worker_string))
1285
1358
 
1286
1359
  rendering_results = list(tqdm(pool.imap(
1287
- partial(render_image_with_gt,
1360
+ partial(_render_image_with_gt,
1288
1361
  ground_truth_indexed_db=ground_truth_indexed_db,
1289
1362
  detection_categories=detection_categories,
1290
1363
  classification_categories=classification_categories,
@@ -1292,7 +1365,7 @@ def process_batch_results(options: PostProcessingOptions
1292
1365
  files_to_render), total=len(files_to_render)))
1293
1366
  else:
1294
1367
  for file_info in tqdm(files_to_render):
1295
- rendering_results.append(render_image_with_gt(
1368
+ rendering_results.append(_render_image_with_gt(
1296
1369
  file_info,ground_truth_indexed_db,
1297
1370
  detection_categories,classification_categories,
1298
1371
  options=options))
@@ -1309,7 +1382,7 @@ def process_batch_results(options: PostProcessingOptions
1309
1382
  images_html[assignment[0]].append(assignment[1])
1310
1383
 
1311
1384
  # Prepare the individual html image files
1312
- image_counts = prepare_html_subpages(images_html, output_dir, options)
1385
+ image_counts = _prepare_html_subpages(images_html, output_dir, options)
1313
1386
 
1314
1387
  print('{} images rendered (of {})'.format(image_rendered_count,image_count))
1315
1388
 
@@ -1469,7 +1542,7 @@ def process_batch_results(options: PostProcessingOptions
1469
1542
  detections_this_row = row['detections']
1470
1543
  above_threshold_category_ids_this_row = set()
1471
1544
  for detection in detections_this_row:
1472
- threshold = get_threshold_for_category_id(detection['category'], options, detection_categories)
1545
+ threshold = _get_threshold_for_category_id(detection['category'], options, detection_categories)
1473
1546
  if detection['conf'] >= threshold:
1474
1547
  above_threshold_category_ids_this_row.add(detection['category'])
1475
1548
  if len(above_threshold_category_ids_this_row) == 0:
@@ -1532,11 +1605,11 @@ def process_batch_results(options: PostProcessingOptions
1532
1605
  print('Rendering images with {} {}'.format(options.parallelize_rendering_n_cores,
1533
1606
  worker_string))
1534
1607
 
1535
- # render_image_no_gt(file_info,detection_categories_to_results_name,
1608
+ # _render_image_no_gt(file_info,detection_categories_to_results_name,
1536
1609
  # detection_categories,classification_categories)
1537
1610
 
1538
1611
  rendering_results = list(tqdm(pool.imap(
1539
- partial(render_image_no_gt,
1612
+ partial(_render_image_no_gt,
1540
1613
  detection_categories_to_results_name=detection_categories_to_results_name,
1541
1614
  detection_categories=detection_categories,
1542
1615
  classification_categories=classification_categories,
@@ -1544,7 +1617,7 @@ def process_batch_results(options: PostProcessingOptions
1544
1617
  files_to_render), total=len(files_to_render)))
1545
1618
  else:
1546
1619
  for file_info in tqdm(files_to_render):
1547
- rendering_results.append(render_image_no_gt(file_info,
1620
+ rendering_results.append(_render_image_no_gt(file_info,
1548
1621
  detection_categories_to_results_name,
1549
1622
  detection_categories,
1550
1623
  classification_categories,
@@ -1568,7 +1641,7 @@ def process_batch_results(options: PostProcessingOptions
1568
1641
  images_html[assignment[0]].append(assignment[1])
1569
1642
 
1570
1643
  # Prepare the individual html image files
1571
- image_counts = prepare_html_subpages(images_html, output_dir, options)
1644
+ image_counts = _prepare_html_subpages(images_html, output_dir, options)
1572
1645
 
1573
1646
  if image_rendered_count == 0:
1574
1647
  seconds_per_image = 0.0
@@ -1723,18 +1796,17 @@ if False:
1723
1796
 
1724
1797
  #%%
1725
1798
 
1726
- base_dir = r'G:\temp\md'
1799
+ base_dir = r'g:\temp'
1727
1800
  options = PostProcessingOptions()
1728
1801
  options.image_base_dir = base_dir
1729
- options.output_dir = os.path.join(base_dir, 'postprocessing')
1730
- options.api_output_filename_replacements = {} # {'20190430cameratraps\\':''}
1731
- options.ground_truth_filename_replacements = {} # {'\\data\\blob\\':''}
1802
+ options.output_dir = os.path.join(base_dir, 'preview')
1732
1803
  options.api_output_file = os.path.join(base_dir, 'results.json')
1733
- options.ground_truth_json_file = os.path.join(base_dir, 'gt.json')
1734
- # options.unlabeled_classes = ['human']
1804
+ options.confidence_threshold = {'person':0.5,'animal':0.5,'vehicle':0.01}
1805
+ options.include_almost_detections = True
1806
+ options.almost_detection_confidence_threshold = 0.001
1735
1807
 
1736
1808
  ppresults = process_batch_results(options)
1737
- # os.start(ppresults.output_html_file)
1809
+ # from md_utils.path_utils import open_file; open_file(ppresults.output_html_file)
1738
1810
 
1739
1811
 
1740
1812
  #%% Command-line driver