megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (191) hide show
  1. api/__init__.py +0 -0
  2. api/batch_processing/__init__.py +0 -0
  3. api/batch_processing/api_core/__init__.py +0 -0
  4. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. api/batch_processing/api_core/batch_service/score.py +0 -1
  6. api/batch_processing/api_core/server_job_status_table.py +0 -1
  7. api/batch_processing/api_core_support/__init__.py +0 -0
  8. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
  9. api/batch_processing/api_support/__init__.py +0 -0
  10. api/batch_processing/api_support/summarize_daily_activity.py +0 -1
  11. api/batch_processing/data_preparation/__init__.py +0 -0
  12. api/batch_processing/data_preparation/manage_local_batch.py +93 -79
  13. api/batch_processing/data_preparation/manage_video_batch.py +8 -8
  14. api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
  15. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  16. api/batch_processing/postprocessing/__init__.py +0 -0
  17. api/batch_processing/postprocessing/add_max_conf.py +12 -12
  18. api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
  19. api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
  20. api/batch_processing/postprocessing/compare_batch_results.py +114 -44
  21. api/batch_processing/postprocessing/convert_output_format.py +62 -19
  22. api/batch_processing/postprocessing/load_api_results.py +17 -20
  23. api/batch_processing/postprocessing/md_to_coco.py +31 -21
  24. api/batch_processing/postprocessing/md_to_labelme.py +165 -68
  25. api/batch_processing/postprocessing/merge_detections.py +40 -15
  26. api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
  27. api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
  28. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
  29. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
  30. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
  31. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
  32. api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
  33. api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
  34. api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
  35. api/synchronous/__init__.py +0 -0
  36. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  37. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
  38. api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
  39. api/synchronous/api_core/animal_detection_api/config.py +35 -35
  40. api/synchronous/api_core/tests/__init__.py +0 -0
  41. api/synchronous/api_core/tests/load_test.py +109 -109
  42. classification/__init__.py +0 -0
  43. classification/aggregate_classifier_probs.py +21 -24
  44. classification/analyze_failed_images.py +11 -13
  45. classification/cache_batchapi_outputs.py +51 -51
  46. classification/create_classification_dataset.py +69 -68
  47. classification/crop_detections.py +54 -53
  48. classification/csv_to_json.py +97 -100
  49. classification/detect_and_crop.py +105 -105
  50. classification/evaluate_model.py +43 -42
  51. classification/identify_mislabeled_candidates.py +47 -46
  52. classification/json_to_azcopy_list.py +10 -10
  53. classification/json_validator.py +72 -71
  54. classification/map_classification_categories.py +44 -43
  55. classification/merge_classification_detection_output.py +68 -68
  56. classification/prepare_classification_script.py +157 -154
  57. classification/prepare_classification_script_mc.py +228 -228
  58. classification/run_classifier.py +27 -26
  59. classification/save_mislabeled.py +30 -30
  60. classification/train_classifier.py +20 -20
  61. classification/train_classifier_tf.py +21 -22
  62. classification/train_utils.py +10 -10
  63. data_management/__init__.py +0 -0
  64. data_management/annotations/__init__.py +0 -0
  65. data_management/annotations/annotation_constants.py +18 -31
  66. data_management/camtrap_dp_to_coco.py +238 -0
  67. data_management/cct_json_utils.py +107 -59
  68. data_management/cct_to_md.py +176 -158
  69. data_management/cct_to_wi.py +247 -219
  70. data_management/coco_to_labelme.py +272 -0
  71. data_management/coco_to_yolo.py +86 -62
  72. data_management/databases/__init__.py +0 -0
  73. data_management/databases/add_width_and_height_to_db.py +20 -16
  74. data_management/databases/combine_coco_camera_traps_files.py +35 -31
  75. data_management/databases/integrity_check_json_db.py +130 -83
  76. data_management/databases/subset_json_db.py +25 -16
  77. data_management/generate_crops_from_cct.py +27 -45
  78. data_management/get_image_sizes.py +188 -144
  79. data_management/importers/add_nacti_sizes.py +8 -8
  80. data_management/importers/add_timestamps_to_icct.py +78 -78
  81. data_management/importers/animl_results_to_md_results.py +158 -160
  82. data_management/importers/auckland_doc_test_to_json.py +9 -9
  83. data_management/importers/auckland_doc_to_json.py +8 -8
  84. data_management/importers/awc_to_json.py +7 -7
  85. data_management/importers/bellevue_to_json.py +15 -15
  86. data_management/importers/cacophony-thermal-importer.py +13 -13
  87. data_management/importers/carrizo_shrubfree_2018.py +8 -8
  88. data_management/importers/carrizo_trail_cam_2017.py +8 -8
  89. data_management/importers/cct_field_adjustments.py +9 -9
  90. data_management/importers/channel_islands_to_cct.py +10 -10
  91. data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
  92. data_management/importers/ena24_to_json.py +7 -7
  93. data_management/importers/filenames_to_json.py +8 -8
  94. data_management/importers/helena_to_cct.py +7 -7
  95. data_management/importers/idaho-camera-traps.py +7 -7
  96. data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
  97. data_management/importers/jb_csv_to_json.py +9 -9
  98. data_management/importers/mcgill_to_json.py +8 -8
  99. data_management/importers/missouri_to_json.py +18 -18
  100. data_management/importers/nacti_fieldname_adjustments.py +10 -10
  101. data_management/importers/noaa_seals_2019.py +8 -8
  102. data_management/importers/pc_to_json.py +7 -7
  103. data_management/importers/plot_wni_giraffes.py +7 -7
  104. data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
  105. data_management/importers/prepare_zsl_imerit.py +7 -7
  106. data_management/importers/rspb_to_json.py +8 -8
  107. data_management/importers/save_the_elephants_survey_A.py +8 -8
  108. data_management/importers/save_the_elephants_survey_B.py +9 -9
  109. data_management/importers/snapshot_safari_importer.py +26 -26
  110. data_management/importers/snapshot_safari_importer_reprise.py +665 -665
  111. data_management/importers/snapshot_serengeti_lila.py +14 -14
  112. data_management/importers/sulross_get_exif.py +8 -9
  113. data_management/importers/timelapse_csv_set_to_json.py +11 -11
  114. data_management/importers/ubc_to_json.py +13 -13
  115. data_management/importers/umn_to_json.py +7 -7
  116. data_management/importers/wellington_to_json.py +8 -8
  117. data_management/importers/wi_to_json.py +9 -9
  118. data_management/importers/zamba_results_to_md_results.py +181 -181
  119. data_management/labelme_to_coco.py +309 -159
  120. data_management/labelme_to_yolo.py +103 -60
  121. data_management/lila/__init__.py +0 -0
  122. data_management/lila/add_locations_to_island_camera_traps.py +9 -9
  123. data_management/lila/add_locations_to_nacti.py +147 -147
  124. data_management/lila/create_lila_blank_set.py +114 -31
  125. data_management/lila/create_lila_test_set.py +8 -8
  126. data_management/lila/create_links_to_md_results_files.py +106 -106
  127. data_management/lila/download_lila_subset.py +92 -90
  128. data_management/lila/generate_lila_per_image_labels.py +56 -43
  129. data_management/lila/get_lila_annotation_counts.py +18 -15
  130. data_management/lila/get_lila_image_counts.py +11 -11
  131. data_management/lila/lila_common.py +103 -70
  132. data_management/lila/test_lila_metadata_urls.py +132 -116
  133. data_management/ocr_tools.py +173 -128
  134. data_management/read_exif.py +161 -99
  135. data_management/remap_coco_categories.py +84 -0
  136. data_management/remove_exif.py +58 -62
  137. data_management/resize_coco_dataset.py +32 -44
  138. data_management/wi_download_csv_to_coco.py +246 -0
  139. data_management/yolo_output_to_md_output.py +86 -73
  140. data_management/yolo_to_coco.py +535 -95
  141. detection/__init__.py +0 -0
  142. detection/detector_training/__init__.py +0 -0
  143. detection/process_video.py +85 -33
  144. detection/pytorch_detector.py +43 -25
  145. detection/run_detector.py +157 -72
  146. detection/run_detector_batch.py +189 -114
  147. detection/run_inference_with_yolov5_val.py +118 -51
  148. detection/run_tiled_inference.py +113 -42
  149. detection/tf_detector.py +51 -28
  150. detection/video_utils.py +606 -521
  151. docs/source/conf.py +43 -0
  152. md_utils/__init__.py +0 -0
  153. md_utils/azure_utils.py +9 -9
  154. md_utils/ct_utils.py +249 -70
  155. md_utils/directory_listing.py +59 -64
  156. md_utils/md_tests.py +968 -862
  157. md_utils/path_utils.py +655 -155
  158. md_utils/process_utils.py +157 -133
  159. md_utils/sas_blob_utils.py +20 -20
  160. md_utils/split_locations_into_train_val.py +45 -32
  161. md_utils/string_utils.py +33 -10
  162. md_utils/url_utils.py +208 -27
  163. md_utils/write_html_image_list.py +51 -35
  164. md_visualization/__init__.py +0 -0
  165. md_visualization/plot_utils.py +102 -109
  166. md_visualization/render_images_with_thumbnails.py +34 -34
  167. md_visualization/visualization_utils.py +908 -311
  168. md_visualization/visualize_db.py +109 -58
  169. md_visualization/visualize_detector_output.py +61 -42
  170. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
  171. megadetector-5.0.9.dist-info/RECORD +224 -0
  172. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
  173. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
  174. taxonomy_mapping/__init__.py +0 -0
  175. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
  176. taxonomy_mapping/map_new_lila_datasets.py +154 -154
  177. taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
  178. taxonomy_mapping/preview_lila_taxonomy.py +591 -591
  179. taxonomy_mapping/retrieve_sample_image.py +12 -12
  180. taxonomy_mapping/simple_image_download.py +11 -11
  181. taxonomy_mapping/species_lookup.py +10 -10
  182. taxonomy_mapping/taxonomy_csv_checker.py +18 -18
  183. taxonomy_mapping/taxonomy_graph.py +47 -47
  184. taxonomy_mapping/validate_lila_category_mappings.py +83 -76
  185. data_management/cct_json_to_filename_json.py +0 -89
  186. data_management/cct_to_csv.py +0 -140
  187. data_management/databases/remove_corrupted_images_from_db.py +0 -191
  188. detection/detector_training/copy_checkpoints.py +0 -43
  189. md_visualization/visualize_megadb.py +0 -183
  190. megadetector-5.0.7.dist-info/RECORD +0 -202
  191. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
@@ -1,10 +1,15 @@
1
- ########
2
- #
3
- # repeat_detections_core.py
4
- #
5
- # Core utilities shared by find_repeat_detections and remove_repeat_detections.
6
- #
7
- ########
1
+ """
2
+
3
+ repeat_detections_core.py
4
+
5
+ Core utilities shared by find_repeat_detections and remove_repeat_detections.
6
+
7
+ Nothing in this file (in fact nothing in this subpackage) will make sense until you read
8
+ the RDE user's guide:
9
+
10
+ https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing/postprocessing/repeat_detection_elimination
11
+
12
+ """
8
13
 
9
14
  #%% Imports and environment
10
15
 
@@ -62,161 +67,214 @@ class RepeatDetectionOptions:
62
67
  Options that control the behavior of repeat detection elimination
63
68
  """
64
69
 
65
- # Relevant for rendering the folder of images for filtering
66
- #
67
- # imageBase can also be a SAS URL, in which case some error-checking is
68
- # disabled.
70
+ #: Folder where images live; filenames in the MD results .json file should
71
+ #: be relative to this folder.
72
+ #:
73
+ #: imageBase can also be a SAS URL, in which case some error-checking is
74
+ #: disabled.
69
75
  imageBase = ''
76
+
77
+ #: Folder where we should write temporary output.
70
78
  outputBase = ''
71
79
 
72
- # Don't consider detections with confidence lower than this as suspicious
80
+ #: Don't consider detections with confidence lower than this as suspicious
73
81
  confidenceMin = 0.1
74
82
 
75
- # Don't consider detections with confidence higher than this as suspicious
83
+ #: Don't consider detections with confidence higher than this as suspicious
76
84
  confidenceMax = 1.0
77
85
 
78
- # What's the IOU threshold for considering two boxes the same?
86
+ #: What's the IOU threshold for considering two boxes the same?
79
87
  iouThreshold = 0.9
80
88
 
81
- # How many occurrences of a single location (as defined by the IOU threshold)
82
- # are required before we declare it suspicious?
89
+ #: How many occurrences of a single location (as defined by the IOU threshold)
90
+ #: are required before we declare it suspicious?
83
91
  occurrenceThreshold = 20
84
92
 
85
- # Ignore "suspicious" detections smaller than some size
93
+ #: Ignore "suspicious" detections smaller than some size
86
94
  minSuspiciousDetectionSize = 0.0
87
95
 
88
- # Ignore "suspicious" detections larger than some size; these are often animals
89
- # taking up the whole image. This is expressed as a fraction of the image size.
96
+ #: Ignore "suspicious" detections larger than some size; these are often animals
97
+ #: taking up the whole image. This is expressed as a fraction of the image size.
90
98
  maxSuspiciousDetectionSize = 0.2
91
99
 
92
- # Ignore folders with more than this many images in them
100
+ #: Ignore folders with more than this many images in them
93
101
  maxImagesPerFolder = None
94
102
 
95
- # A list of classes we don't want to treat as suspicious. Each element is an int.
96
- excludeClasses = [] # [annotation_constants.detector_bbox_category_name_to_id['person']]
97
-
98
- # For very large sets of results, passing chunks of results to and from workers as
99
- # parameters ('memory') can be memory-intensive, so we can serialize to intermediate
100
- # files instead ('file').
101
- #
102
- # The use of 'file' here is still experimental.
103
+ #: A list of category IDs (ints) that we don't want consider as candidate repeat detections.
104
+ #:
105
+ #: Typically used to say, e.g., "don't bother analyzing people or vehicles for repeat
106
+ #: detections", which you could do by saying excludeClasses = [2,3].
107
+ excludeClasses = []
108
+
109
+ #: For very large sets of results, passing chunks of results to and from workers as
110
+ #: parameters ('memory') can be memory-intensive, so we can serialize to intermediate
111
+ #: files instead ('file').
112
+ #:
113
+ #: The use of 'file' here is still experimental.
103
114
  pass_detections_to_processes_method = 'memory'
104
115
 
116
+ #: Number of workers to use for parallel operations
105
117
  nWorkers = 10
106
118
 
107
- # Should we use threads or processes for parallelization?
119
+ #: Should we use threads (True) or processes (False) for parallelization?
120
+ #:
121
+ #: Not relevant if nWorkers <= 1, or if bParallelizeComparisons and
122
+ #: bParallelizeRendering are both False.
108
123
  parallelizationUsesThreads = True
109
124
 
110
- # Load detections from a filter file rather than finding them from the detector output
111
-
112
- # .json file containing detections, generally this is the detectionIndex.json file in
113
- # the filtering_* folder produced in the first pass
125
+ #: If this is not empty, we'll load detections from a filter file rather than finding them
126
+ #: from the detector output. This should be a .json file containing detections, generally this
127
+ #: is the detectionIndex.json file in the filtering_* folder produced by find_repeat_detections().
114
128
  filterFileToLoad = ''
115
129
 
116
- # (optional) List of filenames remaining after deletion of identified
117
- # repeated detections that are actually animals. This should be a flat
118
- # text file, one relative filename per line. See enumerate_images().
119
- #
120
- # This is a pretty esoteric code path and a candidate for removal.
121
- #
122
- # The scenario where I see it being most useful is the very hypothetical one
123
- # where we use an external tool for image handling that allows us to do something
124
- # smarter and less destructive than deleting images to mark them as non-false-positives.
130
+ #: (optional) List of filenames remaining after deletion of identified
131
+ #: repeated detections that are actually animals. This should be a flat
132
+ #: text file, one relative filename per line.
133
+ #:
134
+ #: This is a pretty esoteric code path and a candidate for removal.
135
+ #:
136
+ #: The scenario where I see it being most useful is the very hypothetical one
137
+ #: where we use an external tool for image handling that allows us to do something
138
+ #: smarter and less destructive than deleting images to mark them as non-false-positives.
125
139
  filteredFileListToLoad = None
126
140
 
127
- # Turn on/off optional outputs
141
+ #: Should we write the folder of images used to manually review repeat detections?
128
142
  bWriteFilteringFolder = True
129
143
 
144
+ #: For debugging: limit comparisons to a specific number of folders
130
145
  debugMaxDir = -1
146
+
147
+ #: For debugging: limit rendering to a specific number of folders
131
148
  debugMaxRenderDir = -1
149
+
150
+ #: For debugging: limit comparisons to a specific number of detections
132
151
  debugMaxRenderDetection = -1
152
+
153
+ #: For debugging: limit comparisons to a specific number of instances
133
154
  debugMaxRenderInstance = -1
155
+
156
+ #: Should we parallelize (across cameras) comparisons to find repeat detections?
134
157
  bParallelizeComparisons = True
158
+
159
+ #: Should we parallelize image rendering?
135
160
  bParallelizeRendering = True
136
161
 
137
- # If this is False (default), a detection from class A is not considered to be "the same"
138
- # as a detection from class B, even if they're at the same location.
162
+ #: If this is False (default), a detection from class A is *not* considered to be "the same"
163
+ #: as a detection from class B, even if they're at the same location.
139
164
  categoryAgnosticComparisons = False
140
165
 
141
- # Determines whether bounding-box rendering errors (typically network errors) should
142
- # be treated as failures
166
+ #: Determines whether bounding-box rendering errors (typically network errors) should
167
+ #: be treated as failures
143
168
  bFailOnRenderError = False
144
169
 
170
+ #: Should we print a warning if images referred to in the MD results file are missing?
145
171
  bPrintMissingImageWarnings = True
172
+
173
+ #: If bPrintMissingImageWarnings is True, should we print a warning about missing images
174
+ #: just once ('once') or every time ('all')?
146
175
  missingImageWarningType = 'once' # 'all'
147
176
 
148
- # This does *not* include the tile image grid
177
+ #: Image width for rendered images (it's called "max" because we don't resize smaller images).
178
+ #:
179
+ #: Original size is preserved if this is None.
180
+ #:
181
+ #: This does *not* include the tile image grid.
149
182
  maxOutputImageWidth = None
150
183
 
151
- # Box rendering options
184
+ #: Line thickness (in pixels) for box rendering
152
185
  lineThickness = 10
186
+
187
+ #: Box expansion (in pixels)
153
188
  boxExpansion = 2
154
189
 
155
- # State variables
190
+ #: Progress bar used during comparisons and rendering. Do not set externally.
191
+ #:
192
+ #: :meta private:
156
193
  pbar = None
157
194
 
158
- # Replace filename tokens after reading, useful when the directory structure
159
- # has changed relative to the structure the detector saw
195
+ #: Replace filename tokens after reading, useful when the directory structure
196
+ #: has changed relative to the structure the detector saw.
160
197
  filenameReplacements = {}
161
198
 
162
- # How many folders up from the leaf nodes should we be going to aggregate images?
199
+ #: How many folders up from the leaf nodes should we be going to aggregate images into
200
+ #: cameras?
201
+ #:
202
+ #: If this is zero, each leaf folder is treated as a camera.
163
203
  nDirLevelsFromLeaf = 0
164
204
 
165
- # An optional function that takes a string (an image file name) and returns
166
- # a string (the corresponding folder ID), typically used when multiple folders
167
- # actually correspond to the same camera in a manufacturer-specific way (e.g.
168
- # a/b/c/RECONYX100 and a/b/c/RECONYX101 may really be the same camera).
205
+ #: An optional function that takes a string (an image file name) and returns
206
+ #: a string (the corresponding folder ID), typically used when multiple folders
207
+ #: actually correspond to the same camera in a manufacturer-specific way (e.g.
208
+ #: a/b/c/RECONYX100 and a/b/c/RECONYX101 may really be the same camera).
209
+ #:
210
+ #: See ct_utils for a common replacement function that handles most common
211
+ #: manufacturer folder names.
169
212
  customDirNameFunction = None
170
213
 
171
- # Include/exclude specific folders... only one of these may be
172
- # specified; "including" folders includes *only* those folders.
214
+ #: Include only specific folders, mutually exclusive with [excludeFolders]
173
215
  includeFolders = None
216
+
217
+ #: Exclude specific folders, mutually exclusive with [includeFolders]
174
218
  excludeFolders = None
175
219
 
176
- # Optionally show *other* detections (i.e., detections other than the
177
- # one the user is evaluating) in a light gray
220
+ #: Optionally show *other* detections (i.e., detections other than the
221
+ #: one the user is evaluating), typically in a light gray.
178
222
  bRenderOtherDetections = False
223
+
224
+ #: Threshold to use for *other* detections
179
225
  otherDetectionsThreshold = 0.2
226
+
227
+ #: Line width (in pixels) for *other* detections
180
228
  otherDetectionsLineWidth = 1
181
229
 
182
- # Optionally show a grid that includes a sample image for the detection, plus
183
- # the top N additional detections
184
- bRenderDetectionTiles = False
230
+ #: Optionally show a grid that includes a sample image for the detection, plus
231
+ #: the top N additional detections
232
+ bRenderDetectionTiles = True
185
233
 
186
- # If this is None, we'll render at the width of the original image
234
+ #: Width of the original image (within the larger output image) when bRenderDetectionTiles
235
+ #: is True.
236
+ #:
237
+ #: If this is None, we'll render the original image in the detection tile image
238
+ #: at its original width.
187
239
  detectionTilesPrimaryImageWidth = None
188
240
 
189
- # Can be a width in pixels, or a number from 0 to 1 representing a fraction
190
- # of the primary image width.
191
- #
192
- # If you want to render the grid at exactly 1 pixel wide, I guess you're out
193
- # of luck.
241
+ #: Width to use for the grid of detection instances.
242
+ #:
243
+ #: Can be a width in pixels, or a number from 0 to 1 representing a fraction
244
+ #: of the primary image width.
245
+ #:
246
+ #: If you want to render the grid at exactly 1 pixel wide, I guess you're out
247
+ #: of luck.
194
248
  detectionTilesCroppedGridWidth = 0.6
195
- detectionTilesPrimaryImageLocation='right'
196
- detectionTilesMaxCrops = None
197
249
 
198
- # If bRenderOtherDetections is True, what color should we use to render the
199
- # (hopefully pretty subtle) non-target detections?
200
- #
201
- # In theory I'd like these "other detection" rectangles to be partially
202
- # transparent, but this is not straightforward, and the alpha is ignored
203
- # here. But maybe if I leave it here and wish hard enough, someday it
204
- # will work.
205
- #
206
- # otherDetectionsColors = ['dimgray']
250
+ #: Location of the primary image within the mosaic ('right' or 'left)
251
+ detectionTilesPrimaryImageLocation = 'right'
252
+
253
+ #: Maximum number of individual detection instances to include in the mosaic
254
+ detectionTilesMaxCrops = 250
255
+
256
+ #: If bRenderOtherDetections is True, what color should we use to render the
257
+ #: (hopefully pretty subtle) non-target detections?
258
+ #:
259
+ #: In theory I'd like these "other detection" rectangles to be partially
260
+ #: transparent, but this is not straightforward, and the alpha is ignored
261
+ #: here. But maybe if I leave it here and wish hard enough, someday it
262
+ #: will work.
263
+ #:
264
+ #: otherDetectionsColors = ['dimgray']
207
265
  otherDetectionsColors = [(105,105,105,100)]
208
266
 
209
- # Sort detections within a directory so nearby detections are adjacent
210
- # in the list, for faster review.
211
- #
212
- # Can be None, 'xsort', or 'clustersort'
213
- #
214
- # * None sorts detections chronologically by first occurrence
215
- # * 'xsort' sorts detections from left to right
216
- # * 'clustersort' clusters detections and sorts by cluster
267
+ #: Sort detections within a directory so nearby detections are adjacent
268
+ #: in the list, for faster review.
269
+ #:
270
+ #: Can be None, 'xsort', or 'clustersort'
271
+ #:
272
+ #: * None sorts detections chronologically by first occurrence
273
+ #: * 'xsort' sorts detections from left to right
274
+ #: * 'clustersort' clusters detections and sorts by cluster
217
275
  smartSort = 'xsort'
218
276
 
219
- # Only relevant if smartSort == 'clustersort'
277
+ #: Only relevant if smartSort == 'clustersort'
220
278
  smartSortDistanceThreshold = 0.1
221
279
 
222
280
 
@@ -225,26 +283,28 @@ class RepeatDetectionResults:
225
283
  The results of an entire repeat detection analysis
226
284
  """
227
285
 
228
- # The data table (Pandas DataFrame), as loaded from the input json file via
229
- # load_api_results(). Has columns ['file', 'detections','failure'].
286
+ #: The data table (Pandas DataFrame), as loaded from the input json file via
287
+ #: load_api_results(). Has columns ['file', 'detections','failure'].
230
288
  detectionResults = None
231
289
 
232
- # The other fields in the input json file, loaded via load_api_results()
290
+ #: The other fields in the input json file, loaded via load_api_results()
233
291
  otherFields = None
234
292
 
235
- # The data table after modification
293
+ #: The data table after modification
236
294
  detectionResultsFiltered = None
237
295
 
238
- # dict mapping folder names to whole rows from the data table
296
+ #: dict mapping folder names to whole rows from the data table
239
297
  rowsByDirectory = None
240
298
 
241
- # dict mapping filenames to rows in the master table
299
+ #: dict mapping filenames to rows in the master table
242
300
  filenameToRow = None
243
301
 
244
- # An array of length nDirs, where each element is a list of DetectionLocation
245
- # objects for that directory that have been flagged as suspicious
302
+ #: An array of length nDirs, where each element is a list of DetectionLocation
303
+ #: objects for that directory that have been flagged as suspicious
246
304
  suspiciousDetections = None
247
305
 
306
+ #: The location of the .json file written with information about the RDE
307
+ #: review images (typically detectionIndex.json)
248
308
  filterFile = None
249
309
 
250
310
 
@@ -254,21 +314,25 @@ class IndexedDetection:
254
314
  """
255
315
 
256
316
  def __init__(self, iDetection=-1, filename='', bbox=[], confidence=-1, category='unknown'):
257
- """
258
- Args:
259
- iDetection: order in API output file
260
- filename: path to the image of this detection
261
- bbox: [x_min, y_min, width_of_box, height_of_box]
262
- """
317
+
263
318
  assert isinstance(iDetection,int)
264
319
  assert isinstance(filename,str)
265
320
  assert isinstance(bbox,list)
266
321
  assert isinstance(category,str)
267
322
 
323
+ #: index of this detection within all detections for this filename
268
324
  self.iDetection = iDetection
325
+
326
+ #: path to the image corresponding to this detection
269
327
  self.filename = filename
328
+
329
+ #: [x_min, y_min, width_of_box, height_of_box]
270
330
  self.bbox = bbox
331
+
332
+ #: confidence value of this detection
271
333
  self.confidence = confidence
334
+
335
+ #: category ID (not name) of this detection
272
336
  self.category = category
273
337
 
274
338
  def __repr__(self):
@@ -280,7 +344,7 @@ class DetectionLocation:
280
344
  """
281
345
  A unique-ish detection location, meaningful in the context of one
282
346
  directory. All detections within an IoU threshold of self.bbox
283
- will be stored in "instances".
347
+ will be stored in IndexedDetection objects.
284
348
  """
285
349
 
286
350
  def __init__(self, instance, detection, relativeDir, category, id=None):
@@ -290,15 +354,28 @@ class DetectionLocation:
290
354
  assert isinstance(relativeDir,str)
291
355
  assert isinstance(category,str)
292
356
 
293
- self.instances = [instance] # list of IndexedDetections
357
+ #: list of IndexedDetections that match this detection
358
+ self.instances = [instance]
359
+
360
+ #: category ID (not name) for this detection
294
361
  self.category = category
362
+
363
+ #: bbox as x,y,w,h
295
364
  self.bbox = detection['bbox']
365
+
366
+ #: relative folder (i.e., camera name) in which this detectin was found
296
367
  self.relativeDir = relativeDir
368
+
369
+ #: relative path to the canonical image representing this detection
297
370
  self.sampleImageRelativeFileName = ''
371
+
372
+ #: list of detections on that canonical image that match this detection
298
373
  self.sampleImageDetections = None
299
374
 
300
- # This ID is only guaranteed to be unique within a directory
375
+ #: ID for this detection; this ID is only guaranteed to be unique within a directory
301
376
  self.id = id
377
+
378
+ #: only used when doing cluster-based sorting
302
379
  self.clusterLabel = None
303
380
 
304
381
  def __repr__(self):
@@ -307,8 +384,11 @@ class DetectionLocation:
307
384
 
308
385
  def to_api_detection(self):
309
386
  """
310
- Converts to a 'detection' dictionary, making the semi-arbitrary assumption that
311
- the first instance is representative of confidence.
387
+ Converts this detection to a 'detection' dictionary, making the semi-arbitrary
388
+ assumption that the first instance is representative of confidence.
389
+
390
+ Returns:
391
+ dict: dictionary in the format used to store detections in MD results
312
392
  """
313
393
 
314
394
  # This is a bit of a hack right now, but for future-proofing, I don't want to call this
@@ -328,30 +408,13 @@ class DetectionLocation:
328
408
 
329
409
  #%% Support functions
330
410
 
331
- def enumerate_images(dirName,outputFileName=None):
411
+ def _render_bounding_box(detection, inputFileName, outputFileName, lineWidth=5,
412
+ expansion=0):
332
413
  """
333
- Non-recursively enumerates all image files in *dirName* to the text file
334
- *outputFileName*, as relative paths. This is used to produce a file list
335
- after removing true positives from the image directory.
336
-
337
- Not used directly in this module, but provides a consistent way to enumerate
338
- files in the format expected by this module.
414
+ Rendering the detection [detection] on the image [inputFileName], writing the result
415
+ to [outputFileName].
339
416
  """
340
417
 
341
- imageList = path_utils.find_images(dirName)
342
- imageList = [os.path.basename(fn) for fn in imageList]
343
-
344
- if outputFileName is not None:
345
- with open(outputFileName,'w') as f:
346
- for s in imageList:
347
- f.write(s + '\n')
348
-
349
- return imageList
350
-
351
-
352
- def render_bounding_box(detection, inputFileName, outputFileName, lineWidth=5,
353
- expansion=0):
354
-
355
418
  im = open_image(inputFileName)
356
419
  d = detection.to_api_detection()
357
420
  render_detection_bounding_boxes([d],im,thickness=lineWidth,expansion=expansion,
@@ -359,8 +422,12 @@ def render_bounding_box(detection, inputFileName, outputFileName, lineWidth=5,
359
422
  im.save(outputFileName)
360
423
 
361
424
 
362
- def detection_rect_to_rtree_rect(detection_rect):
363
- # We store detections as x/y/w/h, rtree and pyqtree use l/b/r/t
425
+ def _detection_rect_to_rtree_rect(detection_rect):
426
+ """
427
+ We store detections as x/y/w/h, rtree and pyqtree use l/b/r/t. Convert from
428
+ our representation to rtree's.
429
+ """
430
+
364
431
  l = detection_rect[0]
365
432
  b = detection_rect[1]
366
433
  r = detection_rect[0] + detection_rect[2]
@@ -368,8 +435,12 @@ def detection_rect_to_rtree_rect(detection_rect):
368
435
  return (l,b,r,t)
369
436
 
370
437
 
371
- def rtree_rect_to_detection_rect(rtree_rect):
372
- # We store detections as x/y/w/h, rtree and pyqtree use l/b/r/t
438
+ def _rtree_rect_to_detection_rect(rtree_rect):
439
+ """
440
+ We store detections as x/y/w/h, rtree and pyqtree use l/b/r/t. Convert from
441
+ rtree's representation to ours.
442
+ """
443
+
373
444
  x = rtree_rect[0]
374
445
  y = rtree_rect[1]
375
446
  w = rtree_rect[2] - rtree_rect[0]
@@ -377,7 +448,7 @@ def rtree_rect_to_detection_rect(rtree_rect):
377
448
  return (x,y,w,h)
378
449
 
379
450
 
380
- def sort_detections_for_directory(candidateDetections,options):
451
+ def _sort_detections_for_directory(candidateDetections,options):
381
452
  """
382
453
  candidateDetections is a list of DetectionLocation objects. Sorts them to
383
454
  put nearby detections next to each other, for easier visual review. Returns
@@ -474,14 +545,15 @@ def sort_detections_for_directory(candidateDetections,options):
474
545
  raise ValueError('Unrecognized sort method {}'.format(
475
546
  options.smartSort))
476
547
 
477
- # ...def sort_detections_for_directory(...)
548
+ # ...def _sort_detections_for_directory(...)
478
549
 
479
550
 
480
- def find_matches_in_directory(dirNameAndRows, options):
551
+ def _find_matches_in_directory(dirNameAndRows, options):
481
552
  """
482
553
  dirNameAndRows is a tuple of (name,rows).
483
554
 
484
- "name" is a location name, typically a folder name.
555
+ "name" is a location name, typically a folder name, though this may be an arbitrary
556
+ location identifier.
485
557
 
486
558
  "rows" is a Pandas dataframe with one row per image in this location, with columns:
487
559
 
@@ -548,7 +620,7 @@ def find_matches_in_directory(dirNameAndRows, options):
548
620
 
549
621
  i_iteration += 1
550
622
  filename = row['file']
551
- if not ct_utils.is_image_file(filename):
623
+ if not path_utils.is_image_file(filename):
552
624
  continue
553
625
 
554
626
  if 'max_detection_conf' not in row or 'detections' not in row or \
@@ -643,7 +715,7 @@ def find_matches_in_directory(dirNameAndRows, options):
643
715
 
644
716
  bFoundSimilarDetection = False
645
717
 
646
- rtree_rect = detection_rect_to_rtree_rect(bbox)
718
+ rtree_rect = _detection_rect_to_rtree_rect(bbox)
647
719
 
648
720
  # This will return candidates of all classes
649
721
  overlappingCandidateDetections =\
@@ -723,10 +795,10 @@ def find_matches_in_directory(dirNameAndRows, options):
723
795
  else:
724
796
  return candidateDetections
725
797
 
726
- # ...def find_matches_in_directory(...)
798
+ # ...def _find_matches_in_directory(...)
727
799
 
728
800
 
729
- def update_detection_table(repeatDetectionResults, options, outputFilename=None):
801
+ def _update_detection_table(repeatDetectionResults, options, outputFilename=None):
730
802
  """
731
803
  Changes confidence values in repeatDetectionResults.detectionResults so that detections
732
804
  deemed to be possible false positives are given negative confidence values.
@@ -870,10 +942,10 @@ def update_detection_table(repeatDetectionResults, options, outputFilename=None)
870
942
 
871
943
  return detectionResults
872
944
 
873
- # ...def update_detection_table(...)
945
+ # ...def _update_detection_table(...)
874
946
 
875
947
 
876
- def render_sample_image_for_detection(detection,filteringDir,options):
948
+ def _render_sample_image_for_detection(detection,filteringDir,options):
877
949
  """
878
950
  Render a sample image for one unique detection, possibly containing lightly-colored
879
951
  high-confidence detections from elsewhere in the sample image.
@@ -954,7 +1026,7 @@ def render_sample_image_for_detection(detection,filteringDir,options):
954
1026
 
955
1027
  else:
956
1028
 
957
- render_bounding_box(detection, inputFullPath, outputFullPath,
1029
+ _render_bounding_box(detection, inputFullPath, outputFullPath,
958
1030
  lineWidth=options.lineThickness, expansion=options.boxExpansion)
959
1031
 
960
1032
  # ...if we are/aren't rendering other bounding boxes
@@ -1003,11 +1075,7 @@ def render_sample_image_for_detection(detection,filteringDir,options):
1003
1075
  cropped_grid_width=croppedGridWidth,
1004
1076
  output_image_filename=outputFullPath,
1005
1077
  primary_image_location=options.detectionTilesPrimaryImageLocation)
1006
-
1007
- # bDetectionTilesPrimaryImageWidth = None
1008
- # bDetectionTilesCroppedGridWidth = 0.6
1009
- # bDetectionTilesPrimaryImageLocation='right'
1010
-
1078
+
1011
1079
  # ...if we are/aren't rendering detection tiles
1012
1080
 
1013
1081
  except Exception as e:
@@ -1018,12 +1086,28 @@ def render_sample_image_for_detection(detection,filteringDir,options):
1018
1086
  if options.bFailOnRenderError:
1019
1087
  raise
1020
1088
 
1021
- # ...def render_sample_image_for_detection(...)
1089
+ # ...def _render_sample_image_for_detection(...)
1022
1090
 
1023
1091
 
1024
1092
  #%% Main entry point
1025
1093
 
1026
1094
  def find_repeat_detections(inputFilename, outputFilename=None, options=None):
1095
+ """
1096
+ Find detections in a MD results file that occur repeatedly and are likely to be
1097
+ rocks/sticks.
1098
+
1099
+ Args:
1100
+ inputFilename (str): the MD results .json file to analyze
1101
+ outputFilename (str, optional): the filename to which we should write results
1102
+ with repeat detections removed, typically set to None during the first
1103
+ part of the RDE process.
1104
+ options (RepeatDetectionOptions): all the interesting options controlling this
1105
+ process; see RepeatDetectionOptions for details.
1106
+
1107
+ Returns:
1108
+ RepeatDetectionResults: results of the RDE process; see RepeatDetectionResults
1109
+ for details.
1110
+ """
1027
1111
 
1028
1112
  ##%% Input handling
1029
1113
 
@@ -1203,7 +1287,7 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
1203
1287
  assert dirNameAndRow[0] == dirName
1204
1288
  print('Processing dir {} of {}: {}'.format(iDir,len(dirsToSearch),dirName))
1205
1289
  allCandidateDetections[iDir] = \
1206
- find_matches_in_directory(dirNameAndRow, options)
1290
+ _find_matches_in_directory(dirNameAndRow, options)
1207
1291
 
1208
1292
  else:
1209
1293
 
@@ -1271,7 +1355,7 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
1271
1355
 
1272
1356
  options.pbar = None
1273
1357
  allCandidateDetectionFiles = list(pool.imap(
1274
- partial(find_matches_in_directory,options=options), dirNameAndIntermediateFile))
1358
+ partial(_find_matches_in_directory,options=options), dirNameAndIntermediateFile))
1275
1359
 
1276
1360
 
1277
1361
  ##%% Load into a combined list of candidate detections
@@ -1298,11 +1382,11 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
1298
1382
  if options.parallelizationUsesThreads:
1299
1383
  options.pbar = tqdm(total=len(dirNameAndRows))
1300
1384
  allCandidateDetections = list(pool.imap(
1301
- partial(find_matches_in_directory,options=options), dirNameAndRows))
1385
+ partial(_find_matches_in_directory,options=options), dirNameAndRows))
1302
1386
  else:
1303
1387
  options.pbar = None
1304
1388
  allCandidateDetections = list(tqdm(pool.imap(
1305
- partial(find_matches_in_directory,options=options), dirNameAndRows)))
1389
+ partial(_find_matches_in_directory,options=options), dirNameAndRows)))
1306
1390
 
1307
1391
  print('\nFinished looking for similar detections')
1308
1392
 
@@ -1342,7 +1426,7 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
1342
1426
 
1343
1427
  # Sort the above-threshold detections for easier review
1344
1428
  if options.smartSort is not None:
1345
- suspiciousDetections[iDir] = sort_detections_for_directory(
1429
+ suspiciousDetections[iDir] = _sort_detections_for_directory(
1346
1430
  suspiciousDetections[iDir],options)
1347
1431
 
1348
1432
  print('Found {} suspicious detections in directory {} ({})'.format(
@@ -1427,7 +1511,7 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
1427
1511
 
1428
1512
  toReturn.suspiciousDetections = suspiciousDetections
1429
1513
 
1430
- toReturn.allRowsFiltered = update_detection_table(toReturn, options, outputFilename)
1514
+ toReturn.allRowsFiltered = _update_detection_table(toReturn, options, outputFilename)
1431
1515
 
1432
1516
 
1433
1517
  ##%% Create filtering directory
@@ -1501,19 +1585,19 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
1501
1585
  if options.parallelizationUsesThreads:
1502
1586
  options.pbar = tqdm(total=len(allSuspiciousDetections))
1503
1587
  allCandidateDetections = list(pool.imap(
1504
- partial(render_sample_image_for_detection,filteringDir=filteringDir,
1588
+ partial(_render_sample_image_for_detection,filteringDir=filteringDir,
1505
1589
  options=options), allSuspiciousDetections))
1506
1590
  else:
1507
1591
  options.pbar = None
1508
1592
  allCandidateDetections = list(tqdm(pool.imap(
1509
- partial(render_sample_image_for_detection,filteringDir=filteringDir,
1593
+ partial(_render_sample_image_for_detection,filteringDir=filteringDir,
1510
1594
  options=options), allSuspiciousDetections)))
1511
1595
 
1512
1596
  else:
1513
1597
 
1514
1598
  # Serial loop over detections
1515
1599
  for detection in allSuspiciousDetections:
1516
- render_sample_image_for_detection(detection,filteringDir,options)
1600
+ _render_sample_image_for_detection(detection,filteringDir,options)
1517
1601
 
1518
1602
  # Delete (large) temporary data from the list of suspicious detections
1519
1603
  for detection in allSuspiciousDetections: