megadetector 5.0.27__py3-none-any.whl → 5.0.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (176) hide show
  1. megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
  2. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
  3. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  7. megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
  8. megadetector/classification/aggregate_classifier_probs.py +3 -3
  9. megadetector/classification/analyze_failed_images.py +5 -5
  10. megadetector/classification/cache_batchapi_outputs.py +5 -5
  11. megadetector/classification/create_classification_dataset.py +11 -12
  12. megadetector/classification/crop_detections.py +10 -10
  13. megadetector/classification/csv_to_json.py +8 -8
  14. megadetector/classification/detect_and_crop.py +13 -15
  15. megadetector/classification/evaluate_model.py +7 -7
  16. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  17. megadetector/classification/json_to_azcopy_list.py +1 -1
  18. megadetector/classification/json_validator.py +29 -32
  19. megadetector/classification/map_classification_categories.py +9 -9
  20. megadetector/classification/merge_classification_detection_output.py +12 -9
  21. megadetector/classification/prepare_classification_script.py +19 -19
  22. megadetector/classification/prepare_classification_script_mc.py +23 -23
  23. megadetector/classification/run_classifier.py +4 -4
  24. megadetector/classification/save_mislabeled.py +6 -6
  25. megadetector/classification/train_classifier.py +1 -1
  26. megadetector/classification/train_classifier_tf.py +9 -9
  27. megadetector/classification/train_utils.py +10 -10
  28. megadetector/data_management/annotations/annotation_constants.py +1 -1
  29. megadetector/data_management/camtrap_dp_to_coco.py +45 -45
  30. megadetector/data_management/cct_json_utils.py +101 -101
  31. megadetector/data_management/cct_to_md.py +49 -49
  32. megadetector/data_management/cct_to_wi.py +33 -33
  33. megadetector/data_management/coco_to_labelme.py +75 -75
  34. megadetector/data_management/coco_to_yolo.py +189 -189
  35. megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
  36. megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
  37. megadetector/data_management/databases/integrity_check_json_db.py +202 -188
  38. megadetector/data_management/databases/subset_json_db.py +33 -33
  39. megadetector/data_management/generate_crops_from_cct.py +38 -38
  40. megadetector/data_management/get_image_sizes.py +54 -49
  41. megadetector/data_management/labelme_to_coco.py +130 -124
  42. megadetector/data_management/labelme_to_yolo.py +78 -72
  43. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  44. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  45. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  46. megadetector/data_management/lila/download_lila_subset.py +21 -24
  47. megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
  48. megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
  49. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  50. megadetector/data_management/lila/lila_common.py +70 -70
  51. megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
  52. megadetector/data_management/mewc_to_md.py +339 -340
  53. megadetector/data_management/ocr_tools.py +258 -252
  54. megadetector/data_management/read_exif.py +232 -223
  55. megadetector/data_management/remap_coco_categories.py +26 -26
  56. megadetector/data_management/remove_exif.py +31 -20
  57. megadetector/data_management/rename_images.py +187 -187
  58. megadetector/data_management/resize_coco_dataset.py +41 -41
  59. megadetector/data_management/speciesnet_to_md.py +41 -41
  60. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  61. megadetector/data_management/yolo_output_to_md_output.py +117 -120
  62. megadetector/data_management/yolo_to_coco.py +195 -188
  63. megadetector/detection/change_detection.py +831 -0
  64. megadetector/detection/process_video.py +341 -338
  65. megadetector/detection/pytorch_detector.py +308 -266
  66. megadetector/detection/run_detector.py +186 -166
  67. megadetector/detection/run_detector_batch.py +366 -364
  68. megadetector/detection/run_inference_with_yolov5_val.py +328 -325
  69. megadetector/detection/run_tiled_inference.py +312 -253
  70. megadetector/detection/tf_detector.py +24 -24
  71. megadetector/detection/video_utils.py +291 -283
  72. megadetector/postprocessing/add_max_conf.py +15 -11
  73. megadetector/postprocessing/categorize_detections_by_size.py +44 -44
  74. megadetector/postprocessing/classification_postprocessing.py +808 -311
  75. megadetector/postprocessing/combine_batch_outputs.py +20 -21
  76. megadetector/postprocessing/compare_batch_results.py +528 -517
  77. megadetector/postprocessing/convert_output_format.py +97 -97
  78. megadetector/postprocessing/create_crop_folder.py +220 -147
  79. megadetector/postprocessing/detector_calibration.py +173 -168
  80. megadetector/postprocessing/generate_csv_report.py +508 -0
  81. megadetector/postprocessing/load_api_results.py +25 -22
  82. megadetector/postprocessing/md_to_coco.py +129 -98
  83. megadetector/postprocessing/md_to_labelme.py +89 -83
  84. megadetector/postprocessing/md_to_wi.py +40 -40
  85. megadetector/postprocessing/merge_detections.py +87 -114
  86. megadetector/postprocessing/postprocess_batch_results.py +319 -302
  87. megadetector/postprocessing/remap_detection_categories.py +36 -36
  88. megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
  89. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  90. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  91. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
  92. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  93. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  94. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  95. megadetector/postprocessing/validate_batch_results.py +70 -70
  96. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  97. megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
  98. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
  99. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -69
  100. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  101. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  102. megadetector/taxonomy_mapping/species_lookup.py +33 -33
  103. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  104. megadetector/taxonomy_mapping/taxonomy_graph.py +11 -11
  105. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  106. megadetector/utils/azure_utils.py +22 -22
  107. megadetector/utils/ct_utils.py +1019 -200
  108. megadetector/utils/directory_listing.py +21 -77
  109. megadetector/utils/gpu_test.py +22 -22
  110. megadetector/utils/md_tests.py +541 -518
  111. megadetector/utils/path_utils.py +1511 -406
  112. megadetector/utils/process_utils.py +41 -41
  113. megadetector/utils/sas_blob_utils.py +53 -49
  114. megadetector/utils/split_locations_into_train_val.py +73 -60
  115. megadetector/utils/string_utils.py +147 -26
  116. megadetector/utils/url_utils.py +463 -173
  117. megadetector/utils/wi_utils.py +2629 -2868
  118. megadetector/utils/write_html_image_list.py +137 -137
  119. megadetector/visualization/plot_utils.py +21 -21
  120. megadetector/visualization/render_images_with_thumbnails.py +37 -73
  121. megadetector/visualization/visualization_utils.py +424 -404
  122. megadetector/visualization/visualize_db.py +197 -190
  123. megadetector/visualization/visualize_detector_output.py +126 -98
  124. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/METADATA +6 -3
  125. megadetector-5.0.29.dist-info/RECORD +163 -0
  126. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
  127. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  128. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  129. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  130. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  131. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  132. megadetector/data_management/importers/awc_to_json.py +0 -191
  133. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  134. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  135. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  136. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  137. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  138. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  139. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  140. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  141. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  142. megadetector/data_management/importers/ena24_to_json.py +0 -276
  143. megadetector/data_management/importers/filenames_to_json.py +0 -386
  144. megadetector/data_management/importers/helena_to_cct.py +0 -283
  145. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  146. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  147. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  148. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  149. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  150. megadetector/data_management/importers/missouri_to_json.py +0 -490
  151. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  152. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  153. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  154. megadetector/data_management/importers/pc_to_json.py +0 -365
  155. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  156. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  157. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  158. megadetector/data_management/importers/rspb_to_json.py +0 -356
  159. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  160. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  161. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  162. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  163. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  164. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  165. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  166. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  167. megadetector/data_management/importers/ubc_to_json.py +0 -399
  168. megadetector/data_management/importers/umn_to_json.py +0 -507
  169. megadetector/data_management/importers/wellington_to_json.py +0 -263
  170. megadetector/data_management/importers/wi_to_json.py +0 -442
  171. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  172. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  173. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  174. megadetector-5.0.27.dist-info/RECORD +0 -208
  175. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
  176. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
@@ -6,7 +6,7 @@ Core utilities shared by find_repeat_detections and remove_repeat_detections.
6
6
 
7
7
  Nothing in this file (in fact nothing in this subpackage) will make sense until you read
8
8
  the RDE user's guide:
9
-
9
+
10
10
  https://github.com/agentmorris/MegaDetector/tree/main/megadetector/postprocessing/repeat_detection_elimination
11
11
 
12
12
  """
@@ -68,68 +68,68 @@ class RepeatDetectionOptions:
68
68
  """
69
69
 
70
70
  def __init__(self):
71
-
71
+
72
72
  #: Folder where images live; filenames in the MD results .json file should
73
73
  #: be relative to this folder.
74
74
  #:
75
75
  #: imageBase can also be a SAS URL, in which case some error-checking is
76
76
  #: disabled.
77
77
  self.imageBase = ''
78
-
78
+
79
79
  #: Folder where we should write temporary output.
80
80
  self.outputBase = ''
81
-
81
+
82
82
  #: Don't consider detections with confidence lower than this as suspicious
83
83
  self.confidenceMin = 0.1
84
-
84
+
85
85
  #: Don't consider detections with confidence higher than this as suspicious
86
86
  self.confidenceMax = 1.0
87
-
87
+
88
88
  #: What's the IOU threshold for considering two boxes the same?
89
89
  self.iouThreshold = 0.9
90
-
90
+
91
91
  #: How many occurrences of a single location (as defined by the IOU threshold)
92
92
  #: are required before we declare it suspicious?
93
93
  self.occurrenceThreshold = 20
94
-
94
+
95
95
  #: Ignore "suspicious" detections smaller than some size
96
96
  self.minSuspiciousDetectionSize = 0.0
97
-
97
+
98
98
  #: Ignore "suspicious" detections larger than some size; these are often animals
99
99
  #: taking up the whole image. This is expressed as a fraction of the image size.
100
100
  self.maxSuspiciousDetectionSize = 0.2
101
-
101
+
102
102
  #: Ignore folders with more than this many images in them
103
103
  self.maxImagesPerFolder = None
104
-
104
+
105
105
  #: A list of category IDs (ints) that we don't want consider as candidate repeat detections.
106
106
  #:
107
- #: Typically used to say, e.g., "don't bother analyzing people or vehicles for repeat
107
+ #: Typically used to say, e.g., "don't bother analyzing people or vehicles for repeat
108
108
  #: detections", which you could do by saying excludeClasses = [2,3].
109
109
  self.excludeClasses = []
110
-
111
- #: For very large sets of results, passing chunks of results to and from workers as
110
+
111
+ #: For very large sets of results, passing chunks of results to and from workers as
112
112
  #: parameters ('memory') can be memory-intensive, so we can serialize to intermediate
113
113
  #: files instead ('file').
114
114
  #:
115
115
  #: The use of 'file' here is still experimental.
116
116
  self.pass_detections_to_processes_method = 'memory'
117
-
117
+
118
118
  #: Number of workers to use for parallel operations
119
119
  self.nWorkers = 10
120
-
120
+
121
121
  #: Should we use threads (True) or processes (False) for parallelization?
122
122
  #:
123
- #: Not relevant if nWorkers <= 1, or if bParallelizeComparisons and
123
+ #: Not relevant if nWorkers <= 1, or if bParallelizeComparisons and
124
124
  #: bParallelizeRendering are both False.
125
125
  self.parallelizationUsesThreads = True
126
-
127
- #: If this is not empty, we'll load detections from a filter file rather than finding them
128
- #: from the detector output. This should be a .json file containing detections, generally this
126
+
127
+ #: If this is not empty, we'll load detections from a filter file rather than finding them
128
+ #: from the detector output. This should be a .json file containing detections, generally this
129
129
  #: is the detectionIndex.json file in the filtering_* folder produced by find_repeat_detections().
130
130
  self.filterFileToLoad = ''
131
-
132
- #: (optional) List of filenames remaining after deletion of identified
131
+
132
+ #: (optional) List of filenames remaining after deletion of identified
133
133
  #: repeated detections that are actually animals. This should be a flat
134
134
  #: text file, one relative filename per line.
135
135
  #:
@@ -139,71 +139,71 @@ class RepeatDetectionOptions:
139
139
  #: where we use an external tool for image handling that allows us to do something
140
140
  #: smarter and less destructive than deleting images to mark them as non-false-positives.
141
141
  self.filteredFileListToLoad = None
142
-
142
+
143
143
  #: Should we write the folder of images used to manually review repeat detections?
144
144
  self.bWriteFilteringFolder = True
145
-
145
+
146
146
  #: For debugging: limit comparisons to a specific number of folders
147
147
  self.debugMaxDir = -1
148
-
148
+
149
149
  #: For debugging: limit rendering to a specific number of folders
150
150
  self.debugMaxRenderDir = -1
151
-
151
+
152
152
  #: For debugging: limit comparisons to a specific number of detections
153
153
  self.debugMaxRenderDetection = -1
154
-
154
+
155
155
  #: For debugging: limit comparisons to a specific number of instances
156
156
  self.debugMaxRenderInstance = -1
157
-
157
+
158
158
  #: Should we parallelize (across cameras) comparisons to find repeat detections?
159
159
  self.bParallelizeComparisons = True
160
-
160
+
161
161
  #: Should we parallelize image rendering?
162
162
  self.bParallelizeRendering = True
163
-
163
+
164
164
  #: If this is False (default), a detection from class A is *not* considered to be "the same"
165
165
  #: as a detection from class B, even if they're at the same location.
166
166
  self.categoryAgnosticComparisons = False
167
-
167
+
168
168
  #: Determines whether bounding-box rendering errors (typically network errors) should
169
- #: be treated as failures
169
+ #: be treated as failures
170
170
  self.bFailOnRenderError = False
171
-
171
+
172
172
  #: Should we print a warning if images referred to in the MD results file are missing?
173
173
  self.bPrintMissingImageWarnings = True
174
-
174
+
175
175
  #: If bPrintMissingImageWarnings is True, should we print a warning about missing images
176
176
  #: just once ('once') or every time ('all')?
177
177
  self.missingImageWarningType = 'once' # 'all'
178
-
178
+
179
179
  #: Image width for rendered images (it's called "max" because we don't resize smaller images).
180
180
  #:
181
181
  #: Original size is preserved if this is None.
182
182
  #:
183
183
  #: This does *not* include the tile image grid.
184
184
  self.maxOutputImageWidth = 2000
185
-
185
+
186
186
  #: Line thickness (in pixels) for box rendering
187
187
  self.lineThickness = 10
188
-
188
+
189
189
  #: Box expansion (in pixels)
190
190
  self.boxExpansion = 2
191
-
191
+
192
192
  #: Progress bar used during comparisons and rendering. Do not set externally.
193
193
  #:
194
194
  #: :meta private:
195
195
  self.pbar = None
196
-
196
+
197
197
  #: Replace filename tokens after reading, useful when the directory structure
198
198
  #: has changed relative to the structure the detector saw.
199
199
  self.filenameReplacements = {}
200
-
201
- #: How many folders up from the leaf nodes should we be going to aggregate images into
200
+
201
+ #: How many folders up from the leaf nodes should we be going to aggregate images into
202
202
  #: cameras?
203
203
  #:
204
204
  #: If this is zero, each leaf folder is treated as a camera.
205
205
  self.nDirLevelsFromLeaf = 0
206
-
206
+
207
207
  #: An optional function that takes a string (an image file name) and returns
208
208
  #: a string (the corresponding folder ID), typically used when multiple folders
209
209
  #: actually correspond to the same camera in a manufacturer-specific way (e.g.
@@ -215,60 +215,60 @@ class RepeatDetectionOptions:
215
215
  #: from megadetector.utils import ct_utils
216
216
  #: self.customDirNameFunction = ct_utils.image_file_to_camera_folder
217
217
  self.customDirNameFunction = None
218
-
218
+
219
219
  #: Include only specific folders, mutually exclusive with [excludeFolders]
220
220
  self.includeFolders = None
221
-
221
+
222
222
  #: Exclude specific folders, mutually exclusive with [includeFolders]
223
223
  self.excludeFolders = None
224
-
224
+
225
225
  #: Optionally show *other* detections (i.e., detections other than the
226
226
  #: one the user is evaluating), typically in a light gray.
227
227
  self.bRenderOtherDetections = False
228
-
228
+
229
229
  #: Threshold to use for *other* detections
230
- self.otherDetectionsThreshold = 0.2
231
-
230
+ self.otherDetectionsThreshold = 0.2
231
+
232
232
  #: Line width (in pixels) for *other* detections
233
233
  self.otherDetectionsLineWidth = 1
234
-
234
+
235
235
  #: Optionally show a grid that includes a sample image for the detection, plus
236
236
  #: the top N additional detections
237
237
  self.bRenderDetectionTiles = True
238
-
238
+
239
239
  #: Width of the original image (within the larger output image) when bRenderDetectionTiles
240
240
  #: is True.
241
241
  #:
242
242
  #: If this is None, we'll render the original image in the detection tile image
243
243
  #: at its original width.
244
244
  self.detectionTilesPrimaryImageWidth = None
245
-
245
+
246
246
  #: Width to use for the grid of detection instances.
247
247
  #:
248
248
  #: Can be a width in pixels, or a number from 0 to 1 representing a fraction
249
249
  #: of the primary image width.
250
250
  #:
251
251
  #: If you want to render the grid at exactly 1 pixel wide, I guess you're out
252
- #: of luck.
252
+ #: of luck.
253
253
  self.detectionTilesCroppedGridWidth = 0.6
254
-
254
+
255
255
  #: Location of the primary image within the mosaic ('right' or 'left)
256
256
  self.detectionTilesPrimaryImageLocation = 'right'
257
-
257
+
258
258
  #: Maximum number of individual detection instances to include in the mosaic
259
259
  self.detectionTilesMaxCrops = 150
260
-
260
+
261
261
  #: If bRenderOtherDetections is True, what color should we use to render the
262
262
  #: (hopefully pretty subtle) non-target detections?
263
- #:
264
- #: In theory I'd like these "other detection" rectangles to be partially
263
+ #:
264
+ #: In theory I'd like these "other detection" rectangles to be partially
265
265
  #: transparent, but this is not straightforward, and the alpha is ignored
266
- #: here. But maybe if I leave it here and wish hard enough, someday it
266
+ #: here. But maybe if I leave it here and wish hard enough, someday it
267
267
  #: will work.
268
268
  #:
269
269
  #: otherDetectionsColors = ['dimgray']
270
270
  self.otherDetectionsColors = [(105,105,105,100)]
271
-
271
+
272
272
  #: Sort detections within a directory so nearby detections are adjacent
273
273
  #: in the list, for faster review.
274
274
  #:
@@ -278,68 +278,70 @@ class RepeatDetectionOptions:
278
278
  #: * 'xsort' sorts detections from left to right
279
279
  #: * 'clustersort' clusters detections and sorts by cluster
280
280
  self.smartSort = 'xsort'
281
-
281
+
282
282
  #: Only relevant if smartSort == 'clustersort'
283
283
  self.smartSortDistanceThreshold = 0.1
284
-
285
-
284
+
285
+
286
286
  class RepeatDetectionResults:
287
287
  """
288
288
  The results of an entire repeat detection analysis
289
289
  """
290
290
 
291
291
  def __init__(self):
292
-
293
- #: The data table (Pandas DataFrame), as loaded from the input json file via
292
+
293
+ #: The data table (Pandas DataFrame), as loaded from the input json file via
294
294
  #: load_api_results(). Has columns ['file', 'detections','failure'].
295
295
  self.detectionResults = None
296
-
296
+
297
297
  #: The other fields in the input json file, loaded via load_api_results()
298
298
  self.otherFields = None
299
-
299
+
300
300
  #: The data table after modification
301
301
  self.detectionResultsFiltered = None
302
-
302
+
303
303
  #: dict mapping folder names to whole rows from the data table
304
- self.rowsByDirectory = None
305
-
304
+ self.rows_by_directory = None
305
+
306
306
  #: dict mapping filenames to rows in the master table
307
- self.filenameToRow = None
308
-
309
- #: An array of length nDirs, where each element is a list of DetectionLocation
307
+ self.filename_to_row = None
308
+
309
+ #: An array of length nDirs, where each element is a list of DetectionLocation
310
310
  #: objects for that directory that have been flagged as suspicious
311
- self.suspiciousDetections = None
312
-
311
+ self.suspicious_detections = None
312
+
313
313
  #: The location of the .json file written with information about the RDE
314
314
  #: review images (typically detectionIndex.json)
315
315
  self.filterFile = None
316
-
316
+
317
317
 
318
318
  class IndexedDetection:
319
319
  """
320
320
  A single detection event on a single image
321
321
  """
322
322
 
323
- def __init__(self, iDetection=-1, filename='', bbox=[], confidence=-1, category='unknown'):
324
-
325
- assert isinstance(iDetection,int)
323
+ def __init__(self, i_detection=-1, filename='', bbox=None, confidence=-1, category='unknown'):
324
+
325
+ if bbox is None:
326
+ bbox = []
327
+ assert isinstance(i_detection,int)
326
328
  assert isinstance(filename,str)
327
329
  assert isinstance(bbox,list)
328
330
  assert isinstance(category,str)
329
-
331
+
330
332
  #: index of this detection within all detections for this filename
331
- self.iDetection = iDetection
332
-
333
+ self.i_detection = i_detection
334
+
333
335
  #: path to the image corresponding to this detection
334
336
  self.filename = filename
335
-
337
+
336
338
  #: [x_min, y_min, width_of_box, height_of_box]
337
339
  self.bbox = bbox
338
-
340
+
339
341
  #: confidence value of this detection
340
342
  self.confidence = confidence
341
-
342
- #: category ID (not name) of this detection
343
+
344
+ #: category ID (not name) of this detection
343
345
  self.category = category
344
346
 
345
347
  def __repr__(self):
@@ -354,57 +356,57 @@ class DetectionLocation:
354
356
  will be stored in IndexedDetection objects.
355
357
  """
356
358
 
357
- def __init__(self, instance, detection, relativeDir, category, id=None):
358
-
359
+ def __init__(self, instance, detection, relative_dir, category, id=None):
360
+
359
361
  assert isinstance(detection,dict)
360
362
  assert isinstance(instance,IndexedDetection)
361
- assert isinstance(relativeDir,str)
363
+ assert isinstance(relative_dir,str)
362
364
  assert isinstance(category,str)
363
-
365
+
364
366
  #: list of IndexedDetections that match this detection
365
367
  self.instances = [instance]
366
-
368
+
367
369
  #: category ID (not name) for this detection
368
370
  self.category = category
369
-
371
+
370
372
  #: bbox as x,y,w,h
371
373
  self.bbox = detection['bbox']
372
-
374
+
373
375
  #: relative folder (i.e., camera name) in which this detectin was found
374
- self.relativeDir = relativeDir
375
-
376
+ self.relativeDir = relative_dir
377
+
376
378
  #: relative path to the canonical image representing this detection
377
- self.sampleImageRelativeFileName = ''
378
-
379
+ self.sampleImageRelativeFileName = ''
380
+
379
381
  #: list of detections on that canonical image that match this detection
380
382
  self.sampleImageDetections = None
381
-
383
+
382
384
  #: ID for this detection; this ID is only guaranteed to be unique within a directory
383
385
  self.id = id
384
-
386
+
385
387
  #: only used when doing cluster-based sorting
386
388
  self.clusterLabel = None
387
389
 
388
390
  def __repr__(self):
389
391
  s = ct_utils.pretty_print_object(self, False)
390
392
  return s
391
-
393
+
392
394
  def to_api_detection(self):
393
395
  """
394
- Converts this detection to a 'detection' dictionary, making the semi-arbitrary
396
+ Converts this detection to a 'detection' dictionary, making the semi-arbitrary
395
397
  assumption that the first instance is representative of confidence.
396
-
398
+
397
399
  Returns:
398
400
  dict: dictionary in the format used to store detections in MD results
399
401
  """
400
-
402
+
401
403
  # This is a bit of a hack right now, but for future-proofing, I don't want to call this
402
- # to retrieve anything other than the highest-confidence detection, and I'm assuming this
404
+ # to retrieve anything other than the highest-confidence detection, and I'm assuming this
403
405
  # is already sorted, so assert() that.
404
406
  confidences = [i.confidence for i in self.instances]
405
407
  assert confidences[0] == max(confidences), \
406
408
  'Cannot convert an unsorted DetectionLocation to an API detection'
407
-
409
+
408
410
  # It's not clear whether it's better to use instances[0].bbox or self.bbox
409
411
  # here... they should be very similar, unless iouThreshold is very low.
410
412
  # self.bbox is a better representation of the overall DetectionLocation.
@@ -415,18 +417,21 @@ class DetectionLocation:
415
417
 
416
418
  #%% Support functions
417
419
 
418
- def _render_bounding_box(detection, inputFileName, outputFileName, lineWidth=5,
419
- expansion=0):
420
+ def _render_bounding_box(detection,
421
+ input_file_name,
422
+ output_file_name,
423
+ line_width=5,
424
+ expansion=0):
420
425
  """
421
- Rendering the detection [detection] on the image [inputFileName], writing the result
422
- to [outputFileName].
426
+ Rendering the detection [detection] on the image [input_file_name], writing the result
427
+ to [output_file_name].
423
428
  """
424
-
425
- im = open_image(inputFileName)
429
+
430
+ im = open_image(input_file_name)
426
431
  d = detection.to_api_detection()
427
- render_detection_bounding_boxes([d],im,thickness=lineWidth,expansion=expansion,
432
+ render_detection_bounding_boxes([d],im,thickness=line_width,expansion=expansion,
428
433
  confidence_threshold=-10)
429
- im.save(outputFileName)
434
+ im.save(output_file_name)
430
435
 
431
436
 
432
437
  def _detection_rect_to_rtree_rect(detection_rect):
@@ -434,12 +439,12 @@ def _detection_rect_to_rtree_rect(detection_rect):
434
439
  We store detections as x/y/w/h, rtree and pyqtree use l/b/r/t. Convert from
435
440
  our representation to rtree's.
436
441
  """
437
-
438
- l = detection_rect[0]
439
- b = detection_rect[1]
440
- r = detection_rect[0] + detection_rect[2]
441
- t = detection_rect[1] + detection_rect[3]
442
- return (l,b,r,t)
442
+
443
+ left = detection_rect[0]
444
+ bottom = detection_rect[1]
445
+ right = detection_rect[0] + detection_rect[2]
446
+ top = detection_rect[1] + detection_rect[3]
447
+ return (left,bottom,right,top)
443
448
 
444
449
 
445
450
  def _rtree_rect_to_detection_rect(rtree_rect):
@@ -447,183 +452,183 @@ def _rtree_rect_to_detection_rect(rtree_rect):
447
452
  We store detections as x/y/w/h, rtree and pyqtree use l/b/r/t. Convert from
448
453
  rtree's representation to ours.
449
454
  """
450
-
455
+
451
456
  x = rtree_rect[0]
452
457
  y = rtree_rect[1]
453
458
  w = rtree_rect[2] - rtree_rect[0]
454
459
  h = rtree_rect[3] - rtree_rect[1]
455
460
  return (x,y,w,h)
456
-
457
461
 
458
- def _sort_detections_for_directory(candidateDetections,options):
462
+
463
+ def _sort_detections_for_directory(candidate_detections,options):
459
464
  """
460
- candidateDetections is a list of DetectionLocation objects. Sorts them to
465
+ candidate_detections is a list of DetectionLocation objects. Sorts them to
461
466
  put nearby detections next to each other, for easier visual review. Returns
462
- a sorted copy of candidateDetections, does not sort in-place.
467
+ a sorted copy of candidate_detections, does not sort in-place.
463
468
  """
464
-
465
- if len(candidateDetections) <= 1 or options.smartSort is None:
466
- return candidateDetections
467
-
469
+
470
+ if len(candidate_detections) <= 1 or options.smartSort is None:
471
+ return candidate_detections
472
+
468
473
  # Just sort by the X location of each box
469
474
  if options.smartSort == 'xsort':
470
- candidateDetectionsSorted = sorted(candidateDetections,
475
+ candidate_detections_sorted = sorted(candidate_detections,
471
476
  key=lambda x: (
472
477
  (x.bbox[0]) + (x.bbox[2]/2.0)
473
478
  ))
474
- return candidateDetectionsSorted
475
-
479
+ return candidate_detections_sorted
480
+
476
481
  elif options.smartSort == 'clustersort':
477
-
482
+
478
483
  cluster = sklearn.cluster.AgglomerativeClustering(
479
484
  n_clusters=None,
480
485
  distance_threshold=options.smartSortDistanceThreshold,
481
486
  linkage='complete')
482
-
483
- # Prepare a list of points to represent each box,
487
+
488
+ # Prepare a list of points to represent each box,
484
489
  # that's what we'll use for clustering
485
490
  points = []
486
- for det in candidateDetections:
491
+ for det in candidate_detections:
487
492
  # To use the upper-left of the box as the clustering point
488
493
  # points.append([det.bbox[0],det.bbox[1]])
489
-
494
+
490
495
  # To use the center of the box as the clustering point
491
496
  points.append([det.bbox[0]+det.bbox[2]/2.0,
492
497
  det.bbox[1]+det.bbox[3]/2.0])
493
- X = np.array(points)
494
-
495
- labels = cluster.fit_predict(X)
498
+ points_array = np.array(points)
499
+
500
+ labels = cluster.fit_predict(points_array)
496
501
  unique_labels = np.unique(labels)
497
-
502
+
498
503
  # Labels *could* be any unique labels according to the docs, but in practice
499
504
  # they are unique integers from 0:nClusters.
500
505
  #
501
506
  # Make sure the labels are unique incrementing integers.
502
507
  for i_label in range(1,len(unique_labels)):
503
508
  assert unique_labels[i_label] == 1 + unique_labels[i_label-1]
504
-
505
- assert len(labels) == len(candidateDetections)
506
-
509
+
510
+ assert len(labels) == len(candidate_detections)
511
+
507
512
  # Store the label assigned to each cluster
508
513
  for i_label,label in enumerate(labels):
509
- candidateDetections[i_label].clusterLabel = label
510
-
514
+ candidate_detections[i_label].clusterLabel = label
515
+
511
516
  # Now sort the clusters by their x coordinate, and re-assign labels
512
517
  # so the labels are sortable
513
518
  label_x_means = []
514
-
519
+
515
520
  for label in unique_labels:
516
- detections_this_label = [d for d in candidateDetections if (
521
+ detections_this_label = [d for d in candidate_detections if (
517
522
  d.clusterLabel == label)]
518
523
  points_this_label = [ [d.bbox[0],d.bbox[1]] for d in detections_this_label]
519
524
  x = [p[0] for p in points_this_label]
520
- y = [p[1] for p in points_this_label]
521
-
525
+ y = [p[1] for p in points_this_label]
526
+
522
527
  # Compute the centroid for debugging, but we're only going to use the x
523
528
  # coordinate. This is the centroid of points used to represent detections,
524
529
  # which may be box centers or box corners.
525
530
  centroid = [ sum(x) / len(points_this_label), sum(y) / len(points_this_label) ]
526
531
  label_xval = centroid[0]
527
532
  label_x_means.append(label_xval)
528
-
529
- old_cluster_label_to_new_cluster_label = {}
533
+
534
+ old_cluster_label_to_new_cluster_label = {}
530
535
  new_cluster_labels = np.argsort(label_x_means)
531
536
  assert len(new_cluster_labels) == len(np.unique(new_cluster_labels))
532
537
  for old_cluster_label in unique_labels:
533
538
  old_cluster_label_to_new_cluster_label[old_cluster_label] =\
534
539
  np.where(new_cluster_labels==old_cluster_label)[0][0]
535
-
540
+
536
541
  for i_cluster in range(0,len(unique_labels)):
537
542
  old_label = unique_labels[i_cluster]
538
543
  assert i_cluster == old_label
539
544
  new_label = old_cluster_label_to_new_cluster_label[old_label]
540
-
541
- for i_det,det in enumerate(candidateDetections):
545
+
546
+ for i_det,det in enumerate(candidate_detections):
542
547
  old_label = det.clusterLabel
543
548
  new_label = old_cluster_label_to_new_cluster_label[old_label]
544
549
  det.clusterLabel = new_label
545
-
546
- candidateDetectionsSorted = sorted(candidateDetections,
550
+
551
+ candidate_detections_sorted = sorted(candidate_detections,
547
552
  key=lambda x: (x.clusterLabel,x.id))
548
-
549
- return candidateDetectionsSorted
550
-
553
+
554
+ return candidate_detections_sorted
555
+
551
556
  else:
552
557
  raise ValueError('Unrecognized sort method {}'.format(
553
558
  options.smartSort))
554
-
559
+
555
560
  # ...def _sort_detections_for_directory(...)
556
561
 
557
562
 
558
- def _find_matches_in_directory(dirNameAndRows, options):
563
+ def _find_matches_in_directory(dir_name_and_rows, options):
559
564
  """
560
- dirNameAndRows is a tuple of (name,rows).
561
-
565
+ dir_name_and_rows is a tuple of (name,rows).
566
+
562
567
  "name" is a location name, typically a folder name, though this may be an arbitrary
563
568
  location identifier.
564
-
569
+
565
570
  "rows" is a Pandas dataframe with one row per image in this location, with columns:
566
-
571
+
567
572
  * 'file': relative file name
568
573
  * 'detections': a list of MD detection objects, i.e. dicts with keys ['category','conf','bbox']
569
574
  * 'max_detection_conf': maximum confidence of any detection, in any category
570
-
575
+
571
576
  "rows" can also point to a .csv file, in which case the detection table will be read from that
572
577
  .csv file, and results will be written to a .csv file rather than being returned.
573
-
578
+
574
579
  Find all unique detections in this directory.
575
-
580
+
576
581
  Returns a list of DetectionLocation objects.
577
582
  """
578
-
583
+
579
584
  if options.pbar is not None:
580
585
  options.pbar.update()
581
586
 
582
587
  # Create a tree to store candidate detections
583
- candidateDetectionsIndex = pyqtree.Index(bbox=(-0.1,-0.1,1.1,1.1))
588
+ candidate_detections_index = pyqtree.Index(bbox=(-0.1,-0.1,1.1,1.1))
589
+
590
+ assert len(dir_name_and_rows) == 2, 'find_matches_in_directory: invalid input'
591
+ assert isinstance(dir_name_and_rows[0],str), 'find_matches_in_directory: invalid location name'
592
+ dir_name = dir_name_and_rows[0]
593
+ rows = dir_name_and_rows[1]
584
594
 
585
- assert len(dirNameAndRows) == 2, 'find_matches_in_directory: invalid input'
586
- assert isinstance(dirNameAndRows[0],str), 'find_matches_in_directory: invalid location name'
587
- dirName = dirNameAndRows[0]
588
- rows = dirNameAndRows[1]
589
-
590
595
  detections_loaded_from_csv_file = None
591
-
596
+
592
597
  if isinstance(rows,str):
593
598
  detections_loaded_from_csv_file = rows
594
599
  print('Loading results for location {} from {}'.format(
595
- dirName,detections_loaded_from_csv_file))
600
+ dir_name,detections_loaded_from_csv_file))
596
601
  rows = pd.read_csv(detections_loaded_from_csv_file)
597
602
  # Pandas writes out detections out as strings, convert them back to lists
598
603
  rows['detections'] = rows['detections'].apply(lambda s: json.loads(s.replace('\'','"')))
599
-
604
+
600
605
  if options.maxImagesPerFolder is not None and len(rows) > options.maxImagesPerFolder:
601
606
  print('Ignoring directory {} because it has {} images (limit set to {})'.format(
602
- dirName,len(rows),options.maxImagesPerFolder))
607
+ dir_name,len(rows),options.maxImagesPerFolder))
603
608
  return []
604
-
609
+
605
610
  if options.includeFolders is not None:
606
611
  assert options.excludeFolders is None, 'Cannot specify include and exclude folder lists'
607
- if dirName not in options.includeFolders:
608
- print('Ignoring folder {}, not in inclusion list'.format(dirName))
612
+ if dir_name not in options.includeFolders:
613
+ print('Ignoring folder {}, not in inclusion list'.format(dir_name))
609
614
  return []
610
-
615
+
611
616
  if options.excludeFolders is not None:
612
617
  assert options.includeFolders is None, 'Cannot specify include and exclude folder lists'
613
- if dirName in options.excludeFolders:
614
- print('Ignoring folder {}, on exclusion list'.format(dirName))
618
+ if dir_name in options.excludeFolders:
619
+ print('Ignoring folder {}, on exclusion list'.format(dir_name))
615
620
  return []
616
-
621
+
617
622
  # For each image in this directory
618
623
  #
619
- # iDirectoryRow = 0; row = rows.iloc[iDirectoryRow]
624
+ # i_directory_row = 0; row = rows.iloc[i_directory_row]
620
625
  #
621
- # iDirectoryRow is a pandas index, so it may not start from zero;
626
+ # i_directory_row is a pandas index, so it may not start from zero;
622
627
  # for debugging, we maintain i_iteration as a loop index.
623
628
  i_iteration = -1
624
629
  n_boxes_evaluated = 0
625
-
626
- for iDirectoryRow, row in rows.iterrows():
630
+
631
+ for i_directory_row, row in rows.iterrows():
627
632
 
628
633
  i_iteration += 1
629
634
  filename = row['file']
@@ -632,12 +637,12 @@ def _find_matches_in_directory(dirNameAndRows, options):
632
637
 
633
638
  if 'max_detection_conf' not in row or 'detections' not in row or \
634
639
  row['detections'] is None:
635
- print('Skipping row {}'.format(iDirectoryRow))
640
+ print('Skipping row {}'.format(i_directory_row))
636
641
  continue
637
642
 
638
643
  # Don't bother checking images with no detections above threshold
639
- maxP = float(row['max_detection_conf'])
640
- if maxP < options.confidenceMin:
644
+ max_p = float(row['max_detection_conf'])
645
+ if max_p < options.confidenceMin:
641
646
  continue
642
647
 
643
648
  # Array of dicts, where each element is
@@ -646,24 +651,24 @@ def _find_matches_in_directory(dirNameAndRows, options):
646
651
  # 'conf': 0.926, # confidence of this detections
647
652
  #
648
653
  # (x_min, y_min) is upper-left, all in relative coordinates
649
- # 'bbox': [x_min, y_min, width_of_box, height_of_box]
650
- #
654
+ # 'bbox': [x_min, y_min, width_of_box, height_of_box]
655
+ #
651
656
  # }
652
657
  detections = row['detections']
653
658
  if isinstance(detections,float):
654
659
  assert isinstance(row['failure'],str), 'Expected failure indicator'
655
660
  print('Skipping failed image {} ({})'.format(filename,row['failure']))
656
661
  continue
657
-
662
+
658
663
  assert len(detections) > 0
659
-
664
+
660
665
  # For each detection in this image
661
- for iDetection, detection in enumerate(detections):
662
-
666
+ for i_detection, detection in enumerate(detections):
667
+
663
668
  n_boxes_evaluated += 1
664
-
669
+
665
670
  if detection is None:
666
- print('Skipping detection {}'.format(iDetection))
671
+ print('Skipping detection {}'.format(i_detection))
667
672
  continue
668
673
 
669
674
  assert 'category' in detection and \
@@ -671,14 +676,14 @@ def _find_matches_in_directory(dirNameAndRows, options):
671
676
  'bbox' in detection, 'Illegal detection'
672
677
 
673
678
  confidence = detection['conf']
674
-
679
+
675
680
  # This is no longer strictly true; I sometimes run RDE in stages, so
676
681
  # some probabilities have already been made negative
677
682
  #
678
683
  # assert confidence >= 0.0 and confidence <= 1.0
679
-
684
+
680
685
  assert confidence >= -1.0 and confidence <= 1.0
681
-
686
+
682
687
  if confidence < options.confidenceMin:
683
688
  continue
684
689
  if confidence > options.confidenceMax:
@@ -686,60 +691,60 @@ def _find_matches_in_directory(dirNameAndRows, options):
686
691
 
687
692
  # Optionally exclude some classes from consideration as suspicious
688
693
  if (options.excludeClasses is not None) and (len(options.excludeClasses) > 0):
689
- iClass = int(detection['category'])
690
- if iClass in options.excludeClasses:
694
+ i_class = int(detection['category'])
695
+ if i_class in options.excludeClasses:
691
696
  continue
692
697
 
693
698
  bbox = detection['bbox']
694
699
  confidence = detection['conf']
695
-
700
+
696
701
  # Is this detection too big or too small for consideration?
697
702
  w, h = bbox[2], bbox[3]
698
-
699
- if (w == 0 or h == 0):
703
+
704
+ if (w == 0 or h == 0):
700
705
  continue
701
-
706
+
702
707
  area = h * w
703
708
 
704
709
  if area < 0:
705
710
  print('Warning: negative-area bounding box for file {}'.format(filename))
706
711
  area = abs(area); h = abs(h); w = abs(w)
707
-
712
+
708
713
  assert area >= 0.0 and area <= 1.0, \
709
714
  'Illegal bounding box area {} in image {}'.format(area,filename)
710
715
 
711
- if area < options.minSuspiciousDetectionSize:
716
+ if area < options.minSuspiciousDetectionSize:
712
717
  continue
713
-
714
- if area > options.maxSuspiciousDetectionSize:
718
+
719
+ if area > options.maxSuspiciousDetectionSize:
715
720
  continue
716
721
 
717
722
  category = detection['category']
718
-
719
- instance = IndexedDetection(iDetection=iDetection,
720
- filename=row['file'], bbox=bbox,
723
+
724
+ instance = IndexedDetection(i_detection=i_detection,
725
+ filename=row['file'], bbox=bbox,
721
726
  confidence=confidence, category=category)
722
727
 
723
- bFoundSimilarDetection = False
728
+ b_found_similar_detection = False
724
729
 
725
730
  rtree_rect = _detection_rect_to_rtree_rect(bbox)
726
-
731
+
727
732
  # This will return candidates of all classes
728
- overlappingCandidateDetections =\
729
- candidateDetectionsIndex.intersect(rtree_rect)
730
-
731
- overlappingCandidateDetections.sort(
733
+ overlapping_candidate_detections =\
734
+ candidate_detections_index.intersect(rtree_rect)
735
+
736
+ overlapping_candidate_detections.sort(
732
737
  key=lambda x: x.id, reverse=False)
733
-
738
+
734
739
  # For each detection in our candidate list
735
- for iCandidate, candidate in enumerate(
736
- overlappingCandidateDetections):
737
-
740
+ for i_candidate, candidate in enumerate(
741
+ overlapping_candidate_detections):
742
+
738
743
  # Don't match across categories
739
744
  if (candidate.category != category) and (not (options.categoryAgnosticComparisons)):
740
745
  continue
741
-
742
- # Is this a match?
746
+
747
+ # Is this a match?
743
748
  try:
744
749
  iou = ct_utils.get_iou(bbox, candidate.bbox)
745
750
  except Exception as e:
@@ -748,12 +753,12 @@ def _find_matches_in_directory(dirNameAndRows, options):
748
753
  format(
749
754
  bbox[0],bbox[1],bbox[2],bbox[3],
750
755
  candidate.bbox[0],candidate.bbox[1],
751
- candidate.bbox[2],candidate.bbox[3], str(e)))
756
+ candidate.bbox[2],candidate.bbox[3], str(e)))
752
757
  continue
753
758
 
754
759
  if iou >= options.iouThreshold:
755
-
756
- bFoundSimilarDetection = True
760
+
761
+ b_found_similar_detection = True
757
762
 
758
763
  # If so, add this example to the list for this detection
759
764
  candidate.instances.append(instance)
@@ -765,89 +770,91 @@ def _find_matches_in_directory(dirNameAndRows, options):
765
770
  # ...for each detection on our candidate list
766
771
 
767
772
  # If we found no matches, add this to the candidate list
768
- if not bFoundSimilarDetection:
769
-
770
- candidate = DetectionLocation(instance=instance,
771
- detection=detection, relativeDir=dirName,
772
- category=category, id=i_iteration)
773
-
773
+ if not b_found_similar_detection:
774
+
775
+ candidate = DetectionLocation(instance=instance,
776
+ detection=detection,
777
+ relative_dir=dir_name,
778
+ category=category,
779
+ id=i_iteration)
780
+
774
781
  # pyqtree
775
- candidateDetectionsIndex.insert(item=candidate,bbox=rtree_rect)
782
+ candidate_detections_index.insert(item=candidate,bbox=rtree_rect)
776
783
 
777
784
  # ...for each detection
778
785
 
779
786
  # ...for each row
780
787
 
781
788
  # Get all candidate detections
782
-
783
- candidateDetections = candidateDetectionsIndex.intersect([-100,-100,100,100])
784
-
789
+
790
+ candidate_detections = candidate_detections_index.intersect([-100,-100,100,100])
791
+
785
792
  # For debugging only, it's convenient to have these sorted
786
793
  # as if they had never gone into a tree structure. Typically
787
794
  # this is in practice a sort by filename.
788
- candidateDetections.sort(
795
+ candidate_detections.sort(
789
796
  key=lambda x: x.id, reverse=False)
790
-
797
+
791
798
  if detections_loaded_from_csv_file is not None:
792
799
  location_results_file = \
793
800
  os.path.splitext(detections_loaded_from_csv_file)[0] + \
794
801
  '_results.json'
795
802
  print('Writing results for location {} to {}'.format(
796
- dirName,location_results_file))
797
- s = jsonpickle.encode(candidateDetections,make_refs=False)
803
+ dir_name,location_results_file))
804
+ s = jsonpickle.encode(candidate_detections,make_refs=False)
798
805
  with open(location_results_file,'w') as f:
799
- f.write(s)
800
- # json.dump(candidateDetections,f,indent=1)
806
+ f.write(s)
807
+ # json.dump(candidate_detections,f,indent=1)
801
808
  return location_results_file
802
809
  else:
803
- return candidateDetections
810
+ return candidate_detections
804
811
 
805
812
  # ...def _find_matches_in_directory(...)
806
813
 
807
814
 
808
- def _update_detection_table(repeatDetectionResults, options, outputFilename=None):
815
+ def _update_detection_table(repeat_detection_results, options, output_file_name=None):
809
816
  """
810
- Changes confidence values in repeatDetectionResults.detectionResults so that detections
817
+ Changes confidence values in repeat_detection_results.detectionResults so that detections
811
818
  deemed to be possible false positives are given negative confidence values.
812
-
813
- repeatDetectionResults is an object of type RepeatDetectionResults, with a pandas
819
+
820
+ repeat_detection_results is an object of type RepeatDetectionResults, with a pandas
814
821
  dataframe (detectionResults) containing all the detections loaded from the .json file,
815
- and a list of detections for each location (suspiciousDetections) that are deemed to
822
+ and a list of detections for each location (suspicious_detections) that are deemed to
816
823
  be suspicious.
817
-
818
- returns the modified pandas dataframe (repeatDetectionResults.detectionResults), but
824
+
825
+ returns the modified pandas dataframe (repeat_detection_results.detectionResults), but
819
826
  also modifies it in place.
820
827
  """
821
-
828
+
822
829
  # This is the pandas dataframe that contains actual detection results.
823
- #
830
+ #
824
831
  # Has fields ['file', 'detections','failure'].
825
- detectionResults = repeatDetectionResults.detectionResults
832
+ detection_results = repeat_detection_results.detectionResults
826
833
 
827
- # An array of length nDirs, where each element is a list of DetectionLocation
834
+ # An array of length nDirs, where each element is a list of DetectionLocation
828
835
  # objects for that directory that have been flagged as suspicious
829
- suspiciousDetectionsByDirectory = repeatDetectionResults.suspiciousDetections
836
+ suspicious_detections_by_directory = repeat_detection_results.suspicious_detections
830
837
 
831
- nBboxChanges = 0
838
+ n_bbox_changes = 0
832
839
 
833
840
  print('Updating output table')
834
841
 
835
842
  # For each directory
836
- for iDir, directoryEvents in enumerate(suspiciousDetectionsByDirectory):
843
+ for i_dir, directory_events in enumerate(suspicious_detections_by_directory):
837
844
 
838
845
  # For each suspicious detection group in this directory
839
- for iDetectionEvent, detectionEvent in enumerate(directoryEvents):
846
+ for i_detection_event, detection_event in enumerate(directory_events):
840
847
 
841
- locationBbox = detectionEvent.bbox
848
+ location_bbox = detection_event.bbox
842
849
 
843
850
  # For each instance of this suspicious detection
844
- for iInstance, instance in enumerate(detectionEvent.instances):
851
+ for i_instance, instance in enumerate(detection_event.instances):
845
852
 
846
- instanceBbox = instance.bbox
853
+ instance_bbox = instance.bbox
847
854
 
848
855
  # This should match the bbox for the detection event
849
- iou = ct_utils.get_iou(instanceBbox, locationBbox)
850
-
856
+ iou = ct_utils.get_iou(instance_bbox, location_bbox)
857
+
851
858
  # The bbox for this instance should be almost the same as the bbox
852
859
  # for this detection group, where "almost" is defined by the IOU
853
860
  # threshold.
@@ -855,159 +862,159 @@ def _update_detection_table(repeatDetectionResults, options, outputFilename=None
855
862
  # if iou < options.iouThreshold:
856
863
  # print('IOU warning: {},{}'.format(iou,options.iouThreshold))
857
864
 
858
- assert instance.filename in repeatDetectionResults.filenameToRow
859
- iRow = repeatDetectionResults.filenameToRow[instance.filename]
860
- row = detectionResults.iloc[iRow]
861
- rowDetections = row['detections']
862
- detectionToModify = rowDetections[instance.iDetection]
865
+ assert instance.filename in repeat_detection_results.filename_to_row
866
+ i_row = repeat_detection_results.filename_to_row[instance.filename]
867
+ row = detection_results.iloc[i_row]
868
+ row_detections = row['detections']
869
+ detection_to_modify = row_detections[instance.i_detection]
863
870
 
864
871
  # Make sure the bounding box matches
865
- assert (instanceBbox[0:3] == detectionToModify['bbox'][0:3])
872
+ assert (instance_bbox[0:3] == detection_to_modify['bbox'][0:3])
866
873
 
867
874
  # Make the probability negative, if it hasn't been switched by
868
875
  # another bounding box
869
- if detectionToModify['conf'] >= 0:
870
- detectionToModify['conf'] = -1 * detectionToModify['conf']
871
- nBboxChanges += 1
876
+ if detection_to_modify['conf'] >= 0:
877
+ detection_to_modify['conf'] = -1 * detection_to_modify['conf']
878
+ n_bbox_changes += 1
872
879
 
873
880
  # ...for each instance
874
881
 
875
882
  # ...for each detection
876
883
 
877
- # ...for each directory
884
+ # ...for each directory
878
885
 
879
886
  # Update maximum probabilities
880
887
 
881
888
  # For each row...
882
- nProbChanges = 0
883
- nProbChangesToNegative = 0
884
- nProbChangesAcrossThreshold = 0
889
+ n_prob_changes = 0
890
+ n_prob_changes_to_negative = 0
891
+ n_prob_changes_across_threshold = 0
885
892
 
886
- for iRow, row in detectionResults.iterrows():
893
+ for i_row, row in detection_results.iterrows():
887
894
 
888
895
  detections = row['detections']
889
896
  if (detections is None) or isinstance(detections,float):
890
897
  assert isinstance(row['failure'],str)
891
898
  continue
892
-
899
+
893
900
  if len(detections) == 0:
894
901
  continue
895
902
 
896
- maxPOriginal = float(row['max_detection_conf'])
897
-
903
+ max_p_original = float(row['max_detection_conf'])
904
+
898
905
  # No longer strictly true; sometimes I run RDE on RDE output
899
- # assert maxPOriginal >= 0
900
- assert maxPOriginal >= -1.0
906
+ # assert max_p_original >= 0
907
+ assert max_p_original >= -1.0
908
+
909
+ max_p = None
910
+ n_negative = 0
901
911
 
902
- maxP = None
903
- nNegative = 0
912
+ for i_detection, detection in enumerate(detections):
904
913
 
905
- for iDetection, detection in enumerate(detections):
906
-
907
914
  p = detection['conf']
908
915
 
909
916
  if p < 0:
910
- nNegative += 1
917
+ n_negative += 1
918
+
919
+ if (max_p is None) or (p > max_p):
920
+ max_p = p
911
921
 
912
- if (maxP is None) or (p > maxP):
913
- maxP = p
914
-
915
922
  # We should only be making detections *less* likely in this process
916
- assert maxP <= maxPOriginal
917
- detectionResults.at[iRow, 'max_detection_conf'] = maxP
923
+ assert max_p <= max_p_original
924
+ detection_results.at[i_row, 'max_detection_conf'] = max_p
918
925
 
919
926
  # If there was a meaningful change, count it
920
- if abs(maxP - maxPOriginal) > 1e-3:
927
+ if abs(max_p - max_p_original) > 1e-3:
928
+
929
+ assert max_p < max_p_original
921
930
 
922
- assert maxP < maxPOriginal
923
-
924
- nProbChanges += 1
931
+ n_prob_changes += 1
925
932
 
926
- if (maxP < 0) and (maxPOriginal >= 0):
927
- nProbChangesToNegative += 1
933
+ if (max_p < 0) and (max_p_original >= 0):
934
+ n_prob_changes_to_negative += 1
928
935
 
929
- if (maxPOriginal >= options.confidenceMin) and (maxP < options.confidenceMin):
930
- nProbChangesAcrossThreshold += 1
936
+ if (max_p_original >= options.confidenceMin) and (max_p < options.confidenceMin):
937
+ n_prob_changes_across_threshold += 1
931
938
 
932
- # Negative probabilities should be the only reason maxP changed, so
939
+ # Negative probabilities should be the only reason max_p changed, so
933
940
  # we should have found at least one negative value if we reached
934
941
  # this point.
935
- assert nNegative > 0
942
+ assert n_negative > 0
936
943
 
937
944
  # ...if there was a meaningful change to the max probability for this row
938
945
 
939
946
  # ...for each row
940
947
 
941
948
  # If we're also writing output...
942
- if outputFilename is not None and len(outputFilename) > 0:
943
- write_api_results(detectionResults, repeatDetectionResults.otherFields,
944
- outputFilename)
949
+ if output_file_name is not None and len(output_file_name) > 0:
950
+ write_api_results(detection_results, repeat_detection_results.otherFields,
951
+ output_file_name)
945
952
 
946
953
  print(
947
- 'Finished updating detection table\nChanged {} detections that impacted {} maxPs ({} to negative) ({} across confidence threshold)'.format(
948
- nBboxChanges, nProbChanges, nProbChangesToNegative, nProbChangesAcrossThreshold))
954
+ 'Finished updating detection table\nChanged {} detections that impacted {} max_ps ({} to negative) ({} across confidence threshold)'.format( # noqa
955
+ n_bbox_changes, n_prob_changes, n_prob_changes_to_negative, n_prob_changes_across_threshold))
949
956
 
950
- return detectionResults
957
+ return detection_results
951
958
 
952
959
  # ...def _update_detection_table(...)
953
960
 
954
961
 
955
- def _render_sample_image_for_detection(detection,filteringDir,options):
962
+ def _render_sample_image_for_detection(detection,filtering_dir,options):
956
963
  """
957
964
  Render a sample image for one unique detection, possibly containing lightly-colored
958
- high-confidence detections from elsewhere in the sample image.
959
-
965
+ high-confidence detections from elsewhere in the sample image.
966
+
960
967
  "detections" is a DetectionLocation object.
961
-
968
+
962
969
  Depends on having already sorted instances within this detection by confidence, and
963
970
  having already generated an output file name for this sample image.
964
971
  """
965
-
972
+
966
973
  # Confidence values should already have been sorted in the previous loop
967
974
  instance_confidences = [instance.confidence for instance in detection.instances]
968
975
  assert ct_utils.is_list_sorted(instance_confidences,reverse=True)
969
-
976
+
970
977
  # Choose the highest-confidence index
971
978
  instance = detection.instances[0]
972
- relativePath = instance.filename
973
-
974
- outputRelativePath = detection.sampleImageRelativeFileName
975
- assert len(outputRelativePath) > 0
976
-
977
- outputFullPath = os.path.join(filteringDir, outputRelativePath)
978
-
979
+ relative_path = instance.filename
980
+
981
+ output_relative_path = detection.sampleImageRelativeFileName
982
+ assert len(output_relative_path) > 0
983
+
984
+ output_full_path = os.path.join(filtering_dir, output_relative_path)
985
+
979
986
  if is_sas_url(options.imageBase):
980
- inputFullPath = relative_sas_url(options.imageBase, relativePath)
987
+ input_full_path = relative_sas_url(options.imageBase, relative_path)
981
988
  else:
982
- inputFullPath = os.path.join(options.imageBase, relativePath)
983
- assert (os.path.isfile(inputFullPath)), 'Not a file: {}'.\
984
- format(inputFullPath)
985
-
989
+ input_full_path = os.path.join(options.imageBase, relative_path)
990
+ assert (os.path.isfile(input_full_path)), 'Not a file: {}'.\
991
+ format(input_full_path)
992
+
986
993
  try:
987
-
988
- im = open_image(inputFullPath)
989
-
994
+
995
+ im = open_image(input_full_path)
996
+
990
997
  # Should we render (typically in a very light color) detections
991
998
  # *other* than the one we're highlighting here?
992
999
  if options.bRenderOtherDetections:
993
-
1000
+
994
1001
  # Optionally resize the output image
995
1002
  if (options.maxOutputImageWidth is not None) and \
996
1003
  (im.size[0] > options.maxOutputImageWidth):
997
- im = vis_utils.resize_image(im, options.maxOutputImageWidth,
1004
+ im = vis_utils.resize_image(im, options.maxOutputImageWidth,
998
1005
  target_height=-1)
999
-
1006
+
1000
1007
  assert detection.sampleImageDetections is not None
1001
-
1002
- # At this point, suspicious detections have already been flipped
1008
+
1009
+ # At this point, suspicious detections have already been flipped
1003
1010
  # negative, which we don't want for rendering purposes
1004
1011
  rendered_detections = []
1005
-
1012
+
1006
1013
  for det in detection.sampleImageDetections:
1007
1014
  rendered_det = copy.copy(det)
1008
1015
  rendered_det['conf'] = abs(rendered_det['conf'])
1009
- rendered_detections.append(rendered_det)
1010
-
1016
+ rendered_detections.append(rendered_det)
1017
+
1011
1018
  # Render other detections first (typically in a thin+light box)
1012
1019
  render_detection_bounding_boxes(rendered_detections,
1013
1020
  im,
@@ -1016,7 +1023,7 @@ def _render_sample_image_for_detection(detection,filteringDir,options):
1016
1023
  expansion=options.boxExpansion,
1017
1024
  colormap=options.otherDetectionsColors,
1018
1025
  confidence_threshold=options.otherDetectionsThreshold)
1019
-
1026
+
1020
1027
  # Now render the example detection (on top of at least one
1021
1028
  # of the other detections)
1022
1029
 
@@ -1024,140 +1031,144 @@ def _render_sample_image_for_detection(detection,filteringDir,options):
1024
1031
  # because we just sorted this list in descending order by confidence,
1025
1032
  # this is the highest-confidence detection.
1026
1033
  d = detection.to_api_detection()
1027
-
1034
+
1028
1035
  render_detection_bounding_boxes([d],im,thickness=options.lineThickness,
1029
1036
  expansion=options.boxExpansion,
1030
1037
  confidence_threshold=-10)
1031
-
1032
- im.save(outputFullPath)
1033
-
1038
+
1039
+ im.save(output_full_path)
1040
+
1034
1041
  else:
1035
-
1036
- _render_bounding_box(detection, inputFullPath, outputFullPath,
1037
- lineWidth=options.lineThickness, expansion=options.boxExpansion)
1038
-
1042
+
1043
+ _render_bounding_box(detection,
1044
+ input_full_path,
1045
+ output_full_path,
1046
+ line_width=options.lineThickness,
1047
+ expansion=options.boxExpansion)
1048
+
1039
1049
  # ...if we are/aren't rendering other bounding boxes
1040
-
1050
+
1041
1051
  # If we're rendering detection tiles, we'll re-load and re-write the image we
1042
- # just wrote to outputFullPath
1052
+ # just wrote to output_full_path
1043
1053
  if options.bRenderDetectionTiles:
1044
-
1054
+
1045
1055
  assert not is_sas_url(options.imageBase), "Can't render detection tiles from SAS URLs"
1046
-
1056
+
1047
1057
  if options.detectionTilesPrimaryImageWidth is not None:
1048
- primaryImageWidth = options.detectionTilesPrimaryImageWidth
1058
+ primary_image_width = options.detectionTilesPrimaryImageWidth
1049
1059
  else:
1050
1060
  # "im" may be a resized version of the original image, if we've already run
1051
1061
  # the code to render other bounding boxes.
1052
- primaryImageWidth = im.size[0]
1053
-
1062
+ primary_image_width = im.size[0]
1063
+
1054
1064
  if options.detectionTilesCroppedGridWidth <= 1.0:
1055
- croppedGridWidth = round(options.detectionTilesCroppedGridWidth * primaryImageWidth)
1065
+ cropped_grid_width = \
1066
+ round(options.detectionTilesCroppedGridWidth * primary_image_width)
1056
1067
  else:
1057
- croppedGridWidth = options.detectionTilesCroppedGridWidth
1058
-
1059
- secondaryImageFilenameList = []
1060
- secondaryImageBoundingBoxList = []
1061
-
1068
+ cropped_grid_width = options.detectionTilesCroppedGridWidth
1069
+
1070
+ secondary_image_filename_list = []
1071
+ secondary_image_bounding_box_list = []
1072
+
1062
1073
  # If we start from zero, we include the sample crop
1063
1074
  for instance in detection.instances[0:]:
1064
- secondaryImageFilenameList.append(os.path.join(options.imageBase,
1075
+ secondary_image_filename_list.append(os.path.join(options.imageBase,
1065
1076
  instance.filename))
1066
- secondaryImageBoundingBoxList.append(instance.bbox)
1067
-
1077
+ secondary_image_bounding_box_list.append(instance.bbox)
1078
+
1068
1079
  # Optionally limit the number of crops we pass to the rendering function
1069
1080
  if (options.detectionTilesMaxCrops is not None) and \
1070
1081
  (len(detection.instances) > options.detectionTilesMaxCrops):
1071
- secondaryImageFilenameList = \
1072
- secondaryImageFilenameList[0:options.detectionTilesMaxCrops]
1073
- secondaryImageBoundingBoxList = \
1074
- secondaryImageBoundingBoxList[0:options.detectionTilesMaxCrops]
1075
-
1076
- # This will over-write the image we've already written to outputFullPath
1082
+ secondary_image_filename_list = \
1083
+ secondary_image_filename_list[0:options.detectionTilesMaxCrops]
1084
+ secondary_image_bounding_box_list = \
1085
+ secondary_image_bounding_box_list[0:options.detectionTilesMaxCrops]
1086
+
1087
+ # This will over-write the image we've already written to output_full_path
1077
1088
  render_images_with_thumbnails.render_images_with_thumbnails(
1078
- primary_image_filename=outputFullPath,
1079
- primary_image_width=primaryImageWidth,
1080
- secondary_image_filename_list=secondaryImageFilenameList,
1081
- secondary_image_bounding_box_list=secondaryImageBoundingBoxList,
1082
- cropped_grid_width=croppedGridWidth,
1083
- output_image_filename=outputFullPath,
1089
+ primary_image_filename=output_full_path,
1090
+ primary_image_width=primary_image_width,
1091
+ secondary_image_filename_list=secondary_image_filename_list,
1092
+ secondary_image_bounding_box_list=secondary_image_bounding_box_list,
1093
+ cropped_grid_width=cropped_grid_width,
1094
+ output_image_filename=output_full_path,
1084
1095
  primary_image_location=options.detectionTilesPrimaryImageLocation)
1085
-
1096
+
1086
1097
  # ...if we are/aren't rendering detection tiles
1087
-
1098
+
1088
1099
  except Exception as e:
1089
-
1100
+
1090
1101
  stack_trace = traceback.format_exc()
1091
1102
  print('Warning: error rendering bounding box from {} to {}: {} ({})'.format(
1092
- inputFullPath,outputFullPath,e,stack_trace))
1103
+ input_full_path,output_full_path,e,stack_trace))
1093
1104
  if options.bFailOnRenderError:
1094
- raise
1105
+ raise
1095
1106
 
1096
1107
  # ...def _render_sample_image_for_detection(...)
1097
1108
 
1098
1109
 
1099
1110
  #%% Main entry point
1100
1111
 
1101
- def find_repeat_detections(inputFilename, outputFilename=None, options=None):
1112
+ def find_repeat_detections(input_filename, output_file_name=None, options=None):
1102
1113
  """
1103
- Find detections in a MD results file that occur repeatedly and are likely to be
1114
+ Find detections in a MD results file that occur repeatedly and are likely to be
1104
1115
  rocks/sticks.
1105
-
1116
+
1106
1117
  Args:
1107
- inputFilename (str): the MD results .json file to analyze
1108
- outputFilename (str, optional): the filename to which we should write results
1118
+ input_filename (str): the MD results .json file to analyze
1119
+ output_file_name (str, optional): the filename to which we should write results
1109
1120
  with repeat detections removed, typically set to None during the first
1110
1121
  part of the RDE process.
1111
1122
  options (RepeatDetectionOptions): all the interesting options controlling this
1112
1123
  process; see RepeatDetectionOptions for details.
1113
-
1124
+
1114
1125
  Returns:
1115
1126
  RepeatDetectionResults: results of the RDE process; see RepeatDetectionResults
1116
1127
  for details.
1117
1128
  """
1118
-
1129
+
1119
1130
  ##%% Input handling
1120
1131
 
1121
1132
  if options is None:
1122
-
1133
+
1123
1134
  options = RepeatDetectionOptions()
1124
1135
 
1125
1136
  # Validate some options
1126
-
1137
+
1127
1138
  if options.customDirNameFunction is not None:
1128
1139
  assert options.nDirLevelsFromLeaf == 0, \
1129
1140
  'Cannot mix custom dir name functions with nDirLevelsFromLeaf'
1130
-
1141
+
1131
1142
  if options.nDirLevelsFromLeaf != 0:
1132
1143
  assert options.customDirNameFunction is None, \
1133
1144
  'Cannot mix custom dir name functions with nDirLevelsFromLeaf'
1134
-
1145
+
1135
1146
  if options.filterFileToLoad is not None and len(options.filterFileToLoad) > 0:
1136
-
1147
+
1137
1148
  print('Bypassing detection-finding, loading from {}'.format(options.filterFileToLoad))
1138
1149
 
1139
1150
  # Load the filtering file
1140
- detectionIndexFileName = options.filterFileToLoad
1141
- sIn = open(detectionIndexFileName, 'r').read()
1142
- detectionInfo = jsonpickle.decode(sIn)
1143
- filteringBaseDir = os.path.dirname(options.filterFileToLoad)
1144
- suspiciousDetections = detectionInfo['suspiciousDetections']
1145
-
1151
+ detection_index_file_name = options.filterFileToLoad
1152
+ s_in = open(detection_index_file_name, 'r').read()
1153
+ detection_info = jsonpickle.decode(s_in)
1154
+ filtering_base_dir = os.path.dirname(options.filterFileToLoad)
1155
+ suspicious_detections = detection_info['suspicious_detections']
1156
+
1146
1157
  # Load the same options we used when finding repeat detections
1147
- options = detectionInfo['options']
1148
-
1158
+ options = detection_info['options']
1159
+
1149
1160
  # ...except for things that explicitly tell this function not to
1150
1161
  # find repeat detections.
1151
- options.filterFileToLoad = detectionIndexFileName
1162
+ options.filterFileToLoad = detection_index_file_name
1152
1163
  options.bWriteFilteringFolder = False
1153
-
1164
+
1154
1165
  # ...if we're loading from an existing filtering file
1155
-
1156
- toReturn = RepeatDetectionResults()
1157
1166
 
1158
-
1167
+ to_return = RepeatDetectionResults()
1168
+
1169
+
1159
1170
  # Check early to avoid problems with the output folder
1160
-
1171
+
1161
1172
  if options.bWriteFilteringFolder:
1162
1173
  assert options.outputBase is not None and len(options.outputBase) > 0
1163
1174
  os.makedirs(options.outputBase,exist_ok=True)
@@ -1165,189 +1176,189 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
1165
1176
 
1166
1177
  # Load file to a pandas dataframe. Also populates 'max_detection_conf', even if it's
1167
1178
  # not present in the .json file.
1168
- detectionResults, otherFields = load_api_results(inputFilename, normalize_paths=True,
1169
- filename_replacements=options.filenameReplacements,
1170
- force_forward_slashes=True)
1171
- toReturn.detectionResults = detectionResults
1172
- toReturn.otherFields = otherFields
1173
-
1174
- # detectionResults[detectionResults['failure'].notna()]
1175
-
1179
+ detection_results, other_fields = load_api_results(input_filename, normalize_paths=True,
1180
+ filename_replacements=options.filenameReplacements,
1181
+ force_forward_slashes=True)
1182
+ to_return.detectionResults = detection_results
1183
+ to_return.otherFields = other_fields
1184
+
1176
1185
  # Before doing any real work, make sure we can *probably* access images
1177
- # This is just a cursory check on the first image, but it heads off most
1186
+ # This is just a cursory check on the first image, but it heads off most
1178
1187
  # problems related to incorrect mount points, etc. Better to do this before
1179
- # spending 20 minutes finding repeat detections.
1180
-
1188
+ # spending 20 minutes finding repeat detections.
1189
+
1181
1190
  if options.bWriteFilteringFolder:
1182
-
1191
+
1183
1192
  if not is_sas_url(options.imageBase):
1184
-
1185
- row = detectionResults.iloc[0]
1186
- relativePath = row['file']
1193
+
1194
+ row = detection_results.iloc[0]
1195
+ relative_path = row['file']
1187
1196
  if options.filenameReplacements is not None:
1188
1197
  for s in options.filenameReplacements.keys():
1189
- relativePath = relativePath.replace(s,options.filenameReplacements[s])
1190
- absolutePath = os.path.join(options.imageBase,relativePath)
1191
- assert os.path.isfile(absolutePath), 'Could not find file {}'.format(absolutePath)
1198
+ relative_path = relative_path.replace(s,options.filenameReplacements[s])
1199
+ absolute_path = os.path.join(options.imageBase,relative_path)
1200
+ assert os.path.isfile(absolute_path), 'Could not find file {}'.format(absolute_path)
1192
1201
 
1193
1202
 
1194
1203
  ##%% Separate files into locations
1195
1204
 
1196
1205
  # This will be a map from a directory name to smaller data frames
1197
- rowsByDirectory = {}
1206
+ rows_by_directory = {}
1198
1207
 
1199
1208
  # This is a mapping back into the rows of the original table
1200
- filenameToRow = {}
1209
+ filename_to_row = {}
1201
1210
 
1202
1211
  print('Separating images into locations...')
1203
1212
 
1204
- nCustomDirReplacements = 0
1205
-
1206
- # iRow = 0; row = detectionResults.iloc[0]
1207
- for iRow, row in tqdm(detectionResults.iterrows(),total=len(detectionResults)):
1208
-
1209
- relativePath = row['file']
1210
-
1213
+ n_custom_dir_replacements = 0
1214
+
1215
+ # i_row = 0; row = detection_results.iloc[i_row]
1216
+ for i_row, row in tqdm(detection_results.iterrows(),total=len(detection_results)):
1217
+
1218
+ relative_path = row['file']
1219
+
1211
1220
  if options.customDirNameFunction is not None:
1212
- basicDirName = os.path.dirname(relativePath.replace('\\','/'))
1213
- dirName = options.customDirNameFunction(relativePath)
1214
- if basicDirName != dirName:
1215
- nCustomDirReplacements += 1
1221
+ basic_dir_name = os.path.dirname(relative_path.replace('\\','/'))
1222
+ dir_name = options.customDirNameFunction(relative_path)
1223
+ if basic_dir_name != dir_name:
1224
+ n_custom_dir_replacements += 1
1216
1225
  else:
1217
- dirName = os.path.dirname(relativePath)
1218
-
1219
- if len(dirName) == 0:
1226
+ dir_name = os.path.dirname(relative_path)
1227
+
1228
+ if len(dir_name) == 0:
1220
1229
  assert options.nDirLevelsFromLeaf == 0, \
1221
1230
  'Can''t use the dirLevelsFromLeaf option with flat filenames'
1222
1231
  else:
1223
1232
  if options.nDirLevelsFromLeaf > 0:
1224
- iLevel = 0
1225
- while (iLevel < options.nDirLevelsFromLeaf):
1226
- iLevel += 1
1227
- dirName = os.path.dirname(dirName)
1228
- assert len(dirName) > 0
1233
+ i_level = 0
1234
+ while (i_level < options.nDirLevelsFromLeaf):
1235
+ i_level += 1
1236
+ dir_name = os.path.dirname(dir_name)
1237
+ assert len(dir_name) > 0
1229
1238
 
1230
- if not dirName in rowsByDirectory:
1239
+ if dir_name not in rows_by_directory:
1231
1240
  # Create a new DataFrame with just this row
1232
- # rowsByDirectory[dirName] = pd.DataFrame(row)
1233
- rowsByDirectory[dirName] = []
1241
+ # rows_by_directory[dir_name] = pd.DataFrame(row)
1242
+ rows_by_directory[dir_name] = []
1234
1243
 
1235
- rowsByDirectory[dirName].append(row)
1244
+ rows_by_directory[dir_name].append(row)
1236
1245
 
1237
- assert relativePath not in filenameToRow
1238
- filenameToRow[relativePath] = iRow
1246
+ assert relative_path not in filename_to_row
1247
+ filename_to_row[relative_path] = i_row
1239
1248
 
1240
1249
  # ...for each unique detection
1241
-
1250
+
1242
1251
  if options.customDirNameFunction is not None:
1243
1252
  print('Custom dir name function made {} replacements (of {} images)'.format(
1244
- nCustomDirReplacements,len(detectionResults)))
1253
+ n_custom_dir_replacements,len(detection_results)))
1245
1254
 
1246
1255
  # Convert lists of rows to proper DataFrames
1247
- dirs = list(rowsByDirectory.keys())
1256
+ dirs = list(rows_by_directory.keys())
1248
1257
  for d in dirs:
1249
- rowsByDirectory[d] = pd.DataFrame(rowsByDirectory[d])
1258
+ rows_by_directory[d] = pd.DataFrame(rows_by_directory[d])
1259
+
1260
+ to_return.rows_by_directory = rows_by_directory
1261
+ to_return.filename_to_row = filename_to_row
1250
1262
 
1251
- toReturn.rowsByDirectory = rowsByDirectory
1252
- toReturn.filenameToRow = filenameToRow
1263
+ print('Finished separating {} files into {} locations'.format(len(detection_results),
1264
+ len(rows_by_directory)))
1253
1265
 
1254
- print('Finished separating {} files into {} locations'.format(len(detectionResults),
1255
- len(rowsByDirectory)))
1256
-
1257
1266
  ##% Look for repeat detections (or load them from file)
1258
1267
 
1259
- dirsToSearch = list(rowsByDirectory.keys())
1268
+ dirs_to_search = list(rows_by_directory.keys())
1260
1269
  if options.debugMaxDir > 0:
1261
- dirsToSearch = dirsToSearch[0:options.debugMaxDir]
1270
+ dirs_to_search = dirs_to_search[0:options.debugMaxDir]
1262
1271
 
1263
1272
  # Map numeric directory indices to names (we'll write this out to the detection index .json file)
1264
- dirIndexToName = {}
1265
- for iDir, dirName in enumerate(dirsToSearch):
1266
- dirIndexToName[iDir] = dirName
1267
-
1273
+ dir_index_to_name = {}
1274
+ for i_dir, dir_name in enumerate(dirs_to_search):
1275
+ dir_index_to_name[i_dir] = dir_name
1276
+
1268
1277
  # Are we actually looking for matches, or just loading from a file?
1269
1278
  if len(options.filterFileToLoad) == 0:
1270
1279
 
1271
1280
  # length-nDirs list of lists of DetectionLocation objects
1272
- suspiciousDetections = [None] * len(dirsToSearch)
1281
+ suspicious_detections = [None] * len(dirs_to_search)
1273
1282
 
1274
1283
  # We're actually looking for matches...
1275
1284
  print('Finding similar detections...')
1276
-
1277
- dirNameAndRows = []
1278
- for dirName in dirsToSearch:
1279
- rowsThisDirectory = rowsByDirectory[dirName]
1280
- dirNameAndRows.append((dirName,rowsThisDirectory))
1281
-
1282
- allCandidateDetections = [None] * len(dirsToSearch)
1283
-
1285
+
1286
+ dir_name_and_rows = []
1287
+ for dir_name in dirs_to_search:
1288
+ rows_this_directory = rows_by_directory[dir_name]
1289
+ dir_name_and_rows.append((dir_name,rows_this_directory))
1290
+
1291
+ all_candidate_detections = [None] * len(dirs_to_search)
1292
+
1284
1293
  # If we serialize results to intermediate files, we need to remove slashes from
1285
1294
  # location names; we store mappings here.
1286
1295
  normalized_location_name_to_location_name = None
1287
1296
  location_name_to_normalized_location_name = None
1288
-
1297
+
1289
1298
  if not options.bParallelizeComparisons:
1290
1299
 
1291
1300
  options.pbar = None
1292
- for iDir, dirName in tqdm(enumerate(dirsToSearch)):
1293
- dirNameAndRow = dirNameAndRows[iDir]
1294
- assert dirNameAndRow[0] == dirName
1295
- print('Processing dir {} of {}: {}'.format(iDir,len(dirsToSearch),dirName))
1296
- allCandidateDetections[iDir] = \
1297
- _find_matches_in_directory(dirNameAndRow, options)
1298
-
1299
- else:
1300
-
1301
+ for i_dir, dir_name in tqdm(enumerate(dirs_to_search)):
1302
+ dir_name_and_row = dir_name_and_rows[i_dir]
1303
+ assert dir_name_and_row[0] == dir_name
1304
+ print('Processing dir {} of {}: {}'.format(i_dir,len(dirs_to_search),dir_name))
1305
+ all_candidate_detections[i_dir] = \
1306
+ _find_matches_in_directory(dir_name_and_row, options)
1307
+
1308
+ else:
1309
+
1301
1310
  n_workers = options.nWorkers
1302
- if n_workers > len(dirNameAndRows):
1311
+ if n_workers > len(dir_name_and_rows):
1303
1312
  print('Pool of {} requested, but only {} folders available, reducing pool to {}'.\
1304
- format(n_workers,len(dirNameAndRows),len(dirNameAndRows)))
1305
- n_workers = len(dirNameAndRows)
1313
+ format(n_workers,len(dir_name_and_rows),len(dir_name_and_rows)))
1314
+ n_workers = len(dir_name_and_rows)
1315
+
1316
+ pool = None
1306
1317
 
1307
1318
  if options.parallelizationUsesThreads:
1308
- pool = ThreadPool(n_workers); poolstring = 'threads'
1319
+ pool = ThreadPool(n_workers); poolstring = 'threads'
1309
1320
  else:
1310
1321
  pool = Pool(n_workers); poolstring = 'processes'
1311
1322
 
1312
1323
  print('Starting comparison pool with {} {}'.format(n_workers,poolstring))
1313
-
1324
+
1314
1325
  assert options.pass_detections_to_processes_method in ('file','memory'), \
1315
1326
  'Unrecognized IPC mechanism: {}'.format(options.pass_detections_to_processes_method)
1316
-
1327
+
1317
1328
  # ** Experimental **
1318
1329
  #
1319
- # Rather than passing detections and results around in memory, write detections and
1330
+ # Rather than passing detections and results around in memory, write detections and
1320
1331
  # results for each worker to intermediate files. May improve performance for very large
1321
1332
  # results sets that exceed working memory.
1322
1333
  if options.pass_detections_to_processes_method == 'file':
1323
-
1334
+
1324
1335
  ##%% Convert location names to normalized names we can write to files
1325
-
1336
+
1326
1337
  normalized_location_name_to_location_name = {}
1327
- for dir_name in dirsToSearch:
1338
+ for dir_name in dirs_to_search:
1328
1339
  normalized_location_name = flatten_path(dir_name)
1329
1340
  assert normalized_location_name not in normalized_location_name_to_location_name, \
1330
1341
  'Redundant location name {}, can\'t serialize to intermediate files'.format(
1331
1342
  dir_name)
1332
1343
  normalized_location_name_to_location_name[normalized_location_name] = dir_name
1333
-
1344
+
1334
1345
  location_name_to_normalized_location_name = \
1335
1346
  invert_dictionary(normalized_location_name_to_location_name)
1336
-
1337
-
1347
+
1348
+
1338
1349
  ##%% Write results to files for each location
1339
-
1350
+
1340
1351
  print('Writing results to intermediate files')
1341
-
1352
+
1342
1353
  intermediate_json_file_folder = os.path.join(options.outputBase,'intermediate_results')
1343
1354
  os.makedirs(intermediate_json_file_folder,exist_ok=True)
1344
-
1345
- # i_location = 0; location_info = dirNameAndRows[0]
1346
- dirNameAndIntermediateFile = []
1347
-
1348
- # i_location = 0; location_info = dirNameAndRows[i_location]
1349
- for i_location, location_info in tqdm(enumerate(dirNameAndRows)):
1350
-
1355
+
1356
+ # i_location = 0; location_info = dir_name_and_rows[0]
1357
+ dir_name_and_intermediate_file = []
1358
+
1359
+ # i_location = 0; location_info = dir_name_and_rows[i_location]
1360
+ for i_location, location_info in tqdm(enumerate(dir_name_and_rows)):
1361
+
1351
1362
  location_name = location_info[0]
1352
1363
  assert location_name in location_name_to_normalized_location_name
1353
1364
  normalized_location_name = location_name_to_normalized_location_name[location_name]
@@ -1355,181 +1366,189 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
1355
1366
  normalized_location_name + '.csv')
1356
1367
  detections_table_this_location = location_info[1]
1357
1368
  detections_table_this_location.to_csv(intermediate_results_file,header=True,index=False)
1358
- dirNameAndIntermediateFile.append((location_name,intermediate_results_file))
1359
-
1360
-
1369
+ dir_name_and_intermediate_file.append((location_name,intermediate_results_file))
1370
+
1371
+
1361
1372
  ##%% Find detections in each directory
1362
-
1363
- options.pbar = None
1364
- allCandidateDetectionFiles = list(pool.imap(
1365
- partial(_find_matches_in_directory,options=options), dirNameAndIntermediateFile))
1366
-
1367
-
1373
+
1374
+ options.pbar = None
1375
+ all_candidate_detection_files = list(pool.imap(
1376
+ partial(_find_matches_in_directory,options=options), dir_name_and_intermediate_file))
1377
+
1378
+
1368
1379
  ##%% Load into a combined list of candidate detections
1369
-
1370
- allCandidateDetections = []
1371
-
1372
- # candidate_detection_file = allCandidateDetectionFiles[0]
1373
- for candidate_detection_file in allCandidateDetectionFiles:
1380
+
1381
+ all_candidate_detections = []
1382
+
1383
+ # candidate_detection_file = all_candidate_detection_files[0]
1384
+ for candidate_detection_file in all_candidate_detection_files:
1374
1385
  s = open(candidate_detection_file, 'r').read()
1375
1386
  candidate_detections_this_file = jsonpickle.decode(s)
1376
- allCandidateDetections.append(candidate_detections_this_file)
1377
-
1378
-
1387
+ all_candidate_detections.append(candidate_detections_this_file)
1388
+
1389
+
1379
1390
  ##%% Clean up intermediate files
1380
-
1381
- shutil.rmtree(intermediate_json_file_folder)
1382
-
1391
+
1392
+ shutil.rmtree(intermediate_json_file_folder)
1393
+
1383
1394
  # If we're passing things around in memory, rather than via intermediate files
1384
1395
  else:
1385
-
1386
- # We get slightly nicer progress bar behavior using threads, by passing a pbar
1387
- # object and letting it get updated. We can't serialize this object across
1396
+
1397
+ # We get slightly nicer progress bar behavior using threads, by passing a pbar
1398
+ # object and letting it get updated. We can't serialize this object across
1388
1399
  # processes.
1389
1400
  if options.parallelizationUsesThreads:
1390
- options.pbar = tqdm(total=len(dirNameAndRows))
1391
- allCandidateDetections = list(pool.imap(
1392
- partial(_find_matches_in_directory,options=options), dirNameAndRows))
1401
+ options.pbar = tqdm(total=len(dir_name_and_rows))
1402
+ all_candidate_detections = list(pool.imap(
1403
+ partial(_find_matches_in_directory,options=options), dir_name_and_rows))
1393
1404
  else:
1394
- options.pbar = None
1395
- allCandidateDetections = list(tqdm(pool.imap(
1396
- partial(_find_matches_in_directory,options=options), dirNameAndRows)))
1405
+ options.pbar = None
1406
+ all_candidate_detections = list(tqdm(pool.imap(
1407
+ partial(_find_matches_in_directory,options=options), dir_name_and_rows)))
1408
+
1409
+ # ...if we're parallelizing comparisons
1410
+
1411
+ if pool is not None:
1412
+ try:
1413
+ pool.close()
1414
+ pool.join()
1415
+ print("Pool closed and joined for RDE comparisons")
1416
+ except Exception as e:
1417
+ print('Warning: error closing RDE comparison pool: {}'.format(str(e)))
1397
1418
 
1398
1419
  print('\nFinished looking for similar detections')
1399
1420
 
1400
-
1421
+
1401
1422
  ##%% Mark suspicious locations based on match results
1402
1423
 
1403
1424
  print('Marking repeat detections...')
1404
1425
 
1405
- nImagesWithSuspiciousDetections = 0
1406
- nSuspiciousDetections = 0
1426
+ n_images_with_suspicious_detections = 0
1427
+ n_suspicious_detections = 0
1407
1428
 
1408
1429
  # For each directory
1409
- #
1410
- # iDir = 51
1411
- for iDir in range(len(dirsToSearch)):
1430
+ for i_dir in range(len(dirs_to_search)):
1412
1431
 
1413
1432
  # A list of DetectionLocation objects
1414
- suspiciousDetectionsThisDir = []
1433
+ suspicious_detections_this_dir = []
1415
1434
 
1416
1435
  # A list of DetectionLocation objects
1417
- candidateDetectionsThisDir = allCandidateDetections[iDir]
1436
+ candidate_detections_this_dir = all_candidate_detections[i_dir]
1418
1437
 
1419
- for iLocation, candidateLocation in enumerate(candidateDetectionsThisDir):
1438
+ for i_location, candidate_location in enumerate(candidate_detections_this_dir):
1420
1439
 
1421
1440
  # occurrenceList is a list of file/detection pairs
1422
- nOccurrences = len(candidateLocation.instances)
1441
+ n_occurrences = len(candidate_location.instances)
1423
1442
 
1424
- if nOccurrences < options.occurrenceThreshold:
1443
+ if n_occurrences < options.occurrenceThreshold:
1425
1444
  continue
1426
1445
 
1427
- nImagesWithSuspiciousDetections += nOccurrences
1428
- nSuspiciousDetections += 1
1446
+ n_images_with_suspicious_detections += n_occurrences
1447
+ n_suspicious_detections += 1
1429
1448
 
1430
- suspiciousDetectionsThisDir.append(candidateLocation)
1449
+ suspicious_detections_this_dir.append(candidate_location)
1431
1450
 
1432
- suspiciousDetections[iDir] = suspiciousDetectionsThisDir
1451
+ suspicious_detections[i_dir] = suspicious_detections_this_dir
1433
1452
 
1434
1453
  # Sort the above-threshold detections for easier review
1435
1454
  if options.smartSort is not None:
1436
- suspiciousDetections[iDir] = _sort_detections_for_directory(
1437
- suspiciousDetections[iDir],options)
1438
-
1455
+ suspicious_detections[i_dir] = _sort_detections_for_directory(
1456
+ suspicious_detections[i_dir],options)
1457
+
1439
1458
  print('Found {} suspicious detections in directory {} ({})'.format(
1440
- len(suspiciousDetections[iDir]),iDir,dirsToSearch[iDir]))
1441
-
1459
+ len(suspicious_detections[i_dir]),i_dir,dirs_to_search[i_dir]))
1460
+
1442
1461
  # ...for each directory
1443
-
1462
+
1444
1463
  print('Finished marking repeat detections')
1445
-
1464
+
1446
1465
  print('Found {} unique detections on {} images that are suspicious'.format(
1447
- nSuspiciousDetections, nImagesWithSuspiciousDetections))
1466
+ n_suspicious_detections, n_images_with_suspicious_detections))
1448
1467
 
1449
1468
  # If we're just loading detections from a file...
1450
1469
  else:
1451
1470
 
1452
- assert len(suspiciousDetections) == len(dirsToSearch)
1471
+ assert len(suspicious_detections) == len(dirs_to_search)
1453
1472
 
1454
- nDetectionsRemoved = 0
1455
- nDetectionsLoaded = 0
1473
+ n_detections_removed = 0
1474
+ n_detections_loaded = 0
1456
1475
 
1457
1476
  # We're skipping detection-finding, but to see which images are actually legit false
1458
- # positives, we may be looking for physical files or loading from a text file.
1459
- fileList = None
1477
+ # positives, we may be looking for physical files or loading from a text file.
1478
+ file_list = None
1460
1479
  if options.filteredFileListToLoad is not None:
1461
1480
  with open(options.filteredFileListToLoad) as f:
1462
- fileList = f.readlines()
1463
- fileList = [x.strip() for x in fileList]
1464
- nSuspiciousDetections = sum([len(x) for x in suspiciousDetections])
1481
+ file_list = f.readlines()
1482
+ file_list = [x.strip() for x in file_list]
1483
+ n_suspicious_detections = sum([len(x) for x in suspicious_detections])
1465
1484
  print('Loaded false positive list from file ' + \
1466
1485
  'will remove {} of {} suspicious detections'.format(
1467
- len(fileList), nSuspiciousDetections))
1486
+ len(file_list), n_suspicious_detections))
1468
1487
 
1469
1488
  # For each directory
1470
- # iDir = 0; detections = suspiciousDetections[0]
1489
+ # i_dir = 0; detections = suspicious_detections[0]
1471
1490
  #
1472
- # suspiciousDetections is an array of DetectionLocation objects,
1473
- # one per directory.
1474
- for iDir, detections in enumerate(suspiciousDetections):
1491
+ # suspicious_detections is an array of DetectionLocation objects,
1492
+ # one per directory.
1493
+ for i_dir, detections in enumerate(suspicious_detections):
1475
1494
 
1476
- bValidDetection = [True] * len(detections)
1477
- nDetectionsLoaded += len(detections)
1495
+ b_valid_detection = [True] * len(detections)
1496
+ n_detections_loaded += len(detections)
1478
1497
 
1479
1498
  # For each detection that was present before filtering
1480
- # iDetection = 0; detection = detections[iDetection]
1481
- for iDetection, detection in enumerate(detections):
1499
+ # i_detection = 0; detection = detections[i_detection]
1500
+ for i_detection, detection in enumerate(detections):
1482
1501
 
1483
1502
  # Are we checking the directory to see whether detections were actually false
1484
1503
  # positives, or reading from a list?
1485
- if fileList is None:
1486
-
1487
- # Is the image still there?
1488
- imageFullPath = os.path.join(filteringBaseDir,
1489
- detection.sampleImageRelativeFileName)
1504
+ if file_list is None:
1505
+
1506
+ # Is the image still there?
1507
+ image_full_path = os.path.join(filtering_base_dir,
1508
+ detection.sampleImageRelativeFileName)
1490
1509
 
1491
1510
  # If not, remove this from the list of suspicious detections
1492
- if not os.path.isfile(imageFullPath):
1493
- nDetectionsRemoved += 1
1494
- bValidDetection[iDetection] = False
1511
+ if not os.path.isfile(image_full_path):
1512
+ n_detections_removed += 1
1513
+ b_valid_detection[i_detection] = False
1495
1514
 
1496
1515
  else:
1497
-
1498
- if detection.sampleImageRelativeFileName not in fileList:
1499
- nDetectionsRemoved += 1
1500
- bValidDetection[iDetection] = False
1516
+
1517
+ if detection.sampleImageRelativeFileName not in file_list:
1518
+ n_detections_removed += 1
1519
+ b_valid_detection[i_detection] = False
1501
1520
 
1502
1521
  # ...for each detection
1503
1522
 
1504
- nRemovedThisDir = len(bValidDetection) - sum(bValidDetection)
1505
- if nRemovedThisDir > 0:
1523
+ n_removed_this_dir = len(b_valid_detection) - sum(b_valid_detection)
1524
+ if n_removed_this_dir > 0:
1506
1525
  print('Removed {} of {} detections from directory {}'.\
1507
- format(nRemovedThisDir,len(detections), iDir))
1526
+ format(n_removed_this_dir,len(detections), i_dir))
1508
1527
 
1509
- detectionsFiltered = list(compress(detections, bValidDetection))
1510
- suspiciousDetections[iDir] = detectionsFiltered
1528
+ detections_filtered = list(compress(detections, b_valid_detection))
1529
+ suspicious_detections[i_dir] = detections_filtered
1511
1530
 
1512
1531
  # ...for each directory
1513
1532
 
1514
1533
  print('Removed {} of {} total detections via manual filtering'.\
1515
- format(nDetectionsRemoved, nDetectionsLoaded))
1534
+ format(n_detections_removed, n_detections_loaded))
1516
1535
 
1517
1536
  # ...if we are/aren't finding detections (vs. loading from file)
1518
1537
 
1519
- toReturn.suspiciousDetections = suspiciousDetections
1538
+ to_return.suspicious_detections = suspicious_detections
1539
+
1540
+ to_return.allRowsFiltered = _update_detection_table(to_return, options, output_file_name)
1541
+
1520
1542
 
1521
- toReturn.allRowsFiltered = _update_detection_table(toReturn, options, outputFilename)
1522
-
1523
-
1524
1543
  ##%% Create filtering directory
1525
-
1544
+
1526
1545
  if options.bWriteFilteringFolder:
1527
1546
 
1528
1547
  print('Creating filtering folder...')
1529
1548
 
1530
- dateString = datetime.now().strftime('%Y.%m.%d.%H.%M.%S')
1531
- filteringDir = os.path.join(options.outputBase, 'filtering_' + dateString)
1532
- os.makedirs(filteringDir, exist_ok=True)
1549
+ date_string = datetime.now().strftime('%Y.%m.%d.%H.%M.%S')
1550
+ filtering_dir = os.path.join(options.outputBase, 'filtering_' + date_string)
1551
+ os.makedirs(filtering_dir, exist_ok=True)
1533
1552
 
1534
1553
  # Take a first loop over every suspicious detection, and do the things that make
1535
1554
  # sense to do in a serial sampleImageDetectionsloop:
@@ -1538,101 +1557,107 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
1538
1557
  # * Sort instances by confidence
1539
1558
  # * Look up detections for each sample image in the big table (so we don't have to pass the
1540
1559
  # table to workers)
1541
- for iDir, suspiciousDetectionsThisDir in enumerate(tqdm(suspiciousDetections)):
1542
-
1543
- for iDetection, detection in enumerate(suspiciousDetectionsThisDir):
1544
-
1560
+ for i_dir, suspicious_detections_this_dir in enumerate(tqdm(suspicious_detections)):
1561
+
1562
+ for i_detection, detection in enumerate(suspicious_detections_this_dir):
1563
+
1545
1564
  # Sort instances in descending order by confidence
1546
1565
  detection.instances.sort(key=attrgetter('confidence'),reverse=True)
1547
-
1566
+
1548
1567
  if detection.clusterLabel is not None:
1549
- clusterString = '_c{:0>4d}'.format(detection.clusterLabel)
1568
+ cluster_string = '_c{:0>4d}'.format(detection.clusterLabel)
1550
1569
  else:
1551
- clusterString = ''
1552
-
1570
+ cluster_string = ''
1571
+
1553
1572
  # Choose the highest-confidence index
1554
1573
  instance = detection.instances[0]
1555
- relativePath = instance.filename
1556
-
1557
- outputRelativePath = 'dir{:0>4d}_det{:0>4d}{}_n{:0>4d}.jpg'.format(
1558
- iDir, iDetection, clusterString, len(detection.instances))
1559
- detection.sampleImageRelativeFileName = outputRelativePath
1560
-
1561
- iRow = filenameToRow[relativePath]
1562
- row = detectionResults.iloc[iRow]
1574
+ relative_path = instance.filename
1575
+
1576
+ output_relative_path = 'dir{:0>4d}_det{:0>4d}{}_n{:0>4d}.jpg'.format(
1577
+ i_dir, i_detection, cluster_string, len(detection.instances))
1578
+ detection.sampleImageRelativeFileName = output_relative_path
1579
+
1580
+ i_row = filename_to_row[relative_path]
1581
+ row = detection_results.iloc[i_row]
1563
1582
  detection.sampleImageDetections = row['detections']
1564
-
1583
+
1565
1584
  # ...for each suspicious detection in this folder
1566
-
1585
+
1567
1586
  # ...for each folder
1568
-
1569
- # Collapse suspicious detections into a flat list
1570
- allSuspiciousDetections = []
1571
-
1572
- # iDir = 0; suspiciousDetectionsThisDir = suspiciousDetections[iDir]
1573
- for iDir, suspiciousDetectionsThisDir in enumerate(tqdm(suspiciousDetections)):
1574
- for iDetection, detection in enumerate(suspiciousDetectionsThisDir):
1575
- allSuspiciousDetections.append(detection)
1576
-
1587
+
1588
+ # Collapse suspicious detections into a flat list
1589
+ all_suspicious_detections = []
1590
+
1591
+ # i_dir = 0; suspicious_detections_this_dir = suspicious_detections[i_dir]
1592
+ for i_dir, suspicious_detections_this_dir in enumerate(tqdm(suspicious_detections)):
1593
+ for i_detection, detection in enumerate(suspicious_detections_this_dir):
1594
+ all_suspicious_detections.append(detection)
1595
+
1577
1596
  # Render suspicious detections
1578
1597
  if options.bParallelizeRendering:
1579
-
1598
+
1580
1599
  n_workers = options.nWorkers
1581
-
1582
- if options.parallelizationUsesThreads:
1583
- pool = ThreadPool(n_workers); poolstring = 'threads'
1584
- else:
1585
- pool = Pool(n_workers); poolstring = 'processes'
1586
1600
 
1587
- print('Starting rendering pool with {} {}'.format(n_workers,poolstring))
1588
-
1589
- # We get slightly nicer progress bar behavior using threads, by passing a pbar
1590
- # object and letting it get updated. We can't serialize this object across
1591
- # processes.
1592
- if options.parallelizationUsesThreads:
1593
- options.pbar = tqdm(total=len(allSuspiciousDetections))
1594
- allCandidateDetections = list(pool.imap(
1595
- partial(_render_sample_image_for_detection,filteringDir=filteringDir,
1596
- options=options), allSuspiciousDetections))
1597
- else:
1598
- options.pbar = None
1599
- allCandidateDetections = list(tqdm(pool.imap(
1600
- partial(_render_sample_image_for_detection,filteringDir=filteringDir,
1601
- options=options), allSuspiciousDetections)))
1602
-
1601
+ pool = None
1602
+
1603
+ try:
1604
+ if options.parallelizationUsesThreads:
1605
+ pool = ThreadPool(n_workers); poolstring = 'threads'
1606
+ else:
1607
+ pool = Pool(n_workers); poolstring = 'processes'
1608
+
1609
+ print('Starting rendering pool with {} {}'.format(n_workers,poolstring))
1610
+
1611
+ # We get slightly nicer progress bar behavior using threads, by passing a pbar
1612
+ # object and letting it get updated. We can't serialize this object across
1613
+ # processes.
1614
+ if options.parallelizationUsesThreads:
1615
+ options.pbar = tqdm(total=len(all_suspicious_detections))
1616
+ all_candidate_detections = list(pool.imap(
1617
+ partial(_render_sample_image_for_detection,filtering_dir=filtering_dir,
1618
+ options=options), all_suspicious_detections))
1619
+ else:
1620
+ options.pbar = None
1621
+ all_candidate_detections = list(tqdm(pool.imap(
1622
+ partial(_render_sample_image_for_detection,filtering_dir=filtering_dir,
1623
+ options=options), all_suspicious_detections)))
1624
+ finally:
1625
+ if pool is not None:
1626
+ pool.close()
1627
+ pool.join()
1628
+ print("Pool closed and joined for RDE rendering")
1629
+
1603
1630
  else:
1604
-
1631
+
1605
1632
  # Serial loop over detections
1606
- for detection in allSuspiciousDetections:
1607
- _render_sample_image_for_detection(detection,filteringDir,options)
1608
-
1633
+ for detection in all_suspicious_detections:
1634
+ _render_sample_image_for_detection(detection,filtering_dir,options)
1635
+
1609
1636
  # Delete (large) temporary data from the list of suspicious detections
1610
- for detection in allSuspiciousDetections:
1611
- detection.sampleImageDetections = None
1612
-
1637
+ for detection in all_suspicious_detections:
1638
+ detection.sampleImageDetections = None
1639
+
1613
1640
  # Write out the detection index
1614
- detectionIndexFileName = os.path.join(filteringDir, detection_index_file_name_base)
1615
-
1641
+ detection_index_file_name = os.path.join(filtering_dir, detection_index_file_name_base)
1642
+
1616
1643
  # Prepare the data we're going to write to the detection index file
1617
- detectionInfo = {}
1618
-
1619
- detectionInfo['suspiciousDetections'] = suspiciousDetections
1620
- detectionInfo['dirIndexToName'] = dirIndexToName
1621
-
1644
+ detection_info = {}
1645
+
1646
+ detection_info['suspicious_detections'] = suspicious_detections
1647
+ detection_info['dir_index_to_name'] = dir_index_to_name
1648
+
1622
1649
  # Remove the one non-serializable object from the options struct before serializing
1623
1650
  # to .json
1624
1651
  options.pbar = None
1625
- detectionInfo['options'] = options
1626
-
1627
- s = jsonpickle.encode(detectionInfo,make_refs=False)
1628
- with open(detectionIndexFileName, 'w') as f:
1629
- f.write(s)
1630
- toReturn.filterFile = detectionIndexFileName
1652
+ detection_info['options'] = options
1631
1653
 
1632
- print('Done')
1654
+ s = jsonpickle.encode(detection_info,make_refs=False)
1655
+ with open(detection_index_file_name, 'w') as f:
1656
+ f.write(s)
1657
+ to_return.filterFile = detection_index_file_name
1633
1658
 
1634
1659
  # ...if we're writing filtering info
1635
1660
 
1636
- return toReturn
1661
+ return to_return
1637
1662
 
1638
1663
  # ...def find_repeat_detections()