megadetector 10.0.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. megadetector/__init__.py +0 -0
  2. megadetector/api/__init__.py +0 -0
  3. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
  7. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  8. megadetector/classification/__init__.py +0 -0
  9. megadetector/classification/aggregate_classifier_probs.py +108 -0
  10. megadetector/classification/analyze_failed_images.py +227 -0
  11. megadetector/classification/cache_batchapi_outputs.py +198 -0
  12. megadetector/classification/create_classification_dataset.py +626 -0
  13. megadetector/classification/crop_detections.py +516 -0
  14. megadetector/classification/csv_to_json.py +226 -0
  15. megadetector/classification/detect_and_crop.py +853 -0
  16. megadetector/classification/efficientnet/__init__.py +9 -0
  17. megadetector/classification/efficientnet/model.py +415 -0
  18. megadetector/classification/efficientnet/utils.py +608 -0
  19. megadetector/classification/evaluate_model.py +520 -0
  20. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  21. megadetector/classification/json_to_azcopy_list.py +63 -0
  22. megadetector/classification/json_validator.py +696 -0
  23. megadetector/classification/map_classification_categories.py +276 -0
  24. megadetector/classification/merge_classification_detection_output.py +509 -0
  25. megadetector/classification/prepare_classification_script.py +194 -0
  26. megadetector/classification/prepare_classification_script_mc.py +228 -0
  27. megadetector/classification/run_classifier.py +287 -0
  28. megadetector/classification/save_mislabeled.py +110 -0
  29. megadetector/classification/train_classifier.py +827 -0
  30. megadetector/classification/train_classifier_tf.py +725 -0
  31. megadetector/classification/train_utils.py +323 -0
  32. megadetector/data_management/__init__.py +0 -0
  33. megadetector/data_management/animl_to_md.py +161 -0
  34. megadetector/data_management/annotations/__init__.py +0 -0
  35. megadetector/data_management/annotations/annotation_constants.py +33 -0
  36. megadetector/data_management/camtrap_dp_to_coco.py +270 -0
  37. megadetector/data_management/cct_json_utils.py +566 -0
  38. megadetector/data_management/cct_to_md.py +184 -0
  39. megadetector/data_management/cct_to_wi.py +293 -0
  40. megadetector/data_management/coco_to_labelme.py +284 -0
  41. megadetector/data_management/coco_to_yolo.py +701 -0
  42. megadetector/data_management/databases/__init__.py +0 -0
  43. megadetector/data_management/databases/add_width_and_height_to_db.py +107 -0
  44. megadetector/data_management/databases/combine_coco_camera_traps_files.py +210 -0
  45. megadetector/data_management/databases/integrity_check_json_db.py +563 -0
  46. megadetector/data_management/databases/subset_json_db.py +195 -0
  47. megadetector/data_management/generate_crops_from_cct.py +200 -0
  48. megadetector/data_management/get_image_sizes.py +164 -0
  49. megadetector/data_management/labelme_to_coco.py +559 -0
  50. megadetector/data_management/labelme_to_yolo.py +349 -0
  51. megadetector/data_management/lila/__init__.py +0 -0
  52. megadetector/data_management/lila/create_lila_blank_set.py +556 -0
  53. megadetector/data_management/lila/create_lila_test_set.py +192 -0
  54. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  55. megadetector/data_management/lila/download_lila_subset.py +182 -0
  56. megadetector/data_management/lila/generate_lila_per_image_labels.py +777 -0
  57. megadetector/data_management/lila/get_lila_annotation_counts.py +174 -0
  58. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  59. megadetector/data_management/lila/lila_common.py +319 -0
  60. megadetector/data_management/lila/test_lila_metadata_urls.py +164 -0
  61. megadetector/data_management/mewc_to_md.py +344 -0
  62. megadetector/data_management/ocr_tools.py +873 -0
  63. megadetector/data_management/read_exif.py +964 -0
  64. megadetector/data_management/remap_coco_categories.py +195 -0
  65. megadetector/data_management/remove_exif.py +156 -0
  66. megadetector/data_management/rename_images.py +194 -0
  67. megadetector/data_management/resize_coco_dataset.py +665 -0
  68. megadetector/data_management/speciesnet_to_md.py +41 -0
  69. megadetector/data_management/wi_download_csv_to_coco.py +247 -0
  70. megadetector/data_management/yolo_output_to_md_output.py +594 -0
  71. megadetector/data_management/yolo_to_coco.py +984 -0
  72. megadetector/data_management/zamba_to_md.py +188 -0
  73. megadetector/detection/__init__.py +0 -0
  74. megadetector/detection/change_detection.py +840 -0
  75. megadetector/detection/process_video.py +479 -0
  76. megadetector/detection/pytorch_detector.py +1451 -0
  77. megadetector/detection/run_detector.py +1267 -0
  78. megadetector/detection/run_detector_batch.py +2172 -0
  79. megadetector/detection/run_inference_with_yolov5_val.py +1314 -0
  80. megadetector/detection/run_md_and_speciesnet.py +1604 -0
  81. megadetector/detection/run_tiled_inference.py +1044 -0
  82. megadetector/detection/tf_detector.py +209 -0
  83. megadetector/detection/video_utils.py +1379 -0
  84. megadetector/postprocessing/__init__.py +0 -0
  85. megadetector/postprocessing/add_max_conf.py +72 -0
  86. megadetector/postprocessing/categorize_detections_by_size.py +166 -0
  87. megadetector/postprocessing/classification_postprocessing.py +1943 -0
  88. megadetector/postprocessing/combine_batch_outputs.py +249 -0
  89. megadetector/postprocessing/compare_batch_results.py +2110 -0
  90. megadetector/postprocessing/convert_output_format.py +403 -0
  91. megadetector/postprocessing/create_crop_folder.py +629 -0
  92. megadetector/postprocessing/detector_calibration.py +570 -0
  93. megadetector/postprocessing/generate_csv_report.py +522 -0
  94. megadetector/postprocessing/load_api_results.py +223 -0
  95. megadetector/postprocessing/md_to_coco.py +428 -0
  96. megadetector/postprocessing/md_to_labelme.py +351 -0
  97. megadetector/postprocessing/md_to_wi.py +41 -0
  98. megadetector/postprocessing/merge_detections.py +392 -0
  99. megadetector/postprocessing/postprocess_batch_results.py +2140 -0
  100. megadetector/postprocessing/remap_detection_categories.py +226 -0
  101. megadetector/postprocessing/render_detection_confusion_matrix.py +677 -0
  102. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +206 -0
  103. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +82 -0
  104. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1665 -0
  105. megadetector/postprocessing/separate_detections_into_folders.py +795 -0
  106. megadetector/postprocessing/subset_json_detector_output.py +964 -0
  107. megadetector/postprocessing/top_folders_to_bottom.py +238 -0
  108. megadetector/postprocessing/validate_batch_results.py +332 -0
  109. megadetector/taxonomy_mapping/__init__.py +0 -0
  110. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  111. megadetector/taxonomy_mapping/map_new_lila_datasets.py +211 -0
  112. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +165 -0
  113. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +543 -0
  114. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  115. megadetector/taxonomy_mapping/simple_image_download.py +231 -0
  116. megadetector/taxonomy_mapping/species_lookup.py +1008 -0
  117. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  118. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  119. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  120. megadetector/tests/__init__.py +0 -0
  121. megadetector/tests/test_nms_synthetic.py +335 -0
  122. megadetector/utils/__init__.py +0 -0
  123. megadetector/utils/ct_utils.py +1857 -0
  124. megadetector/utils/directory_listing.py +199 -0
  125. megadetector/utils/extract_frames_from_video.py +307 -0
  126. megadetector/utils/gpu_test.py +125 -0
  127. megadetector/utils/md_tests.py +2072 -0
  128. megadetector/utils/path_utils.py +2872 -0
  129. megadetector/utils/process_utils.py +172 -0
  130. megadetector/utils/split_locations_into_train_val.py +237 -0
  131. megadetector/utils/string_utils.py +234 -0
  132. megadetector/utils/url_utils.py +825 -0
  133. megadetector/utils/wi_platform_utils.py +968 -0
  134. megadetector/utils/wi_taxonomy_utils.py +1766 -0
  135. megadetector/utils/write_html_image_list.py +239 -0
  136. megadetector/visualization/__init__.py +0 -0
  137. megadetector/visualization/plot_utils.py +309 -0
  138. megadetector/visualization/render_images_with_thumbnails.py +243 -0
  139. megadetector/visualization/visualization_utils.py +1973 -0
  140. megadetector/visualization/visualize_db.py +630 -0
  141. megadetector/visualization/visualize_detector_output.py +498 -0
  142. megadetector/visualization/visualize_video_output.py +705 -0
  143. megadetector-10.0.15.dist-info/METADATA +115 -0
  144. megadetector-10.0.15.dist-info/RECORD +147 -0
  145. megadetector-10.0.15.dist-info/WHEEL +5 -0
  146. megadetector-10.0.15.dist-info/licenses/LICENSE +19 -0
  147. megadetector-10.0.15.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1665 @@
1
+ """
2
+
3
+ repeat_detections_core.py
4
+
5
+ Core utilities shared by find_repeat_detections and remove_repeat_detections.
6
+
7
+ Nothing in this file (in fact nothing in this subpackage) will make sense until you read
8
+ the RDE user's guide:
9
+
10
+ https://github.com/agentmorris/MegaDetector/tree/main/megadetector/postprocessing/repeat_detection_elimination
11
+
12
+ """
13
+
14
+ #%% Imports and environment
15
+
16
+ import os
17
+ import copy
18
+ import warnings
19
+ import sklearn.cluster
20
+ import numpy as np
21
+ import jsonpickle
22
+ import traceback
23
+ import pandas as pd
24
+ import json
25
+ import shutil
26
+
27
+ from tqdm import tqdm
28
+ from operator import attrgetter
29
+ from datetime import datetime
30
+ from itertools import compress
31
+
32
+ import fastquadtree.pyqtree as pyqtree
33
+
34
+ from multiprocessing.pool import ThreadPool
35
+ from multiprocessing.pool import Pool
36
+ from functools import partial
37
+
38
+ from megadetector.utils import path_utils
39
+ from megadetector.utils import ct_utils
40
+ from megadetector.postprocessing.load_api_results import load_api_results, write_api_results
41
+ from megadetector.postprocessing.postprocess_batch_results import is_sas_url
42
+ from megadetector.postprocessing.postprocess_batch_results import relative_sas_url
43
+ from megadetector.visualization.visualization_utils import open_image, render_detection_bounding_boxes
44
+ from megadetector.visualization import render_images_with_thumbnails
45
+ from megadetector.visualization import visualization_utils as vis_utils
46
+ from megadetector.utils.path_utils import flatten_path
47
+ from megadetector.utils.ct_utils import invert_dictionary
48
+
49
+ # "PIL cannot read EXIF metainfo for the images"
50
+ warnings.filterwarnings('ignore', '(Possibly )?corrupt EXIF data', UserWarning)
51
+
52
+ # "Metadata Warning, tag 256 had too many entries: 42, expected 1"
53
+ warnings.filterwarnings('ignore', 'Metadata warning', UserWarning)
54
+
55
+ jsonpickle.set_encoder_options('json', sort_keys=True, indent=1)
56
+
57
+
58
+ #%% Constants
59
+
60
+ detection_index_file_name_base = 'detectionIndex.json'
61
+
62
+
63
+ #%% Classes
64
+
65
+ class RepeatDetectionOptions:
66
+ """
67
+ Options that control the behavior of repeat detection elimination
68
+ """
69
+
70
+ def __init__(self):
71
+
72
+ #: Folder where images live; filenames in the MD results .json file should
73
+ #: be relative to this folder.
74
+ #:
75
+ #: imageBase can also be a SAS URL, in which case some error-checking is
76
+ #: disabled.
77
+ self.imageBase = ''
78
+
79
+ #: Folder where we should write temporary output.
80
+ self.outputBase = ''
81
+
82
+ #: Don't consider detections with confidence lower than this as suspicious
83
+ self.confidenceMin = 0.1
84
+
85
+ #: Don't consider detections with confidence higher than this as suspicious
86
+ self.confidenceMax = 1.0
87
+
88
+ #: What's the IOU threshold for considering two boxes the same?
89
+ self.iouThreshold = 0.9
90
+
91
+ #: How many occurrences of a single location (as defined by the IOU threshold)
92
+ #: are required before we declare it suspicious?
93
+ self.occurrenceThreshold = 20
94
+
95
+ #: Ignore "suspicious" detections smaller than some size
96
+ self.minSuspiciousDetectionSize = 0.0
97
+
98
+ #: Ignore "suspicious" detections larger than some size; these are often animals
99
+ #: taking up the whole image. This is expressed as a fraction of the image size.
100
+ self.maxSuspiciousDetectionSize = 0.2
101
+
102
+ #: Ignore folders with more than this many images in them
103
+ self.maxImagesPerFolder = None
104
+
105
+ #: A list of category IDs (ints) that we don't want consider as candidate repeat detections.
106
+ #:
107
+ #: Typically used to say, e.g., "don't bother analyzing people or vehicles for repeat
108
+ #: detections", which you could do by saying excludeClasses = [2,3].
109
+ self.excludeClasses = []
110
+
111
+ #: For very large sets of results, passing chunks of results to and from workers as
112
+ #: parameters ('memory') can be memory-intensive, so we can serialize to intermediate
113
+ #: files instead ('file').
114
+ #:
115
+ #: The use of 'file' here is still experimental.
116
+ self.pass_detections_to_processes_method = 'memory'
117
+
118
+ #: Number of workers to use for parallel operations
119
+ self.nWorkers = 10
120
+
121
+ #: Should we use threads (True) or processes (False) for parallelization?
122
+ #:
123
+ #: Not relevant if nWorkers <= 1, or if bParallelizeComparisons and
124
+ #: bParallelizeRendering are both False.
125
+ self.parallelizationUsesThreads = True
126
+
127
+ #: If this is not empty, we'll load detections from a filter file rather than finding them
128
+ #: from the detector output. This should be a .json file containing detections, generally this
129
+ #: is the detectionIndex.json file in the filtering_* folder produced by find_repeat_detections().
130
+ self.filterFileToLoad = ''
131
+
132
+ #: (optional) List of filenames remaining after deletion of identified
133
+ #: repeated detections that are actually animals. This should be a flat
134
+ #: text file, one relative filename per line.
135
+ #:
136
+ #: This is a pretty esoteric code path and a candidate for removal.
137
+ #:
138
+ #: The scenario where I see it being most useful is the very hypothetical one
139
+ #: where we use an external tool for image handling that allows us to do something
140
+ #: smarter and less destructive than deleting images to mark them as non-false-positives.
141
+ self.filteredFileListToLoad = None
142
+
143
+ #: Should we write the folder of images used to manually review repeat detections?
144
+ self.bWriteFilteringFolder = True
145
+
146
+ #: For debugging: limit comparisons to a specific number of folders
147
+ self.debugMaxDir = -1
148
+
149
+ #: For debugging: limit rendering to a specific number of folders
150
+ self.debugMaxRenderDir = -1
151
+
152
+ #: For debugging: limit comparisons to a specific number of detections
153
+ self.debugMaxRenderDetection = -1
154
+
155
+ #: For debugging: limit comparisons to a specific number of instances
156
+ self.debugMaxRenderInstance = -1
157
+
158
+ #: Should we parallelize (across cameras) comparisons to find repeat detections?
159
+ self.bParallelizeComparisons = True
160
+
161
+ #: Should we parallelize image rendering?
162
+ self.bParallelizeRendering = True
163
+
164
+ #: If this is False (default), a detection from class A is *not* considered to be "the same"
165
+ #: as a detection from class B, even if they're at the same location.
166
+ self.categoryAgnosticComparisons = False
167
+
168
+ #: Determines whether bounding-box rendering errors (typically network errors) should
169
+ #: be treated as failures
170
+ self.bFailOnRenderError = False
171
+
172
+ #: Should we print a warning if images referred to in the MD results file are missing?
173
+ self.bPrintMissingImageWarnings = True
174
+
175
+ #: If bPrintMissingImageWarnings is True, should we print a warning about missing images
176
+ #: just once ('once') or every time ('all')?
177
+ self.missingImageWarningType = 'once' # 'all'
178
+
179
+ #: Image width for rendered images (it's called "max" because we don't resize smaller images).
180
+ #:
181
+ #: Original size is preserved if this is None.
182
+ #:
183
+ #: This does *not* include the tile image grid.
184
+ self.maxOutputImageWidth = 2000
185
+
186
+ #: Line thickness (in pixels) for box rendering
187
+ self.lineThickness = 10
188
+
189
+ #: Box expansion (in pixels)
190
+ self.boxExpansion = 2
191
+
192
+ #: Progress bar used during comparisons and rendering. Do not set externally.
193
+ #:
194
+ #: :meta private:
195
+ self.pbar = None
196
+
197
+ #: Replace filename tokens after reading, useful when the directory structure
198
+ #: has changed relative to the structure the detector saw.
199
+ self.filenameReplacements = {}
200
+
201
+ #: How many folders up from the leaf nodes should we be going to aggregate images into
202
+ #: cameras?
203
+ #:
204
+ #: If this is zero, each leaf folder is treated as a camera.
205
+ self.nDirLevelsFromLeaf = 0
206
+
207
+ #: An optional function that takes a string (an image file name) and returns
208
+ #: a string (the corresponding folder ID), typically used when multiple folders
209
+ #: actually correspond to the same camera in a manufacturer-specific way (e.g.
210
+ #: a/b/c/RECONYX100 and a/b/c/RECONYX101 may really be the same camera).
211
+ #:
212
+ #: See ct_utils for a common replacement function that handles most common
213
+ #: manufacturer folder names:
214
+ #:
215
+ #: from megadetector.utils import ct_utils
216
+ #: self.customDirNameFunction = ct_utils.image_file_to_camera_folder
217
+ self.customDirNameFunction = None
218
+
219
+ #: Include only specific folders, mutually exclusive with [excludeFolders]
220
+ self.includeFolders = None
221
+
222
+ #: Exclude specific folders, mutually exclusive with [includeFolders]
223
+ self.excludeFolders = None
224
+
225
+ #: Optionally show *other* detections (i.e., detections other than the
226
+ #: one the user is evaluating), typically in a light gray.
227
+ self.bRenderOtherDetections = False
228
+
229
+ #: Threshold to use for *other* detections
230
+ self.otherDetectionsThreshold = 0.2
231
+
232
+ #: Line width (in pixels) for *other* detections
233
+ self.otherDetectionsLineWidth = 1
234
+
235
+ #: Optionally show a grid that includes a sample image for the detection, plus
236
+ #: the top N additional detections
237
+ self.bRenderDetectionTiles = True
238
+
239
+ #: Width of the original image (within the larger output image) when bRenderDetectionTiles
240
+ #: is True.
241
+ #:
242
+ #: If this is None, we'll render the original image in the detection tile image
243
+ #: at its original width.
244
+ self.detectionTilesPrimaryImageWidth = None
245
+
246
+ #: Width to use for the grid of detection instances.
247
+ #:
248
+ #: Can be a width in pixels, or a number from 0 to 1 representing a fraction
249
+ #: of the primary image width.
250
+ #:
251
+ #: If you want to render the grid at exactly 1 pixel wide, I guess you're out
252
+ #: of luck.
253
+ self.detectionTilesCroppedGridWidth = 0.6
254
+
255
+ #: Location of the primary image within the mosaic ('right' or 'left)
256
+ self.detectionTilesPrimaryImageLocation = 'right'
257
+
258
+ #: Maximum number of individual detection instances to include in the mosaic
259
+ self.detectionTilesMaxCrops = 150
260
+
261
+ #: If bRenderOtherDetections is True, what color should we use to render the
262
+ #: (hopefully pretty subtle) non-target detections?
263
+ #:
264
+ #: In theory I'd like these "other detection" rectangles to be partially
265
+ #: transparent, but this is not straightforward, and the alpha is ignored
266
+ #: here. But maybe if I leave it here and wish hard enough, someday it
267
+ #: will work.
268
+ #:
269
+ #: otherDetectionsColors = ['dimgray']
270
+ self.otherDetectionsColors = [(105,105,105,100)]
271
+
272
+ #: Sort detections within a directory so nearby detections are adjacent
273
+ #: in the list, for faster review.
274
+ #:
275
+ #: Can be None, 'xsort', or 'clustersort'
276
+ #:
277
+ #: * None sorts detections chronologically by first occurrence
278
+ #: * 'xsort' sorts detections from left to right
279
+ #: * 'clustersort' clusters detections and sorts by cluster
280
+ self.smartSort = 'xsort'
281
+
282
+ #: Only relevant if smartSort == 'clustersort'
283
+ self.smartSortDistanceThreshold = 0.1
284
+
285
+
286
+ class RepeatDetectionResults:
287
+ """
288
+ The results of an entire repeat detection analysis
289
+ """
290
+
291
+ def __init__(self):
292
+
293
+ #: The data table (Pandas DataFrame), as loaded from the input json file via
294
+ #: load_api_results(). Has columns ['file', 'detections','failure'].
295
+ self.detectionResults = None
296
+
297
+ #: The other fields in the input json file, loaded via load_api_results()
298
+ self.otherFields = None
299
+
300
+ #: The data table after modification
301
+ self.detectionResultsFiltered = None
302
+
303
+ #: dict mapping folder names to whole rows from the data table
304
+ self.rows_by_directory = None
305
+
306
+ #: dict mapping filenames to rows in the master table
307
+ self.filename_to_row = None
308
+
309
+ #: An array of length nDirs, where each element is a list of DetectionLocation
310
+ #: objects for that directory that have been flagged as suspicious
311
+ self.suspicious_detections = None
312
+
313
+ #: The location of the .json file written with information about the RDE
314
+ #: review images (typically detectionIndex.json)
315
+ self.filterFile = None
316
+
317
+
318
+ class IndexedDetection:
319
+ """
320
+ A single detection event on a single image
321
+ """
322
+
323
+ def __init__(self, i_detection=-1, filename='', bbox=None, confidence=-1, category='unknown'):
324
+
325
+ if bbox is None:
326
+ bbox = []
327
+ assert isinstance(i_detection,int)
328
+ assert isinstance(filename,str)
329
+ assert isinstance(bbox,list)
330
+ assert isinstance(category,str)
331
+
332
+ #: index of this detection within all detections for this filename
333
+ self.i_detection = i_detection
334
+
335
+ #: path to the image corresponding to this detection
336
+ self.filename = filename
337
+
338
+ #: [x_min, y_min, width_of_box, height_of_box]
339
+ self.bbox = bbox
340
+
341
+ #: confidence value of this detection
342
+ self.confidence = confidence
343
+
344
+ #: category ID (not name) of this detection
345
+ self.category = category
346
+
347
+ def __repr__(self):
348
+ s = ct_utils.pretty_print_object(self, False)
349
+ return s
350
+
351
+
352
+ class DetectionLocation:
353
+ """
354
+ A unique-ish detection location, meaningful in the context of one
355
+ directory. All detections within an IoU threshold of self.bbox
356
+ will be stored in IndexedDetection objects.
357
+ """
358
+
359
+ def __init__(self, instance, detection, relative_dir, category, id=None):
360
+
361
+ assert isinstance(detection,dict)
362
+ assert isinstance(instance,IndexedDetection)
363
+ assert isinstance(relative_dir,str)
364
+ assert isinstance(category,str)
365
+
366
+ #: list of IndexedDetections that match this detection
367
+ self.instances = [instance]
368
+
369
+ #: category ID (not name) for this detection
370
+ self.category = category
371
+
372
+ #: bbox as x,y,w,h
373
+ self.bbox = detection['bbox']
374
+
375
+ #: relative folder (i.e., camera name) in which this detectin was found
376
+ self.relativeDir = relative_dir
377
+
378
+ #: relative path to the canonical image representing this detection
379
+ self.sampleImageRelativeFileName = ''
380
+
381
+ #: list of detections on that canonical image that match this detection
382
+ self.sampleImageDetections = None
383
+
384
+ #: ID for this detection; this ID is only guaranteed to be unique within a directory
385
+ self.id = id
386
+
387
+ #: only used when doing cluster-based sorting
388
+ self.clusterLabel = None
389
+
390
+ def __repr__(self):
391
+ s = ct_utils.pretty_print_object(self, False)
392
+ return s
393
+
394
+ def to_api_detection(self):
395
+ """
396
+ Converts this detection to a 'detection' dictionary, making the semi-arbitrary
397
+ assumption that the first instance is representative of confidence.
398
+
399
+ Returns:
400
+ dict: dictionary in the format used to store detections in MD results
401
+ """
402
+
403
+ # This is a bit of a hack right now, but for future-proofing, I don't want to call this
404
+ # to retrieve anything other than the highest-confidence detection, and I'm assuming this
405
+ # is already sorted, so assert() that.
406
+ confidences = [i.confidence for i in self.instances]
407
+ assert confidences[0] == max(confidences), \
408
+ 'Cannot convert an unsorted DetectionLocation to an API detection'
409
+
410
+ # It's not clear whether it's better to use instances[0].bbox or self.bbox
411
+ # here... they should be very similar, unless iouThreshold is very low.
412
+ # self.bbox is a better representation of the overall DetectionLocation.
413
+ detection = {'conf':self.instances[0].confidence,
414
+ 'bbox':self.bbox,'category':self.instances[0].category}
415
+ return detection
416
+
417
+
418
+ #%% Support functions
419
+
420
+ def _render_bounding_box(detection,
421
+ input_file_name,
422
+ output_file_name,
423
+ line_width=5,
424
+ expansion=0):
425
+ """
426
+ Rendering the detection [detection] on the image [input_file_name], writing the result
427
+ to [output_file_name].
428
+ """
429
+
430
+ im = open_image(input_file_name)
431
+ d = detection.to_api_detection()
432
+ render_detection_bounding_boxes([d],im,thickness=line_width,expansion=expansion,
433
+ confidence_threshold=-10)
434
+ im.save(output_file_name)
435
+
436
+
437
+ def _detection_rect_to_rtree_rect(detection_rect):
438
+ """
439
+ We store detections as x/y/w/h, rtree and pyqtree use l/b/r/t. Convert from
440
+ our representation to rtree's.
441
+ """
442
+
443
+ left = detection_rect[0]
444
+ bottom = detection_rect[1]
445
+ right = detection_rect[0] + detection_rect[2]
446
+ top = detection_rect[1] + detection_rect[3]
447
+ return (left,bottom,right,top)
448
+
449
+
450
+ def _rtree_rect_to_detection_rect(rtree_rect):
451
+ """
452
+ We store detections as x/y/w/h, rtree and pyqtree use l/b/r/t. Convert from
453
+ rtree's representation to ours.
454
+ """
455
+
456
+ x = rtree_rect[0]
457
+ y = rtree_rect[1]
458
+ w = rtree_rect[2] - rtree_rect[0]
459
+ h = rtree_rect[3] - rtree_rect[1]
460
+ return (x,y,w,h)
461
+
462
+
463
+ def _sort_detections_for_directory(candidate_detections,options):
464
+ """
465
+ candidate_detections is a list of DetectionLocation objects. Sorts them to
466
+ put nearby detections next to each other, for easier visual review. Returns
467
+ a sorted copy of candidate_detections, does not sort in-place.
468
+ """
469
+
470
+ if len(candidate_detections) <= 1 or options.smartSort is None:
471
+ return candidate_detections
472
+
473
+ # Just sort by the X location of each box
474
+ if options.smartSort == 'xsort':
475
+ candidate_detections_sorted = sorted(candidate_detections,
476
+ key=lambda x: (
477
+ (x.bbox[0]) + (x.bbox[2]/2.0)
478
+ ))
479
+ return candidate_detections_sorted
480
+
481
+ elif options.smartSort == 'clustersort':
482
+
483
+ cluster = sklearn.cluster.AgglomerativeClustering(
484
+ n_clusters=None,
485
+ distance_threshold=options.smartSortDistanceThreshold,
486
+ linkage='complete')
487
+
488
+ # Prepare a list of points to represent each box,
489
+ # that's what we'll use for clustering
490
+ points = []
491
+ for det in candidate_detections:
492
+ # To use the upper-left of the box as the clustering point
493
+ # points.append([det.bbox[0],det.bbox[1]])
494
+
495
+ # To use the center of the box as the clustering point
496
+ points.append([det.bbox[0]+det.bbox[2]/2.0,
497
+ det.bbox[1]+det.bbox[3]/2.0])
498
+ points_array = np.array(points)
499
+
500
+ labels = cluster.fit_predict(points_array)
501
+ unique_labels = np.unique(labels)
502
+
503
+ # Labels *could* be any unique labels according to the docs, but in practice
504
+ # they are unique integers from 0:nClusters.
505
+ #
506
+ # Make sure the labels are unique incrementing integers.
507
+ for i_label in range(1,len(unique_labels)):
508
+ assert unique_labels[i_label] == 1 + unique_labels[i_label-1]
509
+
510
+ assert len(labels) == len(candidate_detections)
511
+
512
+ # Store the label assigned to each cluster
513
+ for i_label,label in enumerate(labels):
514
+ candidate_detections[i_label].clusterLabel = label
515
+
516
+ # Now sort the clusters by their x coordinate, and re-assign labels
517
+ # so the labels are sortable
518
+ label_x_means = []
519
+
520
+ for label in unique_labels:
521
+ detections_this_label = [d for d in candidate_detections if (
522
+ d.clusterLabel == label)]
523
+ points_this_label = [ [d.bbox[0],d.bbox[1]] for d in detections_this_label]
524
+ x = [p[0] for p in points_this_label]
525
+ y = [p[1] for p in points_this_label]
526
+
527
+ # Compute the centroid for debugging, but we're only going to use the x
528
+ # coordinate. This is the centroid of points used to represent detections,
529
+ # which may be box centers or box corners.
530
+ centroid = [ sum(x) / len(points_this_label), sum(y) / len(points_this_label) ]
531
+ label_xval = centroid[0]
532
+ label_x_means.append(label_xval)
533
+
534
+ old_cluster_label_to_new_cluster_label = {}
535
+ new_cluster_labels = np.argsort(label_x_means)
536
+ assert len(new_cluster_labels) == len(np.unique(new_cluster_labels))
537
+ for old_cluster_label in unique_labels:
538
+ old_cluster_label_to_new_cluster_label[old_cluster_label] =\
539
+ np.where(new_cluster_labels==old_cluster_label)[0][0]
540
+
541
+ for i_cluster in range(0,len(unique_labels)):
542
+ old_label = unique_labels[i_cluster]
543
+ assert i_cluster == old_label
544
+ new_label = old_cluster_label_to_new_cluster_label[old_label]
545
+
546
+ for i_det,det in enumerate(candidate_detections):
547
+ old_label = det.clusterLabel
548
+ new_label = old_cluster_label_to_new_cluster_label[old_label]
549
+ det.clusterLabel = new_label
550
+
551
+ candidate_detections_sorted = sorted(candidate_detections,
552
+ key=lambda x: (x.clusterLabel,x.id))
553
+
554
+ return candidate_detections_sorted
555
+
556
+ else:
557
+ raise ValueError('Unrecognized sort method {}'.format(
558
+ options.smartSort))
559
+
560
+ # ...def _sort_detections_for_directory(...)
561
+
562
+
563
+ def _find_matches_in_directory(dir_name_and_rows, options):
564
+ """
565
+ dir_name_and_rows is a tuple of (name,rows).
566
+
567
+ "name" is a location name, typically a folder name, though this may be an arbitrary
568
+ location identifier.
569
+
570
+ "rows" is a Pandas dataframe with one row per image in this location, with columns:
571
+
572
+ * 'file': relative file name
573
+ * 'detections': a list of MD detection objects, i.e. dicts with keys ['category','conf','bbox']
574
+ * 'max_detection_conf': maximum confidence of any detection, in any category
575
+
576
+ "rows" can also point to a .csv file, in which case the detection table will be read from that
577
+ .csv file, and results will be written to a .csv file rather than being returned.
578
+
579
+ Find all unique detections in this directory.
580
+
581
+ Returns a list of DetectionLocation objects.
582
+ """
583
+
584
+ if options.pbar is not None:
585
+ options.pbar.update()
586
+
587
+ # Create a tree to store candidate detections
588
+ candidate_detections_index = pyqtree.Index(bbox=(-0.1,-0.1,1.1,1.1))
589
+
590
+ assert len(dir_name_and_rows) == 2, 'find_matches_in_directory: invalid input'
591
+ assert isinstance(dir_name_and_rows[0],str), 'find_matches_in_directory: invalid location name'
592
+ dir_name = dir_name_and_rows[0]
593
+ rows = dir_name_and_rows[1]
594
+
595
+ detections_loaded_from_csv_file = None
596
+
597
+ if isinstance(rows,str):
598
+ detections_loaded_from_csv_file = rows
599
+ print('Loading results for location {} from {}'.format(
600
+ dir_name,detections_loaded_from_csv_file))
601
+ rows = pd.read_csv(detections_loaded_from_csv_file)
602
+ # Pandas writes out detections out as strings, convert them back to lists
603
+ rows['detections'] = rows['detections'].apply(lambda s: json.loads(s.replace('\'','"')))
604
+
605
+ if options.maxImagesPerFolder is not None and len(rows) > options.maxImagesPerFolder:
606
+ print('Ignoring directory {} because it has {} images (limit set to {})'.format(
607
+ dir_name,len(rows),options.maxImagesPerFolder))
608
+ return []
609
+
610
+ if options.includeFolders is not None:
611
+ assert options.excludeFolders is None, 'Cannot specify include and exclude folder lists'
612
+ if dir_name not in options.includeFolders:
613
+ print('Ignoring folder {}, not in inclusion list'.format(dir_name))
614
+ return []
615
+
616
+ if options.excludeFolders is not None:
617
+ assert options.includeFolders is None, 'Cannot specify include and exclude folder lists'
618
+ if dir_name in options.excludeFolders:
619
+ print('Ignoring folder {}, on exclusion list'.format(dir_name))
620
+ return []
621
+
622
+ # For each image in this directory
623
+ #
624
+ # i_directory_row = 0; row = rows.iloc[i_directory_row]
625
+ #
626
+ # i_directory_row is a pandas index, so it may not start from zero;
627
+ # for debugging, we maintain i_iteration as a loop index.
628
+ i_iteration = -1
629
+ n_boxes_evaluated = 0
630
+
631
+ for i_directory_row, row in rows.iterrows():
632
+
633
+ i_iteration += 1
634
+ filename = row['file']
635
+ if not path_utils.is_image_file(filename):
636
+ continue
637
+
638
+ if 'max_detection_conf' not in row or 'detections' not in row or \
639
+ row['detections'] is None:
640
+ # print('Skipping row {}'.format(i_directory_row))
641
+ continue
642
+
643
+ # Don't bother checking images with no detections above threshold
644
+ max_p = float(row['max_detection_conf'])
645
+ if max_p < options.confidenceMin:
646
+ continue
647
+
648
+ # Array of dicts, where each element is
649
+ # {
650
+ # 'category': '1', # str value, category ID
651
+ # 'conf': 0.926, # confidence of this detections
652
+ #
653
+ # (x_min, y_min) is upper-left, all in relative coordinates
654
+ # 'bbox': [x_min, y_min, width_of_box, height_of_box]
655
+ #
656
+ # }
657
+ detections = row['detections']
658
+ if isinstance(detections,float):
659
+ assert isinstance(row['failure'],str), 'Expected failure indicator'
660
+ print('Skipping failed image {} ({})'.format(filename,row['failure']))
661
+ continue
662
+
663
+ assert len(detections) > 0
664
+
665
+ # For each detection in this image
666
+ for i_detection, detection in enumerate(detections):
667
+
668
+ n_boxes_evaluated += 1
669
+
670
+ if detection is None:
671
+ print('Skipping detection {}'.format(i_detection))
672
+ continue
673
+
674
+ assert 'category' in detection and \
675
+ 'conf' in detection and \
676
+ 'bbox' in detection, 'Illegal detection'
677
+
678
+ confidence = detection['conf']
679
+
680
+ # This is no longer strictly true; I sometimes run RDE in stages, so
681
+ # some probabilities have already been made negative
682
+ #
683
+ # assert confidence >= 0.0 and confidence <= 1.0
684
+
685
+ assert confidence >= -1.0 and confidence <= 1.0
686
+
687
+ if confidence < options.confidenceMin:
688
+ continue
689
+ if confidence > options.confidenceMax:
690
+ continue
691
+
692
+ # Optionally exclude some classes from consideration as suspicious
693
+ if (options.excludeClasses is not None) and (len(options.excludeClasses) > 0):
694
+ i_class = int(detection['category'])
695
+ if i_class in options.excludeClasses:
696
+ continue
697
+
698
+ bbox = detection['bbox']
699
+ confidence = detection['conf']
700
+
701
+ # Is this detection too big or too small for consideration?
702
+ w, h = bbox[2], bbox[3]
703
+
704
+ if (w == 0 or h == 0):
705
+ continue
706
+
707
+ area = h * w
708
+
709
+ if area < 0:
710
+ print('Warning: negative-area bounding box for file {}'.format(filename))
711
+ area = abs(area); h = abs(h); w = abs(w)
712
+
713
+ assert area >= 0.0 and area <= 1.0, \
714
+ 'Illegal bounding box area {} in image {}'.format(area,filename)
715
+
716
+ if area < options.minSuspiciousDetectionSize:
717
+ continue
718
+
719
+ if area > options.maxSuspiciousDetectionSize:
720
+ continue
721
+
722
+ category = detection['category']
723
+
724
+ instance = IndexedDetection(i_detection=i_detection,
725
+ filename=row['file'], bbox=bbox,
726
+ confidence=confidence, category=category)
727
+
728
+ b_found_similar_detection = False
729
+
730
+ rtree_rect = _detection_rect_to_rtree_rect(bbox)
731
+
732
+ # This will return candidates of all classes
733
+ overlapping_candidate_detections =\
734
+ candidate_detections_index.intersect(rtree_rect)
735
+
736
+ overlapping_candidate_detections.sort(
737
+ key=lambda x: x.id, reverse=False)
738
+
739
+ # For each detection in our candidate list
740
+ for i_candidate, candidate in enumerate(
741
+ overlapping_candidate_detections):
742
+
743
+ # Don't match across categories
744
+ if (candidate.category != category) and (not (options.categoryAgnosticComparisons)):
745
+ continue
746
+
747
+ # Is this a match?
748
+ try:
749
+ iou = ct_utils.get_iou(bbox, candidate.bbox)
750
+ except Exception as e:
751
+ print(\
752
+ 'Warning: IOU computation error on boxes ({},{},{},{}),({},{},{},{}): {}'.\
753
+ format(
754
+ bbox[0],bbox[1],bbox[2],bbox[3],
755
+ candidate.bbox[0],candidate.bbox[1],
756
+ candidate.bbox[2],candidate.bbox[3], str(e)))
757
+ continue
758
+
759
+ if iou >= options.iouThreshold:
760
+
761
+ b_found_similar_detection = True
762
+
763
+ # If so, add this example to the list for this detection
764
+ candidate.instances.append(instance)
765
+
766
+ # We *don't* break here; we allow this instance to possibly
767
+ # match multiple candidates. There isn't an obvious right or
768
+ # wrong here.
769
+
770
+ # ...for each detection on our candidate list
771
+
772
+ # If we found no matches, add this to the candidate list
773
+ if not b_found_similar_detection:
774
+
775
+ candidate = DetectionLocation(instance=instance,
776
+ detection=detection,
777
+ relative_dir=dir_name,
778
+ category=category,
779
+ id=i_iteration)
780
+
781
+ # pyqtree
782
+ candidate_detections_index.insert(item=candidate,bbox=rtree_rect)
783
+
784
+ # ...for each detection
785
+
786
+ # ...for each row
787
+
788
+ # Get all candidate detections
789
+
790
+ candidate_detections = candidate_detections_index.intersect([-100,-100,100,100])
791
+
792
+ # For debugging only, it's convenient to have these sorted
793
+ # as if they had never gone into a tree structure. Typically
794
+ # this is in practice a sort by filename.
795
+ candidate_detections.sort(
796
+ key=lambda x: x.id, reverse=False)
797
+
798
+ if detections_loaded_from_csv_file is not None:
799
+ location_results_file = \
800
+ os.path.splitext(detections_loaded_from_csv_file)[0] + \
801
+ '_results.json'
802
+ print('Writing results for location {} to {}'.format(
803
+ dir_name,location_results_file))
804
+ s = jsonpickle.encode(candidate_detections,make_refs=False)
805
+ with open(location_results_file,'w') as f:
806
+ f.write(s)
807
+ # json.dump(candidate_detections,f,indent=1)
808
+ return location_results_file
809
+ else:
810
+ return candidate_detections
811
+
812
+ # ...def _find_matches_in_directory(...)
813
+
814
+
815
+ def _update_detection_table(repeat_detection_results, options, output_file_name=None):
816
+ """
817
+ Changes confidence values in repeat_detection_results.detectionResults so that detections
818
+ deemed to be possible false positives are given negative confidence values.
819
+
820
+ repeat_detection_results is an object of type RepeatDetectionResults, with a pandas
821
+ dataframe (detectionResults) containing all the detections loaded from the .json file,
822
+ and a list of detections for each location (suspicious_detections) that are deemed to
823
+ be suspicious.
824
+
825
+ returns the modified pandas dataframe (repeat_detection_results.detectionResults), but
826
+ also modifies it in place.
827
+ """
828
+
829
+ # This is the pandas dataframe that contains actual detection results.
830
+ #
831
+ # Has fields ['file', 'detections','failure'].
832
+ detection_results = repeat_detection_results.detectionResults
833
+
834
+ # An array of length nDirs, where each element is a list of DetectionLocation
835
+ # objects for that directory that have been flagged as suspicious
836
+ suspicious_detections_by_directory = repeat_detection_results.suspicious_detections
837
+
838
+ n_bbox_changes = 0
839
+
840
+ print('Updating output table')
841
+
842
+ # For each directory
843
+ for i_dir, directory_events in enumerate(suspicious_detections_by_directory):
844
+
845
+ # For each suspicious detection group in this directory
846
+ for i_detection_event, detection_event in enumerate(directory_events):
847
+
848
+ location_bbox = detection_event.bbox
849
+
850
+ # For each instance of this suspicious detection
851
+ for i_instance, instance in enumerate(detection_event.instances):
852
+
853
+ instance_bbox = instance.bbox
854
+
855
+ # This should match the bbox for the detection event
856
+ iou = ct_utils.get_iou(instance_bbox, location_bbox)
857
+
858
+ # The bbox for this instance should be almost the same as the bbox
859
+ # for this detection group, where "almost" is defined by the IOU
860
+ # threshold.
861
+ assert iou >= options.iouThreshold
862
+ # if iou < options.iouThreshold:
863
+ # print('IOU warning: {},{}'.format(iou,options.iouThreshold))
864
+
865
+ assert instance.filename in repeat_detection_results.filename_to_row
866
+ i_row = repeat_detection_results.filename_to_row[instance.filename]
867
+ row = detection_results.iloc[i_row]
868
+ row_detections = row['detections']
869
+ detection_to_modify = row_detections[instance.i_detection]
870
+
871
+ # Make sure the bounding box matches
872
+ assert (instance_bbox[0:4] == detection_to_modify['bbox'][0:4])
873
+
874
+ # Make the probability negative, if it hasn't been switched by
875
+ # another bounding box
876
+ if detection_to_modify['conf'] >= 0:
877
+ detection_to_modify['conf'] = -1 * detection_to_modify['conf']
878
+ n_bbox_changes += 1
879
+
880
+ # ...for each instance
881
+
882
+ # ...for each detection
883
+
884
+ # ...for each directory
885
+
886
+ # Update maximum probabilities
887
+
888
+ # For each row...
889
+ n_prob_changes = 0
890
+ n_prob_changes_to_negative = 0
891
+ n_prob_changes_across_threshold = 0
892
+
893
+ for i_row, row in detection_results.iterrows():
894
+
895
+ detections = row['detections']
896
+ if (detections is None) or isinstance(detections,float):
897
+ assert isinstance(row['failure'],str)
898
+ continue
899
+
900
+ if len(detections) == 0:
901
+ continue
902
+
903
+ max_p_original = float(row['max_detection_conf'])
904
+
905
+ # No longer strictly true; sometimes I run RDE on RDE output
906
+ # assert max_p_original >= 0
907
+ assert max_p_original >= -1.0
908
+
909
+ max_p = None
910
+ n_negative = 0
911
+
912
+ for i_detection, detection in enumerate(detections):
913
+
914
+ p = detection['conf']
915
+
916
+ if p < 0:
917
+ n_negative += 1
918
+
919
+ if (max_p is None) or (p > max_p):
920
+ max_p = p
921
+
922
+ # We should only be making detections *less* likely in this process
923
+ assert max_p <= max_p_original
924
+ detection_results.at[i_row, 'max_detection_conf'] = max_p
925
+
926
+ # If there was a meaningful change, count it
927
+ if abs(max_p - max_p_original) > 1e-3:
928
+
929
+ assert max_p < max_p_original
930
+
931
+ n_prob_changes += 1
932
+
933
+ if (max_p < 0) and (max_p_original >= 0):
934
+ n_prob_changes_to_negative += 1
935
+
936
+ if (max_p_original >= options.confidenceMin) and (max_p < options.confidenceMin):
937
+ n_prob_changes_across_threshold += 1
938
+
939
+ # Negative probabilities should be the only reason max_p changed, so
940
+ # we should have found at least one negative value if we reached
941
+ # this point.
942
+ assert n_negative > 0
943
+
944
+ # ...if there was a meaningful change to the max probability for this row
945
+
946
+ # ...for each row
947
+
948
+ # If we're also writing output...
949
+ if output_file_name is not None and len(output_file_name) > 0:
950
+ write_api_results(detection_results, repeat_detection_results.otherFields,
951
+ output_file_name)
952
+
953
+ print(
954
+ 'Finished updating detection table\nChanged {} detections that impacted {} max_ps ({} to negative) ({} across confidence threshold)'.format( # noqa
955
+ n_bbox_changes, n_prob_changes, n_prob_changes_to_negative, n_prob_changes_across_threshold))
956
+
957
+ return detection_results
958
+
959
+ # ...def _update_detection_table(...)
960
+
961
+
962
+ def _render_sample_image_for_detection(detection,filtering_dir,options):
963
+ """
964
+ Render a sample image for one unique detection, possibly containing lightly-colored
965
+ high-confidence detections from elsewhere in the sample image.
966
+
967
+ "detections" is a DetectionLocation object.
968
+
969
+ Depends on having already sorted instances within this detection by confidence, and
970
+ having already generated an output file name for this sample image.
971
+ """
972
+
973
+ # Confidence values should already have been sorted in the previous loop
974
+ instance_confidences = [instance.confidence for instance in detection.instances]
975
+ assert ct_utils.is_list_sorted(instance_confidences,reverse=True)
976
+
977
+ # Choose the highest-confidence index
978
+ instance = detection.instances[0]
979
+ relative_path = instance.filename
980
+
981
+ output_relative_path = detection.sampleImageRelativeFileName
982
+ assert len(output_relative_path) > 0
983
+
984
+ output_full_path = os.path.join(filtering_dir, output_relative_path)
985
+
986
+ if is_sas_url(options.imageBase):
987
+ input_full_path = relative_sas_url(options.imageBase, relative_path)
988
+ else:
989
+ input_full_path = os.path.join(options.imageBase, relative_path)
990
+ assert (os.path.isfile(input_full_path)), 'Not a file: {}'.\
991
+ format(input_full_path)
992
+
993
+ try:
994
+
995
+ im = open_image(input_full_path)
996
+
997
+ # Should we render (typically in a very light color) detections
998
+ # *other* than the one we're highlighting here?
999
+ if options.bRenderOtherDetections:
1000
+
1001
+ # Optionally resize the output image
1002
+ if (options.maxOutputImageWidth is not None) and \
1003
+ (im.size[0] > options.maxOutputImageWidth):
1004
+ im = vis_utils.resize_image(im, options.maxOutputImageWidth,
1005
+ target_height=-1)
1006
+
1007
+ assert detection.sampleImageDetections is not None
1008
+
1009
+ # At this point, suspicious detections have already been flipped
1010
+ # negative, which we don't want for rendering purposes
1011
+ rendered_detections = []
1012
+
1013
+ for det in detection.sampleImageDetections:
1014
+ rendered_det = copy.copy(det)
1015
+ rendered_det['conf'] = abs(rendered_det['conf'])
1016
+ rendered_detections.append(rendered_det)
1017
+
1018
+ # Render other detections first (typically in a thin+light box)
1019
+ render_detection_bounding_boxes(rendered_detections,
1020
+ im,
1021
+ label_map=None,
1022
+ thickness=options.otherDetectionsLineWidth,
1023
+ expansion=options.boxExpansion,
1024
+ colormap=options.otherDetectionsColors,
1025
+ confidence_threshold=options.otherDetectionsThreshold)
1026
+
1027
+ # Now render the example detection (on top of at least one
1028
+ # of the other detections)
1029
+
1030
+ # This converts the *first* instance to an API standard detection;
1031
+ # because we just sorted this list in descending order by confidence,
1032
+ # this is the highest-confidence detection.
1033
+ d = detection.to_api_detection()
1034
+
1035
+ render_detection_bounding_boxes([d],im,thickness=options.lineThickness,
1036
+ expansion=options.boxExpansion,
1037
+ confidence_threshold=-10)
1038
+
1039
+ im.save(output_full_path)
1040
+
1041
+ else:
1042
+
1043
+ _render_bounding_box(detection,
1044
+ input_full_path,
1045
+ output_full_path,
1046
+ line_width=options.lineThickness,
1047
+ expansion=options.boxExpansion)
1048
+
1049
+ # ...if we are/aren't rendering other bounding boxes
1050
+
1051
+ # If we're rendering detection tiles, we'll re-load and re-write the image we
1052
+ # just wrote to output_full_path
1053
+ if options.bRenderDetectionTiles:
1054
+
1055
+ assert not is_sas_url(options.imageBase), "Can't render detection tiles from SAS URLs"
1056
+
1057
+ if options.detectionTilesPrimaryImageWidth is not None:
1058
+ primary_image_width = options.detectionTilesPrimaryImageWidth
1059
+ else:
1060
+ # "im" may be a resized version of the original image, if we've already run
1061
+ # the code to render other bounding boxes.
1062
+ primary_image_width = im.size[0]
1063
+
1064
+ if options.detectionTilesCroppedGridWidth <= 1.0:
1065
+ cropped_grid_width = \
1066
+ round(options.detectionTilesCroppedGridWidth * primary_image_width)
1067
+ else:
1068
+ cropped_grid_width = options.detectionTilesCroppedGridWidth
1069
+
1070
+ secondary_image_filename_list = []
1071
+ secondary_image_bounding_box_list = []
1072
+
1073
+ # If we start from zero, we include the sample crop
1074
+ for instance in detection.instances[0:]:
1075
+ secondary_image_filename_list.append(os.path.join(options.imageBase,
1076
+ instance.filename))
1077
+ secondary_image_bounding_box_list.append(instance.bbox)
1078
+
1079
+ # Optionally limit the number of crops we pass to the rendering function
1080
+ if (options.detectionTilesMaxCrops is not None) and \
1081
+ (len(detection.instances) > options.detectionTilesMaxCrops):
1082
+ secondary_image_filename_list = \
1083
+ secondary_image_filename_list[0:options.detectionTilesMaxCrops]
1084
+ secondary_image_bounding_box_list = \
1085
+ secondary_image_bounding_box_list[0:options.detectionTilesMaxCrops]
1086
+
1087
+ # This will over-write the image we've already written to output_full_path
1088
+ render_images_with_thumbnails.render_images_with_thumbnails(
1089
+ primary_image_filename=output_full_path,
1090
+ primary_image_width=primary_image_width,
1091
+ secondary_image_filename_list=secondary_image_filename_list,
1092
+ secondary_image_bounding_box_list=secondary_image_bounding_box_list,
1093
+ cropped_grid_width=cropped_grid_width,
1094
+ output_image_filename=output_full_path,
1095
+ primary_image_location=options.detectionTilesPrimaryImageLocation)
1096
+
1097
+ # ...if we are/aren't rendering detection tiles
1098
+
1099
+ except Exception as e:
1100
+
1101
+ stack_trace = traceback.format_exc()
1102
+ print('Warning: error rendering bounding box from {} to {}: {} ({})'.format(
1103
+ input_full_path,output_full_path,e,stack_trace))
1104
+ if options.bFailOnRenderError:
1105
+ raise
1106
+
1107
+ # ...def _render_sample_image_for_detection(...)
1108
+
1109
+
1110
+ #%% Main entry point
1111
+
1112
+ def find_repeat_detections(input_filename, output_file_name=None, options=None):
1113
+ """
1114
+ Find detections in a MD results file that occur repeatedly and are likely to be
1115
+ rocks/sticks.
1116
+
1117
+ Args:
1118
+ input_filename (str): the MD results .json file to analyze
1119
+ output_file_name (str, optional): the filename to which we should write results
1120
+ with repeat detections removed, typically set to None during the first
1121
+ part of the RDE process.
1122
+ options (RepeatDetectionOptions, optional): all the interesting options controlling
1123
+ this process; see RepeatDetectionOptions for details.
1124
+
1125
+ Returns:
1126
+ RepeatDetectionResults: results of the RDE process; see RepeatDetectionResults
1127
+ for details.
1128
+ """
1129
+
1130
+ ##%% Input handling
1131
+
1132
+ if options is None:
1133
+
1134
+ options = RepeatDetectionOptions()
1135
+
1136
+ # Validate some options
1137
+
1138
+ if options.customDirNameFunction is not None:
1139
+ assert options.nDirLevelsFromLeaf == 0, \
1140
+ 'Cannot mix custom dir name functions with nDirLevelsFromLeaf'
1141
+
1142
+ if options.nDirLevelsFromLeaf != 0:
1143
+ assert options.customDirNameFunction is None, \
1144
+ 'Cannot mix custom dir name functions with nDirLevelsFromLeaf'
1145
+
1146
+ if options.filterFileToLoad is not None and len(options.filterFileToLoad) > 0:
1147
+
1148
+ print('Bypassing detection-finding, loading from {}'.format(options.filterFileToLoad))
1149
+
1150
+ # Load the filtering file
1151
+ detection_index_file_name = options.filterFileToLoad
1152
+ with open(detection_index_file_name, 'r') as f:
1153
+ s_in = f.read()
1154
+ detection_info = jsonpickle.decode(s_in)
1155
+ filtering_base_dir = os.path.dirname(options.filterFileToLoad)
1156
+ suspicious_detections = detection_info['suspicious_detections']
1157
+
1158
+ # Load the same options we used when finding repeat detections
1159
+ options = detection_info['options']
1160
+
1161
+ # ...except for things that explicitly tell this function not to
1162
+ # find repeat detections.
1163
+ options.filterFileToLoad = detection_index_file_name
1164
+ options.bWriteFilteringFolder = False
1165
+
1166
+ # ...if we're loading from an existing filtering file
1167
+
1168
+ to_return = RepeatDetectionResults()
1169
+
1170
+
1171
+ # Check early to avoid problems with the output folder
1172
+
1173
+ if options.bWriteFilteringFolder:
1174
+ assert options.outputBase is not None and len(options.outputBase) > 0
1175
+ os.makedirs(options.outputBase,exist_ok=True)
1176
+
1177
+
1178
+ # Load file to a pandas dataframe. Also populates 'max_detection_conf', even if it's
1179
+ # not present in the .json file.
1180
+ detection_results, other_fields = load_api_results(input_filename, normalize_paths=True,
1181
+ filename_replacements=options.filenameReplacements,
1182
+ force_forward_slashes=True)
1183
+ to_return.detectionResults = detection_results
1184
+ to_return.otherFields = other_fields
1185
+
1186
+ # Before doing any real work, make sure we can *probably* access images
1187
+ # This is just a cursory check on the first image, but it heads off most
1188
+ # problems related to incorrect mount points, etc. Better to do this before
1189
+ # spending 20 minutes finding repeat detections.
1190
+
1191
+ if options.bWriteFilteringFolder:
1192
+
1193
+ if not is_sas_url(options.imageBase):
1194
+
1195
+ row = detection_results.iloc[0]
1196
+ relative_path = row['file']
1197
+ if options.filenameReplacements is not None:
1198
+ for s in options.filenameReplacements.keys():
1199
+ relative_path = relative_path.replace(s,options.filenameReplacements[s])
1200
+ absolute_path = os.path.join(options.imageBase,relative_path)
1201
+ assert os.path.isfile(absolute_path), 'Could not find file {}'.format(absolute_path)
1202
+
1203
+
1204
+ ##%% Separate files into locations
1205
+
1206
+ # This will be a map from a directory name to smaller data frames
1207
+ rows_by_directory = {}
1208
+
1209
+ # This is a mapping back into the rows of the original table
1210
+ filename_to_row = {}
1211
+
1212
+ print('Separating images into locations...')
1213
+
1214
+ n_custom_dir_replacements = 0
1215
+
1216
+ # i_row = 0; row = detection_results.iloc[i_row]
1217
+ for i_row, row in tqdm(detection_results.iterrows(),total=len(detection_results)):
1218
+
1219
+ relative_path = row['file']
1220
+
1221
+ if options.customDirNameFunction is not None:
1222
+ basic_dir_name = os.path.dirname(relative_path.replace('\\','/'))
1223
+ dir_name = options.customDirNameFunction(relative_path)
1224
+ if basic_dir_name != dir_name:
1225
+ n_custom_dir_replacements += 1
1226
+ else:
1227
+ dir_name = os.path.dirname(relative_path)
1228
+
1229
+ if len(dir_name) == 0:
1230
+ assert options.nDirLevelsFromLeaf == 0, \
1231
+ 'Can''t use the dirLevelsFromLeaf option with flat filenames'
1232
+ else:
1233
+ if options.nDirLevelsFromLeaf > 0:
1234
+ i_level = 0
1235
+ while (i_level < options.nDirLevelsFromLeaf):
1236
+ i_level += 1
1237
+ dir_name = os.path.dirname(dir_name)
1238
+ assert len(dir_name) > 0
1239
+
1240
+ if dir_name not in rows_by_directory:
1241
+ # Create a new DataFrame with just this row
1242
+ # rows_by_directory[dir_name] = pd.DataFrame(row)
1243
+ rows_by_directory[dir_name] = []
1244
+
1245
+ rows_by_directory[dir_name].append(row)
1246
+
1247
+ assert relative_path not in filename_to_row
1248
+ filename_to_row[relative_path] = i_row
1249
+
1250
+ # ...for each unique detection
1251
+
1252
+ if options.customDirNameFunction is not None:
1253
+ print('Custom dir name function made {} replacements (of {} images)'.format(
1254
+ n_custom_dir_replacements,len(detection_results)))
1255
+
1256
+ # Convert lists of rows to proper DataFrames
1257
+ dirs = list(rows_by_directory.keys())
1258
+ for d in dirs:
1259
+ rows_by_directory[d] = pd.DataFrame(rows_by_directory[d])
1260
+
1261
+ to_return.rows_by_directory = rows_by_directory
1262
+ to_return.filename_to_row = filename_to_row
1263
+
1264
+ print('Finished separating {} files into {} locations'.format(len(detection_results),
1265
+ len(rows_by_directory)))
1266
+
1267
+ ##% Look for repeat detections (or load them from file)
1268
+
1269
+ dirs_to_search = list(rows_by_directory.keys())
1270
+ if options.debugMaxDir > 0:
1271
+ dirs_to_search = dirs_to_search[0:options.debugMaxDir]
1272
+
1273
+ # Map numeric directory indices to names (we'll write this out to the detection index .json file)
1274
+ dir_index_to_name = {}
1275
+ for i_dir, dir_name in enumerate(dirs_to_search):
1276
+ dir_index_to_name[i_dir] = dir_name
1277
+
1278
+ # Are we actually looking for matches, or just loading from a file?
1279
+ if len(options.filterFileToLoad) == 0:
1280
+
1281
+ # length-nDirs list of lists of DetectionLocation objects
1282
+ suspicious_detections = [None] * len(dirs_to_search)
1283
+
1284
+ # We're actually looking for matches...
1285
+ print('Finding similar detections...')
1286
+
1287
+ dir_name_and_rows = []
1288
+ for dir_name in dirs_to_search:
1289
+ rows_this_directory = rows_by_directory[dir_name]
1290
+ dir_name_and_rows.append((dir_name,rows_this_directory))
1291
+
1292
+ all_candidate_detections = [None] * len(dirs_to_search)
1293
+
1294
+ # If we serialize results to intermediate files, we need to remove slashes from
1295
+ # location names; we store mappings here.
1296
+ normalized_location_name_to_location_name = None
1297
+ location_name_to_normalized_location_name = None
1298
+
1299
+ if not options.bParallelizeComparisons:
1300
+
1301
+ options.pbar = None
1302
+ for i_dir, dir_name in tqdm(enumerate(dirs_to_search)):
1303
+ dir_name_and_row = dir_name_and_rows[i_dir]
1304
+ assert dir_name_and_row[0] == dir_name
1305
+ print('Processing dir {} of {}: {}'.format(i_dir,len(dirs_to_search),dir_name))
1306
+ all_candidate_detections[i_dir] = \
1307
+ _find_matches_in_directory(dir_name_and_row, options)
1308
+
1309
+ else:
1310
+
1311
+ n_workers = options.nWorkers
1312
+ if n_workers > len(dir_name_and_rows):
1313
+ print('Pool of {} requested, but only {} folders available, reducing pool to {}'.\
1314
+ format(n_workers,len(dir_name_and_rows),len(dir_name_and_rows)))
1315
+ n_workers = len(dir_name_and_rows)
1316
+
1317
+ pool = None
1318
+
1319
+ if options.parallelizationUsesThreads:
1320
+ pool = ThreadPool(n_workers); poolstring = 'threads'
1321
+ else:
1322
+ pool = Pool(n_workers); poolstring = 'processes'
1323
+
1324
+ print('Starting comparison pool with {} {}'.format(n_workers,poolstring))
1325
+
1326
+ assert options.pass_detections_to_processes_method in ('file','memory'), \
1327
+ 'Unrecognized IPC mechanism: {}'.format(options.pass_detections_to_processes_method)
1328
+
1329
+ # ** Experimental **
1330
+ #
1331
+ # Rather than passing detections and results around in memory, write detections and
1332
+ # results for each worker to intermediate files. May improve performance for very large
1333
+ # results sets that exceed working memory.
1334
+ if options.pass_detections_to_processes_method == 'file':
1335
+
1336
+ ##%% Convert location names to normalized names we can write to files
1337
+
1338
+ normalized_location_name_to_location_name = {}
1339
+ for dir_name in dirs_to_search:
1340
+ normalized_location_name = flatten_path(dir_name)
1341
+ assert normalized_location_name not in normalized_location_name_to_location_name, \
1342
+ 'Redundant location name {}, can\'t serialize to intermediate files'.format(
1343
+ dir_name)
1344
+ normalized_location_name_to_location_name[normalized_location_name] = dir_name
1345
+
1346
+ location_name_to_normalized_location_name = \
1347
+ invert_dictionary(normalized_location_name_to_location_name)
1348
+
1349
+
1350
+ ##%% Write results to files for each location
1351
+
1352
+ print('Writing results to intermediate files')
1353
+
1354
+ intermediate_json_file_folder = os.path.join(options.outputBase,'intermediate_results')
1355
+ os.makedirs(intermediate_json_file_folder,exist_ok=True)
1356
+
1357
+ # i_location = 0; location_info = dir_name_and_rows[0]
1358
+ dir_name_and_intermediate_file = []
1359
+
1360
+ # i_location = 0; location_info = dir_name_and_rows[i_location]
1361
+ for i_location, location_info in tqdm(enumerate(dir_name_and_rows)):
1362
+
1363
+ location_name = location_info[0]
1364
+ assert location_name in location_name_to_normalized_location_name
1365
+ normalized_location_name = location_name_to_normalized_location_name[location_name]
1366
+ intermediate_results_file = os.path.join(intermediate_json_file_folder,
1367
+ normalized_location_name + '.csv')
1368
+ detections_table_this_location = location_info[1]
1369
+ detections_table_this_location.to_csv(intermediate_results_file,header=True,index=False)
1370
+ dir_name_and_intermediate_file.append((location_name,intermediate_results_file))
1371
+
1372
+
1373
+ ##%% Find detections in each directory
1374
+
1375
+ options.pbar = None
1376
+ all_candidate_detection_files = list(pool.imap(
1377
+ partial(_find_matches_in_directory,options=options), dir_name_and_intermediate_file))
1378
+
1379
+
1380
+ ##%% Load into a combined list of candidate detections
1381
+
1382
+ all_candidate_detections = []
1383
+
1384
+ # candidate_detection_file = all_candidate_detection_files[0]
1385
+ for candidate_detection_file in all_candidate_detection_files:
1386
+ with open(candidate_detection_file, 'r') as f:
1387
+ s = f.read()
1388
+ candidate_detections_this_file = jsonpickle.decode(s)
1389
+ all_candidate_detections.append(candidate_detections_this_file)
1390
+
1391
+
1392
+ ##%% Clean up intermediate files
1393
+
1394
+ shutil.rmtree(intermediate_json_file_folder)
1395
+
1396
+ # If we're passing things around in memory, rather than via intermediate files
1397
+ else:
1398
+
1399
+ # We get slightly nicer progress bar behavior using threads, by passing a pbar
1400
+ # object and letting it get updated. We can't serialize this object across
1401
+ # processes.
1402
+ if options.parallelizationUsesThreads:
1403
+ options.pbar = tqdm(total=len(dir_name_and_rows))
1404
+ all_candidate_detections = list(pool.imap(
1405
+ partial(_find_matches_in_directory,options=options), dir_name_and_rows))
1406
+ else:
1407
+ options.pbar = None
1408
+ all_candidate_detections = list(tqdm(pool.imap(
1409
+ partial(_find_matches_in_directory,options=options), dir_name_and_rows)))
1410
+
1411
+ # ...if we're parallelizing comparisons
1412
+
1413
+ if pool is not None:
1414
+ try:
1415
+ pool.close()
1416
+ pool.join()
1417
+ print('Pool closed and joined for RDE comparisons')
1418
+ except Exception as e:
1419
+ print('Warning: error closing RDE comparison pool: {}'.format(str(e)))
1420
+
1421
+ print('\nFinished looking for similar detections')
1422
+
1423
+
1424
+ ##%% Mark suspicious locations based on match results
1425
+
1426
+ print('Marking repeat detections...')
1427
+
1428
+ n_images_with_suspicious_detections = 0
1429
+ n_suspicious_detections = 0
1430
+
1431
+ # For each directory
1432
+ for i_dir in range(len(dirs_to_search)):
1433
+
1434
+ # A list of DetectionLocation objects
1435
+ suspicious_detections_this_dir = []
1436
+
1437
+ # A list of DetectionLocation objects
1438
+ candidate_detections_this_dir = all_candidate_detections[i_dir]
1439
+
1440
+ for i_location, candidate_location in enumerate(candidate_detections_this_dir):
1441
+
1442
+ # occurrenceList is a list of file/detection pairs
1443
+ n_occurrences = len(candidate_location.instances)
1444
+
1445
+ if n_occurrences < options.occurrenceThreshold:
1446
+ continue
1447
+
1448
+ n_images_with_suspicious_detections += n_occurrences
1449
+ n_suspicious_detections += 1
1450
+
1451
+ suspicious_detections_this_dir.append(candidate_location)
1452
+
1453
+ suspicious_detections[i_dir] = suspicious_detections_this_dir
1454
+
1455
+ # Sort the above-threshold detections for easier review
1456
+ if options.smartSort is not None:
1457
+ suspicious_detections[i_dir] = _sort_detections_for_directory(
1458
+ suspicious_detections[i_dir],options)
1459
+
1460
+ print('Found {} suspicious detections in directory {} ({})'.format(
1461
+ len(suspicious_detections[i_dir]),i_dir,dirs_to_search[i_dir]))
1462
+
1463
+ # ...for each directory
1464
+
1465
+ print('Finished marking repeat detections')
1466
+
1467
+ print('Found {} unique detections on {} images that are suspicious'.format(
1468
+ n_suspicious_detections, n_images_with_suspicious_detections))
1469
+
1470
+ # If we're just loading detections from a file...
1471
+ else:
1472
+
1473
+ assert len(suspicious_detections) == len(dirs_to_search)
1474
+
1475
+ n_detections_removed = 0
1476
+ n_detections_loaded = 0
1477
+
1478
+ # We're skipping detection-finding, but to see which images are actually legit false
1479
+ # positives, we may be looking for physical files or loading from a text file.
1480
+ file_list = None
1481
+ if options.filteredFileListToLoad is not None:
1482
+ with open(options.filteredFileListToLoad) as f:
1483
+ file_list = f.readlines()
1484
+ file_list = [x.strip() for x in file_list]
1485
+ n_suspicious_detections = sum([len(x) for x in suspicious_detections])
1486
+ print('Loaded false positive list from file ' + \
1487
+ 'will remove {} of {} suspicious detections'.format(
1488
+ len(file_list), n_suspicious_detections))
1489
+
1490
+ # For each directory
1491
+ # i_dir = 0; detections = suspicious_detections[0]
1492
+ #
1493
+ # suspicious_detections is an array of DetectionLocation objects,
1494
+ # one per directory.
1495
+ for i_dir, detections in enumerate(suspicious_detections):
1496
+
1497
+ b_valid_detection = [True] * len(detections)
1498
+ n_detections_loaded += len(detections)
1499
+
1500
+ # For each detection that was present before filtering
1501
+ # i_detection = 0; detection = detections[i_detection]
1502
+ for i_detection, detection in enumerate(detections):
1503
+
1504
+ # Are we checking the directory to see whether detections were actually false
1505
+ # positives, or reading from a list?
1506
+ if file_list is None:
1507
+
1508
+ # Is the image still there?
1509
+ image_full_path = os.path.join(filtering_base_dir,
1510
+ detection.sampleImageRelativeFileName)
1511
+
1512
+ # If not, remove this from the list of suspicious detections
1513
+ if not os.path.isfile(image_full_path):
1514
+ n_detections_removed += 1
1515
+ b_valid_detection[i_detection] = False
1516
+
1517
+ else:
1518
+
1519
+ if detection.sampleImageRelativeFileName not in file_list:
1520
+ n_detections_removed += 1
1521
+ b_valid_detection[i_detection] = False
1522
+
1523
+ # ...for each detection
1524
+
1525
+ n_removed_this_dir = len(b_valid_detection) - sum(b_valid_detection)
1526
+ if n_removed_this_dir > 0:
1527
+ print('Removed {} of {} detections from directory {}'.\
1528
+ format(n_removed_this_dir,len(detections), i_dir))
1529
+
1530
+ detections_filtered = list(compress(detections, b_valid_detection))
1531
+ suspicious_detections[i_dir] = detections_filtered
1532
+
1533
+ # ...for each directory
1534
+
1535
+ print('Removed {} of {} total detections via manual filtering'.\
1536
+ format(n_detections_removed, n_detections_loaded))
1537
+
1538
+ # ...if we are/aren't finding detections (vs. loading from file)
1539
+
1540
+ to_return.suspicious_detections = suspicious_detections
1541
+
1542
+ to_return.allRowsFiltered = _update_detection_table(to_return, options, output_file_name)
1543
+
1544
+
1545
+ ##%% Create filtering directory
1546
+
1547
+ if options.bWriteFilteringFolder:
1548
+
1549
+ print('Creating filtering folder...')
1550
+
1551
+ date_string = datetime.now().strftime('%Y.%m.%d.%H.%M.%S')
1552
+ filtering_dir = os.path.join(options.outputBase, 'filtering_' + date_string)
1553
+ os.makedirs(filtering_dir, exist_ok=True)
1554
+
1555
+ # Take a first loop over every suspicious detection, and do the things that make
1556
+ # sense to do in a serial sampleImageDetectionsloop:
1557
+ #
1558
+ # * Generate file names (which requires an index variable)
1559
+ # * Sort instances by confidence
1560
+ # * Look up detections for each sample image in the big table (so we don't have to pass the
1561
+ # table to workers)
1562
+ for i_dir, suspicious_detections_this_dir in enumerate(tqdm(suspicious_detections)):
1563
+
1564
+ for i_detection, detection in enumerate(suspicious_detections_this_dir):
1565
+
1566
+ # Sort instances in descending order by confidence
1567
+ detection.instances.sort(key=attrgetter('confidence'),reverse=True)
1568
+
1569
+ if detection.clusterLabel is not None:
1570
+ cluster_string = '_c{:0>4d}'.format(detection.clusterLabel)
1571
+ else:
1572
+ cluster_string = ''
1573
+
1574
+ # Choose the highest-confidence index
1575
+ instance = detection.instances[0]
1576
+ relative_path = instance.filename
1577
+
1578
+ output_relative_path = 'dir{:0>4d}_det{:0>4d}{}_n{:0>4d}.jpg'.format(
1579
+ i_dir, i_detection, cluster_string, len(detection.instances))
1580
+ detection.sampleImageRelativeFileName = output_relative_path
1581
+
1582
+ i_row = filename_to_row[relative_path]
1583
+ row = detection_results.iloc[i_row]
1584
+ detection.sampleImageDetections = row['detections']
1585
+
1586
+ # ...for each suspicious detection in this folder
1587
+
1588
+ # ...for each folder
1589
+
1590
+ # Collapse suspicious detections into a flat list
1591
+ all_suspicious_detections = []
1592
+
1593
+ # i_dir = 0; suspicious_detections_this_dir = suspicious_detections[i_dir]
1594
+ for i_dir, suspicious_detections_this_dir in enumerate(tqdm(suspicious_detections)):
1595
+ for i_detection, detection in enumerate(suspicious_detections_this_dir):
1596
+ all_suspicious_detections.append(detection)
1597
+
1598
+ # Render suspicious detections
1599
+ if options.bParallelizeRendering:
1600
+
1601
+ n_workers = options.nWorkers
1602
+
1603
+ pool = None
1604
+
1605
+ try:
1606
+ if options.parallelizationUsesThreads:
1607
+ pool = ThreadPool(n_workers); poolstring = 'threads'
1608
+ else:
1609
+ pool = Pool(n_workers); poolstring = 'processes'
1610
+
1611
+ print('Starting rendering pool with {} {}'.format(n_workers,poolstring))
1612
+
1613
+ # We get slightly nicer progress bar behavior using threads, by passing a pbar
1614
+ # object and letting it get updated. We can't serialize this object across
1615
+ # processes.
1616
+ if options.parallelizationUsesThreads:
1617
+ options.pbar = tqdm(total=len(all_suspicious_detections))
1618
+ all_candidate_detections = list(pool.imap(
1619
+ partial(_render_sample_image_for_detection,filtering_dir=filtering_dir,
1620
+ options=options), all_suspicious_detections))
1621
+ else:
1622
+ options.pbar = None
1623
+ all_candidate_detections = list(tqdm(pool.imap(
1624
+ partial(_render_sample_image_for_detection,filtering_dir=filtering_dir,
1625
+ options=options), all_suspicious_detections)))
1626
+ finally:
1627
+ if pool is not None:
1628
+ pool.close()
1629
+ pool.join()
1630
+ print('Pool closed and joined for RDE rendering')
1631
+
1632
+ else:
1633
+
1634
+ # Serial loop over detections
1635
+ for detection in all_suspicious_detections:
1636
+ _render_sample_image_for_detection(detection,filtering_dir,options)
1637
+
1638
+ # Delete (large) temporary data from the list of suspicious detections
1639
+ for detection in all_suspicious_detections:
1640
+ detection.sampleImageDetections = None
1641
+
1642
+ # Write out the detection index
1643
+ detection_index_file_name = os.path.join(filtering_dir, detection_index_file_name_base)
1644
+
1645
+ # Prepare the data we're going to write to the detection index file
1646
+ detection_info = {}
1647
+
1648
+ detection_info['suspicious_detections'] = suspicious_detections
1649
+ detection_info['dir_index_to_name'] = dir_index_to_name
1650
+
1651
+ # Remove the one non-serializable object from the options struct before serializing
1652
+ # to .json
1653
+ options.pbar = None
1654
+ detection_info['options'] = options
1655
+
1656
+ s = jsonpickle.encode(detection_info,make_refs=False)
1657
+ with open(detection_index_file_name, 'w') as f:
1658
+ f.write(s)
1659
+ to_return.filterFile = detection_index_file_name
1660
+
1661
+ # ...if we're writing filtering info
1662
+
1663
+ return to_return
1664
+
1665
+ # ...def find_repeat_detections()