megadetector 5.0.27__py3-none-any.whl → 5.0.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
- megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
- megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
- megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
- megadetector/classification/aggregate_classifier_probs.py +3 -3
- megadetector/classification/analyze_failed_images.py +5 -5
- megadetector/classification/cache_batchapi_outputs.py +5 -5
- megadetector/classification/create_classification_dataset.py +11 -12
- megadetector/classification/crop_detections.py +10 -10
- megadetector/classification/csv_to_json.py +8 -8
- megadetector/classification/detect_and_crop.py +13 -15
- megadetector/classification/evaluate_model.py +7 -7
- megadetector/classification/identify_mislabeled_candidates.py +6 -6
- megadetector/classification/json_to_azcopy_list.py +1 -1
- megadetector/classification/json_validator.py +29 -32
- megadetector/classification/map_classification_categories.py +9 -9
- megadetector/classification/merge_classification_detection_output.py +12 -9
- megadetector/classification/prepare_classification_script.py +19 -19
- megadetector/classification/prepare_classification_script_mc.py +23 -23
- megadetector/classification/run_classifier.py +4 -4
- megadetector/classification/save_mislabeled.py +6 -6
- megadetector/classification/train_classifier.py +1 -1
- megadetector/classification/train_classifier_tf.py +9 -9
- megadetector/classification/train_utils.py +10 -10
- megadetector/data_management/annotations/annotation_constants.py +1 -1
- megadetector/data_management/camtrap_dp_to_coco.py +45 -45
- megadetector/data_management/cct_json_utils.py +101 -101
- megadetector/data_management/cct_to_md.py +49 -49
- megadetector/data_management/cct_to_wi.py +33 -33
- megadetector/data_management/coco_to_labelme.py +75 -75
- megadetector/data_management/coco_to_yolo.py +189 -189
- megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
- megadetector/data_management/databases/integrity_check_json_db.py +202 -188
- megadetector/data_management/databases/subset_json_db.py +33 -33
- megadetector/data_management/generate_crops_from_cct.py +38 -38
- megadetector/data_management/get_image_sizes.py +54 -49
- megadetector/data_management/labelme_to_coco.py +130 -124
- megadetector/data_management/labelme_to_yolo.py +78 -72
- megadetector/data_management/lila/create_lila_blank_set.py +81 -83
- megadetector/data_management/lila/create_lila_test_set.py +32 -31
- megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
- megadetector/data_management/lila/download_lila_subset.py +21 -24
- megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
- megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
- megadetector/data_management/lila/get_lila_image_counts.py +22 -22
- megadetector/data_management/lila/lila_common.py +70 -70
- megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
- megadetector/data_management/mewc_to_md.py +339 -340
- megadetector/data_management/ocr_tools.py +258 -252
- megadetector/data_management/read_exif.py +232 -223
- megadetector/data_management/remap_coco_categories.py +26 -26
- megadetector/data_management/remove_exif.py +31 -20
- megadetector/data_management/rename_images.py +187 -187
- megadetector/data_management/resize_coco_dataset.py +41 -41
- megadetector/data_management/speciesnet_to_md.py +41 -41
- megadetector/data_management/wi_download_csv_to_coco.py +55 -55
- megadetector/data_management/yolo_output_to_md_output.py +117 -120
- megadetector/data_management/yolo_to_coco.py +195 -188
- megadetector/detection/change_detection.py +831 -0
- megadetector/detection/process_video.py +341 -338
- megadetector/detection/pytorch_detector.py +308 -266
- megadetector/detection/run_detector.py +186 -166
- megadetector/detection/run_detector_batch.py +366 -364
- megadetector/detection/run_inference_with_yolov5_val.py +328 -325
- megadetector/detection/run_tiled_inference.py +312 -253
- megadetector/detection/tf_detector.py +24 -24
- megadetector/detection/video_utils.py +291 -283
- megadetector/postprocessing/add_max_conf.py +15 -11
- megadetector/postprocessing/categorize_detections_by_size.py +44 -44
- megadetector/postprocessing/classification_postprocessing.py +808 -311
- megadetector/postprocessing/combine_batch_outputs.py +20 -21
- megadetector/postprocessing/compare_batch_results.py +528 -517
- megadetector/postprocessing/convert_output_format.py +97 -97
- megadetector/postprocessing/create_crop_folder.py +220 -147
- megadetector/postprocessing/detector_calibration.py +173 -168
- megadetector/postprocessing/generate_csv_report.py +508 -0
- megadetector/postprocessing/load_api_results.py +25 -22
- megadetector/postprocessing/md_to_coco.py +129 -98
- megadetector/postprocessing/md_to_labelme.py +89 -83
- megadetector/postprocessing/md_to_wi.py +40 -40
- megadetector/postprocessing/merge_detections.py +87 -114
- megadetector/postprocessing/postprocess_batch_results.py +319 -302
- megadetector/postprocessing/remap_detection_categories.py +36 -36
- megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
- megadetector/postprocessing/separate_detections_into_folders.py +226 -211
- megadetector/postprocessing/subset_json_detector_output.py +265 -262
- megadetector/postprocessing/top_folders_to_bottom.py +45 -45
- megadetector/postprocessing/validate_batch_results.py +70 -70
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -69
- megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
- megadetector/taxonomy_mapping/simple_image_download.py +8 -8
- megadetector/taxonomy_mapping/species_lookup.py +33 -33
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
- megadetector/taxonomy_mapping/taxonomy_graph.py +11 -11
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
- megadetector/utils/azure_utils.py +22 -22
- megadetector/utils/ct_utils.py +1019 -200
- megadetector/utils/directory_listing.py +21 -77
- megadetector/utils/gpu_test.py +22 -22
- megadetector/utils/md_tests.py +541 -518
- megadetector/utils/path_utils.py +1511 -406
- megadetector/utils/process_utils.py +41 -41
- megadetector/utils/sas_blob_utils.py +53 -49
- megadetector/utils/split_locations_into_train_val.py +73 -60
- megadetector/utils/string_utils.py +147 -26
- megadetector/utils/url_utils.py +463 -173
- megadetector/utils/wi_utils.py +2629 -2868
- megadetector/utils/write_html_image_list.py +137 -137
- megadetector/visualization/plot_utils.py +21 -21
- megadetector/visualization/render_images_with_thumbnails.py +37 -73
- megadetector/visualization/visualization_utils.py +424 -404
- megadetector/visualization/visualize_db.py +197 -190
- megadetector/visualization/visualize_detector_output.py +126 -98
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/METADATA +6 -3
- megadetector-5.0.29.dist-info/RECORD +163 -0
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
- megadetector/data_management/importers/add_nacti_sizes.py +0 -52
- megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
- megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
- megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
- megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
- megadetector/data_management/importers/awc_to_json.py +0 -191
- megadetector/data_management/importers/bellevue_to_json.py +0 -272
- megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
- megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
- megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
- megadetector/data_management/importers/cct_field_adjustments.py +0 -58
- megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
- megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
- megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
- megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
- megadetector/data_management/importers/ena24_to_json.py +0 -276
- megadetector/data_management/importers/filenames_to_json.py +0 -386
- megadetector/data_management/importers/helena_to_cct.py +0 -283
- megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
- megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
- megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
- megadetector/data_management/importers/jb_csv_to_json.py +0 -150
- megadetector/data_management/importers/mcgill_to_json.py +0 -250
- megadetector/data_management/importers/missouri_to_json.py +0 -490
- megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
- megadetector/data_management/importers/noaa_seals_2019.py +0 -181
- megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
- megadetector/data_management/importers/pc_to_json.py +0 -365
- megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
- megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
- megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
- megadetector/data_management/importers/rspb_to_json.py +0 -356
- megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
- megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
- megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
- megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
- megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
- megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
- megadetector/data_management/importers/sulross_get_exif.py +0 -65
- megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
- megadetector/data_management/importers/ubc_to_json.py +0 -399
- megadetector/data_management/importers/umn_to_json.py +0 -507
- megadetector/data_management/importers/wellington_to_json.py +0 -263
- megadetector/data_management/importers/wi_to_json.py +0 -442
- megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
- megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
- megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
- megadetector-5.0.27.dist-info/RECORD +0 -208
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
|
@@ -6,7 +6,7 @@ Core utilities shared by find_repeat_detections and remove_repeat_detections.
|
|
|
6
6
|
|
|
7
7
|
Nothing in this file (in fact nothing in this subpackage) will make sense until you read
|
|
8
8
|
the RDE user's guide:
|
|
9
|
-
|
|
9
|
+
|
|
10
10
|
https://github.com/agentmorris/MegaDetector/tree/main/megadetector/postprocessing/repeat_detection_elimination
|
|
11
11
|
|
|
12
12
|
"""
|
|
@@ -68,68 +68,68 @@ class RepeatDetectionOptions:
|
|
|
68
68
|
"""
|
|
69
69
|
|
|
70
70
|
def __init__(self):
|
|
71
|
-
|
|
71
|
+
|
|
72
72
|
#: Folder where images live; filenames in the MD results .json file should
|
|
73
73
|
#: be relative to this folder.
|
|
74
74
|
#:
|
|
75
75
|
#: imageBase can also be a SAS URL, in which case some error-checking is
|
|
76
76
|
#: disabled.
|
|
77
77
|
self.imageBase = ''
|
|
78
|
-
|
|
78
|
+
|
|
79
79
|
#: Folder where we should write temporary output.
|
|
80
80
|
self.outputBase = ''
|
|
81
|
-
|
|
81
|
+
|
|
82
82
|
#: Don't consider detections with confidence lower than this as suspicious
|
|
83
83
|
self.confidenceMin = 0.1
|
|
84
|
-
|
|
84
|
+
|
|
85
85
|
#: Don't consider detections with confidence higher than this as suspicious
|
|
86
86
|
self.confidenceMax = 1.0
|
|
87
|
-
|
|
87
|
+
|
|
88
88
|
#: What's the IOU threshold for considering two boxes the same?
|
|
89
89
|
self.iouThreshold = 0.9
|
|
90
|
-
|
|
90
|
+
|
|
91
91
|
#: How many occurrences of a single location (as defined by the IOU threshold)
|
|
92
92
|
#: are required before we declare it suspicious?
|
|
93
93
|
self.occurrenceThreshold = 20
|
|
94
|
-
|
|
94
|
+
|
|
95
95
|
#: Ignore "suspicious" detections smaller than some size
|
|
96
96
|
self.minSuspiciousDetectionSize = 0.0
|
|
97
|
-
|
|
97
|
+
|
|
98
98
|
#: Ignore "suspicious" detections larger than some size; these are often animals
|
|
99
99
|
#: taking up the whole image. This is expressed as a fraction of the image size.
|
|
100
100
|
self.maxSuspiciousDetectionSize = 0.2
|
|
101
|
-
|
|
101
|
+
|
|
102
102
|
#: Ignore folders with more than this many images in them
|
|
103
103
|
self.maxImagesPerFolder = None
|
|
104
|
-
|
|
104
|
+
|
|
105
105
|
#: A list of category IDs (ints) that we don't want consider as candidate repeat detections.
|
|
106
106
|
#:
|
|
107
|
-
#: Typically used to say, e.g., "don't bother analyzing people or vehicles for repeat
|
|
107
|
+
#: Typically used to say, e.g., "don't bother analyzing people or vehicles for repeat
|
|
108
108
|
#: detections", which you could do by saying excludeClasses = [2,3].
|
|
109
109
|
self.excludeClasses = []
|
|
110
|
-
|
|
111
|
-
#: For very large sets of results, passing chunks of results to and from workers as
|
|
110
|
+
|
|
111
|
+
#: For very large sets of results, passing chunks of results to and from workers as
|
|
112
112
|
#: parameters ('memory') can be memory-intensive, so we can serialize to intermediate
|
|
113
113
|
#: files instead ('file').
|
|
114
114
|
#:
|
|
115
115
|
#: The use of 'file' here is still experimental.
|
|
116
116
|
self.pass_detections_to_processes_method = 'memory'
|
|
117
|
-
|
|
117
|
+
|
|
118
118
|
#: Number of workers to use for parallel operations
|
|
119
119
|
self.nWorkers = 10
|
|
120
|
-
|
|
120
|
+
|
|
121
121
|
#: Should we use threads (True) or processes (False) for parallelization?
|
|
122
122
|
#:
|
|
123
|
-
#: Not relevant if nWorkers <= 1, or if bParallelizeComparisons and
|
|
123
|
+
#: Not relevant if nWorkers <= 1, or if bParallelizeComparisons and
|
|
124
124
|
#: bParallelizeRendering are both False.
|
|
125
125
|
self.parallelizationUsesThreads = True
|
|
126
|
-
|
|
127
|
-
#: If this is not empty, we'll load detections from a filter file rather than finding them
|
|
128
|
-
#: from the detector output. This should be a .json file containing detections, generally this
|
|
126
|
+
|
|
127
|
+
#: If this is not empty, we'll load detections from a filter file rather than finding them
|
|
128
|
+
#: from the detector output. This should be a .json file containing detections, generally this
|
|
129
129
|
#: is the detectionIndex.json file in the filtering_* folder produced by find_repeat_detections().
|
|
130
130
|
self.filterFileToLoad = ''
|
|
131
|
-
|
|
132
|
-
#: (optional) List of filenames remaining after deletion of identified
|
|
131
|
+
|
|
132
|
+
#: (optional) List of filenames remaining after deletion of identified
|
|
133
133
|
#: repeated detections that are actually animals. This should be a flat
|
|
134
134
|
#: text file, one relative filename per line.
|
|
135
135
|
#:
|
|
@@ -139,71 +139,71 @@ class RepeatDetectionOptions:
|
|
|
139
139
|
#: where we use an external tool for image handling that allows us to do something
|
|
140
140
|
#: smarter and less destructive than deleting images to mark them as non-false-positives.
|
|
141
141
|
self.filteredFileListToLoad = None
|
|
142
|
-
|
|
142
|
+
|
|
143
143
|
#: Should we write the folder of images used to manually review repeat detections?
|
|
144
144
|
self.bWriteFilteringFolder = True
|
|
145
|
-
|
|
145
|
+
|
|
146
146
|
#: For debugging: limit comparisons to a specific number of folders
|
|
147
147
|
self.debugMaxDir = -1
|
|
148
|
-
|
|
148
|
+
|
|
149
149
|
#: For debugging: limit rendering to a specific number of folders
|
|
150
150
|
self.debugMaxRenderDir = -1
|
|
151
|
-
|
|
151
|
+
|
|
152
152
|
#: For debugging: limit comparisons to a specific number of detections
|
|
153
153
|
self.debugMaxRenderDetection = -1
|
|
154
|
-
|
|
154
|
+
|
|
155
155
|
#: For debugging: limit comparisons to a specific number of instances
|
|
156
156
|
self.debugMaxRenderInstance = -1
|
|
157
|
-
|
|
157
|
+
|
|
158
158
|
#: Should we parallelize (across cameras) comparisons to find repeat detections?
|
|
159
159
|
self.bParallelizeComparisons = True
|
|
160
|
-
|
|
160
|
+
|
|
161
161
|
#: Should we parallelize image rendering?
|
|
162
162
|
self.bParallelizeRendering = True
|
|
163
|
-
|
|
163
|
+
|
|
164
164
|
#: If this is False (default), a detection from class A is *not* considered to be "the same"
|
|
165
165
|
#: as a detection from class B, even if they're at the same location.
|
|
166
166
|
self.categoryAgnosticComparisons = False
|
|
167
|
-
|
|
167
|
+
|
|
168
168
|
#: Determines whether bounding-box rendering errors (typically network errors) should
|
|
169
|
-
#: be treated as failures
|
|
169
|
+
#: be treated as failures
|
|
170
170
|
self.bFailOnRenderError = False
|
|
171
|
-
|
|
171
|
+
|
|
172
172
|
#: Should we print a warning if images referred to in the MD results file are missing?
|
|
173
173
|
self.bPrintMissingImageWarnings = True
|
|
174
|
-
|
|
174
|
+
|
|
175
175
|
#: If bPrintMissingImageWarnings is True, should we print a warning about missing images
|
|
176
176
|
#: just once ('once') or every time ('all')?
|
|
177
177
|
self.missingImageWarningType = 'once' # 'all'
|
|
178
|
-
|
|
178
|
+
|
|
179
179
|
#: Image width for rendered images (it's called "max" because we don't resize smaller images).
|
|
180
180
|
#:
|
|
181
181
|
#: Original size is preserved if this is None.
|
|
182
182
|
#:
|
|
183
183
|
#: This does *not* include the tile image grid.
|
|
184
184
|
self.maxOutputImageWidth = 2000
|
|
185
|
-
|
|
185
|
+
|
|
186
186
|
#: Line thickness (in pixels) for box rendering
|
|
187
187
|
self.lineThickness = 10
|
|
188
|
-
|
|
188
|
+
|
|
189
189
|
#: Box expansion (in pixels)
|
|
190
190
|
self.boxExpansion = 2
|
|
191
|
-
|
|
191
|
+
|
|
192
192
|
#: Progress bar used during comparisons and rendering. Do not set externally.
|
|
193
193
|
#:
|
|
194
194
|
#: :meta private:
|
|
195
195
|
self.pbar = None
|
|
196
|
-
|
|
196
|
+
|
|
197
197
|
#: Replace filename tokens after reading, useful when the directory structure
|
|
198
198
|
#: has changed relative to the structure the detector saw.
|
|
199
199
|
self.filenameReplacements = {}
|
|
200
|
-
|
|
201
|
-
#: How many folders up from the leaf nodes should we be going to aggregate images into
|
|
200
|
+
|
|
201
|
+
#: How many folders up from the leaf nodes should we be going to aggregate images into
|
|
202
202
|
#: cameras?
|
|
203
203
|
#:
|
|
204
204
|
#: If this is zero, each leaf folder is treated as a camera.
|
|
205
205
|
self.nDirLevelsFromLeaf = 0
|
|
206
|
-
|
|
206
|
+
|
|
207
207
|
#: An optional function that takes a string (an image file name) and returns
|
|
208
208
|
#: a string (the corresponding folder ID), typically used when multiple folders
|
|
209
209
|
#: actually correspond to the same camera in a manufacturer-specific way (e.g.
|
|
@@ -215,60 +215,60 @@ class RepeatDetectionOptions:
|
|
|
215
215
|
#: from megadetector.utils import ct_utils
|
|
216
216
|
#: self.customDirNameFunction = ct_utils.image_file_to_camera_folder
|
|
217
217
|
self.customDirNameFunction = None
|
|
218
|
-
|
|
218
|
+
|
|
219
219
|
#: Include only specific folders, mutually exclusive with [excludeFolders]
|
|
220
220
|
self.includeFolders = None
|
|
221
|
-
|
|
221
|
+
|
|
222
222
|
#: Exclude specific folders, mutually exclusive with [includeFolders]
|
|
223
223
|
self.excludeFolders = None
|
|
224
|
-
|
|
224
|
+
|
|
225
225
|
#: Optionally show *other* detections (i.e., detections other than the
|
|
226
226
|
#: one the user is evaluating), typically in a light gray.
|
|
227
227
|
self.bRenderOtherDetections = False
|
|
228
|
-
|
|
228
|
+
|
|
229
229
|
#: Threshold to use for *other* detections
|
|
230
|
-
self.otherDetectionsThreshold = 0.2
|
|
231
|
-
|
|
230
|
+
self.otherDetectionsThreshold = 0.2
|
|
231
|
+
|
|
232
232
|
#: Line width (in pixels) for *other* detections
|
|
233
233
|
self.otherDetectionsLineWidth = 1
|
|
234
|
-
|
|
234
|
+
|
|
235
235
|
#: Optionally show a grid that includes a sample image for the detection, plus
|
|
236
236
|
#: the top N additional detections
|
|
237
237
|
self.bRenderDetectionTiles = True
|
|
238
|
-
|
|
238
|
+
|
|
239
239
|
#: Width of the original image (within the larger output image) when bRenderDetectionTiles
|
|
240
240
|
#: is True.
|
|
241
241
|
#:
|
|
242
242
|
#: If this is None, we'll render the original image in the detection tile image
|
|
243
243
|
#: at its original width.
|
|
244
244
|
self.detectionTilesPrimaryImageWidth = None
|
|
245
|
-
|
|
245
|
+
|
|
246
246
|
#: Width to use for the grid of detection instances.
|
|
247
247
|
#:
|
|
248
248
|
#: Can be a width in pixels, or a number from 0 to 1 representing a fraction
|
|
249
249
|
#: of the primary image width.
|
|
250
250
|
#:
|
|
251
251
|
#: If you want to render the grid at exactly 1 pixel wide, I guess you're out
|
|
252
|
-
#: of luck.
|
|
252
|
+
#: of luck.
|
|
253
253
|
self.detectionTilesCroppedGridWidth = 0.6
|
|
254
|
-
|
|
254
|
+
|
|
255
255
|
#: Location of the primary image within the mosaic ('right' or 'left)
|
|
256
256
|
self.detectionTilesPrimaryImageLocation = 'right'
|
|
257
|
-
|
|
257
|
+
|
|
258
258
|
#: Maximum number of individual detection instances to include in the mosaic
|
|
259
259
|
self.detectionTilesMaxCrops = 150
|
|
260
|
-
|
|
260
|
+
|
|
261
261
|
#: If bRenderOtherDetections is True, what color should we use to render the
|
|
262
262
|
#: (hopefully pretty subtle) non-target detections?
|
|
263
|
-
#:
|
|
264
|
-
#: In theory I'd like these "other detection" rectangles to be partially
|
|
263
|
+
#:
|
|
264
|
+
#: In theory I'd like these "other detection" rectangles to be partially
|
|
265
265
|
#: transparent, but this is not straightforward, and the alpha is ignored
|
|
266
|
-
#: here. But maybe if I leave it here and wish hard enough, someday it
|
|
266
|
+
#: here. But maybe if I leave it here and wish hard enough, someday it
|
|
267
267
|
#: will work.
|
|
268
268
|
#:
|
|
269
269
|
#: otherDetectionsColors = ['dimgray']
|
|
270
270
|
self.otherDetectionsColors = [(105,105,105,100)]
|
|
271
|
-
|
|
271
|
+
|
|
272
272
|
#: Sort detections within a directory so nearby detections are adjacent
|
|
273
273
|
#: in the list, for faster review.
|
|
274
274
|
#:
|
|
@@ -278,68 +278,70 @@ class RepeatDetectionOptions:
|
|
|
278
278
|
#: * 'xsort' sorts detections from left to right
|
|
279
279
|
#: * 'clustersort' clusters detections and sorts by cluster
|
|
280
280
|
self.smartSort = 'xsort'
|
|
281
|
-
|
|
281
|
+
|
|
282
282
|
#: Only relevant if smartSort == 'clustersort'
|
|
283
283
|
self.smartSortDistanceThreshold = 0.1
|
|
284
|
-
|
|
285
|
-
|
|
284
|
+
|
|
285
|
+
|
|
286
286
|
class RepeatDetectionResults:
|
|
287
287
|
"""
|
|
288
288
|
The results of an entire repeat detection analysis
|
|
289
289
|
"""
|
|
290
290
|
|
|
291
291
|
def __init__(self):
|
|
292
|
-
|
|
293
|
-
#: The data table (Pandas DataFrame), as loaded from the input json file via
|
|
292
|
+
|
|
293
|
+
#: The data table (Pandas DataFrame), as loaded from the input json file via
|
|
294
294
|
#: load_api_results(). Has columns ['file', 'detections','failure'].
|
|
295
295
|
self.detectionResults = None
|
|
296
|
-
|
|
296
|
+
|
|
297
297
|
#: The other fields in the input json file, loaded via load_api_results()
|
|
298
298
|
self.otherFields = None
|
|
299
|
-
|
|
299
|
+
|
|
300
300
|
#: The data table after modification
|
|
301
301
|
self.detectionResultsFiltered = None
|
|
302
|
-
|
|
302
|
+
|
|
303
303
|
#: dict mapping folder names to whole rows from the data table
|
|
304
|
-
self.
|
|
305
|
-
|
|
304
|
+
self.rows_by_directory = None
|
|
305
|
+
|
|
306
306
|
#: dict mapping filenames to rows in the master table
|
|
307
|
-
self.
|
|
308
|
-
|
|
309
|
-
#: An array of length nDirs, where each element is a list of DetectionLocation
|
|
307
|
+
self.filename_to_row = None
|
|
308
|
+
|
|
309
|
+
#: An array of length nDirs, where each element is a list of DetectionLocation
|
|
310
310
|
#: objects for that directory that have been flagged as suspicious
|
|
311
|
-
self.
|
|
312
|
-
|
|
311
|
+
self.suspicious_detections = None
|
|
312
|
+
|
|
313
313
|
#: The location of the .json file written with information about the RDE
|
|
314
314
|
#: review images (typically detectionIndex.json)
|
|
315
315
|
self.filterFile = None
|
|
316
|
-
|
|
316
|
+
|
|
317
317
|
|
|
318
318
|
class IndexedDetection:
|
|
319
319
|
"""
|
|
320
320
|
A single detection event on a single image
|
|
321
321
|
"""
|
|
322
322
|
|
|
323
|
-
def __init__(self,
|
|
324
|
-
|
|
325
|
-
|
|
323
|
+
def __init__(self, i_detection=-1, filename='', bbox=None, confidence=-1, category='unknown'):
|
|
324
|
+
|
|
325
|
+
if bbox is None:
|
|
326
|
+
bbox = []
|
|
327
|
+
assert isinstance(i_detection,int)
|
|
326
328
|
assert isinstance(filename,str)
|
|
327
329
|
assert isinstance(bbox,list)
|
|
328
330
|
assert isinstance(category,str)
|
|
329
|
-
|
|
331
|
+
|
|
330
332
|
#: index of this detection within all detections for this filename
|
|
331
|
-
self.
|
|
332
|
-
|
|
333
|
+
self.i_detection = i_detection
|
|
334
|
+
|
|
333
335
|
#: path to the image corresponding to this detection
|
|
334
336
|
self.filename = filename
|
|
335
|
-
|
|
337
|
+
|
|
336
338
|
#: [x_min, y_min, width_of_box, height_of_box]
|
|
337
339
|
self.bbox = bbox
|
|
338
|
-
|
|
340
|
+
|
|
339
341
|
#: confidence value of this detection
|
|
340
342
|
self.confidence = confidence
|
|
341
|
-
|
|
342
|
-
#: category ID (not name) of this detection
|
|
343
|
+
|
|
344
|
+
#: category ID (not name) of this detection
|
|
343
345
|
self.category = category
|
|
344
346
|
|
|
345
347
|
def __repr__(self):
|
|
@@ -354,57 +356,57 @@ class DetectionLocation:
|
|
|
354
356
|
will be stored in IndexedDetection objects.
|
|
355
357
|
"""
|
|
356
358
|
|
|
357
|
-
def __init__(self, instance, detection,
|
|
358
|
-
|
|
359
|
+
def __init__(self, instance, detection, relative_dir, category, id=None):
|
|
360
|
+
|
|
359
361
|
assert isinstance(detection,dict)
|
|
360
362
|
assert isinstance(instance,IndexedDetection)
|
|
361
|
-
assert isinstance(
|
|
363
|
+
assert isinstance(relative_dir,str)
|
|
362
364
|
assert isinstance(category,str)
|
|
363
|
-
|
|
365
|
+
|
|
364
366
|
#: list of IndexedDetections that match this detection
|
|
365
367
|
self.instances = [instance]
|
|
366
|
-
|
|
368
|
+
|
|
367
369
|
#: category ID (not name) for this detection
|
|
368
370
|
self.category = category
|
|
369
|
-
|
|
371
|
+
|
|
370
372
|
#: bbox as x,y,w,h
|
|
371
373
|
self.bbox = detection['bbox']
|
|
372
|
-
|
|
374
|
+
|
|
373
375
|
#: relative folder (i.e., camera name) in which this detectin was found
|
|
374
|
-
self.relativeDir =
|
|
375
|
-
|
|
376
|
+
self.relativeDir = relative_dir
|
|
377
|
+
|
|
376
378
|
#: relative path to the canonical image representing this detection
|
|
377
|
-
self.sampleImageRelativeFileName = ''
|
|
378
|
-
|
|
379
|
+
self.sampleImageRelativeFileName = ''
|
|
380
|
+
|
|
379
381
|
#: list of detections on that canonical image that match this detection
|
|
380
382
|
self.sampleImageDetections = None
|
|
381
|
-
|
|
383
|
+
|
|
382
384
|
#: ID for this detection; this ID is only guaranteed to be unique within a directory
|
|
383
385
|
self.id = id
|
|
384
|
-
|
|
386
|
+
|
|
385
387
|
#: only used when doing cluster-based sorting
|
|
386
388
|
self.clusterLabel = None
|
|
387
389
|
|
|
388
390
|
def __repr__(self):
|
|
389
391
|
s = ct_utils.pretty_print_object(self, False)
|
|
390
392
|
return s
|
|
391
|
-
|
|
393
|
+
|
|
392
394
|
def to_api_detection(self):
|
|
393
395
|
"""
|
|
394
|
-
Converts this detection to a 'detection' dictionary, making the semi-arbitrary
|
|
396
|
+
Converts this detection to a 'detection' dictionary, making the semi-arbitrary
|
|
395
397
|
assumption that the first instance is representative of confidence.
|
|
396
|
-
|
|
398
|
+
|
|
397
399
|
Returns:
|
|
398
400
|
dict: dictionary in the format used to store detections in MD results
|
|
399
401
|
"""
|
|
400
|
-
|
|
402
|
+
|
|
401
403
|
# This is a bit of a hack right now, but for future-proofing, I don't want to call this
|
|
402
|
-
# to retrieve anything other than the highest-confidence detection, and I'm assuming this
|
|
404
|
+
# to retrieve anything other than the highest-confidence detection, and I'm assuming this
|
|
403
405
|
# is already sorted, so assert() that.
|
|
404
406
|
confidences = [i.confidence for i in self.instances]
|
|
405
407
|
assert confidences[0] == max(confidences), \
|
|
406
408
|
'Cannot convert an unsorted DetectionLocation to an API detection'
|
|
407
|
-
|
|
409
|
+
|
|
408
410
|
# It's not clear whether it's better to use instances[0].bbox or self.bbox
|
|
409
411
|
# here... they should be very similar, unless iouThreshold is very low.
|
|
410
412
|
# self.bbox is a better representation of the overall DetectionLocation.
|
|
@@ -415,18 +417,21 @@ class DetectionLocation:
|
|
|
415
417
|
|
|
416
418
|
#%% Support functions
|
|
417
419
|
|
|
418
|
-
def _render_bounding_box(detection,
|
|
419
|
-
|
|
420
|
+
def _render_bounding_box(detection,
|
|
421
|
+
input_file_name,
|
|
422
|
+
output_file_name,
|
|
423
|
+
line_width=5,
|
|
424
|
+
expansion=0):
|
|
420
425
|
"""
|
|
421
|
-
Rendering the detection [detection] on the image [
|
|
422
|
-
to [
|
|
426
|
+
Rendering the detection [detection] on the image [input_file_name], writing the result
|
|
427
|
+
to [output_file_name].
|
|
423
428
|
"""
|
|
424
|
-
|
|
425
|
-
im = open_image(
|
|
429
|
+
|
|
430
|
+
im = open_image(input_file_name)
|
|
426
431
|
d = detection.to_api_detection()
|
|
427
|
-
render_detection_bounding_boxes([d],im,thickness=
|
|
432
|
+
render_detection_bounding_boxes([d],im,thickness=line_width,expansion=expansion,
|
|
428
433
|
confidence_threshold=-10)
|
|
429
|
-
im.save(
|
|
434
|
+
im.save(output_file_name)
|
|
430
435
|
|
|
431
436
|
|
|
432
437
|
def _detection_rect_to_rtree_rect(detection_rect):
|
|
@@ -434,12 +439,12 @@ def _detection_rect_to_rtree_rect(detection_rect):
|
|
|
434
439
|
We store detections as x/y/w/h, rtree and pyqtree use l/b/r/t. Convert from
|
|
435
440
|
our representation to rtree's.
|
|
436
441
|
"""
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
return (
|
|
442
|
+
|
|
443
|
+
left = detection_rect[0]
|
|
444
|
+
bottom = detection_rect[1]
|
|
445
|
+
right = detection_rect[0] + detection_rect[2]
|
|
446
|
+
top = detection_rect[1] + detection_rect[3]
|
|
447
|
+
return (left,bottom,right,top)
|
|
443
448
|
|
|
444
449
|
|
|
445
450
|
def _rtree_rect_to_detection_rect(rtree_rect):
|
|
@@ -447,183 +452,183 @@ def _rtree_rect_to_detection_rect(rtree_rect):
|
|
|
447
452
|
We store detections as x/y/w/h, rtree and pyqtree use l/b/r/t. Convert from
|
|
448
453
|
rtree's representation to ours.
|
|
449
454
|
"""
|
|
450
|
-
|
|
455
|
+
|
|
451
456
|
x = rtree_rect[0]
|
|
452
457
|
y = rtree_rect[1]
|
|
453
458
|
w = rtree_rect[2] - rtree_rect[0]
|
|
454
459
|
h = rtree_rect[3] - rtree_rect[1]
|
|
455
460
|
return (x,y,w,h)
|
|
456
|
-
|
|
457
461
|
|
|
458
|
-
|
|
462
|
+
|
|
463
|
+
def _sort_detections_for_directory(candidate_detections,options):
|
|
459
464
|
"""
|
|
460
|
-
|
|
465
|
+
candidate_detections is a list of DetectionLocation objects. Sorts them to
|
|
461
466
|
put nearby detections next to each other, for easier visual review. Returns
|
|
462
|
-
a sorted copy of
|
|
467
|
+
a sorted copy of candidate_detections, does not sort in-place.
|
|
463
468
|
"""
|
|
464
|
-
|
|
465
|
-
if len(
|
|
466
|
-
return
|
|
467
|
-
|
|
469
|
+
|
|
470
|
+
if len(candidate_detections) <= 1 or options.smartSort is None:
|
|
471
|
+
return candidate_detections
|
|
472
|
+
|
|
468
473
|
# Just sort by the X location of each box
|
|
469
474
|
if options.smartSort == 'xsort':
|
|
470
|
-
|
|
475
|
+
candidate_detections_sorted = sorted(candidate_detections,
|
|
471
476
|
key=lambda x: (
|
|
472
477
|
(x.bbox[0]) + (x.bbox[2]/2.0)
|
|
473
478
|
))
|
|
474
|
-
return
|
|
475
|
-
|
|
479
|
+
return candidate_detections_sorted
|
|
480
|
+
|
|
476
481
|
elif options.smartSort == 'clustersort':
|
|
477
|
-
|
|
482
|
+
|
|
478
483
|
cluster = sklearn.cluster.AgglomerativeClustering(
|
|
479
484
|
n_clusters=None,
|
|
480
485
|
distance_threshold=options.smartSortDistanceThreshold,
|
|
481
486
|
linkage='complete')
|
|
482
|
-
|
|
483
|
-
# Prepare a list of points to represent each box,
|
|
487
|
+
|
|
488
|
+
# Prepare a list of points to represent each box,
|
|
484
489
|
# that's what we'll use for clustering
|
|
485
490
|
points = []
|
|
486
|
-
for det in
|
|
491
|
+
for det in candidate_detections:
|
|
487
492
|
# To use the upper-left of the box as the clustering point
|
|
488
493
|
# points.append([det.bbox[0],det.bbox[1]])
|
|
489
|
-
|
|
494
|
+
|
|
490
495
|
# To use the center of the box as the clustering point
|
|
491
496
|
points.append([det.bbox[0]+det.bbox[2]/2.0,
|
|
492
497
|
det.bbox[1]+det.bbox[3]/2.0])
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
labels = cluster.fit_predict(
|
|
498
|
+
points_array = np.array(points)
|
|
499
|
+
|
|
500
|
+
labels = cluster.fit_predict(points_array)
|
|
496
501
|
unique_labels = np.unique(labels)
|
|
497
|
-
|
|
502
|
+
|
|
498
503
|
# Labels *could* be any unique labels according to the docs, but in practice
|
|
499
504
|
# they are unique integers from 0:nClusters.
|
|
500
505
|
#
|
|
501
506
|
# Make sure the labels are unique incrementing integers.
|
|
502
507
|
for i_label in range(1,len(unique_labels)):
|
|
503
508
|
assert unique_labels[i_label] == 1 + unique_labels[i_label-1]
|
|
504
|
-
|
|
505
|
-
assert len(labels) == len(
|
|
506
|
-
|
|
509
|
+
|
|
510
|
+
assert len(labels) == len(candidate_detections)
|
|
511
|
+
|
|
507
512
|
# Store the label assigned to each cluster
|
|
508
513
|
for i_label,label in enumerate(labels):
|
|
509
|
-
|
|
510
|
-
|
|
514
|
+
candidate_detections[i_label].clusterLabel = label
|
|
515
|
+
|
|
511
516
|
# Now sort the clusters by their x coordinate, and re-assign labels
|
|
512
517
|
# so the labels are sortable
|
|
513
518
|
label_x_means = []
|
|
514
|
-
|
|
519
|
+
|
|
515
520
|
for label in unique_labels:
|
|
516
|
-
detections_this_label = [d for d in
|
|
521
|
+
detections_this_label = [d for d in candidate_detections if (
|
|
517
522
|
d.clusterLabel == label)]
|
|
518
523
|
points_this_label = [ [d.bbox[0],d.bbox[1]] for d in detections_this_label]
|
|
519
524
|
x = [p[0] for p in points_this_label]
|
|
520
|
-
y = [p[1] for p in points_this_label]
|
|
521
|
-
|
|
525
|
+
y = [p[1] for p in points_this_label]
|
|
526
|
+
|
|
522
527
|
# Compute the centroid for debugging, but we're only going to use the x
|
|
523
528
|
# coordinate. This is the centroid of points used to represent detections,
|
|
524
529
|
# which may be box centers or box corners.
|
|
525
530
|
centroid = [ sum(x) / len(points_this_label), sum(y) / len(points_this_label) ]
|
|
526
531
|
label_xval = centroid[0]
|
|
527
532
|
label_x_means.append(label_xval)
|
|
528
|
-
|
|
529
|
-
old_cluster_label_to_new_cluster_label = {}
|
|
533
|
+
|
|
534
|
+
old_cluster_label_to_new_cluster_label = {}
|
|
530
535
|
new_cluster_labels = np.argsort(label_x_means)
|
|
531
536
|
assert len(new_cluster_labels) == len(np.unique(new_cluster_labels))
|
|
532
537
|
for old_cluster_label in unique_labels:
|
|
533
538
|
old_cluster_label_to_new_cluster_label[old_cluster_label] =\
|
|
534
539
|
np.where(new_cluster_labels==old_cluster_label)[0][0]
|
|
535
|
-
|
|
540
|
+
|
|
536
541
|
for i_cluster in range(0,len(unique_labels)):
|
|
537
542
|
old_label = unique_labels[i_cluster]
|
|
538
543
|
assert i_cluster == old_label
|
|
539
544
|
new_label = old_cluster_label_to_new_cluster_label[old_label]
|
|
540
|
-
|
|
541
|
-
for i_det,det in enumerate(
|
|
545
|
+
|
|
546
|
+
for i_det,det in enumerate(candidate_detections):
|
|
542
547
|
old_label = det.clusterLabel
|
|
543
548
|
new_label = old_cluster_label_to_new_cluster_label[old_label]
|
|
544
549
|
det.clusterLabel = new_label
|
|
545
|
-
|
|
546
|
-
|
|
550
|
+
|
|
551
|
+
candidate_detections_sorted = sorted(candidate_detections,
|
|
547
552
|
key=lambda x: (x.clusterLabel,x.id))
|
|
548
|
-
|
|
549
|
-
return
|
|
550
|
-
|
|
553
|
+
|
|
554
|
+
return candidate_detections_sorted
|
|
555
|
+
|
|
551
556
|
else:
|
|
552
557
|
raise ValueError('Unrecognized sort method {}'.format(
|
|
553
558
|
options.smartSort))
|
|
554
|
-
|
|
559
|
+
|
|
555
560
|
# ...def _sort_detections_for_directory(...)
|
|
556
561
|
|
|
557
562
|
|
|
558
|
-
def _find_matches_in_directory(
|
|
563
|
+
def _find_matches_in_directory(dir_name_and_rows, options):
|
|
559
564
|
"""
|
|
560
|
-
|
|
561
|
-
|
|
565
|
+
dir_name_and_rows is a tuple of (name,rows).
|
|
566
|
+
|
|
562
567
|
"name" is a location name, typically a folder name, though this may be an arbitrary
|
|
563
568
|
location identifier.
|
|
564
|
-
|
|
569
|
+
|
|
565
570
|
"rows" is a Pandas dataframe with one row per image in this location, with columns:
|
|
566
|
-
|
|
571
|
+
|
|
567
572
|
* 'file': relative file name
|
|
568
573
|
* 'detections': a list of MD detection objects, i.e. dicts with keys ['category','conf','bbox']
|
|
569
574
|
* 'max_detection_conf': maximum confidence of any detection, in any category
|
|
570
|
-
|
|
575
|
+
|
|
571
576
|
"rows" can also point to a .csv file, in which case the detection table will be read from that
|
|
572
577
|
.csv file, and results will be written to a .csv file rather than being returned.
|
|
573
|
-
|
|
578
|
+
|
|
574
579
|
Find all unique detections in this directory.
|
|
575
|
-
|
|
580
|
+
|
|
576
581
|
Returns a list of DetectionLocation objects.
|
|
577
582
|
"""
|
|
578
|
-
|
|
583
|
+
|
|
579
584
|
if options.pbar is not None:
|
|
580
585
|
options.pbar.update()
|
|
581
586
|
|
|
582
587
|
# Create a tree to store candidate detections
|
|
583
|
-
|
|
588
|
+
candidate_detections_index = pyqtree.Index(bbox=(-0.1,-0.1,1.1,1.1))
|
|
589
|
+
|
|
590
|
+
assert len(dir_name_and_rows) == 2, 'find_matches_in_directory: invalid input'
|
|
591
|
+
assert isinstance(dir_name_and_rows[0],str), 'find_matches_in_directory: invalid location name'
|
|
592
|
+
dir_name = dir_name_and_rows[0]
|
|
593
|
+
rows = dir_name_and_rows[1]
|
|
584
594
|
|
|
585
|
-
assert len(dirNameAndRows) == 2, 'find_matches_in_directory: invalid input'
|
|
586
|
-
assert isinstance(dirNameAndRows[0],str), 'find_matches_in_directory: invalid location name'
|
|
587
|
-
dirName = dirNameAndRows[0]
|
|
588
|
-
rows = dirNameAndRows[1]
|
|
589
|
-
|
|
590
595
|
detections_loaded_from_csv_file = None
|
|
591
|
-
|
|
596
|
+
|
|
592
597
|
if isinstance(rows,str):
|
|
593
598
|
detections_loaded_from_csv_file = rows
|
|
594
599
|
print('Loading results for location {} from {}'.format(
|
|
595
|
-
|
|
600
|
+
dir_name,detections_loaded_from_csv_file))
|
|
596
601
|
rows = pd.read_csv(detections_loaded_from_csv_file)
|
|
597
602
|
# Pandas writes out detections out as strings, convert them back to lists
|
|
598
603
|
rows['detections'] = rows['detections'].apply(lambda s: json.loads(s.replace('\'','"')))
|
|
599
|
-
|
|
604
|
+
|
|
600
605
|
if options.maxImagesPerFolder is not None and len(rows) > options.maxImagesPerFolder:
|
|
601
606
|
print('Ignoring directory {} because it has {} images (limit set to {})'.format(
|
|
602
|
-
|
|
607
|
+
dir_name,len(rows),options.maxImagesPerFolder))
|
|
603
608
|
return []
|
|
604
|
-
|
|
609
|
+
|
|
605
610
|
if options.includeFolders is not None:
|
|
606
611
|
assert options.excludeFolders is None, 'Cannot specify include and exclude folder lists'
|
|
607
|
-
if
|
|
608
|
-
print('Ignoring folder {}, not in inclusion list'.format(
|
|
612
|
+
if dir_name not in options.includeFolders:
|
|
613
|
+
print('Ignoring folder {}, not in inclusion list'.format(dir_name))
|
|
609
614
|
return []
|
|
610
|
-
|
|
615
|
+
|
|
611
616
|
if options.excludeFolders is not None:
|
|
612
617
|
assert options.includeFolders is None, 'Cannot specify include and exclude folder lists'
|
|
613
|
-
if
|
|
614
|
-
print('Ignoring folder {}, on exclusion list'.format(
|
|
618
|
+
if dir_name in options.excludeFolders:
|
|
619
|
+
print('Ignoring folder {}, on exclusion list'.format(dir_name))
|
|
615
620
|
return []
|
|
616
|
-
|
|
621
|
+
|
|
617
622
|
# For each image in this directory
|
|
618
623
|
#
|
|
619
|
-
#
|
|
624
|
+
# i_directory_row = 0; row = rows.iloc[i_directory_row]
|
|
620
625
|
#
|
|
621
|
-
#
|
|
626
|
+
# i_directory_row is a pandas index, so it may not start from zero;
|
|
622
627
|
# for debugging, we maintain i_iteration as a loop index.
|
|
623
628
|
i_iteration = -1
|
|
624
629
|
n_boxes_evaluated = 0
|
|
625
|
-
|
|
626
|
-
for
|
|
630
|
+
|
|
631
|
+
for i_directory_row, row in rows.iterrows():
|
|
627
632
|
|
|
628
633
|
i_iteration += 1
|
|
629
634
|
filename = row['file']
|
|
@@ -632,12 +637,12 @@ def _find_matches_in_directory(dirNameAndRows, options):
|
|
|
632
637
|
|
|
633
638
|
if 'max_detection_conf' not in row or 'detections' not in row or \
|
|
634
639
|
row['detections'] is None:
|
|
635
|
-
print('Skipping row {}'.format(
|
|
640
|
+
print('Skipping row {}'.format(i_directory_row))
|
|
636
641
|
continue
|
|
637
642
|
|
|
638
643
|
# Don't bother checking images with no detections above threshold
|
|
639
|
-
|
|
640
|
-
if
|
|
644
|
+
max_p = float(row['max_detection_conf'])
|
|
645
|
+
if max_p < options.confidenceMin:
|
|
641
646
|
continue
|
|
642
647
|
|
|
643
648
|
# Array of dicts, where each element is
|
|
@@ -646,24 +651,24 @@ def _find_matches_in_directory(dirNameAndRows, options):
|
|
|
646
651
|
# 'conf': 0.926, # confidence of this detections
|
|
647
652
|
#
|
|
648
653
|
# (x_min, y_min) is upper-left, all in relative coordinates
|
|
649
|
-
# 'bbox': [x_min, y_min, width_of_box, height_of_box]
|
|
650
|
-
#
|
|
654
|
+
# 'bbox': [x_min, y_min, width_of_box, height_of_box]
|
|
655
|
+
#
|
|
651
656
|
# }
|
|
652
657
|
detections = row['detections']
|
|
653
658
|
if isinstance(detections,float):
|
|
654
659
|
assert isinstance(row['failure'],str), 'Expected failure indicator'
|
|
655
660
|
print('Skipping failed image {} ({})'.format(filename,row['failure']))
|
|
656
661
|
continue
|
|
657
|
-
|
|
662
|
+
|
|
658
663
|
assert len(detections) > 0
|
|
659
|
-
|
|
664
|
+
|
|
660
665
|
# For each detection in this image
|
|
661
|
-
for
|
|
662
|
-
|
|
666
|
+
for i_detection, detection in enumerate(detections):
|
|
667
|
+
|
|
663
668
|
n_boxes_evaluated += 1
|
|
664
|
-
|
|
669
|
+
|
|
665
670
|
if detection is None:
|
|
666
|
-
print('Skipping detection {}'.format(
|
|
671
|
+
print('Skipping detection {}'.format(i_detection))
|
|
667
672
|
continue
|
|
668
673
|
|
|
669
674
|
assert 'category' in detection and \
|
|
@@ -671,14 +676,14 @@ def _find_matches_in_directory(dirNameAndRows, options):
|
|
|
671
676
|
'bbox' in detection, 'Illegal detection'
|
|
672
677
|
|
|
673
678
|
confidence = detection['conf']
|
|
674
|
-
|
|
679
|
+
|
|
675
680
|
# This is no longer strictly true; I sometimes run RDE in stages, so
|
|
676
681
|
# some probabilities have already been made negative
|
|
677
682
|
#
|
|
678
683
|
# assert confidence >= 0.0 and confidence <= 1.0
|
|
679
|
-
|
|
684
|
+
|
|
680
685
|
assert confidence >= -1.0 and confidence <= 1.0
|
|
681
|
-
|
|
686
|
+
|
|
682
687
|
if confidence < options.confidenceMin:
|
|
683
688
|
continue
|
|
684
689
|
if confidence > options.confidenceMax:
|
|
@@ -686,60 +691,60 @@ def _find_matches_in_directory(dirNameAndRows, options):
|
|
|
686
691
|
|
|
687
692
|
# Optionally exclude some classes from consideration as suspicious
|
|
688
693
|
if (options.excludeClasses is not None) and (len(options.excludeClasses) > 0):
|
|
689
|
-
|
|
690
|
-
if
|
|
694
|
+
i_class = int(detection['category'])
|
|
695
|
+
if i_class in options.excludeClasses:
|
|
691
696
|
continue
|
|
692
697
|
|
|
693
698
|
bbox = detection['bbox']
|
|
694
699
|
confidence = detection['conf']
|
|
695
|
-
|
|
700
|
+
|
|
696
701
|
# Is this detection too big or too small for consideration?
|
|
697
702
|
w, h = bbox[2], bbox[3]
|
|
698
|
-
|
|
699
|
-
if (w == 0 or h == 0):
|
|
703
|
+
|
|
704
|
+
if (w == 0 or h == 0):
|
|
700
705
|
continue
|
|
701
|
-
|
|
706
|
+
|
|
702
707
|
area = h * w
|
|
703
708
|
|
|
704
709
|
if area < 0:
|
|
705
710
|
print('Warning: negative-area bounding box for file {}'.format(filename))
|
|
706
711
|
area = abs(area); h = abs(h); w = abs(w)
|
|
707
|
-
|
|
712
|
+
|
|
708
713
|
assert area >= 0.0 and area <= 1.0, \
|
|
709
714
|
'Illegal bounding box area {} in image {}'.format(area,filename)
|
|
710
715
|
|
|
711
|
-
if area < options.minSuspiciousDetectionSize:
|
|
716
|
+
if area < options.minSuspiciousDetectionSize:
|
|
712
717
|
continue
|
|
713
|
-
|
|
714
|
-
if area > options.maxSuspiciousDetectionSize:
|
|
718
|
+
|
|
719
|
+
if area > options.maxSuspiciousDetectionSize:
|
|
715
720
|
continue
|
|
716
721
|
|
|
717
722
|
category = detection['category']
|
|
718
|
-
|
|
719
|
-
instance = IndexedDetection(
|
|
720
|
-
filename=row['file'], bbox=bbox,
|
|
723
|
+
|
|
724
|
+
instance = IndexedDetection(i_detection=i_detection,
|
|
725
|
+
filename=row['file'], bbox=bbox,
|
|
721
726
|
confidence=confidence, category=category)
|
|
722
727
|
|
|
723
|
-
|
|
728
|
+
b_found_similar_detection = False
|
|
724
729
|
|
|
725
730
|
rtree_rect = _detection_rect_to_rtree_rect(bbox)
|
|
726
|
-
|
|
731
|
+
|
|
727
732
|
# This will return candidates of all classes
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
733
|
+
overlapping_candidate_detections =\
|
|
734
|
+
candidate_detections_index.intersect(rtree_rect)
|
|
735
|
+
|
|
736
|
+
overlapping_candidate_detections.sort(
|
|
732
737
|
key=lambda x: x.id, reverse=False)
|
|
733
|
-
|
|
738
|
+
|
|
734
739
|
# For each detection in our candidate list
|
|
735
|
-
for
|
|
736
|
-
|
|
737
|
-
|
|
740
|
+
for i_candidate, candidate in enumerate(
|
|
741
|
+
overlapping_candidate_detections):
|
|
742
|
+
|
|
738
743
|
# Don't match across categories
|
|
739
744
|
if (candidate.category != category) and (not (options.categoryAgnosticComparisons)):
|
|
740
745
|
continue
|
|
741
|
-
|
|
742
|
-
# Is this a match?
|
|
746
|
+
|
|
747
|
+
# Is this a match?
|
|
743
748
|
try:
|
|
744
749
|
iou = ct_utils.get_iou(bbox, candidate.bbox)
|
|
745
750
|
except Exception as e:
|
|
@@ -748,12 +753,12 @@ def _find_matches_in_directory(dirNameAndRows, options):
|
|
|
748
753
|
format(
|
|
749
754
|
bbox[0],bbox[1],bbox[2],bbox[3],
|
|
750
755
|
candidate.bbox[0],candidate.bbox[1],
|
|
751
|
-
candidate.bbox[2],candidate.bbox[3], str(e)))
|
|
756
|
+
candidate.bbox[2],candidate.bbox[3], str(e)))
|
|
752
757
|
continue
|
|
753
758
|
|
|
754
759
|
if iou >= options.iouThreshold:
|
|
755
|
-
|
|
756
|
-
|
|
760
|
+
|
|
761
|
+
b_found_similar_detection = True
|
|
757
762
|
|
|
758
763
|
# If so, add this example to the list for this detection
|
|
759
764
|
candidate.instances.append(instance)
|
|
@@ -765,89 +770,91 @@ def _find_matches_in_directory(dirNameAndRows, options):
|
|
|
765
770
|
# ...for each detection on our candidate list
|
|
766
771
|
|
|
767
772
|
# If we found no matches, add this to the candidate list
|
|
768
|
-
if not
|
|
769
|
-
|
|
770
|
-
candidate = DetectionLocation(instance=instance,
|
|
771
|
-
detection=detection,
|
|
772
|
-
|
|
773
|
-
|
|
773
|
+
if not b_found_similar_detection:
|
|
774
|
+
|
|
775
|
+
candidate = DetectionLocation(instance=instance,
|
|
776
|
+
detection=detection,
|
|
777
|
+
relative_dir=dir_name,
|
|
778
|
+
category=category,
|
|
779
|
+
id=i_iteration)
|
|
780
|
+
|
|
774
781
|
# pyqtree
|
|
775
|
-
|
|
782
|
+
candidate_detections_index.insert(item=candidate,bbox=rtree_rect)
|
|
776
783
|
|
|
777
784
|
# ...for each detection
|
|
778
785
|
|
|
779
786
|
# ...for each row
|
|
780
787
|
|
|
781
788
|
# Get all candidate detections
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
789
|
+
|
|
790
|
+
candidate_detections = candidate_detections_index.intersect([-100,-100,100,100])
|
|
791
|
+
|
|
785
792
|
# For debugging only, it's convenient to have these sorted
|
|
786
793
|
# as if they had never gone into a tree structure. Typically
|
|
787
794
|
# this is in practice a sort by filename.
|
|
788
|
-
|
|
795
|
+
candidate_detections.sort(
|
|
789
796
|
key=lambda x: x.id, reverse=False)
|
|
790
|
-
|
|
797
|
+
|
|
791
798
|
if detections_loaded_from_csv_file is not None:
|
|
792
799
|
location_results_file = \
|
|
793
800
|
os.path.splitext(detections_loaded_from_csv_file)[0] + \
|
|
794
801
|
'_results.json'
|
|
795
802
|
print('Writing results for location {} to {}'.format(
|
|
796
|
-
|
|
797
|
-
s = jsonpickle.encode(
|
|
803
|
+
dir_name,location_results_file))
|
|
804
|
+
s = jsonpickle.encode(candidate_detections,make_refs=False)
|
|
798
805
|
with open(location_results_file,'w') as f:
|
|
799
|
-
f.write(s)
|
|
800
|
-
# json.dump(
|
|
806
|
+
f.write(s)
|
|
807
|
+
# json.dump(candidate_detections,f,indent=1)
|
|
801
808
|
return location_results_file
|
|
802
809
|
else:
|
|
803
|
-
return
|
|
810
|
+
return candidate_detections
|
|
804
811
|
|
|
805
812
|
# ...def _find_matches_in_directory(...)
|
|
806
813
|
|
|
807
814
|
|
|
808
|
-
def _update_detection_table(
|
|
815
|
+
def _update_detection_table(repeat_detection_results, options, output_file_name=None):
|
|
809
816
|
"""
|
|
810
|
-
Changes confidence values in
|
|
817
|
+
Changes confidence values in repeat_detection_results.detectionResults so that detections
|
|
811
818
|
deemed to be possible false positives are given negative confidence values.
|
|
812
|
-
|
|
813
|
-
|
|
819
|
+
|
|
820
|
+
repeat_detection_results is an object of type RepeatDetectionResults, with a pandas
|
|
814
821
|
dataframe (detectionResults) containing all the detections loaded from the .json file,
|
|
815
|
-
and a list of detections for each location (
|
|
822
|
+
and a list of detections for each location (suspicious_detections) that are deemed to
|
|
816
823
|
be suspicious.
|
|
817
|
-
|
|
818
|
-
returns the modified pandas dataframe (
|
|
824
|
+
|
|
825
|
+
returns the modified pandas dataframe (repeat_detection_results.detectionResults), but
|
|
819
826
|
also modifies it in place.
|
|
820
827
|
"""
|
|
821
|
-
|
|
828
|
+
|
|
822
829
|
# This is the pandas dataframe that contains actual detection results.
|
|
823
|
-
#
|
|
830
|
+
#
|
|
824
831
|
# Has fields ['file', 'detections','failure'].
|
|
825
|
-
|
|
832
|
+
detection_results = repeat_detection_results.detectionResults
|
|
826
833
|
|
|
827
|
-
# An array of length nDirs, where each element is a list of DetectionLocation
|
|
834
|
+
# An array of length nDirs, where each element is a list of DetectionLocation
|
|
828
835
|
# objects for that directory that have been flagged as suspicious
|
|
829
|
-
|
|
836
|
+
suspicious_detections_by_directory = repeat_detection_results.suspicious_detections
|
|
830
837
|
|
|
831
|
-
|
|
838
|
+
n_bbox_changes = 0
|
|
832
839
|
|
|
833
840
|
print('Updating output table')
|
|
834
841
|
|
|
835
842
|
# For each directory
|
|
836
|
-
for
|
|
843
|
+
for i_dir, directory_events in enumerate(suspicious_detections_by_directory):
|
|
837
844
|
|
|
838
845
|
# For each suspicious detection group in this directory
|
|
839
|
-
for
|
|
846
|
+
for i_detection_event, detection_event in enumerate(directory_events):
|
|
840
847
|
|
|
841
|
-
|
|
848
|
+
location_bbox = detection_event.bbox
|
|
842
849
|
|
|
843
850
|
# For each instance of this suspicious detection
|
|
844
|
-
for
|
|
851
|
+
for i_instance, instance in enumerate(detection_event.instances):
|
|
845
852
|
|
|
846
|
-
|
|
853
|
+
instance_bbox = instance.bbox
|
|
847
854
|
|
|
848
855
|
# This should match the bbox for the detection event
|
|
849
|
-
iou = ct_utils.get_iou(
|
|
850
|
-
|
|
856
|
+
iou = ct_utils.get_iou(instance_bbox, location_bbox)
|
|
857
|
+
|
|
851
858
|
# The bbox for this instance should be almost the same as the bbox
|
|
852
859
|
# for this detection group, where "almost" is defined by the IOU
|
|
853
860
|
# threshold.
|
|
@@ -855,159 +862,159 @@ def _update_detection_table(repeatDetectionResults, options, outputFilename=None
|
|
|
855
862
|
# if iou < options.iouThreshold:
|
|
856
863
|
# print('IOU warning: {},{}'.format(iou,options.iouThreshold))
|
|
857
864
|
|
|
858
|
-
assert instance.filename in
|
|
859
|
-
|
|
860
|
-
row =
|
|
861
|
-
|
|
862
|
-
|
|
865
|
+
assert instance.filename in repeat_detection_results.filename_to_row
|
|
866
|
+
i_row = repeat_detection_results.filename_to_row[instance.filename]
|
|
867
|
+
row = detection_results.iloc[i_row]
|
|
868
|
+
row_detections = row['detections']
|
|
869
|
+
detection_to_modify = row_detections[instance.i_detection]
|
|
863
870
|
|
|
864
871
|
# Make sure the bounding box matches
|
|
865
|
-
assert (
|
|
872
|
+
assert (instance_bbox[0:3] == detection_to_modify['bbox'][0:3])
|
|
866
873
|
|
|
867
874
|
# Make the probability negative, if it hasn't been switched by
|
|
868
875
|
# another bounding box
|
|
869
|
-
if
|
|
870
|
-
|
|
871
|
-
|
|
876
|
+
if detection_to_modify['conf'] >= 0:
|
|
877
|
+
detection_to_modify['conf'] = -1 * detection_to_modify['conf']
|
|
878
|
+
n_bbox_changes += 1
|
|
872
879
|
|
|
873
880
|
# ...for each instance
|
|
874
881
|
|
|
875
882
|
# ...for each detection
|
|
876
883
|
|
|
877
|
-
# ...for each directory
|
|
884
|
+
# ...for each directory
|
|
878
885
|
|
|
879
886
|
# Update maximum probabilities
|
|
880
887
|
|
|
881
888
|
# For each row...
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
889
|
+
n_prob_changes = 0
|
|
890
|
+
n_prob_changes_to_negative = 0
|
|
891
|
+
n_prob_changes_across_threshold = 0
|
|
885
892
|
|
|
886
|
-
for
|
|
893
|
+
for i_row, row in detection_results.iterrows():
|
|
887
894
|
|
|
888
895
|
detections = row['detections']
|
|
889
896
|
if (detections is None) or isinstance(detections,float):
|
|
890
897
|
assert isinstance(row['failure'],str)
|
|
891
898
|
continue
|
|
892
|
-
|
|
899
|
+
|
|
893
900
|
if len(detections) == 0:
|
|
894
901
|
continue
|
|
895
902
|
|
|
896
|
-
|
|
897
|
-
|
|
903
|
+
max_p_original = float(row['max_detection_conf'])
|
|
904
|
+
|
|
898
905
|
# No longer strictly true; sometimes I run RDE on RDE output
|
|
899
|
-
# assert
|
|
900
|
-
assert
|
|
906
|
+
# assert max_p_original >= 0
|
|
907
|
+
assert max_p_original >= -1.0
|
|
908
|
+
|
|
909
|
+
max_p = None
|
|
910
|
+
n_negative = 0
|
|
901
911
|
|
|
902
|
-
|
|
903
|
-
nNegative = 0
|
|
912
|
+
for i_detection, detection in enumerate(detections):
|
|
904
913
|
|
|
905
|
-
for iDetection, detection in enumerate(detections):
|
|
906
|
-
|
|
907
914
|
p = detection['conf']
|
|
908
915
|
|
|
909
916
|
if p < 0:
|
|
910
|
-
|
|
917
|
+
n_negative += 1
|
|
918
|
+
|
|
919
|
+
if (max_p is None) or (p > max_p):
|
|
920
|
+
max_p = p
|
|
911
921
|
|
|
912
|
-
if (maxP is None) or (p > maxP):
|
|
913
|
-
maxP = p
|
|
914
|
-
|
|
915
922
|
# We should only be making detections *less* likely in this process
|
|
916
|
-
assert
|
|
917
|
-
|
|
923
|
+
assert max_p <= max_p_original
|
|
924
|
+
detection_results.at[i_row, 'max_detection_conf'] = max_p
|
|
918
925
|
|
|
919
926
|
# If there was a meaningful change, count it
|
|
920
|
-
if abs(
|
|
927
|
+
if abs(max_p - max_p_original) > 1e-3:
|
|
928
|
+
|
|
929
|
+
assert max_p < max_p_original
|
|
921
930
|
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
nProbChanges += 1
|
|
931
|
+
n_prob_changes += 1
|
|
925
932
|
|
|
926
|
-
if (
|
|
927
|
-
|
|
933
|
+
if (max_p < 0) and (max_p_original >= 0):
|
|
934
|
+
n_prob_changes_to_negative += 1
|
|
928
935
|
|
|
929
|
-
if (
|
|
930
|
-
|
|
936
|
+
if (max_p_original >= options.confidenceMin) and (max_p < options.confidenceMin):
|
|
937
|
+
n_prob_changes_across_threshold += 1
|
|
931
938
|
|
|
932
|
-
# Negative probabilities should be the only reason
|
|
939
|
+
# Negative probabilities should be the only reason max_p changed, so
|
|
933
940
|
# we should have found at least one negative value if we reached
|
|
934
941
|
# this point.
|
|
935
|
-
assert
|
|
942
|
+
assert n_negative > 0
|
|
936
943
|
|
|
937
944
|
# ...if there was a meaningful change to the max probability for this row
|
|
938
945
|
|
|
939
946
|
# ...for each row
|
|
940
947
|
|
|
941
948
|
# If we're also writing output...
|
|
942
|
-
if
|
|
943
|
-
write_api_results(
|
|
944
|
-
|
|
949
|
+
if output_file_name is not None and len(output_file_name) > 0:
|
|
950
|
+
write_api_results(detection_results, repeat_detection_results.otherFields,
|
|
951
|
+
output_file_name)
|
|
945
952
|
|
|
946
953
|
print(
|
|
947
|
-
'Finished updating detection table\nChanged {} detections that impacted {}
|
|
948
|
-
|
|
954
|
+
'Finished updating detection table\nChanged {} detections that impacted {} max_ps ({} to negative) ({} across confidence threshold)'.format( # noqa
|
|
955
|
+
n_bbox_changes, n_prob_changes, n_prob_changes_to_negative, n_prob_changes_across_threshold))
|
|
949
956
|
|
|
950
|
-
return
|
|
957
|
+
return detection_results
|
|
951
958
|
|
|
952
959
|
# ...def _update_detection_table(...)
|
|
953
960
|
|
|
954
961
|
|
|
955
|
-
def _render_sample_image_for_detection(detection,
|
|
962
|
+
def _render_sample_image_for_detection(detection,filtering_dir,options):
|
|
956
963
|
"""
|
|
957
964
|
Render a sample image for one unique detection, possibly containing lightly-colored
|
|
958
|
-
high-confidence detections from elsewhere in the sample image.
|
|
959
|
-
|
|
965
|
+
high-confidence detections from elsewhere in the sample image.
|
|
966
|
+
|
|
960
967
|
"detections" is a DetectionLocation object.
|
|
961
|
-
|
|
968
|
+
|
|
962
969
|
Depends on having already sorted instances within this detection by confidence, and
|
|
963
970
|
having already generated an output file name for this sample image.
|
|
964
971
|
"""
|
|
965
|
-
|
|
972
|
+
|
|
966
973
|
# Confidence values should already have been sorted in the previous loop
|
|
967
974
|
instance_confidences = [instance.confidence for instance in detection.instances]
|
|
968
975
|
assert ct_utils.is_list_sorted(instance_confidences,reverse=True)
|
|
969
|
-
|
|
976
|
+
|
|
970
977
|
# Choose the highest-confidence index
|
|
971
978
|
instance = detection.instances[0]
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
assert len(
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
+
relative_path = instance.filename
|
|
980
|
+
|
|
981
|
+
output_relative_path = detection.sampleImageRelativeFileName
|
|
982
|
+
assert len(output_relative_path) > 0
|
|
983
|
+
|
|
984
|
+
output_full_path = os.path.join(filtering_dir, output_relative_path)
|
|
985
|
+
|
|
979
986
|
if is_sas_url(options.imageBase):
|
|
980
|
-
|
|
987
|
+
input_full_path = relative_sas_url(options.imageBase, relative_path)
|
|
981
988
|
else:
|
|
982
|
-
|
|
983
|
-
assert (os.path.isfile(
|
|
984
|
-
format(
|
|
985
|
-
|
|
989
|
+
input_full_path = os.path.join(options.imageBase, relative_path)
|
|
990
|
+
assert (os.path.isfile(input_full_path)), 'Not a file: {}'.\
|
|
991
|
+
format(input_full_path)
|
|
992
|
+
|
|
986
993
|
try:
|
|
987
|
-
|
|
988
|
-
im = open_image(
|
|
989
|
-
|
|
994
|
+
|
|
995
|
+
im = open_image(input_full_path)
|
|
996
|
+
|
|
990
997
|
# Should we render (typically in a very light color) detections
|
|
991
998
|
# *other* than the one we're highlighting here?
|
|
992
999
|
if options.bRenderOtherDetections:
|
|
993
|
-
|
|
1000
|
+
|
|
994
1001
|
# Optionally resize the output image
|
|
995
1002
|
if (options.maxOutputImageWidth is not None) and \
|
|
996
1003
|
(im.size[0] > options.maxOutputImageWidth):
|
|
997
|
-
im = vis_utils.resize_image(im, options.maxOutputImageWidth,
|
|
1004
|
+
im = vis_utils.resize_image(im, options.maxOutputImageWidth,
|
|
998
1005
|
target_height=-1)
|
|
999
|
-
|
|
1006
|
+
|
|
1000
1007
|
assert detection.sampleImageDetections is not None
|
|
1001
|
-
|
|
1002
|
-
# At this point, suspicious detections have already been flipped
|
|
1008
|
+
|
|
1009
|
+
# At this point, suspicious detections have already been flipped
|
|
1003
1010
|
# negative, which we don't want for rendering purposes
|
|
1004
1011
|
rendered_detections = []
|
|
1005
|
-
|
|
1012
|
+
|
|
1006
1013
|
for det in detection.sampleImageDetections:
|
|
1007
1014
|
rendered_det = copy.copy(det)
|
|
1008
1015
|
rendered_det['conf'] = abs(rendered_det['conf'])
|
|
1009
|
-
rendered_detections.append(rendered_det)
|
|
1010
|
-
|
|
1016
|
+
rendered_detections.append(rendered_det)
|
|
1017
|
+
|
|
1011
1018
|
# Render other detections first (typically in a thin+light box)
|
|
1012
1019
|
render_detection_bounding_boxes(rendered_detections,
|
|
1013
1020
|
im,
|
|
@@ -1016,7 +1023,7 @@ def _render_sample_image_for_detection(detection,filteringDir,options):
|
|
|
1016
1023
|
expansion=options.boxExpansion,
|
|
1017
1024
|
colormap=options.otherDetectionsColors,
|
|
1018
1025
|
confidence_threshold=options.otherDetectionsThreshold)
|
|
1019
|
-
|
|
1026
|
+
|
|
1020
1027
|
# Now render the example detection (on top of at least one
|
|
1021
1028
|
# of the other detections)
|
|
1022
1029
|
|
|
@@ -1024,140 +1031,144 @@ def _render_sample_image_for_detection(detection,filteringDir,options):
|
|
|
1024
1031
|
# because we just sorted this list in descending order by confidence,
|
|
1025
1032
|
# this is the highest-confidence detection.
|
|
1026
1033
|
d = detection.to_api_detection()
|
|
1027
|
-
|
|
1034
|
+
|
|
1028
1035
|
render_detection_bounding_boxes([d],im,thickness=options.lineThickness,
|
|
1029
1036
|
expansion=options.boxExpansion,
|
|
1030
1037
|
confidence_threshold=-10)
|
|
1031
|
-
|
|
1032
|
-
im.save(
|
|
1033
|
-
|
|
1038
|
+
|
|
1039
|
+
im.save(output_full_path)
|
|
1040
|
+
|
|
1034
1041
|
else:
|
|
1035
|
-
|
|
1036
|
-
_render_bounding_box(detection,
|
|
1037
|
-
|
|
1038
|
-
|
|
1042
|
+
|
|
1043
|
+
_render_bounding_box(detection,
|
|
1044
|
+
input_full_path,
|
|
1045
|
+
output_full_path,
|
|
1046
|
+
line_width=options.lineThickness,
|
|
1047
|
+
expansion=options.boxExpansion)
|
|
1048
|
+
|
|
1039
1049
|
# ...if we are/aren't rendering other bounding boxes
|
|
1040
|
-
|
|
1050
|
+
|
|
1041
1051
|
# If we're rendering detection tiles, we'll re-load and re-write the image we
|
|
1042
|
-
# just wrote to
|
|
1052
|
+
# just wrote to output_full_path
|
|
1043
1053
|
if options.bRenderDetectionTiles:
|
|
1044
|
-
|
|
1054
|
+
|
|
1045
1055
|
assert not is_sas_url(options.imageBase), "Can't render detection tiles from SAS URLs"
|
|
1046
|
-
|
|
1056
|
+
|
|
1047
1057
|
if options.detectionTilesPrimaryImageWidth is not None:
|
|
1048
|
-
|
|
1058
|
+
primary_image_width = options.detectionTilesPrimaryImageWidth
|
|
1049
1059
|
else:
|
|
1050
1060
|
# "im" may be a resized version of the original image, if we've already run
|
|
1051
1061
|
# the code to render other bounding boxes.
|
|
1052
|
-
|
|
1053
|
-
|
|
1062
|
+
primary_image_width = im.size[0]
|
|
1063
|
+
|
|
1054
1064
|
if options.detectionTilesCroppedGridWidth <= 1.0:
|
|
1055
|
-
|
|
1065
|
+
cropped_grid_width = \
|
|
1066
|
+
round(options.detectionTilesCroppedGridWidth * primary_image_width)
|
|
1056
1067
|
else:
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1068
|
+
cropped_grid_width = options.detectionTilesCroppedGridWidth
|
|
1069
|
+
|
|
1070
|
+
secondary_image_filename_list = []
|
|
1071
|
+
secondary_image_bounding_box_list = []
|
|
1072
|
+
|
|
1062
1073
|
# If we start from zero, we include the sample crop
|
|
1063
1074
|
for instance in detection.instances[0:]:
|
|
1064
|
-
|
|
1075
|
+
secondary_image_filename_list.append(os.path.join(options.imageBase,
|
|
1065
1076
|
instance.filename))
|
|
1066
|
-
|
|
1067
|
-
|
|
1077
|
+
secondary_image_bounding_box_list.append(instance.bbox)
|
|
1078
|
+
|
|
1068
1079
|
# Optionally limit the number of crops we pass to the rendering function
|
|
1069
1080
|
if (options.detectionTilesMaxCrops is not None) and \
|
|
1070
1081
|
(len(detection.instances) > options.detectionTilesMaxCrops):
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
# This will over-write the image we've already written to
|
|
1082
|
+
secondary_image_filename_list = \
|
|
1083
|
+
secondary_image_filename_list[0:options.detectionTilesMaxCrops]
|
|
1084
|
+
secondary_image_bounding_box_list = \
|
|
1085
|
+
secondary_image_bounding_box_list[0:options.detectionTilesMaxCrops]
|
|
1086
|
+
|
|
1087
|
+
# This will over-write the image we've already written to output_full_path
|
|
1077
1088
|
render_images_with_thumbnails.render_images_with_thumbnails(
|
|
1078
|
-
primary_image_filename=
|
|
1079
|
-
primary_image_width=
|
|
1080
|
-
secondary_image_filename_list=
|
|
1081
|
-
secondary_image_bounding_box_list=
|
|
1082
|
-
cropped_grid_width=
|
|
1083
|
-
output_image_filename=
|
|
1089
|
+
primary_image_filename=output_full_path,
|
|
1090
|
+
primary_image_width=primary_image_width,
|
|
1091
|
+
secondary_image_filename_list=secondary_image_filename_list,
|
|
1092
|
+
secondary_image_bounding_box_list=secondary_image_bounding_box_list,
|
|
1093
|
+
cropped_grid_width=cropped_grid_width,
|
|
1094
|
+
output_image_filename=output_full_path,
|
|
1084
1095
|
primary_image_location=options.detectionTilesPrimaryImageLocation)
|
|
1085
|
-
|
|
1096
|
+
|
|
1086
1097
|
# ...if we are/aren't rendering detection tiles
|
|
1087
|
-
|
|
1098
|
+
|
|
1088
1099
|
except Exception as e:
|
|
1089
|
-
|
|
1100
|
+
|
|
1090
1101
|
stack_trace = traceback.format_exc()
|
|
1091
1102
|
print('Warning: error rendering bounding box from {} to {}: {} ({})'.format(
|
|
1092
|
-
|
|
1103
|
+
input_full_path,output_full_path,e,stack_trace))
|
|
1093
1104
|
if options.bFailOnRenderError:
|
|
1094
|
-
raise
|
|
1105
|
+
raise
|
|
1095
1106
|
|
|
1096
1107
|
# ...def _render_sample_image_for_detection(...)
|
|
1097
1108
|
|
|
1098
1109
|
|
|
1099
1110
|
#%% Main entry point
|
|
1100
1111
|
|
|
1101
|
-
def find_repeat_detections(
|
|
1112
|
+
def find_repeat_detections(input_filename, output_file_name=None, options=None):
|
|
1102
1113
|
"""
|
|
1103
|
-
Find detections in a MD results file that occur repeatedly and are likely to be
|
|
1114
|
+
Find detections in a MD results file that occur repeatedly and are likely to be
|
|
1104
1115
|
rocks/sticks.
|
|
1105
|
-
|
|
1116
|
+
|
|
1106
1117
|
Args:
|
|
1107
|
-
|
|
1108
|
-
|
|
1118
|
+
input_filename (str): the MD results .json file to analyze
|
|
1119
|
+
output_file_name (str, optional): the filename to which we should write results
|
|
1109
1120
|
with repeat detections removed, typically set to None during the first
|
|
1110
1121
|
part of the RDE process.
|
|
1111
1122
|
options (RepeatDetectionOptions): all the interesting options controlling this
|
|
1112
1123
|
process; see RepeatDetectionOptions for details.
|
|
1113
|
-
|
|
1124
|
+
|
|
1114
1125
|
Returns:
|
|
1115
1126
|
RepeatDetectionResults: results of the RDE process; see RepeatDetectionResults
|
|
1116
1127
|
for details.
|
|
1117
1128
|
"""
|
|
1118
|
-
|
|
1129
|
+
|
|
1119
1130
|
##%% Input handling
|
|
1120
1131
|
|
|
1121
1132
|
if options is None:
|
|
1122
|
-
|
|
1133
|
+
|
|
1123
1134
|
options = RepeatDetectionOptions()
|
|
1124
1135
|
|
|
1125
1136
|
# Validate some options
|
|
1126
|
-
|
|
1137
|
+
|
|
1127
1138
|
if options.customDirNameFunction is not None:
|
|
1128
1139
|
assert options.nDirLevelsFromLeaf == 0, \
|
|
1129
1140
|
'Cannot mix custom dir name functions with nDirLevelsFromLeaf'
|
|
1130
|
-
|
|
1141
|
+
|
|
1131
1142
|
if options.nDirLevelsFromLeaf != 0:
|
|
1132
1143
|
assert options.customDirNameFunction is None, \
|
|
1133
1144
|
'Cannot mix custom dir name functions with nDirLevelsFromLeaf'
|
|
1134
|
-
|
|
1145
|
+
|
|
1135
1146
|
if options.filterFileToLoad is not None and len(options.filterFileToLoad) > 0:
|
|
1136
|
-
|
|
1147
|
+
|
|
1137
1148
|
print('Bypassing detection-finding, loading from {}'.format(options.filterFileToLoad))
|
|
1138
1149
|
|
|
1139
1150
|
# Load the filtering file
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1151
|
+
detection_index_file_name = options.filterFileToLoad
|
|
1152
|
+
s_in = open(detection_index_file_name, 'r').read()
|
|
1153
|
+
detection_info = jsonpickle.decode(s_in)
|
|
1154
|
+
filtering_base_dir = os.path.dirname(options.filterFileToLoad)
|
|
1155
|
+
suspicious_detections = detection_info['suspicious_detections']
|
|
1156
|
+
|
|
1146
1157
|
# Load the same options we used when finding repeat detections
|
|
1147
|
-
options =
|
|
1148
|
-
|
|
1158
|
+
options = detection_info['options']
|
|
1159
|
+
|
|
1149
1160
|
# ...except for things that explicitly tell this function not to
|
|
1150
1161
|
# find repeat detections.
|
|
1151
|
-
options.filterFileToLoad =
|
|
1162
|
+
options.filterFileToLoad = detection_index_file_name
|
|
1152
1163
|
options.bWriteFilteringFolder = False
|
|
1153
|
-
|
|
1164
|
+
|
|
1154
1165
|
# ...if we're loading from an existing filtering file
|
|
1155
|
-
|
|
1156
|
-
toReturn = RepeatDetectionResults()
|
|
1157
1166
|
|
|
1158
|
-
|
|
1167
|
+
to_return = RepeatDetectionResults()
|
|
1168
|
+
|
|
1169
|
+
|
|
1159
1170
|
# Check early to avoid problems with the output folder
|
|
1160
|
-
|
|
1171
|
+
|
|
1161
1172
|
if options.bWriteFilteringFolder:
|
|
1162
1173
|
assert options.outputBase is not None and len(options.outputBase) > 0
|
|
1163
1174
|
os.makedirs(options.outputBase,exist_ok=True)
|
|
@@ -1165,189 +1176,189 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
|
|
|
1165
1176
|
|
|
1166
1177
|
# Load file to a pandas dataframe. Also populates 'max_detection_conf', even if it's
|
|
1167
1178
|
# not present in the .json file.
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
# detectionResults[detectionResults['failure'].notna()]
|
|
1175
|
-
|
|
1179
|
+
detection_results, other_fields = load_api_results(input_filename, normalize_paths=True,
|
|
1180
|
+
filename_replacements=options.filenameReplacements,
|
|
1181
|
+
force_forward_slashes=True)
|
|
1182
|
+
to_return.detectionResults = detection_results
|
|
1183
|
+
to_return.otherFields = other_fields
|
|
1184
|
+
|
|
1176
1185
|
# Before doing any real work, make sure we can *probably* access images
|
|
1177
|
-
# This is just a cursory check on the first image, but it heads off most
|
|
1186
|
+
# This is just a cursory check on the first image, but it heads off most
|
|
1178
1187
|
# problems related to incorrect mount points, etc. Better to do this before
|
|
1179
|
-
# spending 20 minutes finding repeat detections.
|
|
1180
|
-
|
|
1188
|
+
# spending 20 minutes finding repeat detections.
|
|
1189
|
+
|
|
1181
1190
|
if options.bWriteFilteringFolder:
|
|
1182
|
-
|
|
1191
|
+
|
|
1183
1192
|
if not is_sas_url(options.imageBase):
|
|
1184
|
-
|
|
1185
|
-
row =
|
|
1186
|
-
|
|
1193
|
+
|
|
1194
|
+
row = detection_results.iloc[0]
|
|
1195
|
+
relative_path = row['file']
|
|
1187
1196
|
if options.filenameReplacements is not None:
|
|
1188
1197
|
for s in options.filenameReplacements.keys():
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
assert os.path.isfile(
|
|
1198
|
+
relative_path = relative_path.replace(s,options.filenameReplacements[s])
|
|
1199
|
+
absolute_path = os.path.join(options.imageBase,relative_path)
|
|
1200
|
+
assert os.path.isfile(absolute_path), 'Could not find file {}'.format(absolute_path)
|
|
1192
1201
|
|
|
1193
1202
|
|
|
1194
1203
|
##%% Separate files into locations
|
|
1195
1204
|
|
|
1196
1205
|
# This will be a map from a directory name to smaller data frames
|
|
1197
|
-
|
|
1206
|
+
rows_by_directory = {}
|
|
1198
1207
|
|
|
1199
1208
|
# This is a mapping back into the rows of the original table
|
|
1200
|
-
|
|
1209
|
+
filename_to_row = {}
|
|
1201
1210
|
|
|
1202
1211
|
print('Separating images into locations...')
|
|
1203
1212
|
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
#
|
|
1207
|
-
for
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1213
|
+
n_custom_dir_replacements = 0
|
|
1214
|
+
|
|
1215
|
+
# i_row = 0; row = detection_results.iloc[i_row]
|
|
1216
|
+
for i_row, row in tqdm(detection_results.iterrows(),total=len(detection_results)):
|
|
1217
|
+
|
|
1218
|
+
relative_path = row['file']
|
|
1219
|
+
|
|
1211
1220
|
if options.customDirNameFunction is not None:
|
|
1212
|
-
|
|
1213
|
-
|
|
1214
|
-
if
|
|
1215
|
-
|
|
1221
|
+
basic_dir_name = os.path.dirname(relative_path.replace('\\','/'))
|
|
1222
|
+
dir_name = options.customDirNameFunction(relative_path)
|
|
1223
|
+
if basic_dir_name != dir_name:
|
|
1224
|
+
n_custom_dir_replacements += 1
|
|
1216
1225
|
else:
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
if len(
|
|
1226
|
+
dir_name = os.path.dirname(relative_path)
|
|
1227
|
+
|
|
1228
|
+
if len(dir_name) == 0:
|
|
1220
1229
|
assert options.nDirLevelsFromLeaf == 0, \
|
|
1221
1230
|
'Can''t use the dirLevelsFromLeaf option with flat filenames'
|
|
1222
1231
|
else:
|
|
1223
1232
|
if options.nDirLevelsFromLeaf > 0:
|
|
1224
|
-
|
|
1225
|
-
while (
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
assert len(
|
|
1233
|
+
i_level = 0
|
|
1234
|
+
while (i_level < options.nDirLevelsFromLeaf):
|
|
1235
|
+
i_level += 1
|
|
1236
|
+
dir_name = os.path.dirname(dir_name)
|
|
1237
|
+
assert len(dir_name) > 0
|
|
1229
1238
|
|
|
1230
|
-
if not
|
|
1239
|
+
if dir_name not in rows_by_directory:
|
|
1231
1240
|
# Create a new DataFrame with just this row
|
|
1232
|
-
#
|
|
1233
|
-
|
|
1241
|
+
# rows_by_directory[dir_name] = pd.DataFrame(row)
|
|
1242
|
+
rows_by_directory[dir_name] = []
|
|
1234
1243
|
|
|
1235
|
-
|
|
1244
|
+
rows_by_directory[dir_name].append(row)
|
|
1236
1245
|
|
|
1237
|
-
assert
|
|
1238
|
-
|
|
1246
|
+
assert relative_path not in filename_to_row
|
|
1247
|
+
filename_to_row[relative_path] = i_row
|
|
1239
1248
|
|
|
1240
1249
|
# ...for each unique detection
|
|
1241
|
-
|
|
1250
|
+
|
|
1242
1251
|
if options.customDirNameFunction is not None:
|
|
1243
1252
|
print('Custom dir name function made {} replacements (of {} images)'.format(
|
|
1244
|
-
|
|
1253
|
+
n_custom_dir_replacements,len(detection_results)))
|
|
1245
1254
|
|
|
1246
1255
|
# Convert lists of rows to proper DataFrames
|
|
1247
|
-
dirs = list(
|
|
1256
|
+
dirs = list(rows_by_directory.keys())
|
|
1248
1257
|
for d in dirs:
|
|
1249
|
-
|
|
1258
|
+
rows_by_directory[d] = pd.DataFrame(rows_by_directory[d])
|
|
1259
|
+
|
|
1260
|
+
to_return.rows_by_directory = rows_by_directory
|
|
1261
|
+
to_return.filename_to_row = filename_to_row
|
|
1250
1262
|
|
|
1251
|
-
|
|
1252
|
-
|
|
1263
|
+
print('Finished separating {} files into {} locations'.format(len(detection_results),
|
|
1264
|
+
len(rows_by_directory)))
|
|
1253
1265
|
|
|
1254
|
-
print('Finished separating {} files into {} locations'.format(len(detectionResults),
|
|
1255
|
-
len(rowsByDirectory)))
|
|
1256
|
-
|
|
1257
1266
|
##% Look for repeat detections (or load them from file)
|
|
1258
1267
|
|
|
1259
|
-
|
|
1268
|
+
dirs_to_search = list(rows_by_directory.keys())
|
|
1260
1269
|
if options.debugMaxDir > 0:
|
|
1261
|
-
|
|
1270
|
+
dirs_to_search = dirs_to_search[0:options.debugMaxDir]
|
|
1262
1271
|
|
|
1263
1272
|
# Map numeric directory indices to names (we'll write this out to the detection index .json file)
|
|
1264
|
-
|
|
1265
|
-
for
|
|
1266
|
-
|
|
1267
|
-
|
|
1273
|
+
dir_index_to_name = {}
|
|
1274
|
+
for i_dir, dir_name in enumerate(dirs_to_search):
|
|
1275
|
+
dir_index_to_name[i_dir] = dir_name
|
|
1276
|
+
|
|
1268
1277
|
# Are we actually looking for matches, or just loading from a file?
|
|
1269
1278
|
if len(options.filterFileToLoad) == 0:
|
|
1270
1279
|
|
|
1271
1280
|
# length-nDirs list of lists of DetectionLocation objects
|
|
1272
|
-
|
|
1281
|
+
suspicious_detections = [None] * len(dirs_to_search)
|
|
1273
1282
|
|
|
1274
1283
|
# We're actually looking for matches...
|
|
1275
1284
|
print('Finding similar detections...')
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
for
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1285
|
+
|
|
1286
|
+
dir_name_and_rows = []
|
|
1287
|
+
for dir_name in dirs_to_search:
|
|
1288
|
+
rows_this_directory = rows_by_directory[dir_name]
|
|
1289
|
+
dir_name_and_rows.append((dir_name,rows_this_directory))
|
|
1290
|
+
|
|
1291
|
+
all_candidate_detections = [None] * len(dirs_to_search)
|
|
1292
|
+
|
|
1284
1293
|
# If we serialize results to intermediate files, we need to remove slashes from
|
|
1285
1294
|
# location names; we store mappings here.
|
|
1286
1295
|
normalized_location_name_to_location_name = None
|
|
1287
1296
|
location_name_to_normalized_location_name = None
|
|
1288
|
-
|
|
1297
|
+
|
|
1289
1298
|
if not options.bParallelizeComparisons:
|
|
1290
1299
|
|
|
1291
1300
|
options.pbar = None
|
|
1292
|
-
for
|
|
1293
|
-
|
|
1294
|
-
assert
|
|
1295
|
-
print('Processing dir {} of {}: {}'.format(
|
|
1296
|
-
|
|
1297
|
-
_find_matches_in_directory(
|
|
1298
|
-
|
|
1299
|
-
else:
|
|
1300
|
-
|
|
1301
|
+
for i_dir, dir_name in tqdm(enumerate(dirs_to_search)):
|
|
1302
|
+
dir_name_and_row = dir_name_and_rows[i_dir]
|
|
1303
|
+
assert dir_name_and_row[0] == dir_name
|
|
1304
|
+
print('Processing dir {} of {}: {}'.format(i_dir,len(dirs_to_search),dir_name))
|
|
1305
|
+
all_candidate_detections[i_dir] = \
|
|
1306
|
+
_find_matches_in_directory(dir_name_and_row, options)
|
|
1307
|
+
|
|
1308
|
+
else:
|
|
1309
|
+
|
|
1301
1310
|
n_workers = options.nWorkers
|
|
1302
|
-
if n_workers > len(
|
|
1311
|
+
if n_workers > len(dir_name_and_rows):
|
|
1303
1312
|
print('Pool of {} requested, but only {} folders available, reducing pool to {}'.\
|
|
1304
|
-
format(n_workers,len(
|
|
1305
|
-
n_workers = len(
|
|
1313
|
+
format(n_workers,len(dir_name_and_rows),len(dir_name_and_rows)))
|
|
1314
|
+
n_workers = len(dir_name_and_rows)
|
|
1315
|
+
|
|
1316
|
+
pool = None
|
|
1306
1317
|
|
|
1307
1318
|
if options.parallelizationUsesThreads:
|
|
1308
|
-
pool = ThreadPool(n_workers); poolstring = 'threads'
|
|
1319
|
+
pool = ThreadPool(n_workers); poolstring = 'threads'
|
|
1309
1320
|
else:
|
|
1310
1321
|
pool = Pool(n_workers); poolstring = 'processes'
|
|
1311
1322
|
|
|
1312
1323
|
print('Starting comparison pool with {} {}'.format(n_workers,poolstring))
|
|
1313
|
-
|
|
1324
|
+
|
|
1314
1325
|
assert options.pass_detections_to_processes_method in ('file','memory'), \
|
|
1315
1326
|
'Unrecognized IPC mechanism: {}'.format(options.pass_detections_to_processes_method)
|
|
1316
|
-
|
|
1327
|
+
|
|
1317
1328
|
# ** Experimental **
|
|
1318
1329
|
#
|
|
1319
|
-
# Rather than passing detections and results around in memory, write detections and
|
|
1330
|
+
# Rather than passing detections and results around in memory, write detections and
|
|
1320
1331
|
# results for each worker to intermediate files. May improve performance for very large
|
|
1321
1332
|
# results sets that exceed working memory.
|
|
1322
1333
|
if options.pass_detections_to_processes_method == 'file':
|
|
1323
|
-
|
|
1334
|
+
|
|
1324
1335
|
##%% Convert location names to normalized names we can write to files
|
|
1325
|
-
|
|
1336
|
+
|
|
1326
1337
|
normalized_location_name_to_location_name = {}
|
|
1327
|
-
for dir_name in
|
|
1338
|
+
for dir_name in dirs_to_search:
|
|
1328
1339
|
normalized_location_name = flatten_path(dir_name)
|
|
1329
1340
|
assert normalized_location_name not in normalized_location_name_to_location_name, \
|
|
1330
1341
|
'Redundant location name {}, can\'t serialize to intermediate files'.format(
|
|
1331
1342
|
dir_name)
|
|
1332
1343
|
normalized_location_name_to_location_name[normalized_location_name] = dir_name
|
|
1333
|
-
|
|
1344
|
+
|
|
1334
1345
|
location_name_to_normalized_location_name = \
|
|
1335
1346
|
invert_dictionary(normalized_location_name_to_location_name)
|
|
1336
|
-
|
|
1337
|
-
|
|
1347
|
+
|
|
1348
|
+
|
|
1338
1349
|
##%% Write results to files for each location
|
|
1339
|
-
|
|
1350
|
+
|
|
1340
1351
|
print('Writing results to intermediate files')
|
|
1341
|
-
|
|
1352
|
+
|
|
1342
1353
|
intermediate_json_file_folder = os.path.join(options.outputBase,'intermediate_results')
|
|
1343
1354
|
os.makedirs(intermediate_json_file_folder,exist_ok=True)
|
|
1344
|
-
|
|
1345
|
-
# i_location = 0; location_info =
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
# i_location = 0; location_info =
|
|
1349
|
-
for i_location, location_info in tqdm(enumerate(
|
|
1350
|
-
|
|
1355
|
+
|
|
1356
|
+
# i_location = 0; location_info = dir_name_and_rows[0]
|
|
1357
|
+
dir_name_and_intermediate_file = []
|
|
1358
|
+
|
|
1359
|
+
# i_location = 0; location_info = dir_name_and_rows[i_location]
|
|
1360
|
+
for i_location, location_info in tqdm(enumerate(dir_name_and_rows)):
|
|
1361
|
+
|
|
1351
1362
|
location_name = location_info[0]
|
|
1352
1363
|
assert location_name in location_name_to_normalized_location_name
|
|
1353
1364
|
normalized_location_name = location_name_to_normalized_location_name[location_name]
|
|
@@ -1355,181 +1366,189 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
|
|
|
1355
1366
|
normalized_location_name + '.csv')
|
|
1356
1367
|
detections_table_this_location = location_info[1]
|
|
1357
1368
|
detections_table_this_location.to_csv(intermediate_results_file,header=True,index=False)
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1369
|
+
dir_name_and_intermediate_file.append((location_name,intermediate_results_file))
|
|
1370
|
+
|
|
1371
|
+
|
|
1361
1372
|
##%% Find detections in each directory
|
|
1362
|
-
|
|
1363
|
-
options.pbar = None
|
|
1364
|
-
|
|
1365
|
-
partial(_find_matches_in_directory,options=options),
|
|
1366
|
-
|
|
1367
|
-
|
|
1373
|
+
|
|
1374
|
+
options.pbar = None
|
|
1375
|
+
all_candidate_detection_files = list(pool.imap(
|
|
1376
|
+
partial(_find_matches_in_directory,options=options), dir_name_and_intermediate_file))
|
|
1377
|
+
|
|
1378
|
+
|
|
1368
1379
|
##%% Load into a combined list of candidate detections
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
# candidate_detection_file =
|
|
1373
|
-
for candidate_detection_file in
|
|
1380
|
+
|
|
1381
|
+
all_candidate_detections = []
|
|
1382
|
+
|
|
1383
|
+
# candidate_detection_file = all_candidate_detection_files[0]
|
|
1384
|
+
for candidate_detection_file in all_candidate_detection_files:
|
|
1374
1385
|
s = open(candidate_detection_file, 'r').read()
|
|
1375
1386
|
candidate_detections_this_file = jsonpickle.decode(s)
|
|
1376
|
-
|
|
1377
|
-
|
|
1378
|
-
|
|
1387
|
+
all_candidate_detections.append(candidate_detections_this_file)
|
|
1388
|
+
|
|
1389
|
+
|
|
1379
1390
|
##%% Clean up intermediate files
|
|
1380
|
-
|
|
1381
|
-
shutil.rmtree(intermediate_json_file_folder)
|
|
1382
|
-
|
|
1391
|
+
|
|
1392
|
+
shutil.rmtree(intermediate_json_file_folder)
|
|
1393
|
+
|
|
1383
1394
|
# If we're passing things around in memory, rather than via intermediate files
|
|
1384
1395
|
else:
|
|
1385
|
-
|
|
1386
|
-
# We get slightly nicer progress bar behavior using threads, by passing a pbar
|
|
1387
|
-
# object and letting it get updated. We can't serialize this object across
|
|
1396
|
+
|
|
1397
|
+
# We get slightly nicer progress bar behavior using threads, by passing a pbar
|
|
1398
|
+
# object and letting it get updated. We can't serialize this object across
|
|
1388
1399
|
# processes.
|
|
1389
1400
|
if options.parallelizationUsesThreads:
|
|
1390
|
-
options.pbar = tqdm(total=len(
|
|
1391
|
-
|
|
1392
|
-
partial(_find_matches_in_directory,options=options),
|
|
1401
|
+
options.pbar = tqdm(total=len(dir_name_and_rows))
|
|
1402
|
+
all_candidate_detections = list(pool.imap(
|
|
1403
|
+
partial(_find_matches_in_directory,options=options), dir_name_and_rows))
|
|
1393
1404
|
else:
|
|
1394
|
-
options.pbar = None
|
|
1395
|
-
|
|
1396
|
-
partial(_find_matches_in_directory,options=options),
|
|
1405
|
+
options.pbar = None
|
|
1406
|
+
all_candidate_detections = list(tqdm(pool.imap(
|
|
1407
|
+
partial(_find_matches_in_directory,options=options), dir_name_and_rows)))
|
|
1408
|
+
|
|
1409
|
+
# ...if we're parallelizing comparisons
|
|
1410
|
+
|
|
1411
|
+
if pool is not None:
|
|
1412
|
+
try:
|
|
1413
|
+
pool.close()
|
|
1414
|
+
pool.join()
|
|
1415
|
+
print("Pool closed and joined for RDE comparisons")
|
|
1416
|
+
except Exception as e:
|
|
1417
|
+
print('Warning: error closing RDE comparison pool: {}'.format(str(e)))
|
|
1397
1418
|
|
|
1398
1419
|
print('\nFinished looking for similar detections')
|
|
1399
1420
|
|
|
1400
|
-
|
|
1421
|
+
|
|
1401
1422
|
##%% Mark suspicious locations based on match results
|
|
1402
1423
|
|
|
1403
1424
|
print('Marking repeat detections...')
|
|
1404
1425
|
|
|
1405
|
-
|
|
1406
|
-
|
|
1426
|
+
n_images_with_suspicious_detections = 0
|
|
1427
|
+
n_suspicious_detections = 0
|
|
1407
1428
|
|
|
1408
1429
|
# For each directory
|
|
1409
|
-
|
|
1410
|
-
# iDir = 51
|
|
1411
|
-
for iDir in range(len(dirsToSearch)):
|
|
1430
|
+
for i_dir in range(len(dirs_to_search)):
|
|
1412
1431
|
|
|
1413
1432
|
# A list of DetectionLocation objects
|
|
1414
|
-
|
|
1433
|
+
suspicious_detections_this_dir = []
|
|
1415
1434
|
|
|
1416
1435
|
# A list of DetectionLocation objects
|
|
1417
|
-
|
|
1436
|
+
candidate_detections_this_dir = all_candidate_detections[i_dir]
|
|
1418
1437
|
|
|
1419
|
-
for
|
|
1438
|
+
for i_location, candidate_location in enumerate(candidate_detections_this_dir):
|
|
1420
1439
|
|
|
1421
1440
|
# occurrenceList is a list of file/detection pairs
|
|
1422
|
-
|
|
1441
|
+
n_occurrences = len(candidate_location.instances)
|
|
1423
1442
|
|
|
1424
|
-
if
|
|
1443
|
+
if n_occurrences < options.occurrenceThreshold:
|
|
1425
1444
|
continue
|
|
1426
1445
|
|
|
1427
|
-
|
|
1428
|
-
|
|
1446
|
+
n_images_with_suspicious_detections += n_occurrences
|
|
1447
|
+
n_suspicious_detections += 1
|
|
1429
1448
|
|
|
1430
|
-
|
|
1449
|
+
suspicious_detections_this_dir.append(candidate_location)
|
|
1431
1450
|
|
|
1432
|
-
|
|
1451
|
+
suspicious_detections[i_dir] = suspicious_detections_this_dir
|
|
1433
1452
|
|
|
1434
1453
|
# Sort the above-threshold detections for easier review
|
|
1435
1454
|
if options.smartSort is not None:
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
1455
|
+
suspicious_detections[i_dir] = _sort_detections_for_directory(
|
|
1456
|
+
suspicious_detections[i_dir],options)
|
|
1457
|
+
|
|
1439
1458
|
print('Found {} suspicious detections in directory {} ({})'.format(
|
|
1440
|
-
len(
|
|
1441
|
-
|
|
1459
|
+
len(suspicious_detections[i_dir]),i_dir,dirs_to_search[i_dir]))
|
|
1460
|
+
|
|
1442
1461
|
# ...for each directory
|
|
1443
|
-
|
|
1462
|
+
|
|
1444
1463
|
print('Finished marking repeat detections')
|
|
1445
|
-
|
|
1464
|
+
|
|
1446
1465
|
print('Found {} unique detections on {} images that are suspicious'.format(
|
|
1447
|
-
|
|
1466
|
+
n_suspicious_detections, n_images_with_suspicious_detections))
|
|
1448
1467
|
|
|
1449
1468
|
# If we're just loading detections from a file...
|
|
1450
1469
|
else:
|
|
1451
1470
|
|
|
1452
|
-
assert len(
|
|
1471
|
+
assert len(suspicious_detections) == len(dirs_to_search)
|
|
1453
1472
|
|
|
1454
|
-
|
|
1455
|
-
|
|
1473
|
+
n_detections_removed = 0
|
|
1474
|
+
n_detections_loaded = 0
|
|
1456
1475
|
|
|
1457
1476
|
# We're skipping detection-finding, but to see which images are actually legit false
|
|
1458
|
-
# positives, we may be looking for physical files or loading from a text file.
|
|
1459
|
-
|
|
1477
|
+
# positives, we may be looking for physical files or loading from a text file.
|
|
1478
|
+
file_list = None
|
|
1460
1479
|
if options.filteredFileListToLoad is not None:
|
|
1461
1480
|
with open(options.filteredFileListToLoad) as f:
|
|
1462
|
-
|
|
1463
|
-
|
|
1464
|
-
|
|
1481
|
+
file_list = f.readlines()
|
|
1482
|
+
file_list = [x.strip() for x in file_list]
|
|
1483
|
+
n_suspicious_detections = sum([len(x) for x in suspicious_detections])
|
|
1465
1484
|
print('Loaded false positive list from file ' + \
|
|
1466
1485
|
'will remove {} of {} suspicious detections'.format(
|
|
1467
|
-
len(
|
|
1486
|
+
len(file_list), n_suspicious_detections))
|
|
1468
1487
|
|
|
1469
1488
|
# For each directory
|
|
1470
|
-
#
|
|
1489
|
+
# i_dir = 0; detections = suspicious_detections[0]
|
|
1471
1490
|
#
|
|
1472
|
-
#
|
|
1473
|
-
# one per directory.
|
|
1474
|
-
for
|
|
1491
|
+
# suspicious_detections is an array of DetectionLocation objects,
|
|
1492
|
+
# one per directory.
|
|
1493
|
+
for i_dir, detections in enumerate(suspicious_detections):
|
|
1475
1494
|
|
|
1476
|
-
|
|
1477
|
-
|
|
1495
|
+
b_valid_detection = [True] * len(detections)
|
|
1496
|
+
n_detections_loaded += len(detections)
|
|
1478
1497
|
|
|
1479
1498
|
# For each detection that was present before filtering
|
|
1480
|
-
#
|
|
1481
|
-
for
|
|
1499
|
+
# i_detection = 0; detection = detections[i_detection]
|
|
1500
|
+
for i_detection, detection in enumerate(detections):
|
|
1482
1501
|
|
|
1483
1502
|
# Are we checking the directory to see whether detections were actually false
|
|
1484
1503
|
# positives, or reading from a list?
|
|
1485
|
-
if
|
|
1486
|
-
|
|
1487
|
-
# Is the image still there?
|
|
1488
|
-
|
|
1489
|
-
|
|
1504
|
+
if file_list is None:
|
|
1505
|
+
|
|
1506
|
+
# Is the image still there?
|
|
1507
|
+
image_full_path = os.path.join(filtering_base_dir,
|
|
1508
|
+
detection.sampleImageRelativeFileName)
|
|
1490
1509
|
|
|
1491
1510
|
# If not, remove this from the list of suspicious detections
|
|
1492
|
-
if not os.path.isfile(
|
|
1493
|
-
|
|
1494
|
-
|
|
1511
|
+
if not os.path.isfile(image_full_path):
|
|
1512
|
+
n_detections_removed += 1
|
|
1513
|
+
b_valid_detection[i_detection] = False
|
|
1495
1514
|
|
|
1496
1515
|
else:
|
|
1497
|
-
|
|
1498
|
-
if detection.sampleImageRelativeFileName not in
|
|
1499
|
-
|
|
1500
|
-
|
|
1516
|
+
|
|
1517
|
+
if detection.sampleImageRelativeFileName not in file_list:
|
|
1518
|
+
n_detections_removed += 1
|
|
1519
|
+
b_valid_detection[i_detection] = False
|
|
1501
1520
|
|
|
1502
1521
|
# ...for each detection
|
|
1503
1522
|
|
|
1504
|
-
|
|
1505
|
-
if
|
|
1523
|
+
n_removed_this_dir = len(b_valid_detection) - sum(b_valid_detection)
|
|
1524
|
+
if n_removed_this_dir > 0:
|
|
1506
1525
|
print('Removed {} of {} detections from directory {}'.\
|
|
1507
|
-
format(
|
|
1526
|
+
format(n_removed_this_dir,len(detections), i_dir))
|
|
1508
1527
|
|
|
1509
|
-
|
|
1510
|
-
|
|
1528
|
+
detections_filtered = list(compress(detections, b_valid_detection))
|
|
1529
|
+
suspicious_detections[i_dir] = detections_filtered
|
|
1511
1530
|
|
|
1512
1531
|
# ...for each directory
|
|
1513
1532
|
|
|
1514
1533
|
print('Removed {} of {} total detections via manual filtering'.\
|
|
1515
|
-
format(
|
|
1534
|
+
format(n_detections_removed, n_detections_loaded))
|
|
1516
1535
|
|
|
1517
1536
|
# ...if we are/aren't finding detections (vs. loading from file)
|
|
1518
1537
|
|
|
1519
|
-
|
|
1538
|
+
to_return.suspicious_detections = suspicious_detections
|
|
1539
|
+
|
|
1540
|
+
to_return.allRowsFiltered = _update_detection_table(to_return, options, output_file_name)
|
|
1541
|
+
|
|
1520
1542
|
|
|
1521
|
-
toReturn.allRowsFiltered = _update_detection_table(toReturn, options, outputFilename)
|
|
1522
|
-
|
|
1523
|
-
|
|
1524
1543
|
##%% Create filtering directory
|
|
1525
|
-
|
|
1544
|
+
|
|
1526
1545
|
if options.bWriteFilteringFolder:
|
|
1527
1546
|
|
|
1528
1547
|
print('Creating filtering folder...')
|
|
1529
1548
|
|
|
1530
|
-
|
|
1531
|
-
|
|
1532
|
-
os.makedirs(
|
|
1549
|
+
date_string = datetime.now().strftime('%Y.%m.%d.%H.%M.%S')
|
|
1550
|
+
filtering_dir = os.path.join(options.outputBase, 'filtering_' + date_string)
|
|
1551
|
+
os.makedirs(filtering_dir, exist_ok=True)
|
|
1533
1552
|
|
|
1534
1553
|
# Take a first loop over every suspicious detection, and do the things that make
|
|
1535
1554
|
# sense to do in a serial sampleImageDetectionsloop:
|
|
@@ -1538,101 +1557,107 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
|
|
|
1538
1557
|
# * Sort instances by confidence
|
|
1539
1558
|
# * Look up detections for each sample image in the big table (so we don't have to pass the
|
|
1540
1559
|
# table to workers)
|
|
1541
|
-
for
|
|
1542
|
-
|
|
1543
|
-
for
|
|
1544
|
-
|
|
1560
|
+
for i_dir, suspicious_detections_this_dir in enumerate(tqdm(suspicious_detections)):
|
|
1561
|
+
|
|
1562
|
+
for i_detection, detection in enumerate(suspicious_detections_this_dir):
|
|
1563
|
+
|
|
1545
1564
|
# Sort instances in descending order by confidence
|
|
1546
1565
|
detection.instances.sort(key=attrgetter('confidence'),reverse=True)
|
|
1547
|
-
|
|
1566
|
+
|
|
1548
1567
|
if detection.clusterLabel is not None:
|
|
1549
|
-
|
|
1568
|
+
cluster_string = '_c{:0>4d}'.format(detection.clusterLabel)
|
|
1550
1569
|
else:
|
|
1551
|
-
|
|
1552
|
-
|
|
1570
|
+
cluster_string = ''
|
|
1571
|
+
|
|
1553
1572
|
# Choose the highest-confidence index
|
|
1554
1573
|
instance = detection.instances[0]
|
|
1555
|
-
|
|
1556
|
-
|
|
1557
|
-
|
|
1558
|
-
|
|
1559
|
-
detection.sampleImageRelativeFileName =
|
|
1560
|
-
|
|
1561
|
-
|
|
1562
|
-
row =
|
|
1574
|
+
relative_path = instance.filename
|
|
1575
|
+
|
|
1576
|
+
output_relative_path = 'dir{:0>4d}_det{:0>4d}{}_n{:0>4d}.jpg'.format(
|
|
1577
|
+
i_dir, i_detection, cluster_string, len(detection.instances))
|
|
1578
|
+
detection.sampleImageRelativeFileName = output_relative_path
|
|
1579
|
+
|
|
1580
|
+
i_row = filename_to_row[relative_path]
|
|
1581
|
+
row = detection_results.iloc[i_row]
|
|
1563
1582
|
detection.sampleImageDetections = row['detections']
|
|
1564
|
-
|
|
1583
|
+
|
|
1565
1584
|
# ...for each suspicious detection in this folder
|
|
1566
|
-
|
|
1585
|
+
|
|
1567
1586
|
# ...for each folder
|
|
1568
|
-
|
|
1569
|
-
# Collapse suspicious detections into a flat list
|
|
1570
|
-
|
|
1571
|
-
|
|
1572
|
-
#
|
|
1573
|
-
for
|
|
1574
|
-
for
|
|
1575
|
-
|
|
1576
|
-
|
|
1587
|
+
|
|
1588
|
+
# Collapse suspicious detections into a flat list
|
|
1589
|
+
all_suspicious_detections = []
|
|
1590
|
+
|
|
1591
|
+
# i_dir = 0; suspicious_detections_this_dir = suspicious_detections[i_dir]
|
|
1592
|
+
for i_dir, suspicious_detections_this_dir in enumerate(tqdm(suspicious_detections)):
|
|
1593
|
+
for i_detection, detection in enumerate(suspicious_detections_this_dir):
|
|
1594
|
+
all_suspicious_detections.append(detection)
|
|
1595
|
+
|
|
1577
1596
|
# Render suspicious detections
|
|
1578
1597
|
if options.bParallelizeRendering:
|
|
1579
|
-
|
|
1598
|
+
|
|
1580
1599
|
n_workers = options.nWorkers
|
|
1581
|
-
|
|
1582
|
-
if options.parallelizationUsesThreads:
|
|
1583
|
-
pool = ThreadPool(n_workers); poolstring = 'threads'
|
|
1584
|
-
else:
|
|
1585
|
-
pool = Pool(n_workers); poolstring = 'processes'
|
|
1586
1600
|
|
|
1587
|
-
|
|
1588
|
-
|
|
1589
|
-
|
|
1590
|
-
|
|
1591
|
-
|
|
1592
|
-
|
|
1593
|
-
|
|
1594
|
-
|
|
1595
|
-
|
|
1596
|
-
|
|
1597
|
-
|
|
1598
|
-
|
|
1599
|
-
|
|
1600
|
-
|
|
1601
|
-
|
|
1602
|
-
|
|
1601
|
+
pool = None
|
|
1602
|
+
|
|
1603
|
+
try:
|
|
1604
|
+
if options.parallelizationUsesThreads:
|
|
1605
|
+
pool = ThreadPool(n_workers); poolstring = 'threads'
|
|
1606
|
+
else:
|
|
1607
|
+
pool = Pool(n_workers); poolstring = 'processes'
|
|
1608
|
+
|
|
1609
|
+
print('Starting rendering pool with {} {}'.format(n_workers,poolstring))
|
|
1610
|
+
|
|
1611
|
+
# We get slightly nicer progress bar behavior using threads, by passing a pbar
|
|
1612
|
+
# object and letting it get updated. We can't serialize this object across
|
|
1613
|
+
# processes.
|
|
1614
|
+
if options.parallelizationUsesThreads:
|
|
1615
|
+
options.pbar = tqdm(total=len(all_suspicious_detections))
|
|
1616
|
+
all_candidate_detections = list(pool.imap(
|
|
1617
|
+
partial(_render_sample_image_for_detection,filtering_dir=filtering_dir,
|
|
1618
|
+
options=options), all_suspicious_detections))
|
|
1619
|
+
else:
|
|
1620
|
+
options.pbar = None
|
|
1621
|
+
all_candidate_detections = list(tqdm(pool.imap(
|
|
1622
|
+
partial(_render_sample_image_for_detection,filtering_dir=filtering_dir,
|
|
1623
|
+
options=options), all_suspicious_detections)))
|
|
1624
|
+
finally:
|
|
1625
|
+
if pool is not None:
|
|
1626
|
+
pool.close()
|
|
1627
|
+
pool.join()
|
|
1628
|
+
print("Pool closed and joined for RDE rendering")
|
|
1629
|
+
|
|
1603
1630
|
else:
|
|
1604
|
-
|
|
1631
|
+
|
|
1605
1632
|
# Serial loop over detections
|
|
1606
|
-
for detection in
|
|
1607
|
-
_render_sample_image_for_detection(detection,
|
|
1608
|
-
|
|
1633
|
+
for detection in all_suspicious_detections:
|
|
1634
|
+
_render_sample_image_for_detection(detection,filtering_dir,options)
|
|
1635
|
+
|
|
1609
1636
|
# Delete (large) temporary data from the list of suspicious detections
|
|
1610
|
-
for detection in
|
|
1611
|
-
detection.sampleImageDetections = None
|
|
1612
|
-
|
|
1637
|
+
for detection in all_suspicious_detections:
|
|
1638
|
+
detection.sampleImageDetections = None
|
|
1639
|
+
|
|
1613
1640
|
# Write out the detection index
|
|
1614
|
-
|
|
1615
|
-
|
|
1641
|
+
detection_index_file_name = os.path.join(filtering_dir, detection_index_file_name_base)
|
|
1642
|
+
|
|
1616
1643
|
# Prepare the data we're going to write to the detection index file
|
|
1617
|
-
|
|
1618
|
-
|
|
1619
|
-
|
|
1620
|
-
|
|
1621
|
-
|
|
1644
|
+
detection_info = {}
|
|
1645
|
+
|
|
1646
|
+
detection_info['suspicious_detections'] = suspicious_detections
|
|
1647
|
+
detection_info['dir_index_to_name'] = dir_index_to_name
|
|
1648
|
+
|
|
1622
1649
|
# Remove the one non-serializable object from the options struct before serializing
|
|
1623
1650
|
# to .json
|
|
1624
1651
|
options.pbar = None
|
|
1625
|
-
|
|
1626
|
-
|
|
1627
|
-
s = jsonpickle.encode(detectionInfo,make_refs=False)
|
|
1628
|
-
with open(detectionIndexFileName, 'w') as f:
|
|
1629
|
-
f.write(s)
|
|
1630
|
-
toReturn.filterFile = detectionIndexFileName
|
|
1652
|
+
detection_info['options'] = options
|
|
1631
1653
|
|
|
1632
|
-
|
|
1654
|
+
s = jsonpickle.encode(detection_info,make_refs=False)
|
|
1655
|
+
with open(detection_index_file_name, 'w') as f:
|
|
1656
|
+
f.write(s)
|
|
1657
|
+
to_return.filterFile = detection_index_file_name
|
|
1633
1658
|
|
|
1634
1659
|
# ...if we're writing filtering info
|
|
1635
1660
|
|
|
1636
|
-
return
|
|
1661
|
+
return to_return
|
|
1637
1662
|
|
|
1638
1663
|
# ...def find_repeat_detections()
|