megadetector 5.0.11__py3-none-any.whl → 5.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (203) hide show
  1. megadetector/api/__init__.py +0 -0
  2. megadetector/api/batch_processing/__init__.py +0 -0
  3. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  4. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. megadetector/api/batch_processing/api_core/batch_service/score.py +439 -0
  6. megadetector/api/batch_processing/api_core/server.py +294 -0
  7. megadetector/api/batch_processing/api_core/server_api_config.py +97 -0
  8. megadetector/api/batch_processing/api_core/server_app_config.py +55 -0
  9. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +220 -0
  10. megadetector/api/batch_processing/api_core/server_job_status_table.py +149 -0
  11. megadetector/api/batch_processing/api_core/server_orchestration.py +360 -0
  12. megadetector/api/batch_processing/api_core/server_utils.py +88 -0
  13. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  14. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +46 -0
  15. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  16. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +152 -0
  17. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  18. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  19. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  20. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  21. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
  22. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  23. megadetector/api/synchronous/__init__.py +0 -0
  24. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  25. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +152 -0
  26. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +263 -0
  27. megadetector/api/synchronous/api_core/animal_detection_api/config.py +35 -0
  28. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  29. megadetector/api/synchronous/api_core/tests/load_test.py +110 -0
  30. megadetector/classification/__init__.py +0 -0
  31. megadetector/classification/aggregate_classifier_probs.py +108 -0
  32. megadetector/classification/analyze_failed_images.py +227 -0
  33. megadetector/classification/cache_batchapi_outputs.py +198 -0
  34. megadetector/classification/create_classification_dataset.py +627 -0
  35. megadetector/classification/crop_detections.py +516 -0
  36. megadetector/classification/csv_to_json.py +226 -0
  37. megadetector/classification/detect_and_crop.py +855 -0
  38. megadetector/classification/efficientnet/__init__.py +9 -0
  39. megadetector/classification/efficientnet/model.py +415 -0
  40. megadetector/classification/efficientnet/utils.py +607 -0
  41. megadetector/classification/evaluate_model.py +520 -0
  42. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  43. megadetector/classification/json_to_azcopy_list.py +63 -0
  44. megadetector/classification/json_validator.py +699 -0
  45. megadetector/classification/map_classification_categories.py +276 -0
  46. megadetector/classification/merge_classification_detection_output.py +506 -0
  47. megadetector/classification/prepare_classification_script.py +194 -0
  48. megadetector/classification/prepare_classification_script_mc.py +228 -0
  49. megadetector/classification/run_classifier.py +287 -0
  50. megadetector/classification/save_mislabeled.py +110 -0
  51. megadetector/classification/train_classifier.py +827 -0
  52. megadetector/classification/train_classifier_tf.py +725 -0
  53. megadetector/classification/train_utils.py +323 -0
  54. megadetector/data_management/__init__.py +0 -0
  55. megadetector/data_management/annotations/__init__.py +0 -0
  56. megadetector/data_management/annotations/annotation_constants.py +34 -0
  57. megadetector/data_management/camtrap_dp_to_coco.py +237 -0
  58. megadetector/data_management/cct_json_utils.py +404 -0
  59. megadetector/data_management/cct_to_md.py +176 -0
  60. megadetector/data_management/cct_to_wi.py +289 -0
  61. megadetector/data_management/coco_to_labelme.py +283 -0
  62. megadetector/data_management/coco_to_yolo.py +662 -0
  63. megadetector/data_management/databases/__init__.py +0 -0
  64. megadetector/data_management/databases/add_width_and_height_to_db.py +33 -0
  65. megadetector/data_management/databases/combine_coco_camera_traps_files.py +206 -0
  66. megadetector/data_management/databases/integrity_check_json_db.py +493 -0
  67. megadetector/data_management/databases/subset_json_db.py +115 -0
  68. megadetector/data_management/generate_crops_from_cct.py +149 -0
  69. megadetector/data_management/get_image_sizes.py +189 -0
  70. megadetector/data_management/importers/add_nacti_sizes.py +52 -0
  71. megadetector/data_management/importers/add_timestamps_to_icct.py +79 -0
  72. megadetector/data_management/importers/animl_results_to_md_results.py +158 -0
  73. megadetector/data_management/importers/auckland_doc_test_to_json.py +373 -0
  74. megadetector/data_management/importers/auckland_doc_to_json.py +201 -0
  75. megadetector/data_management/importers/awc_to_json.py +191 -0
  76. megadetector/data_management/importers/bellevue_to_json.py +273 -0
  77. megadetector/data_management/importers/cacophony-thermal-importer.py +793 -0
  78. megadetector/data_management/importers/carrizo_shrubfree_2018.py +269 -0
  79. megadetector/data_management/importers/carrizo_trail_cam_2017.py +289 -0
  80. megadetector/data_management/importers/cct_field_adjustments.py +58 -0
  81. megadetector/data_management/importers/channel_islands_to_cct.py +913 -0
  82. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +180 -0
  83. megadetector/data_management/importers/eMammal/eMammal_helpers.py +249 -0
  84. megadetector/data_management/importers/eMammal/make_eMammal_json.py +223 -0
  85. megadetector/data_management/importers/ena24_to_json.py +276 -0
  86. megadetector/data_management/importers/filenames_to_json.py +386 -0
  87. megadetector/data_management/importers/helena_to_cct.py +283 -0
  88. megadetector/data_management/importers/idaho-camera-traps.py +1407 -0
  89. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +294 -0
  90. megadetector/data_management/importers/jb_csv_to_json.py +150 -0
  91. megadetector/data_management/importers/mcgill_to_json.py +250 -0
  92. megadetector/data_management/importers/missouri_to_json.py +490 -0
  93. megadetector/data_management/importers/nacti_fieldname_adjustments.py +79 -0
  94. megadetector/data_management/importers/noaa_seals_2019.py +181 -0
  95. megadetector/data_management/importers/pc_to_json.py +365 -0
  96. megadetector/data_management/importers/plot_wni_giraffes.py +123 -0
  97. megadetector/data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -0
  98. megadetector/data_management/importers/prepare_zsl_imerit.py +131 -0
  99. megadetector/data_management/importers/rspb_to_json.py +356 -0
  100. megadetector/data_management/importers/save_the_elephants_survey_A.py +320 -0
  101. megadetector/data_management/importers/save_the_elephants_survey_B.py +329 -0
  102. megadetector/data_management/importers/snapshot_safari_importer.py +758 -0
  103. megadetector/data_management/importers/snapshot_safari_importer_reprise.py +665 -0
  104. megadetector/data_management/importers/snapshot_serengeti_lila.py +1067 -0
  105. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +150 -0
  106. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +153 -0
  107. megadetector/data_management/importers/sulross_get_exif.py +65 -0
  108. megadetector/data_management/importers/timelapse_csv_set_to_json.py +490 -0
  109. megadetector/data_management/importers/ubc_to_json.py +399 -0
  110. megadetector/data_management/importers/umn_to_json.py +507 -0
  111. megadetector/data_management/importers/wellington_to_json.py +263 -0
  112. megadetector/data_management/importers/wi_to_json.py +442 -0
  113. megadetector/data_management/importers/zamba_results_to_md_results.py +181 -0
  114. megadetector/data_management/labelme_to_coco.py +547 -0
  115. megadetector/data_management/labelme_to_yolo.py +272 -0
  116. megadetector/data_management/lila/__init__.py +0 -0
  117. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +97 -0
  118. megadetector/data_management/lila/add_locations_to_nacti.py +147 -0
  119. megadetector/data_management/lila/create_lila_blank_set.py +558 -0
  120. megadetector/data_management/lila/create_lila_test_set.py +152 -0
  121. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  122. megadetector/data_management/lila/download_lila_subset.py +178 -0
  123. megadetector/data_management/lila/generate_lila_per_image_labels.py +516 -0
  124. megadetector/data_management/lila/get_lila_annotation_counts.py +170 -0
  125. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  126. megadetector/data_management/lila/lila_common.py +300 -0
  127. megadetector/data_management/lila/test_lila_metadata_urls.py +132 -0
  128. megadetector/data_management/ocr_tools.py +870 -0
  129. megadetector/data_management/read_exif.py +809 -0
  130. megadetector/data_management/remap_coco_categories.py +84 -0
  131. megadetector/data_management/remove_exif.py +66 -0
  132. megadetector/data_management/rename_images.py +187 -0
  133. megadetector/data_management/resize_coco_dataset.py +189 -0
  134. megadetector/data_management/wi_download_csv_to_coco.py +247 -0
  135. megadetector/data_management/yolo_output_to_md_output.py +446 -0
  136. megadetector/data_management/yolo_to_coco.py +676 -0
  137. megadetector/detection/__init__.py +0 -0
  138. megadetector/detection/detector_training/__init__.py +0 -0
  139. megadetector/detection/detector_training/model_main_tf2.py +114 -0
  140. megadetector/detection/process_video.py +846 -0
  141. megadetector/detection/pytorch_detector.py +355 -0
  142. megadetector/detection/run_detector.py +779 -0
  143. megadetector/detection/run_detector_batch.py +1219 -0
  144. megadetector/detection/run_inference_with_yolov5_val.py +1087 -0
  145. megadetector/detection/run_tiled_inference.py +934 -0
  146. megadetector/detection/tf_detector.py +192 -0
  147. megadetector/detection/video_utils.py +698 -0
  148. megadetector/postprocessing/__init__.py +0 -0
  149. megadetector/postprocessing/add_max_conf.py +64 -0
  150. megadetector/postprocessing/categorize_detections_by_size.py +165 -0
  151. megadetector/postprocessing/classification_postprocessing.py +716 -0
  152. megadetector/postprocessing/combine_api_outputs.py +249 -0
  153. megadetector/postprocessing/compare_batch_results.py +966 -0
  154. megadetector/postprocessing/convert_output_format.py +396 -0
  155. megadetector/postprocessing/load_api_results.py +195 -0
  156. megadetector/postprocessing/md_to_coco.py +310 -0
  157. megadetector/postprocessing/md_to_labelme.py +330 -0
  158. megadetector/postprocessing/merge_detections.py +412 -0
  159. megadetector/postprocessing/postprocess_batch_results.py +1908 -0
  160. megadetector/postprocessing/remap_detection_categories.py +170 -0
  161. megadetector/postprocessing/render_detection_confusion_matrix.py +660 -0
  162. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +211 -0
  163. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +83 -0
  164. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1635 -0
  165. megadetector/postprocessing/separate_detections_into_folders.py +730 -0
  166. megadetector/postprocessing/subset_json_detector_output.py +700 -0
  167. megadetector/postprocessing/top_folders_to_bottom.py +223 -0
  168. megadetector/taxonomy_mapping/__init__.py +0 -0
  169. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  170. megadetector/taxonomy_mapping/map_new_lila_datasets.py +150 -0
  171. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -0
  172. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +588 -0
  173. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  174. megadetector/taxonomy_mapping/simple_image_download.py +219 -0
  175. megadetector/taxonomy_mapping/species_lookup.py +834 -0
  176. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  177. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  178. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  179. megadetector/utils/__init__.py +0 -0
  180. megadetector/utils/azure_utils.py +178 -0
  181. megadetector/utils/ct_utils.py +613 -0
  182. megadetector/utils/directory_listing.py +246 -0
  183. megadetector/utils/md_tests.py +1164 -0
  184. megadetector/utils/path_utils.py +1045 -0
  185. megadetector/utils/process_utils.py +160 -0
  186. megadetector/utils/sas_blob_utils.py +509 -0
  187. megadetector/utils/split_locations_into_train_val.py +228 -0
  188. megadetector/utils/string_utils.py +92 -0
  189. megadetector/utils/url_utils.py +323 -0
  190. megadetector/utils/write_html_image_list.py +225 -0
  191. megadetector/visualization/__init__.py +0 -0
  192. megadetector/visualization/plot_utils.py +293 -0
  193. megadetector/visualization/render_images_with_thumbnails.py +275 -0
  194. megadetector/visualization/visualization_utils.py +1536 -0
  195. megadetector/visualization/visualize_db.py +552 -0
  196. megadetector/visualization/visualize_detector_output.py +405 -0
  197. {megadetector-5.0.11.dist-info → megadetector-5.0.13.dist-info}/LICENSE +0 -0
  198. {megadetector-5.0.11.dist-info → megadetector-5.0.13.dist-info}/METADATA +2 -2
  199. megadetector-5.0.13.dist-info/RECORD +201 -0
  200. megadetector-5.0.13.dist-info/top_level.txt +1 -0
  201. megadetector-5.0.11.dist-info/RECORD +0 -5
  202. megadetector-5.0.11.dist-info/top_level.txt +0 -1
  203. {megadetector-5.0.11.dist-info → megadetector-5.0.13.dist-info}/WHEEL +0 -0
@@ -0,0 +1,1635 @@
1
+ """
2
+
3
+ repeat_detections_core.py
4
+
5
+ Core utilities shared by find_repeat_detections and remove_repeat_detections.
6
+
7
+ Nothing in this file (in fact nothing in this subpackage) will make sense until you read
8
+ the RDE user's guide:
9
+
10
+ https://github.com/agentmorris/MegaDetector/tree/main/megadetector/postprocessing/repeat_detection_elimination
11
+
12
+ """
13
+
14
+ #%% Imports and environment
15
+
16
+ import os
17
+ import copy
18
+ import warnings
19
+ import sklearn.cluster
20
+ import numpy as np
21
+ import jsonpickle
22
+ import traceback
23
+ import pandas as pd
24
+ import json
25
+ import shutil
26
+
27
+ from tqdm import tqdm
28
+ from operator import attrgetter
29
+ from datetime import datetime
30
+ from itertools import compress
31
+
32
+ import pyqtree
33
+
34
+ from multiprocessing.pool import ThreadPool
35
+ from multiprocessing.pool import Pool
36
+ from functools import partial
37
+
38
+ from megadetector.utils import path_utils
39
+ from megadetector.utils import ct_utils
40
+ from megadetector.postprocessing.load_api_results import load_api_results, write_api_results
41
+ from megadetector.postprocessing.postprocess_batch_results import is_sas_url
42
+ from megadetector.postprocessing.postprocess_batch_results import relative_sas_url
43
+ from megadetector.visualization.visualization_utils import open_image, render_detection_bounding_boxes
44
+ from megadetector.visualization import render_images_with_thumbnails
45
+ from megadetector.visualization import visualization_utils as vis_utils
46
+ from megadetector.utils.path_utils import flatten_path
47
+ from megadetector.utils.ct_utils import invert_dictionary
48
+
49
+ # "PIL cannot read EXIF metainfo for the images"
50
+ warnings.filterwarnings('ignore', '(Possibly )?corrupt EXIF data', UserWarning)
51
+
52
+ # "Metadata Warning, tag 256 had too many entries: 42, expected 1"
53
+ warnings.filterwarnings('ignore', 'Metadata warning', UserWarning)
54
+
55
+ jsonpickle.set_encoder_options('json', sort_keys=True, indent=1)
56
+
57
+
58
+ #%% Constants
59
+
60
+ detection_index_file_name_base = 'detectionIndex.json'
61
+
62
+
63
+ #%% Classes
64
+
65
+ class RepeatDetectionOptions:
66
+ """
67
+ Options that control the behavior of repeat detection elimination
68
+ """
69
+
70
+ def __init__(self):
71
+
72
+ #: Folder where images live; filenames in the MD results .json file should
73
+ #: be relative to this folder.
74
+ #:
75
+ #: imageBase can also be a SAS URL, in which case some error-checking is
76
+ #: disabled.
77
+ self.imageBase = ''
78
+
79
+ #: Folder where we should write temporary output.
80
+ self.outputBase = ''
81
+
82
+ #: Don't consider detections with confidence lower than this as suspicious
83
+ self.confidenceMin = 0.1
84
+
85
+ #: Don't consider detections with confidence higher than this as suspicious
86
+ self.confidenceMax = 1.0
87
+
88
+ #: What's the IOU threshold for considering two boxes the same?
89
+ self.iouThreshold = 0.9
90
+
91
+ #: How many occurrences of a single location (as defined by the IOU threshold)
92
+ #: are required before we declare it suspicious?
93
+ self.occurrenceThreshold = 20
94
+
95
+ #: Ignore "suspicious" detections smaller than some size
96
+ self.minSuspiciousDetectionSize = 0.0
97
+
98
+ #: Ignore "suspicious" detections larger than some size; these are often animals
99
+ #: taking up the whole image. This is expressed as a fraction of the image size.
100
+ self.maxSuspiciousDetectionSize = 0.2
101
+
102
+ #: Ignore folders with more than this many images in them
103
+ self.maxImagesPerFolder = None
104
+
105
+ #: A list of category IDs (ints) that we don't want consider as candidate repeat detections.
106
+ #:
107
+ #: Typically used to say, e.g., "don't bother analyzing people or vehicles for repeat
108
+ #: detections", which you could do by saying excludeClasses = [2,3].
109
+ self.excludeClasses = []
110
+
111
+ #: For very large sets of results, passing chunks of results to and from workers as
112
+ #: parameters ('memory') can be memory-intensive, so we can serialize to intermediate
113
+ #: files instead ('file').
114
+ #:
115
+ #: The use of 'file' here is still experimental.
116
+ self.pass_detections_to_processes_method = 'memory'
117
+
118
+ #: Number of workers to use for parallel operations
119
+ self.nWorkers = 10
120
+
121
+ #: Should we use threads (True) or processes (False) for parallelization?
122
+ #:
123
+ #: Not relevant if nWorkers <= 1, or if bParallelizeComparisons and
124
+ #: bParallelizeRendering are both False.
125
+ self.parallelizationUsesThreads = True
126
+
127
+ #: If this is not empty, we'll load detections from a filter file rather than finding them
128
+ #: from the detector output. This should be a .json file containing detections, generally this
129
+ #: is the detectionIndex.json file in the filtering_* folder produced by find_repeat_detections().
130
+ self.filterFileToLoad = ''
131
+
132
+ #: (optional) List of filenames remaining after deletion of identified
133
+ #: repeated detections that are actually animals. This should be a flat
134
+ #: text file, one relative filename per line.
135
+ #:
136
+ #: This is a pretty esoteric code path and a candidate for removal.
137
+ #:
138
+ #: The scenario where I see it being most useful is the very hypothetical one
139
+ #: where we use an external tool for image handling that allows us to do something
140
+ #: smarter and less destructive than deleting images to mark them as non-false-positives.
141
+ self.filteredFileListToLoad = None
142
+
143
+ #: Should we write the folder of images used to manually review repeat detections?
144
+ self.bWriteFilteringFolder = True
145
+
146
+ #: For debugging: limit comparisons to a specific number of folders
147
+ self.debugMaxDir = -1
148
+
149
+ #: For debugging: limit rendering to a specific number of folders
150
+ self.debugMaxRenderDir = -1
151
+
152
+ #: For debugging: limit comparisons to a specific number of detections
153
+ self.debugMaxRenderDetection = -1
154
+
155
+ #: For debugging: limit comparisons to a specific number of instances
156
+ self.debugMaxRenderInstance = -1
157
+
158
+ #: Should we parallelize (across cameras) comparisons to find repeat detections?
159
+ self.bParallelizeComparisons = True
160
+
161
+ #: Should we parallelize image rendering?
162
+ self.bParallelizeRendering = True
163
+
164
+ #: If this is False (default), a detection from class A is *not* considered to be "the same"
165
+ #: as a detection from class B, even if they're at the same location.
166
+ self.categoryAgnosticComparisons = False
167
+
168
+ #: Determines whether bounding-box rendering errors (typically network errors) should
169
+ #: be treated as failures
170
+ self.bFailOnRenderError = False
171
+
172
+ #: Should we print a warning if images referred to in the MD results file are missing?
173
+ self.bPrintMissingImageWarnings = True
174
+
175
+ #: If bPrintMissingImageWarnings is True, should we print a warning about missing images
176
+ #: just once ('once') or every time ('all')?
177
+ self.missingImageWarningType = 'once' # 'all'
178
+
179
+ #: Image width for rendered images (it's called "max" because we don't resize smaller images).
180
+ #:
181
+ #: Original size is preserved if this is None.
182
+ #:
183
+ #: This does *not* include the tile image grid.
184
+ self.maxOutputImageWidth = None
185
+
186
+ #: Line thickness (in pixels) for box rendering
187
+ self.lineThickness = 10
188
+
189
+ #: Box expansion (in pixels)
190
+ self.boxExpansion = 2
191
+
192
+ #: Progress bar used during comparisons and rendering. Do not set externally.
193
+ #:
194
+ #: :meta private:
195
+ self.pbar = None
196
+
197
+ #: Replace filename tokens after reading, useful when the directory structure
198
+ #: has changed relative to the structure the detector saw.
199
+ self.filenameReplacements = {}
200
+
201
+ #: How many folders up from the leaf nodes should we be going to aggregate images into
202
+ #: cameras?
203
+ #:
204
+ #: If this is zero, each leaf folder is treated as a camera.
205
+ self.nDirLevelsFromLeaf = 0
206
+
207
+ #: An optional function that takes a string (an image file name) and returns
208
+ #: a string (the corresponding folder ID), typically used when multiple folders
209
+ #: actually correspond to the same camera in a manufacturer-specific way (e.g.
210
+ #: a/b/c/RECONYX100 and a/b/c/RECONYX101 may really be the same camera).
211
+ #:
212
+ #: See ct_utils for a common replacement function that handles most common
213
+ #: manufacturer folder names.
214
+ self.customDirNameFunction = None
215
+
216
+ #: Include only specific folders, mutually exclusive with [excludeFolders]
217
+ self.includeFolders = None
218
+
219
+ #: Exclude specific folders, mutually exclusive with [includeFolders]
220
+ self.excludeFolders = None
221
+
222
+ #: Optionally show *other* detections (i.e., detections other than the
223
+ #: one the user is evaluating), typically in a light gray.
224
+ self.bRenderOtherDetections = False
225
+
226
+ #: Threshold to use for *other* detections
227
+ self.otherDetectionsThreshold = 0.2
228
+
229
+ #: Line width (in pixels) for *other* detections
230
+ self.otherDetectionsLineWidth = 1
231
+
232
+ #: Optionally show a grid that includes a sample image for the detection, plus
233
+ #: the top N additional detections
234
+ self.bRenderDetectionTiles = True
235
+
236
+ #: Width of the original image (within the larger output image) when bRenderDetectionTiles
237
+ #: is True.
238
+ #:
239
+ #: If this is None, we'll render the original image in the detection tile image
240
+ #: at its original width.
241
+ self.detectionTilesPrimaryImageWidth = None
242
+
243
+ #: Width to use for the grid of detection instances.
244
+ #:
245
+ #: Can be a width in pixels, or a number from 0 to 1 representing a fraction
246
+ #: of the primary image width.
247
+ #:
248
+ #: If you want to render the grid at exactly 1 pixel wide, I guess you're out
249
+ #: of luck.
250
+ self.detectionTilesCroppedGridWidth = 0.6
251
+
252
+ #: Location of the primary image within the mosaic ('right' or 'left)
253
+ self.detectionTilesPrimaryImageLocation = 'right'
254
+
255
+ #: Maximum number of individual detection instances to include in the mosaic
256
+ self.detectionTilesMaxCrops = 250
257
+
258
+ #: If bRenderOtherDetections is True, what color should we use to render the
259
+ #: (hopefully pretty subtle) non-target detections?
260
+ #:
261
+ #: In theory I'd like these "other detection" rectangles to be partially
262
+ #: transparent, but this is not straightforward, and the alpha is ignored
263
+ #: here. But maybe if I leave it here and wish hard enough, someday it
264
+ #: will work.
265
+ #:
266
+ #: otherDetectionsColors = ['dimgray']
267
+ self.otherDetectionsColors = [(105,105,105,100)]
268
+
269
+ #: Sort detections within a directory so nearby detections are adjacent
270
+ #: in the list, for faster review.
271
+ #:
272
+ #: Can be None, 'xsort', or 'clustersort'
273
+ #:
274
+ #: * None sorts detections chronologically by first occurrence
275
+ #: * 'xsort' sorts detections from left to right
276
+ #: * 'clustersort' clusters detections and sorts by cluster
277
+ self.smartSort = 'xsort'
278
+
279
+ #: Only relevant if smartSort == 'clustersort'
280
+ self.smartSortDistanceThreshold = 0.1
281
+
282
+
283
+ class RepeatDetectionResults:
284
+ """
285
+ The results of an entire repeat detection analysis
286
+ """
287
+
288
+ def __init__(self):
289
+
290
+ #: The data table (Pandas DataFrame), as loaded from the input json file via
291
+ #: load_api_results(). Has columns ['file', 'detections','failure'].
292
+ self.detectionResults = None
293
+
294
+ #: The other fields in the input json file, loaded via load_api_results()
295
+ self.otherFields = None
296
+
297
+ #: The data table after modification
298
+ self.detectionResultsFiltered = None
299
+
300
+ #: dict mapping folder names to whole rows from the data table
301
+ self.rowsByDirectory = None
302
+
303
+ #: dict mapping filenames to rows in the master table
304
+ self.filenameToRow = None
305
+
306
+ #: An array of length nDirs, where each element is a list of DetectionLocation
307
+ #: objects for that directory that have been flagged as suspicious
308
+ self.suspiciousDetections = None
309
+
310
+ #: The location of the .json file written with information about the RDE
311
+ #: review images (typically detectionIndex.json)
312
+ self.filterFile = None
313
+
314
+
315
+ class IndexedDetection:
316
+ """
317
+ A single detection event on a single image
318
+ """
319
+
320
+ def __init__(self, iDetection=-1, filename='', bbox=[], confidence=-1, category='unknown'):
321
+
322
+ assert isinstance(iDetection,int)
323
+ assert isinstance(filename,str)
324
+ assert isinstance(bbox,list)
325
+ assert isinstance(category,str)
326
+
327
+ #: index of this detection within all detections for this filename
328
+ self.iDetection = iDetection
329
+
330
+ #: path to the image corresponding to this detection
331
+ self.filename = filename
332
+
333
+ #: [x_min, y_min, width_of_box, height_of_box]
334
+ self.bbox = bbox
335
+
336
+ #: confidence value of this detection
337
+ self.confidence = confidence
338
+
339
+ #: category ID (not name) of this detection
340
+ self.category = category
341
+
342
+ def __repr__(self):
343
+ s = ct_utils.pretty_print_object(self, False)
344
+ return s
345
+
346
+
347
+ class DetectionLocation:
348
+ """
349
+ A unique-ish detection location, meaningful in the context of one
350
+ directory. All detections within an IoU threshold of self.bbox
351
+ will be stored in IndexedDetection objects.
352
+ """
353
+
354
+ def __init__(self, instance, detection, relativeDir, category, id=None):
355
+
356
+ assert isinstance(detection,dict)
357
+ assert isinstance(instance,IndexedDetection)
358
+ assert isinstance(relativeDir,str)
359
+ assert isinstance(category,str)
360
+
361
+ #: list of IndexedDetections that match this detection
362
+ self.instances = [instance]
363
+
364
+ #: category ID (not name) for this detection
365
+ self.category = category
366
+
367
+ #: bbox as x,y,w,h
368
+ self.bbox = detection['bbox']
369
+
370
+ #: relative folder (i.e., camera name) in which this detectin was found
371
+ self.relativeDir = relativeDir
372
+
373
+ #: relative path to the canonical image representing this detection
374
+ self.sampleImageRelativeFileName = ''
375
+
376
+ #: list of detections on that canonical image that match this detection
377
+ self.sampleImageDetections = None
378
+
379
+ #: ID for this detection; this ID is only guaranteed to be unique within a directory
380
+ self.id = id
381
+
382
+ #: only used when doing cluster-based sorting
383
+ self.clusterLabel = None
384
+
385
+ def __repr__(self):
386
+ s = ct_utils.pretty_print_object(self, False)
387
+ return s
388
+
389
+ def to_api_detection(self):
390
+ """
391
+ Converts this detection to a 'detection' dictionary, making the semi-arbitrary
392
+ assumption that the first instance is representative of confidence.
393
+
394
+ Returns:
395
+ dict: dictionary in the format used to store detections in MD results
396
+ """
397
+
398
+ # This is a bit of a hack right now, but for future-proofing, I don't want to call this
399
+ # to retrieve anything other than the highest-confidence detection, and I'm assuming this
400
+ # is already sorted, so assert() that.
401
+ confidences = [i.confidence for i in self.instances]
402
+ assert confidences[0] == max(confidences), \
403
+ 'Cannot convert an unsorted DetectionLocation to an API detection'
404
+
405
+ # It's not clear whether it's better to use instances[0].bbox or self.bbox
406
+ # here... they should be very similar, unless iouThreshold is very low.
407
+ # self.bbox is a better representation of the overall DetectionLocation.
408
+ detection = {'conf':self.instances[0].confidence,
409
+ 'bbox':self.bbox,'category':self.instances[0].category}
410
+ return detection
411
+
412
+
413
+ #%% Support functions
414
+
415
+ def _render_bounding_box(detection, inputFileName, outputFileName, lineWidth=5,
416
+ expansion=0):
417
+ """
418
+ Rendering the detection [detection] on the image [inputFileName], writing the result
419
+ to [outputFileName].
420
+ """
421
+
422
+ im = open_image(inputFileName)
423
+ d = detection.to_api_detection()
424
+ render_detection_bounding_boxes([d],im,thickness=lineWidth,expansion=expansion,
425
+ confidence_threshold=-10)
426
+ im.save(outputFileName)
427
+
428
+
429
+ def _detection_rect_to_rtree_rect(detection_rect):
430
+ """
431
+ We store detections as x/y/w/h, rtree and pyqtree use l/b/r/t. Convert from
432
+ our representation to rtree's.
433
+ """
434
+
435
+ l = detection_rect[0]
436
+ b = detection_rect[1]
437
+ r = detection_rect[0] + detection_rect[2]
438
+ t = detection_rect[1] + detection_rect[3]
439
+ return (l,b,r,t)
440
+
441
+
442
+ def _rtree_rect_to_detection_rect(rtree_rect):
443
+ """
444
+ We store detections as x/y/w/h, rtree and pyqtree use l/b/r/t. Convert from
445
+ rtree's representation to ours.
446
+ """
447
+
448
+ x = rtree_rect[0]
449
+ y = rtree_rect[1]
450
+ w = rtree_rect[2] - rtree_rect[0]
451
+ h = rtree_rect[3] - rtree_rect[1]
452
+ return (x,y,w,h)
453
+
454
+
455
+ def _sort_detections_for_directory(candidateDetections,options):
456
+ """
457
+ candidateDetections is a list of DetectionLocation objects. Sorts them to
458
+ put nearby detections next to each other, for easier visual review. Returns
459
+ a sorted copy of candidateDetections, does not sort in-place.
460
+ """
461
+
462
+ if len(candidateDetections) <= 1 or options.smartSort is None:
463
+ return candidateDetections
464
+
465
+ # Just sort by the X location of each box
466
+ if options.smartSort == 'xsort':
467
+ candidateDetectionsSorted = sorted(candidateDetections,
468
+ key=lambda x: (
469
+ (x.bbox[0]) + (x.bbox[2]/2.0)
470
+ ))
471
+ return candidateDetectionsSorted
472
+
473
+ elif options.smartSort == 'clustersort':
474
+
475
+ cluster = sklearn.cluster.AgglomerativeClustering(
476
+ n_clusters=None,
477
+ distance_threshold=options.smartSortDistanceThreshold,
478
+ linkage='complete')
479
+
480
+ # Prepare a list of points to represent each box,
481
+ # that's what we'll use for clustering
482
+ points = []
483
+ for det in candidateDetections:
484
+ # To use the upper-left of the box as the clustering point
485
+ # points.append([det.bbox[0],det.bbox[1]])
486
+
487
+ # To use the center of the box as the clustering point
488
+ points.append([det.bbox[0]+det.bbox[2]/2.0,
489
+ det.bbox[1]+det.bbox[3]/2.0])
490
+ X = np.array(points)
491
+
492
+ labels = cluster.fit_predict(X)
493
+ unique_labels = np.unique(labels)
494
+
495
+ # Labels *could* be any unique labels according to the docs, but in practice
496
+ # they are unique integers from 0:nClusters.
497
+ #
498
+ # Make sure the labels are unique incrementing integers.
499
+ for i_label in range(1,len(unique_labels)):
500
+ assert unique_labels[i_label] == 1 + unique_labels[i_label-1]
501
+
502
+ assert len(labels) == len(candidateDetections)
503
+
504
+ # Store the label assigned to each cluster
505
+ for i_label,label in enumerate(labels):
506
+ candidateDetections[i_label].clusterLabel = label
507
+
508
+ # Now sort the clusters by their x coordinate, and re-assign labels
509
+ # so the labels are sortable
510
+ label_x_means = []
511
+
512
+ for label in unique_labels:
513
+ detections_this_label = [d for d in candidateDetections if (
514
+ d.clusterLabel == label)]
515
+ points_this_label = [ [d.bbox[0],d.bbox[1]] for d in detections_this_label]
516
+ x = [p[0] for p in points_this_label]
517
+ y = [p[1] for p in points_this_label]
518
+
519
+ # Compute the centroid for debugging, but we're only going to use the x
520
+ # coordinate. This is the centroid of points used to represent detections,
521
+ # which may be box centers or box corners.
522
+ centroid = [ sum(x) / len(points_this_label), sum(y) / len(points_this_label) ]
523
+ label_xval = centroid[0]
524
+ label_x_means.append(label_xval)
525
+
526
+ old_cluster_label_to_new_cluster_label = {}
527
+ new_cluster_labels = np.argsort(label_x_means)
528
+ assert len(new_cluster_labels) == len(np.unique(new_cluster_labels))
529
+ for old_cluster_label in unique_labels:
530
+ old_cluster_label_to_new_cluster_label[old_cluster_label] =\
531
+ np.where(new_cluster_labels==old_cluster_label)[0][0]
532
+
533
+ for i_cluster in range(0,len(unique_labels)):
534
+ old_label = unique_labels[i_cluster]
535
+ assert i_cluster == old_label
536
+ new_label = old_cluster_label_to_new_cluster_label[old_label]
537
+
538
+ for i_det,det in enumerate(candidateDetections):
539
+ old_label = det.clusterLabel
540
+ new_label = old_cluster_label_to_new_cluster_label[old_label]
541
+ det.clusterLabel = new_label
542
+
543
+ candidateDetectionsSorted = sorted(candidateDetections,
544
+ key=lambda x: (x.clusterLabel,x.id))
545
+
546
+ return candidateDetectionsSorted
547
+
548
+ else:
549
+ raise ValueError('Unrecognized sort method {}'.format(
550
+ options.smartSort))
551
+
552
+ # ...def _sort_detections_for_directory(...)
553
+
554
+
555
+ def _find_matches_in_directory(dirNameAndRows, options):
556
+ """
557
+ dirNameAndRows is a tuple of (name,rows).
558
+
559
+ "name" is a location name, typically a folder name, though this may be an arbitrary
560
+ location identifier.
561
+
562
+ "rows" is a Pandas dataframe with one row per image in this location, with columns:
563
+
564
+ * 'file': relative file name
565
+ * 'detections': a list of MD detection objects, i.e. dicts with keys ['category','conf','bbox']
566
+ * 'max_detection_conf': maximum confidence of any detection, in any category
567
+
568
+ "rows" can also point to a .csv file, in which case the detection table will be read from that
569
+ .csv file, and results will be written to a .csv file rather than being returned.
570
+
571
+ Find all unique detections in this directory.
572
+
573
+ Returns a list of DetectionLocation objects.
574
+ """
575
+
576
+ if options.pbar is not None:
577
+ options.pbar.update()
578
+
579
+ # Create a tree to store candidate detections
580
+ candidateDetectionsIndex = pyqtree.Index(bbox=(-0.1,-0.1,1.1,1.1))
581
+
582
+ assert len(dirNameAndRows) == 2, 'find_matches_in_directory: invalid input'
583
+ assert isinstance(dirNameAndRows[0],str), 'find_matches_in_directory: invalid location name'
584
+ dirName = dirNameAndRows[0]
585
+ rows = dirNameAndRows[1]
586
+
587
+ detections_loaded_from_csv_file = None
588
+
589
+ if isinstance(rows,str):
590
+ detections_loaded_from_csv_file = rows
591
+ print('Loading results for location {} from {}'.format(
592
+ dirName,detections_loaded_from_csv_file))
593
+ rows = pd.read_csv(detections_loaded_from_csv_file)
594
+ # Pandas writes out detections out as strings, convert them back to lists
595
+ rows['detections'] = rows['detections'].apply(lambda s: json.loads(s.replace('\'','"')))
596
+
597
+ if options.maxImagesPerFolder is not None and len(rows) > options.maxImagesPerFolder:
598
+ print('Ignoring directory {} because it has {} images (limit set to {})'.format(
599
+ dirName,len(rows),options.maxImagesPerFolder))
600
+ return []
601
+
602
+ if options.includeFolders is not None:
603
+ assert options.excludeFolders is None, 'Cannot specify include and exclude folder lists'
604
+ if dirName not in options.includeFolders:
605
+ print('Ignoring folder {}, not in inclusion list'.format(dirName))
606
+ return []
607
+
608
+ if options.excludeFolders is not None:
609
+ assert options.includeFolders is None, 'Cannot specify include and exclude folder lists'
610
+ if dirName in options.excludeFolders:
611
+ print('Ignoring folder {}, on exclusion list'.format(dirName))
612
+ return []
613
+
614
+ # For each image in this directory
615
+ #
616
+ # iDirectoryRow = 0; row = rows.iloc[iDirectoryRow]
617
+ #
618
+ # iDirectoryRow is a pandas index, so it may not start from zero;
619
+ # for debugging, we maintain i_iteration as a loop index.
620
+ i_iteration = -1
621
+ n_boxes_evaluated = 0
622
+
623
+ for iDirectoryRow, row in rows.iterrows():
624
+
625
+ i_iteration += 1
626
+ filename = row['file']
627
+ if not path_utils.is_image_file(filename):
628
+ continue
629
+
630
+ if 'max_detection_conf' not in row or 'detections' not in row or \
631
+ row['detections'] is None:
632
+ print('Skipping row {}'.format(iDirectoryRow))
633
+ continue
634
+
635
+ # Don't bother checking images with no detections above threshold
636
+ maxP = float(row['max_detection_conf'])
637
+ if maxP < options.confidenceMin:
638
+ continue
639
+
640
+ # Array of dicts, where each element is
641
+ # {
642
+ # 'category': '1', # str value, category ID
643
+ # 'conf': 0.926, # confidence of this detections
644
+ #
645
+ # (x_min, y_min) is upper-left, all in relative coordinates
646
+ # 'bbox': [x_min, y_min, width_of_box, height_of_box]
647
+ #
648
+ # }
649
+ detections = row['detections']
650
+ if isinstance(detections,float):
651
+ assert isinstance(row['failure'],str), 'Expected failure indicator'
652
+ print('Skipping failed image {} ({})'.format(filename,row['failure']))
653
+ continue
654
+
655
+ assert len(detections) > 0
656
+
657
+ # For each detection in this image
658
+ for iDetection, detection in enumerate(detections):
659
+
660
+ n_boxes_evaluated += 1
661
+
662
+ if detection is None:
663
+ print('Skipping detection {}'.format(iDetection))
664
+ continue
665
+
666
+ assert 'category' in detection and \
667
+ 'conf' in detection and \
668
+ 'bbox' in detection, 'Illegal detection'
669
+
670
+ confidence = detection['conf']
671
+
672
+ # This is no longer strictly true; I sometimes run RDE in stages, so
673
+ # some probabilities have already been made negative
674
+ #
675
+ # assert confidence >= 0.0 and confidence <= 1.0
676
+
677
+ assert confidence >= -1.0 and confidence <= 1.0
678
+
679
+ if confidence < options.confidenceMin:
680
+ continue
681
+ if confidence > options.confidenceMax:
682
+ continue
683
+
684
+ # Optionally exclude some classes from consideration as suspicious
685
+ if (options.excludeClasses is not None) and (len(options.excludeClasses) > 0):
686
+ iClass = int(detection['category'])
687
+ if iClass in options.excludeClasses:
688
+ continue
689
+
690
+ bbox = detection['bbox']
691
+ confidence = detection['conf']
692
+
693
+ # Is this detection too big or too small for consideration?
694
+ w, h = bbox[2], bbox[3]
695
+
696
+ if (w == 0 or h == 0):
697
+ continue
698
+
699
+ area = h * w
700
+
701
+ if area < 0:
702
+ print('Warning: negative-area bounding box for file {}'.format(filename))
703
+ area = abs(area); h = abs(h); w = abs(w)
704
+
705
+ assert area >= 0.0 and area <= 1.0, \
706
+ 'Illegal bounding box area {} in image {}'.format(area,filename)
707
+
708
+ if area < options.minSuspiciousDetectionSize:
709
+ continue
710
+
711
+ if area > options.maxSuspiciousDetectionSize:
712
+ continue
713
+
714
+ category = detection['category']
715
+
716
+ instance = IndexedDetection(iDetection=iDetection,
717
+ filename=row['file'], bbox=bbox,
718
+ confidence=confidence, category=category)
719
+
720
+ bFoundSimilarDetection = False
721
+
722
+ rtree_rect = _detection_rect_to_rtree_rect(bbox)
723
+
724
+ # This will return candidates of all classes
725
+ overlappingCandidateDetections =\
726
+ candidateDetectionsIndex.intersect(rtree_rect)
727
+
728
+ overlappingCandidateDetections.sort(
729
+ key=lambda x: x.id, reverse=False)
730
+
731
+ # For each detection in our candidate list
732
+ for iCandidate, candidate in enumerate(
733
+ overlappingCandidateDetections):
734
+
735
+ # Don't match across categories
736
+ if (candidate.category != category) and (not (options.categoryAgnosticComparisons)):
737
+ continue
738
+
739
+ # Is this a match?
740
+ try:
741
+ iou = ct_utils.get_iou(bbox, candidate.bbox)
742
+ except Exception as e:
743
+ print(\
744
+ 'Warning: IOU computation error on boxes ({},{},{},{}),({},{},{},{}): {}'.\
745
+ format(
746
+ bbox[0],bbox[1],bbox[2],bbox[3],
747
+ candidate.bbox[0],candidate.bbox[1],
748
+ candidate.bbox[2],candidate.bbox[3], str(e)))
749
+ continue
750
+
751
+ if iou >= options.iouThreshold:
752
+
753
+ bFoundSimilarDetection = True
754
+
755
+ # If so, add this example to the list for this detection
756
+ candidate.instances.append(instance)
757
+
758
+ # We *don't* break here; we allow this instance to possibly
759
+ # match multiple candidates. There isn't an obvious right or
760
+ # wrong here.
761
+
762
+ # ...for each detection on our candidate list
763
+
764
+ # If we found no matches, add this to the candidate list
765
+ if not bFoundSimilarDetection:
766
+
767
+ candidate = DetectionLocation(instance=instance,
768
+ detection=detection, relativeDir=dirName,
769
+ category=category, id=i_iteration)
770
+
771
+ # pyqtree
772
+ candidateDetectionsIndex.insert(item=candidate,bbox=rtree_rect)
773
+
774
+ # ...for each detection
775
+
776
+ # ...for each row
777
+
778
+ # Get all candidate detections
779
+
780
+ candidateDetections = candidateDetectionsIndex.intersect([-100,-100,100,100])
781
+
782
+ # For debugging only, it's convenient to have these sorted
783
+ # as if they had never gone into a tree structure. Typically
784
+ # this is in practice a sort by filename.
785
+ candidateDetections.sort(
786
+ key=lambda x: x.id, reverse=False)
787
+
788
+ if detections_loaded_from_csv_file is not None:
789
+ location_results_file = \
790
+ os.path.splitext(detections_loaded_from_csv_file)[0] + \
791
+ '_results.json'
792
+ print('Writing results for location {} to {}'.format(
793
+ dirName,location_results_file))
794
+ s = jsonpickle.encode(candidateDetections,make_refs=False)
795
+ with open(location_results_file,'w') as f:
796
+ f.write(s)
797
+ # json.dump(candidateDetections,f,indent=1)
798
+ return location_results_file
799
+ else:
800
+ return candidateDetections
801
+
802
+ # ...def _find_matches_in_directory(...)
803
+
804
+
805
+ def _update_detection_table(repeatDetectionResults, options, outputFilename=None):
806
+ """
807
+ Changes confidence values in repeatDetectionResults.detectionResults so that detections
808
+ deemed to be possible false positives are given negative confidence values.
809
+
810
+ repeatDetectionResults is an object of type RepeatDetectionResults, with a pandas
811
+ dataframe (detectionResults) containing all the detections loaded from the .json file,
812
+ and a list of detections for each location (suspiciousDetections) that are deemed to
813
+ be suspicious.
814
+
815
+ returns the modified pandas dataframe (repeatDetectionResults.detectionResults), but
816
+ also modifies it in place.
817
+ """
818
+
819
+ # This is the pandas dataframe that contains actual detection results.
820
+ #
821
+ # Has fields ['file', 'detections','failure'].
822
+ detectionResults = repeatDetectionResults.detectionResults
823
+
824
+ # An array of length nDirs, where each element is a list of DetectionLocation
825
+ # objects for that directory that have been flagged as suspicious
826
+ suspiciousDetectionsByDirectory = repeatDetectionResults.suspiciousDetections
827
+
828
+ nBboxChanges = 0
829
+
830
+ print('Updating output table')
831
+
832
+ # For each directory
833
+ for iDir, directoryEvents in enumerate(suspiciousDetectionsByDirectory):
834
+
835
+ # For each suspicious detection group in this directory
836
+ for iDetectionEvent, detectionEvent in enumerate(directoryEvents):
837
+
838
+ locationBbox = detectionEvent.bbox
839
+
840
+ # For each instance of this suspicious detection
841
+ for iInstance, instance in enumerate(detectionEvent.instances):
842
+
843
+ instanceBbox = instance.bbox
844
+
845
+ # This should match the bbox for the detection event
846
+ iou = ct_utils.get_iou(instanceBbox, locationBbox)
847
+
848
+ # The bbox for this instance should be almost the same as the bbox
849
+ # for this detection group, where "almost" is defined by the IOU
850
+ # threshold.
851
+ assert iou >= options.iouThreshold
852
+ # if iou < options.iouThreshold:
853
+ # print('IOU warning: {},{}'.format(iou,options.iouThreshold))
854
+
855
+ assert instance.filename in repeatDetectionResults.filenameToRow
856
+ iRow = repeatDetectionResults.filenameToRow[instance.filename]
857
+ row = detectionResults.iloc[iRow]
858
+ rowDetections = row['detections']
859
+ detectionToModify = rowDetections[instance.iDetection]
860
+
861
+ # Make sure the bounding box matches
862
+ assert (instanceBbox[0:3] == detectionToModify['bbox'][0:3])
863
+
864
+ # Make the probability negative, if it hasn't been switched by
865
+ # another bounding box
866
+ if detectionToModify['conf'] >= 0:
867
+ detectionToModify['conf'] = -1 * detectionToModify['conf']
868
+ nBboxChanges += 1
869
+
870
+ # ...for each instance
871
+
872
+ # ...for each detection
873
+
874
+ # ...for each directory
875
+
876
+ # Update maximum probabilities
877
+
878
+ # For each row...
879
+ nProbChanges = 0
880
+ nProbChangesToNegative = 0
881
+ nProbChangesAcrossThreshold = 0
882
+
883
+ for iRow, row in detectionResults.iterrows():
884
+
885
+ detections = row['detections']
886
+ if (detections is None) or isinstance(detections,float):
887
+ assert isinstance(row['failure'],str)
888
+ continue
889
+
890
+ if len(detections) == 0:
891
+ continue
892
+
893
+ maxPOriginal = float(row['max_detection_conf'])
894
+
895
+ # No longer strictly true; sometimes I run RDE on RDE output
896
+ # assert maxPOriginal >= 0
897
+ assert maxPOriginal >= -1.0
898
+
899
+ maxP = None
900
+ nNegative = 0
901
+
902
+ for iDetection, detection in enumerate(detections):
903
+
904
+ p = detection['conf']
905
+
906
+ if p < 0:
907
+ nNegative += 1
908
+
909
+ if (maxP is None) or (p > maxP):
910
+ maxP = p
911
+
912
+ # We should only be making detections *less* likely in this process
913
+ assert maxP <= maxPOriginal
914
+ detectionResults.at[iRow, 'max_detection_conf'] = maxP
915
+
916
+ # If there was a meaningful change, count it
917
+ if abs(maxP - maxPOriginal) > 1e-3:
918
+
919
+ assert maxP < maxPOriginal
920
+
921
+ nProbChanges += 1
922
+
923
+ if (maxP < 0) and (maxPOriginal >= 0):
924
+ nProbChangesToNegative += 1
925
+
926
+ if (maxPOriginal >= options.confidenceMin) and (maxP < options.confidenceMin):
927
+ nProbChangesAcrossThreshold += 1
928
+
929
+ # Negative probabilities should be the only reason maxP changed, so
930
+ # we should have found at least one negative value if we reached
931
+ # this point.
932
+ assert nNegative > 0
933
+
934
+ # ...if there was a meaningful change to the max probability for this row
935
+
936
+ # ...for each row
937
+
938
+ # If we're also writing output...
939
+ if outputFilename is not None and len(outputFilename) > 0:
940
+ write_api_results(detectionResults, repeatDetectionResults.otherFields,
941
+ outputFilename)
942
+
943
+ print(
944
+ 'Finished updating detection table\nChanged {} detections that impacted {} maxPs ({} to negative) ({} across confidence threshold)'.format(
945
+ nBboxChanges, nProbChanges, nProbChangesToNegative, nProbChangesAcrossThreshold))
946
+
947
+ return detectionResults
948
+
949
+ # ...def _update_detection_table(...)
950
+
951
+
952
+ def _render_sample_image_for_detection(detection,filteringDir,options):
953
+ """
954
+ Render a sample image for one unique detection, possibly containing lightly-colored
955
+ high-confidence detections from elsewhere in the sample image.
956
+
957
+ "detections" is a DetectionLocation object.
958
+
959
+ Depends on having already sorted instances within this detection by confidence, and
960
+ having already generated an output file name for this sample image.
961
+ """
962
+
963
+ # Confidence values should already have been sorted in the previous loop
964
+ instance_confidences = [instance.confidence for instance in detection.instances]
965
+ assert ct_utils.is_list_sorted(instance_confidences,reverse=True)
966
+
967
+ # Choose the highest-confidence index
968
+ instance = detection.instances[0]
969
+ relativePath = instance.filename
970
+
971
+ outputRelativePath = detection.sampleImageRelativeFileName
972
+ assert len(outputRelativePath) > 0
973
+
974
+ outputFullPath = os.path.join(filteringDir, outputRelativePath)
975
+
976
+ if is_sas_url(options.imageBase):
977
+ inputFullPath = relative_sas_url(options.imageBase, relativePath)
978
+ else:
979
+ inputFullPath = os.path.join(options.imageBase, relativePath)
980
+ assert (os.path.isfile(inputFullPath)), 'Not a file: {}'.\
981
+ format(inputFullPath)
982
+
983
+ try:
984
+
985
+ im = open_image(inputFullPath)
986
+
987
+ # Should we render (typically in a very light color) detections
988
+ # *other* than the one we're highlighting here?
989
+ if options.bRenderOtherDetections:
990
+
991
+ # Optionally resize the output image
992
+ if (options.maxOutputImageWidth is not None) and \
993
+ (im.size[0] > options.maxOutputImageWidth):
994
+ im = vis_utils.resize_image(im, options.maxOutputImageWidth,
995
+ target_height=-1)
996
+
997
+ assert detection.sampleImageDetections is not None
998
+
999
+ # At this point, suspicious detections have already been flipped
1000
+ # negative, which we don't want for rendering purposes
1001
+ rendered_detections = []
1002
+
1003
+ for det in detection.sampleImageDetections:
1004
+ rendered_det = copy.copy(det)
1005
+ rendered_det['conf'] = abs(rendered_det['conf'])
1006
+ rendered_detections.append(rendered_det)
1007
+
1008
+ # Render other detections first (typically in a thin+light box)
1009
+ render_detection_bounding_boxes(rendered_detections,
1010
+ im,
1011
+ label_map=None,
1012
+ thickness=options.otherDetectionsLineWidth,
1013
+ expansion=options.boxExpansion,
1014
+ colormap=options.otherDetectionsColors,
1015
+ confidence_threshold=options.otherDetectionsThreshold)
1016
+
1017
+ # Now render the example detection (on top of at least one
1018
+ # of the other detections)
1019
+
1020
+ # This converts the *first* instance to an API standard detection;
1021
+ # because we just sorted this list in descending order by confidence,
1022
+ # this is the highest-confidence detection.
1023
+ d = detection.to_api_detection()
1024
+
1025
+ render_detection_bounding_boxes([d],im,thickness=options.lineThickness,
1026
+ expansion=options.boxExpansion,
1027
+ confidence_threshold=-10)
1028
+
1029
+ im.save(outputFullPath)
1030
+
1031
+ else:
1032
+
1033
+ _render_bounding_box(detection, inputFullPath, outputFullPath,
1034
+ lineWidth=options.lineThickness, expansion=options.boxExpansion)
1035
+
1036
+ # ...if we are/aren't rendering other bounding boxes
1037
+
1038
+ # If we're rendering detection tiles, we'll re-load and re-write the image we
1039
+ # just wrote to outputFullPath
1040
+ if options.bRenderDetectionTiles:
1041
+
1042
+ assert not is_sas_url(options.imageBase), "Can't render detection tiles from SAS URLs"
1043
+
1044
+ if options.detectionTilesPrimaryImageWidth is not None:
1045
+ primaryImageWidth = options.detectionTilesPrimaryImageWidth
1046
+ else:
1047
+ # "im" may be a resized version of the original image, if we've already run
1048
+ # the code to render other bounding boxes.
1049
+ primaryImageWidth = im.size[0]
1050
+
1051
+ if options.detectionTilesCroppedGridWidth <= 1.0:
1052
+ croppedGridWidth = round(options.detectionTilesCroppedGridWidth * primaryImageWidth)
1053
+ else:
1054
+ croppedGridWidth = options.detectionTilesCroppedGridWidth
1055
+
1056
+ secondaryImageFilenameList = []
1057
+ secondaryImageBoundingBoxList = []
1058
+
1059
+ # If we start from zero, we include the sample crop
1060
+ for instance in detection.instances[0:]:
1061
+ secondaryImageFilenameList.append(os.path.join(options.imageBase,
1062
+ instance.filename))
1063
+ secondaryImageBoundingBoxList.append(instance.bbox)
1064
+
1065
+ # Optionally limit the number of crops we pass to the rendering function
1066
+ if (options.detectionTilesMaxCrops is not None) and \
1067
+ (len(detection.instances) > options.detectionTilesMaxCrops):
1068
+ secondaryImageFilenameList = \
1069
+ secondaryImageFilenameList[0:options.detectionTilesMaxCrops]
1070
+ secondaryImageBoundingBoxList = \
1071
+ secondaryImageBoundingBoxList[0:options.detectionTilesMaxCrops]
1072
+
1073
+ # This will over-write the image we've already written to outputFullPath
1074
+ render_images_with_thumbnails.render_images_with_thumbnails(
1075
+ primary_image_filename=outputFullPath,
1076
+ primary_image_width=primaryImageWidth,
1077
+ secondary_image_filename_list=secondaryImageFilenameList,
1078
+ secondary_image_bounding_box_list=secondaryImageBoundingBoxList,
1079
+ cropped_grid_width=croppedGridWidth,
1080
+ output_image_filename=outputFullPath,
1081
+ primary_image_location=options.detectionTilesPrimaryImageLocation)
1082
+
1083
+ # ...if we are/aren't rendering detection tiles
1084
+
1085
+ except Exception as e:
1086
+
1087
+ stack_trace = traceback.format_exc()
1088
+ print('Warning: error rendering bounding box from {} to {}: {} ({})'.format(
1089
+ inputFullPath,outputFullPath,e,stack_trace))
1090
+ if options.bFailOnRenderError:
1091
+ raise
1092
+
1093
+ # ...def _render_sample_image_for_detection(...)
1094
+
1095
+
1096
+ #%% Main entry point
1097
+
1098
+ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
1099
+ """
1100
+ Find detections in a MD results file that occur repeatedly and are likely to be
1101
+ rocks/sticks.
1102
+
1103
+ Args:
1104
+ inputFilename (str): the MD results .json file to analyze
1105
+ outputFilename (str, optional): the filename to which we should write results
1106
+ with repeat detections removed, typically set to None during the first
1107
+ part of the RDE process.
1108
+ options (RepeatDetectionOptions): all the interesting options controlling this
1109
+ process; see RepeatDetectionOptions for details.
1110
+
1111
+ Returns:
1112
+ RepeatDetectionResults: results of the RDE process; see RepeatDetectionResults
1113
+ for details.
1114
+ """
1115
+
1116
+ ##%% Input handling
1117
+
1118
+ if options is None:
1119
+
1120
+ options = RepeatDetectionOptions()
1121
+
1122
+ # Validate some options
1123
+
1124
+ if options.customDirNameFunction is not None:
1125
+ assert options.nDirLevelsFromLeaf == 0, \
1126
+ 'Cannot mix custom dir name functions with nDirLevelsFromLeaf'
1127
+
1128
+ if options.nDirLevelsFromLeaf != 0:
1129
+ assert options.customDirNameFunction is None, \
1130
+ 'Cannot mix custom dir name functions with nDirLevelsFromLeaf'
1131
+
1132
+ if options.filterFileToLoad is not None and len(options.filterFileToLoad) > 0:
1133
+
1134
+ print('Bypassing detection-finding, loading from {}'.format(options.filterFileToLoad))
1135
+
1136
+ # Load the filtering file
1137
+ detectionIndexFileName = options.filterFileToLoad
1138
+ sIn = open(detectionIndexFileName, 'r').read()
1139
+ detectionInfo = jsonpickle.decode(sIn)
1140
+ filteringBaseDir = os.path.dirname(options.filterFileToLoad)
1141
+ suspiciousDetections = detectionInfo['suspiciousDetections']
1142
+
1143
+ # Load the same options we used when finding repeat detections
1144
+ options = detectionInfo['options']
1145
+
1146
+ # ...except for things that explicitly tell this function not to
1147
+ # find repeat detections.
1148
+ options.filterFileToLoad = detectionIndexFileName
1149
+ options.bWriteFilteringFolder = False
1150
+
1151
+ # ...if we're loading from an existing filtering file
1152
+
1153
+ toReturn = RepeatDetectionResults()
1154
+
1155
+
1156
+ # Check early to avoid problems with the output folder
1157
+
1158
+ if options.bWriteFilteringFolder:
1159
+ assert options.outputBase is not None and len(options.outputBase) > 0
1160
+ os.makedirs(options.outputBase,exist_ok=True)
1161
+
1162
+
1163
+ # Load file to a pandas dataframe. Also populates 'max_detection_conf', even if it's
1164
+ # not present in the .json file.
1165
+ detectionResults, otherFields = load_api_results(inputFilename, normalize_paths=True,
1166
+ filename_replacements=options.filenameReplacements,
1167
+ force_forward_slashes=True)
1168
+ toReturn.detectionResults = detectionResults
1169
+ toReturn.otherFields = otherFields
1170
+
1171
+ # detectionResults[detectionResults['failure'].notna()]
1172
+
1173
+ # Before doing any real work, make sure we can *probably* access images
1174
+ # This is just a cursory check on the first image, but it heads off most
1175
+ # problems related to incorrect mount points, etc. Better to do this before
1176
+ # spending 20 minutes finding repeat detections.
1177
+
1178
+ if options.bWriteFilteringFolder:
1179
+
1180
+ if not is_sas_url(options.imageBase):
1181
+
1182
+ row = detectionResults.iloc[0]
1183
+ relativePath = row['file']
1184
+ if options.filenameReplacements is not None:
1185
+ for s in options.filenameReplacements.keys():
1186
+ relativePath = relativePath.replace(s,options.filenameReplacements[s])
1187
+ absolutePath = os.path.join(options.imageBase,relativePath)
1188
+ assert os.path.isfile(absolutePath), 'Could not find file {}'.format(absolutePath)
1189
+
1190
+
1191
+ ##%% Separate files into locations
1192
+
1193
+ # This will be a map from a directory name to smaller data frames
1194
+ rowsByDirectory = {}
1195
+
1196
+ # This is a mapping back into the rows of the original table
1197
+ filenameToRow = {}
1198
+
1199
+ print('Separating images into locations...')
1200
+
1201
+ nCustomDirReplacements = 0
1202
+
1203
+ # iRow = 0; row = detectionResults.iloc[0]
1204
+ for iRow, row in tqdm(detectionResults.iterrows(),total=len(detectionResults)):
1205
+
1206
+ relativePath = row['file']
1207
+
1208
+ if options.customDirNameFunction is not None:
1209
+ basicDirName = os.path.dirname(relativePath.replace('\\','/'))
1210
+ dirName = options.customDirNameFunction(relativePath)
1211
+ if basicDirName != dirName:
1212
+ nCustomDirReplacements += 1
1213
+ else:
1214
+ dirName = os.path.dirname(relativePath)
1215
+
1216
+ if len(dirName) == 0:
1217
+ assert options.nDirLevelsFromLeaf == 0, \
1218
+ 'Can''t use the dirLevelsFromLeaf option with flat filenames'
1219
+ else:
1220
+ if options.nDirLevelsFromLeaf > 0:
1221
+ iLevel = 0
1222
+ while (iLevel < options.nDirLevelsFromLeaf):
1223
+ iLevel += 1
1224
+ dirName = os.path.dirname(dirName)
1225
+ assert len(dirName) > 0
1226
+
1227
+ if not dirName in rowsByDirectory:
1228
+ # Create a new DataFrame with just this row
1229
+ # rowsByDirectory[dirName] = pd.DataFrame(row)
1230
+ rowsByDirectory[dirName] = []
1231
+
1232
+ rowsByDirectory[dirName].append(row)
1233
+
1234
+ assert relativePath not in filenameToRow
1235
+ filenameToRow[relativePath] = iRow
1236
+
1237
+ # ...for each unique detection
1238
+
1239
+ if options.customDirNameFunction is not None:
1240
+ print('Custom dir name function made {} replacements (of {} images)'.format(
1241
+ nCustomDirReplacements,len(detectionResults)))
1242
+
1243
+ # Convert lists of rows to proper DataFrames
1244
+ dirs = list(rowsByDirectory.keys())
1245
+ for d in dirs:
1246
+ rowsByDirectory[d] = pd.DataFrame(rowsByDirectory[d])
1247
+
1248
+ toReturn.rowsByDirectory = rowsByDirectory
1249
+ toReturn.filenameToRow = filenameToRow
1250
+
1251
+ print('Finished separating {} files into {} locations'.format(len(detectionResults),
1252
+ len(rowsByDirectory)))
1253
+
1254
+ ##% Look for repeat detections (or load them from file)
1255
+
1256
+ dirsToSearch = list(rowsByDirectory.keys())
1257
+ if options.debugMaxDir > 0:
1258
+ dirsToSearch = dirsToSearch[0:options.debugMaxDir]
1259
+
1260
+ # Map numeric directory indices to names (we'll write this out to the detection index .json file)
1261
+ dirIndexToName = {}
1262
+ for iDir, dirName in enumerate(dirsToSearch):
1263
+ dirIndexToName[iDir] = dirName
1264
+
1265
+ # Are we actually looking for matches, or just loading from a file?
1266
+ if len(options.filterFileToLoad) == 0:
1267
+
1268
+ # length-nDirs list of lists of DetectionLocation objects
1269
+ suspiciousDetections = [None] * len(dirsToSearch)
1270
+
1271
+ # We're actually looking for matches...
1272
+ print('Finding similar detections...')
1273
+
1274
+ dirNameAndRows = []
1275
+ for dirName in dirsToSearch:
1276
+ rowsThisDirectory = rowsByDirectory[dirName]
1277
+ dirNameAndRows.append((dirName,rowsThisDirectory))
1278
+
1279
+ allCandidateDetections = [None] * len(dirsToSearch)
1280
+
1281
+ # If we serialize results to intermediate files, we need to remove slashes from
1282
+ # location names; we store mappings here.
1283
+ normalized_location_name_to_location_name = None
1284
+ location_name_to_normalized_location_name = None
1285
+
1286
+ if not options.bParallelizeComparisons:
1287
+
1288
+ options.pbar = None
1289
+ for iDir, dirName in tqdm(enumerate(dirsToSearch)):
1290
+ dirNameAndRow = dirNameAndRows[iDir]
1291
+ assert dirNameAndRow[0] == dirName
1292
+ print('Processing dir {} of {}: {}'.format(iDir,len(dirsToSearch),dirName))
1293
+ allCandidateDetections[iDir] = \
1294
+ _find_matches_in_directory(dirNameAndRow, options)
1295
+
1296
+ else:
1297
+
1298
+ n_workers = options.nWorkers
1299
+ if n_workers > len(dirNameAndRows):
1300
+ print('Pool of {} requested, but only {} folders available, reducing pool to {}'.\
1301
+ format(n_workers,len(dirNameAndRows),len(dirNameAndRows)))
1302
+ n_workers = len(dirNameAndRows)
1303
+
1304
+ if options.parallelizationUsesThreads:
1305
+ pool = ThreadPool(n_workers); poolstring = 'threads'
1306
+ else:
1307
+ pool = Pool(n_workers); poolstring = 'processes'
1308
+
1309
+ print('Starting comparison pool with {} {}'.format(n_workers,poolstring))
1310
+
1311
+ assert options.pass_detections_to_processes_method in ('file','memory'), \
1312
+ 'Unrecognized IPC mechanism: {}'.format(options.pass_detections_to_processes_method)
1313
+
1314
+ # ** Experimental **
1315
+ #
1316
+ # Rather than passing detections and results around in memory, write detections and
1317
+ # results for each worker to intermediate files. May improve performance for very large
1318
+ # results sets that exceed working memory.
1319
+ if options.pass_detections_to_processes_method == 'file':
1320
+
1321
+ ##%% Convert location names to normalized names we can write to files
1322
+
1323
+ normalized_location_name_to_location_name = {}
1324
+ for dir_name in dirsToSearch:
1325
+ normalized_location_name = flatten_path(dir_name)
1326
+ assert normalized_location_name not in normalized_location_name_to_location_name, \
1327
+ 'Redundant location name {}, can\'t serialize to intermediate files'.format(
1328
+ dir_name)
1329
+ normalized_location_name_to_location_name[normalized_location_name] = dir_name
1330
+
1331
+ location_name_to_normalized_location_name = \
1332
+ invert_dictionary(normalized_location_name_to_location_name)
1333
+
1334
+
1335
+ ##%% Write results to files for each location
1336
+
1337
+ print('Writing results to intermediate files')
1338
+
1339
+ intermediate_json_file_folder = os.path.join(options.outputBase,'intermediate_results')
1340
+ os.makedirs(intermediate_json_file_folder,exist_ok=True)
1341
+
1342
+ # i_location = 0; location_info = dirNameAndRows[0]
1343
+ dirNameAndIntermediateFile = []
1344
+
1345
+ # i_location = 0; location_info = dirNameAndRows[i_location]
1346
+ for i_location, location_info in tqdm(enumerate(dirNameAndRows)):
1347
+
1348
+ location_name = location_info[0]
1349
+ assert location_name in location_name_to_normalized_location_name
1350
+ normalized_location_name = location_name_to_normalized_location_name[location_name]
1351
+ intermediate_results_file = os.path.join(intermediate_json_file_folder,
1352
+ normalized_location_name + '.csv')
1353
+ detections_table_this_location = location_info[1]
1354
+ detections_table_this_location.to_csv(intermediate_results_file,header=True,index=False)
1355
+ dirNameAndIntermediateFile.append((location_name,intermediate_results_file))
1356
+
1357
+
1358
+ ##%% Find detections in each directory
1359
+
1360
+ options.pbar = None
1361
+ allCandidateDetectionFiles = list(pool.imap(
1362
+ partial(_find_matches_in_directory,options=options), dirNameAndIntermediateFile))
1363
+
1364
+
1365
+ ##%% Load into a combined list of candidate detections
1366
+
1367
+ allCandidateDetections = []
1368
+
1369
+ # candidate_detection_file = allCandidateDetectionFiles[0]
1370
+ for candidate_detection_file in allCandidateDetectionFiles:
1371
+ s = open(candidate_detection_file, 'r').read()
1372
+ candidate_detections_this_file = jsonpickle.decode(s)
1373
+ allCandidateDetections.append(candidate_detections_this_file)
1374
+
1375
+
1376
+ ##%% Clean up intermediate files
1377
+
1378
+ shutil.rmtree(intermediate_json_file_folder)
1379
+
1380
+ # If we're passing things around in memory, rather than via intermediate files
1381
+ else:
1382
+
1383
+ # We get slightly nicer progress bar behavior using threads, by passing a pbar
1384
+ # object and letting it get updated. We can't serialize this object across
1385
+ # processes.
1386
+ if options.parallelizationUsesThreads:
1387
+ options.pbar = tqdm(total=len(dirNameAndRows))
1388
+ allCandidateDetections = list(pool.imap(
1389
+ partial(_find_matches_in_directory,options=options), dirNameAndRows))
1390
+ else:
1391
+ options.pbar = None
1392
+ allCandidateDetections = list(tqdm(pool.imap(
1393
+ partial(_find_matches_in_directory,options=options), dirNameAndRows)))
1394
+
1395
+ print('\nFinished looking for similar detections')
1396
+
1397
+
1398
+ ##%% Mark suspicious locations based on match results
1399
+
1400
+ print('Marking repeat detections...')
1401
+
1402
+ nImagesWithSuspiciousDetections = 0
1403
+ nSuspiciousDetections = 0
1404
+
1405
+ # For each directory
1406
+ #
1407
+ # iDir = 51
1408
+ for iDir in range(len(dirsToSearch)):
1409
+
1410
+ # A list of DetectionLocation objects
1411
+ suspiciousDetectionsThisDir = []
1412
+
1413
+ # A list of DetectionLocation objects
1414
+ candidateDetectionsThisDir = allCandidateDetections[iDir]
1415
+
1416
+ for iLocation, candidateLocation in enumerate(candidateDetectionsThisDir):
1417
+
1418
+ # occurrenceList is a list of file/detection pairs
1419
+ nOccurrences = len(candidateLocation.instances)
1420
+
1421
+ if nOccurrences < options.occurrenceThreshold:
1422
+ continue
1423
+
1424
+ nImagesWithSuspiciousDetections += nOccurrences
1425
+ nSuspiciousDetections += 1
1426
+
1427
+ suspiciousDetectionsThisDir.append(candidateLocation)
1428
+
1429
+ suspiciousDetections[iDir] = suspiciousDetectionsThisDir
1430
+
1431
+ # Sort the above-threshold detections for easier review
1432
+ if options.smartSort is not None:
1433
+ suspiciousDetections[iDir] = _sort_detections_for_directory(
1434
+ suspiciousDetections[iDir],options)
1435
+
1436
+ print('Found {} suspicious detections in directory {} ({})'.format(
1437
+ len(suspiciousDetections[iDir]),iDir,dirsToSearch[iDir]))
1438
+
1439
+ # ...for each directory
1440
+
1441
+ print('Finished marking repeat detections')
1442
+
1443
+ print('Found {} unique detections on {} images that are suspicious'.format(
1444
+ nSuspiciousDetections, nImagesWithSuspiciousDetections))
1445
+
1446
+ # If we're just loading detections from a file...
1447
+ else:
1448
+
1449
+ assert len(suspiciousDetections) == len(dirsToSearch)
1450
+
1451
+ nDetectionsRemoved = 0
1452
+ nDetectionsLoaded = 0
1453
+
1454
+ # We're skipping detection-finding, but to see which images are actually legit false
1455
+ # positives, we may be looking for physical files or loading from a text file.
1456
+ fileList = None
1457
+ if options.filteredFileListToLoad is not None:
1458
+ with open(options.filteredFileListToLoad) as f:
1459
+ fileList = f.readlines()
1460
+ fileList = [x.strip() for x in fileList]
1461
+ nSuspiciousDetections = sum([len(x) for x in suspiciousDetections])
1462
+ print('Loaded false positive list from file ' + \
1463
+ 'will remove {} of {} suspicious detections'.format(
1464
+ len(fileList), nSuspiciousDetections))
1465
+
1466
+ # For each directory
1467
+ # iDir = 0; detections = suspiciousDetections[0]
1468
+ #
1469
+ # suspiciousDetections is an array of DetectionLocation objects,
1470
+ # one per directory.
1471
+ for iDir, detections in enumerate(suspiciousDetections):
1472
+
1473
+ bValidDetection = [True] * len(detections)
1474
+ nDetectionsLoaded += len(detections)
1475
+
1476
+ # For each detection that was present before filtering
1477
+ # iDetection = 0; detection = detections[iDetection]
1478
+ for iDetection, detection in enumerate(detections):
1479
+
1480
+ # Are we checking the directory to see whether detections were actually false
1481
+ # positives, or reading from a list?
1482
+ if fileList is None:
1483
+
1484
+ # Is the image still there?
1485
+ imageFullPath = os.path.join(filteringBaseDir,
1486
+ detection.sampleImageRelativeFileName)
1487
+
1488
+ # If not, remove this from the list of suspicious detections
1489
+ if not os.path.isfile(imageFullPath):
1490
+ nDetectionsRemoved += 1
1491
+ bValidDetection[iDetection] = False
1492
+
1493
+ else:
1494
+
1495
+ if detection.sampleImageRelativeFileName not in fileList:
1496
+ nDetectionsRemoved += 1
1497
+ bValidDetection[iDetection] = False
1498
+
1499
+ # ...for each detection
1500
+
1501
+ nRemovedThisDir = len(bValidDetection) - sum(bValidDetection)
1502
+ if nRemovedThisDir > 0:
1503
+ print('Removed {} of {} detections from directory {}'.\
1504
+ format(nRemovedThisDir,len(detections), iDir))
1505
+
1506
+ detectionsFiltered = list(compress(detections, bValidDetection))
1507
+ suspiciousDetections[iDir] = detectionsFiltered
1508
+
1509
+ # ...for each directory
1510
+
1511
+ print('Removed {} of {} total detections via manual filtering'.\
1512
+ format(nDetectionsRemoved, nDetectionsLoaded))
1513
+
1514
+ # ...if we are/aren't finding detections (vs. loading from file)
1515
+
1516
+ toReturn.suspiciousDetections = suspiciousDetections
1517
+
1518
+ toReturn.allRowsFiltered = _update_detection_table(toReturn, options, outputFilename)
1519
+
1520
+
1521
+ ##%% Create filtering directory
1522
+
1523
+ if options.bWriteFilteringFolder:
1524
+
1525
+ print('Creating filtering folder...')
1526
+
1527
+ dateString = datetime.now().strftime('%Y.%m.%d.%H.%M.%S')
1528
+ filteringDir = os.path.join(options.outputBase, 'filtering_' + dateString)
1529
+ os.makedirs(filteringDir, exist_ok=True)
1530
+
1531
+ # Take a first loop over every suspicious detection, and do the things that make
1532
+ # sense to do in a serial sampleImageDetectionsloop:
1533
+ #
1534
+ # * Generate file names (which requires an index variable)
1535
+ # * Sort instances by confidence
1536
+ # * Look up detections for each sample image in the big table (so we don't have to pass the
1537
+ # table to workers)
1538
+ for iDir, suspiciousDetectionsThisDir in enumerate(tqdm(suspiciousDetections)):
1539
+
1540
+ for iDetection, detection in enumerate(suspiciousDetectionsThisDir):
1541
+
1542
+ # Sort instances in descending order by confidence
1543
+ detection.instances.sort(key=attrgetter('confidence'),reverse=True)
1544
+
1545
+ if detection.clusterLabel is not None:
1546
+ clusterString = '_c{:0>4d}'.format(detection.clusterLabel)
1547
+ else:
1548
+ clusterString = ''
1549
+
1550
+ # Choose the highest-confidence index
1551
+ instance = detection.instances[0]
1552
+ relativePath = instance.filename
1553
+
1554
+ outputRelativePath = 'dir{:0>4d}_det{:0>4d}{}_n{:0>4d}.jpg'.format(
1555
+ iDir, iDetection, clusterString, len(detection.instances))
1556
+ detection.sampleImageRelativeFileName = outputRelativePath
1557
+
1558
+ iRow = filenameToRow[relativePath]
1559
+ row = detectionResults.iloc[iRow]
1560
+ detection.sampleImageDetections = row['detections']
1561
+
1562
+ # ...for each suspicious detection in this folder
1563
+
1564
+ # ...for each folder
1565
+
1566
+ # Collapse suspicious detections into a flat list
1567
+ allSuspiciousDetections = []
1568
+
1569
+ # iDir = 0; suspiciousDetectionsThisDir = suspiciousDetections[iDir]
1570
+ for iDir, suspiciousDetectionsThisDir in enumerate(tqdm(suspiciousDetections)):
1571
+ for iDetection, detection in enumerate(suspiciousDetectionsThisDir):
1572
+ allSuspiciousDetections.append(detection)
1573
+
1574
+ # Render suspicious detections
1575
+ if options.bParallelizeRendering:
1576
+
1577
+ n_workers = options.nWorkers
1578
+
1579
+ if options.parallelizationUsesThreads:
1580
+ pool = ThreadPool(n_workers); poolstring = 'threads'
1581
+ else:
1582
+ pool = Pool(n_workers); poolstring = 'processes'
1583
+
1584
+ print('Starting rendering pool with {} {}'.format(n_workers,poolstring))
1585
+
1586
+ # We get slightly nicer progress bar behavior using threads, by passing a pbar
1587
+ # object and letting it get updated. We can't serialize this object across
1588
+ # processes.
1589
+ if options.parallelizationUsesThreads:
1590
+ options.pbar = tqdm(total=len(allSuspiciousDetections))
1591
+ allCandidateDetections = list(pool.imap(
1592
+ partial(_render_sample_image_for_detection,filteringDir=filteringDir,
1593
+ options=options), allSuspiciousDetections))
1594
+ else:
1595
+ options.pbar = None
1596
+ allCandidateDetections = list(tqdm(pool.imap(
1597
+ partial(_render_sample_image_for_detection,filteringDir=filteringDir,
1598
+ options=options), allSuspiciousDetections)))
1599
+
1600
+ else:
1601
+
1602
+ # Serial loop over detections
1603
+ for detection in allSuspiciousDetections:
1604
+ _render_sample_image_for_detection(detection,filteringDir,options)
1605
+
1606
+ # Delete (large) temporary data from the list of suspicious detections
1607
+ for detection in allSuspiciousDetections:
1608
+ detection.sampleImageDetections = None
1609
+
1610
+ # Write out the detection index
1611
+ detectionIndexFileName = os.path.join(filteringDir, detection_index_file_name_base)
1612
+
1613
+ # Prepare the data we're going to write to the detection index file
1614
+ detectionInfo = {}
1615
+
1616
+ detectionInfo['suspiciousDetections'] = suspiciousDetections
1617
+ detectionInfo['dirIndexToName'] = dirIndexToName
1618
+
1619
+ # Remove the one non-serializable object from the options struct before serializing
1620
+ # to .json
1621
+ options.pbar = None
1622
+ detectionInfo['options'] = options
1623
+
1624
+ s = jsonpickle.encode(detectionInfo,make_refs=False)
1625
+ with open(detectionIndexFileName, 'w') as f:
1626
+ f.write(s)
1627
+ toReturn.filterFile = detectionIndexFileName
1628
+
1629
+ print('Done')
1630
+
1631
+ # ...if we're writing filtering info
1632
+
1633
+ return toReturn
1634
+
1635
+ # ...def find_repeat_detections()