megadetector 5.0.9__py3-none-any.whl → 5.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (226) hide show
  1. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/LICENSE +0 -0
  2. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/METADATA +12 -11
  3. megadetector-5.0.11.dist-info/RECORD +5 -0
  4. megadetector-5.0.11.dist-info/top_level.txt +1 -0
  5. api/__init__.py +0 -0
  6. api/batch_processing/__init__.py +0 -0
  7. api/batch_processing/api_core/__init__.py +0 -0
  8. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  9. api/batch_processing/api_core/batch_service/score.py +0 -439
  10. api/batch_processing/api_core/server.py +0 -294
  11. api/batch_processing/api_core/server_api_config.py +0 -98
  12. api/batch_processing/api_core/server_app_config.py +0 -55
  13. api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  14. api/batch_processing/api_core/server_job_status_table.py +0 -152
  15. api/batch_processing/api_core/server_orchestration.py +0 -360
  16. api/batch_processing/api_core/server_utils.py +0 -92
  17. api/batch_processing/api_core_support/__init__.py +0 -0
  18. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  19. api/batch_processing/api_support/__init__.py +0 -0
  20. api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  21. api/batch_processing/data_preparation/__init__.py +0 -0
  22. api/batch_processing/data_preparation/manage_local_batch.py +0 -2391
  23. api/batch_processing/data_preparation/manage_video_batch.py +0 -327
  24. api/batch_processing/integration/digiKam/setup.py +0 -6
  25. api/batch_processing/integration/digiKam/xmp_integration.py +0 -465
  26. api/batch_processing/integration/eMammal/test_scripts/config_template.py +0 -5
  27. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -126
  28. api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +0 -55
  29. api/batch_processing/postprocessing/__init__.py +0 -0
  30. api/batch_processing/postprocessing/add_max_conf.py +0 -64
  31. api/batch_processing/postprocessing/categorize_detections_by_size.py +0 -163
  32. api/batch_processing/postprocessing/combine_api_outputs.py +0 -249
  33. api/batch_processing/postprocessing/compare_batch_results.py +0 -958
  34. api/batch_processing/postprocessing/convert_output_format.py +0 -397
  35. api/batch_processing/postprocessing/load_api_results.py +0 -195
  36. api/batch_processing/postprocessing/md_to_coco.py +0 -310
  37. api/batch_processing/postprocessing/md_to_labelme.py +0 -330
  38. api/batch_processing/postprocessing/merge_detections.py +0 -401
  39. api/batch_processing/postprocessing/postprocess_batch_results.py +0 -1904
  40. api/batch_processing/postprocessing/remap_detection_categories.py +0 -170
  41. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +0 -661
  42. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +0 -211
  43. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +0 -82
  44. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +0 -1631
  45. api/batch_processing/postprocessing/separate_detections_into_folders.py +0 -731
  46. api/batch_processing/postprocessing/subset_json_detector_output.py +0 -696
  47. api/batch_processing/postprocessing/top_folders_to_bottom.py +0 -223
  48. api/synchronous/__init__.py +0 -0
  49. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  50. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -152
  51. api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -266
  52. api/synchronous/api_core/animal_detection_api/config.py +0 -35
  53. api/synchronous/api_core/animal_detection_api/data_management/annotations/annotation_constants.py +0 -47
  54. api/synchronous/api_core/animal_detection_api/detection/detector_training/copy_checkpoints.py +0 -43
  55. api/synchronous/api_core/animal_detection_api/detection/detector_training/model_main_tf2.py +0 -114
  56. api/synchronous/api_core/animal_detection_api/detection/process_video.py +0 -543
  57. api/synchronous/api_core/animal_detection_api/detection/pytorch_detector.py +0 -304
  58. api/synchronous/api_core/animal_detection_api/detection/run_detector.py +0 -627
  59. api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +0 -1029
  60. api/synchronous/api_core/animal_detection_api/detection/run_inference_with_yolov5_val.py +0 -581
  61. api/synchronous/api_core/animal_detection_api/detection/run_tiled_inference.py +0 -754
  62. api/synchronous/api_core/animal_detection_api/detection/tf_detector.py +0 -165
  63. api/synchronous/api_core/animal_detection_api/detection/video_utils.py +0 -495
  64. api/synchronous/api_core/animal_detection_api/md_utils/azure_utils.py +0 -174
  65. api/synchronous/api_core/animal_detection_api/md_utils/ct_utils.py +0 -262
  66. api/synchronous/api_core/animal_detection_api/md_utils/directory_listing.py +0 -251
  67. api/synchronous/api_core/animal_detection_api/md_utils/matlab_porting_tools.py +0 -97
  68. api/synchronous/api_core/animal_detection_api/md_utils/path_utils.py +0 -416
  69. api/synchronous/api_core/animal_detection_api/md_utils/process_utils.py +0 -110
  70. api/synchronous/api_core/animal_detection_api/md_utils/sas_blob_utils.py +0 -509
  71. api/synchronous/api_core/animal_detection_api/md_utils/string_utils.py +0 -59
  72. api/synchronous/api_core/animal_detection_api/md_utils/url_utils.py +0 -144
  73. api/synchronous/api_core/animal_detection_api/md_utils/write_html_image_list.py +0 -226
  74. api/synchronous/api_core/animal_detection_api/md_visualization/visualization_utils.py +0 -841
  75. api/synchronous/api_core/tests/__init__.py +0 -0
  76. api/synchronous/api_core/tests/load_test.py +0 -110
  77. classification/__init__.py +0 -0
  78. classification/aggregate_classifier_probs.py +0 -108
  79. classification/analyze_failed_images.py +0 -227
  80. classification/cache_batchapi_outputs.py +0 -198
  81. classification/create_classification_dataset.py +0 -627
  82. classification/crop_detections.py +0 -516
  83. classification/csv_to_json.py +0 -226
  84. classification/detect_and_crop.py +0 -855
  85. classification/efficientnet/__init__.py +0 -9
  86. classification/efficientnet/model.py +0 -415
  87. classification/efficientnet/utils.py +0 -610
  88. classification/evaluate_model.py +0 -520
  89. classification/identify_mislabeled_candidates.py +0 -152
  90. classification/json_to_azcopy_list.py +0 -63
  91. classification/json_validator.py +0 -695
  92. classification/map_classification_categories.py +0 -276
  93. classification/merge_classification_detection_output.py +0 -506
  94. classification/prepare_classification_script.py +0 -194
  95. classification/prepare_classification_script_mc.py +0 -228
  96. classification/run_classifier.py +0 -286
  97. classification/save_mislabeled.py +0 -110
  98. classification/train_classifier.py +0 -825
  99. classification/train_classifier_tf.py +0 -724
  100. classification/train_utils.py +0 -322
  101. data_management/__init__.py +0 -0
  102. data_management/annotations/__init__.py +0 -0
  103. data_management/annotations/annotation_constants.py +0 -34
  104. data_management/camtrap_dp_to_coco.py +0 -238
  105. data_management/cct_json_utils.py +0 -395
  106. data_management/cct_to_md.py +0 -176
  107. data_management/cct_to_wi.py +0 -289
  108. data_management/coco_to_labelme.py +0 -272
  109. data_management/coco_to_yolo.py +0 -662
  110. data_management/databases/__init__.py +0 -0
  111. data_management/databases/add_width_and_height_to_db.py +0 -33
  112. data_management/databases/combine_coco_camera_traps_files.py +0 -206
  113. data_management/databases/integrity_check_json_db.py +0 -477
  114. data_management/databases/subset_json_db.py +0 -115
  115. data_management/generate_crops_from_cct.py +0 -149
  116. data_management/get_image_sizes.py +0 -188
  117. data_management/importers/add_nacti_sizes.py +0 -52
  118. data_management/importers/add_timestamps_to_icct.py +0 -79
  119. data_management/importers/animl_results_to_md_results.py +0 -158
  120. data_management/importers/auckland_doc_test_to_json.py +0 -372
  121. data_management/importers/auckland_doc_to_json.py +0 -200
  122. data_management/importers/awc_to_json.py +0 -189
  123. data_management/importers/bellevue_to_json.py +0 -273
  124. data_management/importers/cacophony-thermal-importer.py +0 -796
  125. data_management/importers/carrizo_shrubfree_2018.py +0 -268
  126. data_management/importers/carrizo_trail_cam_2017.py +0 -287
  127. data_management/importers/cct_field_adjustments.py +0 -57
  128. data_management/importers/channel_islands_to_cct.py +0 -913
  129. data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  130. data_management/importers/eMammal/eMammal_helpers.py +0 -249
  131. data_management/importers/eMammal/make_eMammal_json.py +0 -223
  132. data_management/importers/ena24_to_json.py +0 -275
  133. data_management/importers/filenames_to_json.py +0 -385
  134. data_management/importers/helena_to_cct.py +0 -282
  135. data_management/importers/idaho-camera-traps.py +0 -1407
  136. data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  137. data_management/importers/jb_csv_to_json.py +0 -150
  138. data_management/importers/mcgill_to_json.py +0 -250
  139. data_management/importers/missouri_to_json.py +0 -489
  140. data_management/importers/nacti_fieldname_adjustments.py +0 -79
  141. data_management/importers/noaa_seals_2019.py +0 -181
  142. data_management/importers/pc_to_json.py +0 -365
  143. data_management/importers/plot_wni_giraffes.py +0 -123
  144. data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
  145. data_management/importers/prepare_zsl_imerit.py +0 -131
  146. data_management/importers/rspb_to_json.py +0 -356
  147. data_management/importers/save_the_elephants_survey_A.py +0 -320
  148. data_management/importers/save_the_elephants_survey_B.py +0 -332
  149. data_management/importers/snapshot_safari_importer.py +0 -758
  150. data_management/importers/snapshot_safari_importer_reprise.py +0 -665
  151. data_management/importers/snapshot_serengeti_lila.py +0 -1067
  152. data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  153. data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  154. data_management/importers/sulross_get_exif.py +0 -65
  155. data_management/importers/timelapse_csv_set_to_json.py +0 -490
  156. data_management/importers/ubc_to_json.py +0 -399
  157. data_management/importers/umn_to_json.py +0 -507
  158. data_management/importers/wellington_to_json.py +0 -263
  159. data_management/importers/wi_to_json.py +0 -441
  160. data_management/importers/zamba_results_to_md_results.py +0 -181
  161. data_management/labelme_to_coco.py +0 -548
  162. data_management/labelme_to_yolo.py +0 -272
  163. data_management/lila/__init__.py +0 -0
  164. data_management/lila/add_locations_to_island_camera_traps.py +0 -97
  165. data_management/lila/add_locations_to_nacti.py +0 -147
  166. data_management/lila/create_lila_blank_set.py +0 -557
  167. data_management/lila/create_lila_test_set.py +0 -151
  168. data_management/lila/create_links_to_md_results_files.py +0 -106
  169. data_management/lila/download_lila_subset.py +0 -177
  170. data_management/lila/generate_lila_per_image_labels.py +0 -515
  171. data_management/lila/get_lila_annotation_counts.py +0 -170
  172. data_management/lila/get_lila_image_counts.py +0 -111
  173. data_management/lila/lila_common.py +0 -300
  174. data_management/lila/test_lila_metadata_urls.py +0 -132
  175. data_management/ocr_tools.py +0 -874
  176. data_management/read_exif.py +0 -681
  177. data_management/remap_coco_categories.py +0 -84
  178. data_management/remove_exif.py +0 -66
  179. data_management/resize_coco_dataset.py +0 -189
  180. data_management/wi_download_csv_to_coco.py +0 -246
  181. data_management/yolo_output_to_md_output.py +0 -441
  182. data_management/yolo_to_coco.py +0 -676
  183. detection/__init__.py +0 -0
  184. detection/detector_training/__init__.py +0 -0
  185. detection/detector_training/model_main_tf2.py +0 -114
  186. detection/process_video.py +0 -703
  187. detection/pytorch_detector.py +0 -337
  188. detection/run_detector.py +0 -779
  189. detection/run_detector_batch.py +0 -1219
  190. detection/run_inference_with_yolov5_val.py +0 -917
  191. detection/run_tiled_inference.py +0 -935
  192. detection/tf_detector.py +0 -188
  193. detection/video_utils.py +0 -606
  194. docs/source/conf.py +0 -43
  195. md_utils/__init__.py +0 -0
  196. md_utils/azure_utils.py +0 -174
  197. md_utils/ct_utils.py +0 -612
  198. md_utils/directory_listing.py +0 -246
  199. md_utils/md_tests.py +0 -968
  200. md_utils/path_utils.py +0 -1044
  201. md_utils/process_utils.py +0 -157
  202. md_utils/sas_blob_utils.py +0 -509
  203. md_utils/split_locations_into_train_val.py +0 -228
  204. md_utils/string_utils.py +0 -92
  205. md_utils/url_utils.py +0 -323
  206. md_utils/write_html_image_list.py +0 -225
  207. md_visualization/__init__.py +0 -0
  208. md_visualization/plot_utils.py +0 -293
  209. md_visualization/render_images_with_thumbnails.py +0 -275
  210. md_visualization/visualization_utils.py +0 -1537
  211. md_visualization/visualize_db.py +0 -551
  212. md_visualization/visualize_detector_output.py +0 -406
  213. megadetector-5.0.9.dist-info/RECORD +0 -224
  214. megadetector-5.0.9.dist-info/top_level.txt +0 -8
  215. taxonomy_mapping/__init__.py +0 -0
  216. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +0 -491
  217. taxonomy_mapping/map_new_lila_datasets.py +0 -154
  218. taxonomy_mapping/prepare_lila_taxonomy_release.py +0 -142
  219. taxonomy_mapping/preview_lila_taxonomy.py +0 -591
  220. taxonomy_mapping/retrieve_sample_image.py +0 -71
  221. taxonomy_mapping/simple_image_download.py +0 -218
  222. taxonomy_mapping/species_lookup.py +0 -834
  223. taxonomy_mapping/taxonomy_csv_checker.py +0 -159
  224. taxonomy_mapping/taxonomy_graph.py +0 -346
  225. taxonomy_mapping/validate_lila_category_mappings.py +0 -83
  226. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/WHEEL +0 -0
@@ -1,1631 +0,0 @@
1
- """
2
-
3
- repeat_detections_core.py
4
-
5
- Core utilities shared by find_repeat_detections and remove_repeat_detections.
6
-
7
- Nothing in this file (in fact nothing in this subpackage) will make sense until you read
8
- the RDE user's guide:
9
-
10
- https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing/postprocessing/repeat_detection_elimination
11
-
12
- """
13
-
14
- #%% Imports and environment
15
-
16
- import os
17
- import copy
18
- import warnings
19
- import sklearn.cluster
20
- import numpy as np
21
- import jsonpickle
22
- import traceback
23
- import pandas as pd
24
- import json
25
- import shutil
26
-
27
- from tqdm import tqdm
28
- from operator import attrgetter
29
- from datetime import datetime
30
- from itertools import compress
31
-
32
- import pyqtree
33
-
34
- from multiprocessing.pool import ThreadPool
35
- from multiprocessing.pool import Pool
36
- from functools import partial
37
-
38
- from md_utils import path_utils
39
- from md_utils import ct_utils
40
- from api.batch_processing.postprocessing.load_api_results import load_api_results, write_api_results
41
- from api.batch_processing.postprocessing.postprocess_batch_results import is_sas_url
42
- from api.batch_processing.postprocessing.postprocess_batch_results import relative_sas_url
43
- from md_visualization.visualization_utils import open_image, render_detection_bounding_boxes
44
- from md_visualization import render_images_with_thumbnails
45
- from md_visualization import visualization_utils as vis_utils
46
- from md_utils.path_utils import flatten_path
47
- from md_utils.ct_utils import invert_dictionary
48
-
49
- # "PIL cannot read EXIF metainfo for the images"
50
- warnings.filterwarnings('ignore', '(Possibly )?corrupt EXIF data', UserWarning)
51
-
52
- # "Metadata Warning, tag 256 had too many entries: 42, expected 1"
53
- warnings.filterwarnings('ignore', 'Metadata warning', UserWarning)
54
-
55
- jsonpickle.set_encoder_options('json', sort_keys=True, indent=1)
56
-
57
-
58
- #%% Constants
59
-
60
- detection_index_file_name_base = 'detectionIndex.json'
61
-
62
-
63
- #%% Classes
64
-
65
- class RepeatDetectionOptions:
66
- """
67
- Options that control the behavior of repeat detection elimination
68
- """
69
-
70
- #: Folder where images live; filenames in the MD results .json file should
71
- #: be relative to this folder.
72
- #:
73
- #: imageBase can also be a SAS URL, in which case some error-checking is
74
- #: disabled.
75
- imageBase = ''
76
-
77
- #: Folder where we should write temporary output.
78
- outputBase = ''
79
-
80
- #: Don't consider detections with confidence lower than this as suspicious
81
- confidenceMin = 0.1
82
-
83
- #: Don't consider detections with confidence higher than this as suspicious
84
- confidenceMax = 1.0
85
-
86
- #: What's the IOU threshold for considering two boxes the same?
87
- iouThreshold = 0.9
88
-
89
- #: How many occurrences of a single location (as defined by the IOU threshold)
90
- #: are required before we declare it suspicious?
91
- occurrenceThreshold = 20
92
-
93
- #: Ignore "suspicious" detections smaller than some size
94
- minSuspiciousDetectionSize = 0.0
95
-
96
- #: Ignore "suspicious" detections larger than some size; these are often animals
97
- #: taking up the whole image. This is expressed as a fraction of the image size.
98
- maxSuspiciousDetectionSize = 0.2
99
-
100
- #: Ignore folders with more than this many images in them
101
- maxImagesPerFolder = None
102
-
103
- #: A list of category IDs (ints) that we don't want consider as candidate repeat detections.
104
- #:
105
- #: Typically used to say, e.g., "don't bother analyzing people or vehicles for repeat
106
- #: detections", which you could do by saying excludeClasses = [2,3].
107
- excludeClasses = []
108
-
109
- #: For very large sets of results, passing chunks of results to and from workers as
110
- #: parameters ('memory') can be memory-intensive, so we can serialize to intermediate
111
- #: files instead ('file').
112
- #:
113
- #: The use of 'file' here is still experimental.
114
- pass_detections_to_processes_method = 'memory'
115
-
116
- #: Number of workers to use for parallel operations
117
- nWorkers = 10
118
-
119
- #: Should we use threads (True) or processes (False) for parallelization?
120
- #:
121
- #: Not relevant if nWorkers <= 1, or if bParallelizeComparisons and
122
- #: bParallelizeRendering are both False.
123
- parallelizationUsesThreads = True
124
-
125
- #: If this is not empty, we'll load detections from a filter file rather than finding them
126
- #: from the detector output. This should be a .json file containing detections, generally this
127
- #: is the detectionIndex.json file in the filtering_* folder produced by find_repeat_detections().
128
- filterFileToLoad = ''
129
-
130
- #: (optional) List of filenames remaining after deletion of identified
131
- #: repeated detections that are actually animals. This should be a flat
132
- #: text file, one relative filename per line.
133
- #:
134
- #: This is a pretty esoteric code path and a candidate for removal.
135
- #:
136
- #: The scenario where I see it being most useful is the very hypothetical one
137
- #: where we use an external tool for image handling that allows us to do something
138
- #: smarter and less destructive than deleting images to mark them as non-false-positives.
139
- filteredFileListToLoad = None
140
-
141
- #: Should we write the folder of images used to manually review repeat detections?
142
- bWriteFilteringFolder = True
143
-
144
- #: For debugging: limit comparisons to a specific number of folders
145
- debugMaxDir = -1
146
-
147
- #: For debugging: limit rendering to a specific number of folders
148
- debugMaxRenderDir = -1
149
-
150
- #: For debugging: limit comparisons to a specific number of detections
151
- debugMaxRenderDetection = -1
152
-
153
- #: For debugging: limit comparisons to a specific number of instances
154
- debugMaxRenderInstance = -1
155
-
156
- #: Should we parallelize (across cameras) comparisons to find repeat detections?
157
- bParallelizeComparisons = True
158
-
159
- #: Should we parallelize image rendering?
160
- bParallelizeRendering = True
161
-
162
- #: If this is False (default), a detection from class A is *not* considered to be "the same"
163
- #: as a detection from class B, even if they're at the same location.
164
- categoryAgnosticComparisons = False
165
-
166
- #: Determines whether bounding-box rendering errors (typically network errors) should
167
- #: be treated as failures
168
- bFailOnRenderError = False
169
-
170
- #: Should we print a warning if images referred to in the MD results file are missing?
171
- bPrintMissingImageWarnings = True
172
-
173
- #: If bPrintMissingImageWarnings is True, should we print a warning about missing images
174
- #: just once ('once') or every time ('all')?
175
- missingImageWarningType = 'once' # 'all'
176
-
177
- #: Image width for rendered images (it's called "max" because we don't resize smaller images).
178
- #:
179
- #: Original size is preserved if this is None.
180
- #:
181
- #: This does *not* include the tile image grid.
182
- maxOutputImageWidth = None
183
-
184
- #: Line thickness (in pixels) for box rendering
185
- lineThickness = 10
186
-
187
- #: Box expansion (in pixels)
188
- boxExpansion = 2
189
-
190
- #: Progress bar used during comparisons and rendering. Do not set externally.
191
- #:
192
- #: :meta private:
193
- pbar = None
194
-
195
- #: Replace filename tokens after reading, useful when the directory structure
196
- #: has changed relative to the structure the detector saw.
197
- filenameReplacements = {}
198
-
199
- #: How many folders up from the leaf nodes should we be going to aggregate images into
200
- #: cameras?
201
- #:
202
- #: If this is zero, each leaf folder is treated as a camera.
203
- nDirLevelsFromLeaf = 0
204
-
205
- #: An optional function that takes a string (an image file name) and returns
206
- #: a string (the corresponding folder ID), typically used when multiple folders
207
- #: actually correspond to the same camera in a manufacturer-specific way (e.g.
208
- #: a/b/c/RECONYX100 and a/b/c/RECONYX101 may really be the same camera).
209
- #:
210
- #: See ct_utils for a common replacement function that handles most common
211
- #: manufacturer folder names.
212
- customDirNameFunction = None
213
-
214
- #: Include only specific folders, mutually exclusive with [excludeFolders]
215
- includeFolders = None
216
-
217
- #: Exclude specific folders, mutually exclusive with [includeFolders]
218
- excludeFolders = None
219
-
220
- #: Optionally show *other* detections (i.e., detections other than the
221
- #: one the user is evaluating), typically in a light gray.
222
- bRenderOtherDetections = False
223
-
224
- #: Threshold to use for *other* detections
225
- otherDetectionsThreshold = 0.2
226
-
227
- #: Line width (in pixels) for *other* detections
228
- otherDetectionsLineWidth = 1
229
-
230
- #: Optionally show a grid that includes a sample image for the detection, plus
231
- #: the top N additional detections
232
- bRenderDetectionTiles = True
233
-
234
- #: Width of the original image (within the larger output image) when bRenderDetectionTiles
235
- #: is True.
236
- #:
237
- #: If this is None, we'll render the original image in the detection tile image
238
- #: at its original width.
239
- detectionTilesPrimaryImageWidth = None
240
-
241
- #: Width to use for the grid of detection instances.
242
- #:
243
- #: Can be a width in pixels, or a number from 0 to 1 representing a fraction
244
- #: of the primary image width.
245
- #:
246
- #: If you want to render the grid at exactly 1 pixel wide, I guess you're out
247
- #: of luck.
248
- detectionTilesCroppedGridWidth = 0.6
249
-
250
- #: Location of the primary image within the mosaic ('right' or 'left)
251
- detectionTilesPrimaryImageLocation = 'right'
252
-
253
- #: Maximum number of individual detection instances to include in the mosaic
254
- detectionTilesMaxCrops = 250
255
-
256
- #: If bRenderOtherDetections is True, what color should we use to render the
257
- #: (hopefully pretty subtle) non-target detections?
258
- #:
259
- #: In theory I'd like these "other detection" rectangles to be partially
260
- #: transparent, but this is not straightforward, and the alpha is ignored
261
- #: here. But maybe if I leave it here and wish hard enough, someday it
262
- #: will work.
263
- #:
264
- #: otherDetectionsColors = ['dimgray']
265
- otherDetectionsColors = [(105,105,105,100)]
266
-
267
- #: Sort detections within a directory so nearby detections are adjacent
268
- #: in the list, for faster review.
269
- #:
270
- #: Can be None, 'xsort', or 'clustersort'
271
- #:
272
- #: * None sorts detections chronologically by first occurrence
273
- #: * 'xsort' sorts detections from left to right
274
- #: * 'clustersort' clusters detections and sorts by cluster
275
- smartSort = 'xsort'
276
-
277
- #: Only relevant if smartSort == 'clustersort'
278
- smartSortDistanceThreshold = 0.1
279
-
280
-
281
- class RepeatDetectionResults:
282
- """
283
- The results of an entire repeat detection analysis
284
- """
285
-
286
- #: The data table (Pandas DataFrame), as loaded from the input json file via
287
- #: load_api_results(). Has columns ['file', 'detections','failure'].
288
- detectionResults = None
289
-
290
- #: The other fields in the input json file, loaded via load_api_results()
291
- otherFields = None
292
-
293
- #: The data table after modification
294
- detectionResultsFiltered = None
295
-
296
- #: dict mapping folder names to whole rows from the data table
297
- rowsByDirectory = None
298
-
299
- #: dict mapping filenames to rows in the master table
300
- filenameToRow = None
301
-
302
- #: An array of length nDirs, where each element is a list of DetectionLocation
303
- #: objects for that directory that have been flagged as suspicious
304
- suspiciousDetections = None
305
-
306
- #: The location of the .json file written with information about the RDE
307
- #: review images (typically detectionIndex.json)
308
- filterFile = None
309
-
310
-
311
- class IndexedDetection:
312
- """
313
- A single detection event on a single image
314
- """
315
-
316
- def __init__(self, iDetection=-1, filename='', bbox=[], confidence=-1, category='unknown'):
317
-
318
- assert isinstance(iDetection,int)
319
- assert isinstance(filename,str)
320
- assert isinstance(bbox,list)
321
- assert isinstance(category,str)
322
-
323
- #: index of this detection within all detections for this filename
324
- self.iDetection = iDetection
325
-
326
- #: path to the image corresponding to this detection
327
- self.filename = filename
328
-
329
- #: [x_min, y_min, width_of_box, height_of_box]
330
- self.bbox = bbox
331
-
332
- #: confidence value of this detection
333
- self.confidence = confidence
334
-
335
- #: category ID (not name) of this detection
336
- self.category = category
337
-
338
- def __repr__(self):
339
- s = ct_utils.pretty_print_object(self, False)
340
- return s
341
-
342
-
343
- class DetectionLocation:
344
- """
345
- A unique-ish detection location, meaningful in the context of one
346
- directory. All detections within an IoU threshold of self.bbox
347
- will be stored in IndexedDetection objects.
348
- """
349
-
350
- def __init__(self, instance, detection, relativeDir, category, id=None):
351
-
352
- assert isinstance(detection,dict)
353
- assert isinstance(instance,IndexedDetection)
354
- assert isinstance(relativeDir,str)
355
- assert isinstance(category,str)
356
-
357
- #: list of IndexedDetections that match this detection
358
- self.instances = [instance]
359
-
360
- #: category ID (not name) for this detection
361
- self.category = category
362
-
363
- #: bbox as x,y,w,h
364
- self.bbox = detection['bbox']
365
-
366
- #: relative folder (i.e., camera name) in which this detectin was found
367
- self.relativeDir = relativeDir
368
-
369
- #: relative path to the canonical image representing this detection
370
- self.sampleImageRelativeFileName = ''
371
-
372
- #: list of detections on that canonical image that match this detection
373
- self.sampleImageDetections = None
374
-
375
- #: ID for this detection; this ID is only guaranteed to be unique within a directory
376
- self.id = id
377
-
378
- #: only used when doing cluster-based sorting
379
- self.clusterLabel = None
380
-
381
- def __repr__(self):
382
- s = ct_utils.pretty_print_object(self, False)
383
- return s
384
-
385
- def to_api_detection(self):
386
- """
387
- Converts this detection to a 'detection' dictionary, making the semi-arbitrary
388
- assumption that the first instance is representative of confidence.
389
-
390
- Returns:
391
- dict: dictionary in the format used to store detections in MD results
392
- """
393
-
394
- # This is a bit of a hack right now, but for future-proofing, I don't want to call this
395
- # to retrieve anything other than the highest-confidence detection, and I'm assuming this
396
- # is already sorted, so assert() that.
397
- confidences = [i.confidence for i in self.instances]
398
- assert confidences[0] == max(confidences), \
399
- 'Cannot convert an unsorted DetectionLocation to an API detection'
400
-
401
- # It's not clear whether it's better to use instances[0].bbox or self.bbox
402
- # here... they should be very similar, unless iouThreshold is very low.
403
- # self.bbox is a better representation of the overall DetectionLocation.
404
- detection = {'conf':self.instances[0].confidence,
405
- 'bbox':self.bbox,'category':self.instances[0].category}
406
- return detection
407
-
408
-
409
- #%% Support functions
410
-
411
- def _render_bounding_box(detection, inputFileName, outputFileName, lineWidth=5,
412
- expansion=0):
413
- """
414
- Rendering the detection [detection] on the image [inputFileName], writing the result
415
- to [outputFileName].
416
- """
417
-
418
- im = open_image(inputFileName)
419
- d = detection.to_api_detection()
420
- render_detection_bounding_boxes([d],im,thickness=lineWidth,expansion=expansion,
421
- confidence_threshold=-10)
422
- im.save(outputFileName)
423
-
424
-
425
- def _detection_rect_to_rtree_rect(detection_rect):
426
- """
427
- We store detections as x/y/w/h, rtree and pyqtree use l/b/r/t. Convert from
428
- our representation to rtree's.
429
- """
430
-
431
- l = detection_rect[0]
432
- b = detection_rect[1]
433
- r = detection_rect[0] + detection_rect[2]
434
- t = detection_rect[1] + detection_rect[3]
435
- return (l,b,r,t)
436
-
437
-
438
- def _rtree_rect_to_detection_rect(rtree_rect):
439
- """
440
- We store detections as x/y/w/h, rtree and pyqtree use l/b/r/t. Convert from
441
- rtree's representation to ours.
442
- """
443
-
444
- x = rtree_rect[0]
445
- y = rtree_rect[1]
446
- w = rtree_rect[2] - rtree_rect[0]
447
- h = rtree_rect[3] - rtree_rect[1]
448
- return (x,y,w,h)
449
-
450
-
451
- def _sort_detections_for_directory(candidateDetections,options):
452
- """
453
- candidateDetections is a list of DetectionLocation objects. Sorts them to
454
- put nearby detections next to each other, for easier visual review. Returns
455
- a sorted copy of candidateDetections, does not sort in-place.
456
- """
457
-
458
- if len(candidateDetections) <= 1 or options.smartSort is None:
459
- return candidateDetections
460
-
461
- # Just sort by the X location of each box
462
- if options.smartSort == 'xsort':
463
- candidateDetectionsSorted = sorted(candidateDetections,
464
- key=lambda x: (
465
- (x.bbox[0]) + (x.bbox[2]/2.0)
466
- ))
467
- return candidateDetectionsSorted
468
-
469
- elif options.smartSort == 'clustersort':
470
-
471
- cluster = sklearn.cluster.AgglomerativeClustering(
472
- n_clusters=None,
473
- distance_threshold=options.smartSortDistanceThreshold,
474
- linkage='complete')
475
-
476
- # Prepare a list of points to represent each box,
477
- # that's what we'll use for clustering
478
- points = []
479
- for det in candidateDetections:
480
- # To use the upper-left of the box as the clustering point
481
- # points.append([det.bbox[0],det.bbox[1]])
482
-
483
- # To use the center of the box as the clustering point
484
- points.append([det.bbox[0]+det.bbox[2]/2.0,
485
- det.bbox[1]+det.bbox[3]/2.0])
486
- X = np.array(points)
487
-
488
- labels = cluster.fit_predict(X)
489
- unique_labels = np.unique(labels)
490
-
491
- # Labels *could* be any unique labels according to the docs, but in practice
492
- # they are unique integers from 0:nClusters.
493
- #
494
- # Make sure the labels are unique incrementing integers.
495
- for i_label in range(1,len(unique_labels)):
496
- assert unique_labels[i_label] == 1 + unique_labels[i_label-1]
497
-
498
- assert len(labels) == len(candidateDetections)
499
-
500
- # Store the label assigned to each cluster
501
- for i_label,label in enumerate(labels):
502
- candidateDetections[i_label].clusterLabel = label
503
-
504
- # Now sort the clusters by their x coordinate, and re-assign labels
505
- # so the labels are sortable
506
- label_x_means = []
507
-
508
- for label in unique_labels:
509
- detections_this_label = [d for d in candidateDetections if (
510
- d.clusterLabel == label)]
511
- points_this_label = [ [d.bbox[0],d.bbox[1]] for d in detections_this_label]
512
- x = [p[0] for p in points_this_label]
513
- y = [p[1] for p in points_this_label]
514
-
515
- # Compute the centroid for debugging, but we're only going to use the x
516
- # coordinate. This is the centroid of points used to represent detections,
517
- # which may be box centers or box corners.
518
- centroid = [ sum(x) / len(points_this_label), sum(y) / len(points_this_label) ]
519
- label_xval = centroid[0]
520
- label_x_means.append(label_xval)
521
-
522
- old_cluster_label_to_new_cluster_label = {}
523
- new_cluster_labels = np.argsort(label_x_means)
524
- assert len(new_cluster_labels) == len(np.unique(new_cluster_labels))
525
- for old_cluster_label in unique_labels:
526
- old_cluster_label_to_new_cluster_label[old_cluster_label] =\
527
- np.where(new_cluster_labels==old_cluster_label)[0][0]
528
-
529
- for i_cluster in range(0,len(unique_labels)):
530
- old_label = unique_labels[i_cluster]
531
- assert i_cluster == old_label
532
- new_label = old_cluster_label_to_new_cluster_label[old_label]
533
-
534
- for i_det,det in enumerate(candidateDetections):
535
- old_label = det.clusterLabel
536
- new_label = old_cluster_label_to_new_cluster_label[old_label]
537
- det.clusterLabel = new_label
538
-
539
- candidateDetectionsSorted = sorted(candidateDetections,
540
- key=lambda x: (x.clusterLabel,x.id))
541
-
542
- return candidateDetectionsSorted
543
-
544
- else:
545
- raise ValueError('Unrecognized sort method {}'.format(
546
- options.smartSort))
547
-
548
- # ...def _sort_detections_for_directory(...)
549
-
550
-
551
- def _find_matches_in_directory(dirNameAndRows, options):
552
- """
553
- dirNameAndRows is a tuple of (name,rows).
554
-
555
- "name" is a location name, typically a folder name, though this may be an arbitrary
556
- location identifier.
557
-
558
- "rows" is a Pandas dataframe with one row per image in this location, with columns:
559
-
560
- * 'file': relative file name
561
- * 'detections': a list of MD detection objects, i.e. dicts with keys ['category','conf','bbox']
562
- * 'max_detection_conf': maximum confidence of any detection, in any category
563
-
564
- "rows" can also point to a .csv file, in which case the detection table will be read from that
565
- .csv file, and results will be written to a .csv file rather than being returned.
566
-
567
- Find all unique detections in this directory.
568
-
569
- Returns a list of DetectionLocation objects.
570
- """
571
-
572
- if options.pbar is not None:
573
- options.pbar.update()
574
-
575
- # Create a tree to store candidate detections
576
- candidateDetectionsIndex = pyqtree.Index(bbox=(-0.1,-0.1,1.1,1.1))
577
-
578
- assert len(dirNameAndRows) == 2, 'find_matches_in_directory: invalid input'
579
- assert isinstance(dirNameAndRows[0],str), 'find_matches_in_directory: invalid location name'
580
- dirName = dirNameAndRows[0]
581
- rows = dirNameAndRows[1]
582
-
583
- detections_loaded_from_csv_file = None
584
-
585
- if isinstance(rows,str):
586
- detections_loaded_from_csv_file = rows
587
- print('Loading results for location {} from {}'.format(
588
- dirName,detections_loaded_from_csv_file))
589
- rows = pd.read_csv(detections_loaded_from_csv_file)
590
- # Pandas writes out detections out as strings, convert them back to lists
591
- rows['detections'] = rows['detections'].apply(lambda s: json.loads(s.replace('\'','"')))
592
-
593
- if options.maxImagesPerFolder is not None and len(rows) > options.maxImagesPerFolder:
594
- print('Ignoring directory {} because it has {} images (limit set to {})'.format(
595
- dirName,len(rows),options.maxImagesPerFolder))
596
- return []
597
-
598
- if options.includeFolders is not None:
599
- assert options.excludeFolders is None, 'Cannot specify include and exclude folder lists'
600
- if dirName not in options.includeFolders:
601
- print('Ignoring folder {}, not in inclusion list'.format(dirName))
602
- return []
603
-
604
- if options.excludeFolders is not None:
605
- assert options.includeFolders is None, 'Cannot specify include and exclude folder lists'
606
- if dirName in options.excludeFolders:
607
- print('Ignoring folder {}, on exclusion list'.format(dirName))
608
- return []
609
-
610
- # For each image in this directory
611
- #
612
- # iDirectoryRow = 0; row = rows.iloc[iDirectoryRow]
613
- #
614
- # iDirectoryRow is a pandas index, so it may not start from zero;
615
- # for debugging, we maintain i_iteration as a loop index.
616
- i_iteration = -1
617
- n_boxes_evaluated = 0
618
-
619
- for iDirectoryRow, row in rows.iterrows():
620
-
621
- i_iteration += 1
622
- filename = row['file']
623
- if not path_utils.is_image_file(filename):
624
- continue
625
-
626
- if 'max_detection_conf' not in row or 'detections' not in row or \
627
- row['detections'] is None:
628
- print('Skipping row {}'.format(iDirectoryRow))
629
- continue
630
-
631
- # Don't bother checking images with no detections above threshold
632
- maxP = float(row['max_detection_conf'])
633
- if maxP < options.confidenceMin:
634
- continue
635
-
636
- # Array of dicts, where each element is
637
- # {
638
- # 'category': '1', # str value, category ID
639
- # 'conf': 0.926, # confidence of this detections
640
- #
641
- # (x_min, y_min) is upper-left, all in relative coordinates
642
- # 'bbox': [x_min, y_min, width_of_box, height_of_box]
643
- #
644
- # }
645
- detections = row['detections']
646
- if isinstance(detections,float):
647
- assert isinstance(row['failure'],str), 'Expected failure indicator'
648
- print('Skipping failed image {} ({})'.format(filename,row['failure']))
649
- continue
650
-
651
- assert len(detections) > 0
652
-
653
- # For each detection in this image
654
- for iDetection, detection in enumerate(detections):
655
-
656
- n_boxes_evaluated += 1
657
-
658
- if detection is None:
659
- print('Skipping detection {}'.format(iDetection))
660
- continue
661
-
662
- assert 'category' in detection and \
663
- 'conf' in detection and \
664
- 'bbox' in detection, 'Illegal detection'
665
-
666
- confidence = detection['conf']
667
-
668
- # This is no longer strictly true; I sometimes run RDE in stages, so
669
- # some probabilities have already been made negative
670
- #
671
- # assert confidence >= 0.0 and confidence <= 1.0
672
-
673
- assert confidence >= -1.0 and confidence <= 1.0
674
-
675
- if confidence < options.confidenceMin:
676
- continue
677
- if confidence > options.confidenceMax:
678
- continue
679
-
680
- # Optionally exclude some classes from consideration as suspicious
681
- if (options.excludeClasses is not None) and (len(options.excludeClasses) > 0):
682
- iClass = int(detection['category'])
683
- if iClass in options.excludeClasses:
684
- continue
685
-
686
- bbox = detection['bbox']
687
- confidence = detection['conf']
688
-
689
- # Is this detection too big or too small for consideration?
690
- w, h = bbox[2], bbox[3]
691
-
692
- if (w == 0 or h == 0):
693
- continue
694
-
695
- area = h * w
696
-
697
- if area < 0:
698
- print('Warning: negative-area bounding box for file {}'.format(filename))
699
- area = abs(area); h = abs(h); w = abs(w)
700
-
701
- assert area >= 0.0 and area <= 1.0, \
702
- 'Illegal bounding box area {} in image {}'.format(area,filename)
703
-
704
- if area < options.minSuspiciousDetectionSize:
705
- continue
706
-
707
- if area > options.maxSuspiciousDetectionSize:
708
- continue
709
-
710
- category = detection['category']
711
-
712
- instance = IndexedDetection(iDetection=iDetection,
713
- filename=row['file'], bbox=bbox,
714
- confidence=confidence, category=category)
715
-
716
- bFoundSimilarDetection = False
717
-
718
- rtree_rect = _detection_rect_to_rtree_rect(bbox)
719
-
720
- # This will return candidates of all classes
721
- overlappingCandidateDetections =\
722
- candidateDetectionsIndex.intersect(rtree_rect)
723
-
724
- overlappingCandidateDetections.sort(
725
- key=lambda x: x.id, reverse=False)
726
-
727
- # For each detection in our candidate list
728
- for iCandidate, candidate in enumerate(
729
- overlappingCandidateDetections):
730
-
731
- # Don't match across categories
732
- if (candidate.category != category) and (not (options.categoryAgnosticComparisons)):
733
- continue
734
-
735
- # Is this a match?
736
- try:
737
- iou = ct_utils.get_iou(bbox, candidate.bbox)
738
- except Exception as e:
739
- print(\
740
- 'Warning: IOU computation error on boxes ({},{},{},{}),({},{},{},{}): {}'.\
741
- format(
742
- bbox[0],bbox[1],bbox[2],bbox[3],
743
- candidate.bbox[0],candidate.bbox[1],
744
- candidate.bbox[2],candidate.bbox[3], str(e)))
745
- continue
746
-
747
- if iou >= options.iouThreshold:
748
-
749
- bFoundSimilarDetection = True
750
-
751
- # If so, add this example to the list for this detection
752
- candidate.instances.append(instance)
753
-
754
- # We *don't* break here; we allow this instance to possibly
755
- # match multiple candidates. There isn't an obvious right or
756
- # wrong here.
757
-
758
- # ...for each detection on our candidate list
759
-
760
- # If we found no matches, add this to the candidate list
761
- if not bFoundSimilarDetection:
762
-
763
- candidate = DetectionLocation(instance=instance,
764
- detection=detection, relativeDir=dirName,
765
- category=category, id=i_iteration)
766
-
767
- # pyqtree
768
- candidateDetectionsIndex.insert(item=candidate,bbox=rtree_rect)
769
-
770
- # ...for each detection
771
-
772
- # ...for each row
773
-
774
- # Get all candidate detections
775
-
776
- candidateDetections = candidateDetectionsIndex.intersect([-100,-100,100,100])
777
-
778
- # For debugging only, it's convenient to have these sorted
779
- # as if they had never gone into a tree structure. Typically
780
- # this is in practice a sort by filename.
781
- candidateDetections.sort(
782
- key=lambda x: x.id, reverse=False)
783
-
784
- if detections_loaded_from_csv_file is not None:
785
- location_results_file = \
786
- os.path.splitext(detections_loaded_from_csv_file)[0] + \
787
- '_results.json'
788
- print('Writing results for location {} to {}'.format(
789
- dirName,location_results_file))
790
- s = jsonpickle.encode(candidateDetections,make_refs=False)
791
- with open(location_results_file,'w') as f:
792
- f.write(s)
793
- # json.dump(candidateDetections,f,indent=1)
794
- return location_results_file
795
- else:
796
- return candidateDetections
797
-
798
- # ...def _find_matches_in_directory(...)
799
-
800
-
801
- def _update_detection_table(repeatDetectionResults, options, outputFilename=None):
802
- """
803
- Changes confidence values in repeatDetectionResults.detectionResults so that detections
804
- deemed to be possible false positives are given negative confidence values.
805
-
806
- repeatDetectionResults is an object of type RepeatDetectionResults, with a pandas
807
- dataframe (detectionResults) containing all the detections loaded from the .json file,
808
- and a list of detections for each location (suspiciousDetections) that are deemed to
809
- be suspicious.
810
-
811
- returns the modified pandas dataframe (repeatDetectionResults.detectionResults), but
812
- also modifies it in place.
813
- """
814
-
815
- # This is the pandas dataframe that contains actual detection results.
816
- #
817
- # Has fields ['file', 'detections','failure'].
818
- detectionResults = repeatDetectionResults.detectionResults
819
-
820
- # An array of length nDirs, where each element is a list of DetectionLocation
821
- # objects for that directory that have been flagged as suspicious
822
- suspiciousDetectionsByDirectory = repeatDetectionResults.suspiciousDetections
823
-
824
- nBboxChanges = 0
825
-
826
- print('Updating output table')
827
-
828
- # For each directory
829
- for iDir, directoryEvents in enumerate(suspiciousDetectionsByDirectory):
830
-
831
- # For each suspicious detection group in this directory
832
- for iDetectionEvent, detectionEvent in enumerate(directoryEvents):
833
-
834
- locationBbox = detectionEvent.bbox
835
-
836
- # For each instance of this suspicious detection
837
- for iInstance, instance in enumerate(detectionEvent.instances):
838
-
839
- instanceBbox = instance.bbox
840
-
841
- # This should match the bbox for the detection event
842
- iou = ct_utils.get_iou(instanceBbox, locationBbox)
843
-
844
- # The bbox for this instance should be almost the same as the bbox
845
- # for this detection group, where "almost" is defined by the IOU
846
- # threshold.
847
- assert iou >= options.iouThreshold
848
- # if iou < options.iouThreshold:
849
- # print('IOU warning: {},{}'.format(iou,options.iouThreshold))
850
-
851
- assert instance.filename in repeatDetectionResults.filenameToRow
852
- iRow = repeatDetectionResults.filenameToRow[instance.filename]
853
- row = detectionResults.iloc[iRow]
854
- rowDetections = row['detections']
855
- detectionToModify = rowDetections[instance.iDetection]
856
-
857
- # Make sure the bounding box matches
858
- assert (instanceBbox[0:3] == detectionToModify['bbox'][0:3])
859
-
860
- # Make the probability negative, if it hasn't been switched by
861
- # another bounding box
862
- if detectionToModify['conf'] >= 0:
863
- detectionToModify['conf'] = -1 * detectionToModify['conf']
864
- nBboxChanges += 1
865
-
866
- # ...for each instance
867
-
868
- # ...for each detection
869
-
870
- # ...for each directory
871
-
872
- # Update maximum probabilities
873
-
874
- # For each row...
875
- nProbChanges = 0
876
- nProbChangesToNegative = 0
877
- nProbChangesAcrossThreshold = 0
878
-
879
- for iRow, row in detectionResults.iterrows():
880
-
881
- detections = row['detections']
882
- if (detections is None) or isinstance(detections,float):
883
- assert isinstance(row['failure'],str)
884
- continue
885
-
886
- if len(detections) == 0:
887
- continue
888
-
889
- maxPOriginal = float(row['max_detection_conf'])
890
-
891
- # No longer strictly true; sometimes I run RDE on RDE output
892
- # assert maxPOriginal >= 0
893
- assert maxPOriginal >= -1.0
894
-
895
- maxP = None
896
- nNegative = 0
897
-
898
- for iDetection, detection in enumerate(detections):
899
-
900
- p = detection['conf']
901
-
902
- if p < 0:
903
- nNegative += 1
904
-
905
- if (maxP is None) or (p > maxP):
906
- maxP = p
907
-
908
- # We should only be making detections *less* likely in this process
909
- assert maxP <= maxPOriginal
910
- detectionResults.at[iRow, 'max_detection_conf'] = maxP
911
-
912
- # If there was a meaningful change, count it
913
- if abs(maxP - maxPOriginal) > 1e-3:
914
-
915
- assert maxP < maxPOriginal
916
-
917
- nProbChanges += 1
918
-
919
- if (maxP < 0) and (maxPOriginal >= 0):
920
- nProbChangesToNegative += 1
921
-
922
- if (maxPOriginal >= options.confidenceMin) and (maxP < options.confidenceMin):
923
- nProbChangesAcrossThreshold += 1
924
-
925
- # Negative probabilities should be the only reason maxP changed, so
926
- # we should have found at least one negative value if we reached
927
- # this point.
928
- assert nNegative > 0
929
-
930
- # ...if there was a meaningful change to the max probability for this row
931
-
932
- # ...for each row
933
-
934
- # If we're also writing output...
935
- if outputFilename is not None and len(outputFilename) > 0:
936
- write_api_results(detectionResults, repeatDetectionResults.otherFields,
937
- outputFilename)
938
-
939
- print(
940
- 'Finished updating detection table\nChanged {} detections that impacted {} maxPs ({} to negative) ({} across confidence threshold)'.format(
941
- nBboxChanges, nProbChanges, nProbChangesToNegative, nProbChangesAcrossThreshold))
942
-
943
- return detectionResults
944
-
945
- # ...def _update_detection_table(...)
946
-
947
-
948
- def _render_sample_image_for_detection(detection,filteringDir,options):
949
- """
950
- Render a sample image for one unique detection, possibly containing lightly-colored
951
- high-confidence detections from elsewhere in the sample image.
952
-
953
- "detections" is a DetectionLocation object.
954
-
955
- Depends on having already sorted instances within this detection by confidence, and
956
- having already generated an output file name for this sample image.
957
- """
958
-
959
- # Confidence values should already have been sorted in the previous loop
960
- instance_confidences = [instance.confidence for instance in detection.instances]
961
- assert ct_utils.is_list_sorted(instance_confidences,reverse=True)
962
-
963
- # Choose the highest-confidence index
964
- instance = detection.instances[0]
965
- relativePath = instance.filename
966
-
967
- outputRelativePath = detection.sampleImageRelativeFileName
968
- assert len(outputRelativePath) > 0
969
-
970
- outputFullPath = os.path.join(filteringDir, outputRelativePath)
971
-
972
- if is_sas_url(options.imageBase):
973
- inputFullPath = relative_sas_url(options.imageBase, relativePath)
974
- else:
975
- inputFullPath = os.path.join(options.imageBase, relativePath)
976
- assert (os.path.isfile(inputFullPath)), 'Not a file: {}'.\
977
- format(inputFullPath)
978
-
979
- try:
980
-
981
- im = open_image(inputFullPath)
982
-
983
- # Should we render (typically in a very light color) detections
984
- # *other* than the one we're highlighting here?
985
- if options.bRenderOtherDetections:
986
-
987
- # Optionally resize the output image
988
- if (options.maxOutputImageWidth is not None) and \
989
- (im.size[0] > options.maxOutputImageWidth):
990
- im = vis_utils.resize_image(im, options.maxOutputImageWidth,
991
- target_height=-1)
992
-
993
- assert detection.sampleImageDetections is not None
994
-
995
- # At this point, suspicious detections have already been flipped
996
- # negative, which we don't want for rendering purposes
997
- rendered_detections = []
998
-
999
- for det in detection.sampleImageDetections:
1000
- rendered_det = copy.copy(det)
1001
- rendered_det['conf'] = abs(rendered_det['conf'])
1002
- rendered_detections.append(rendered_det)
1003
-
1004
- # Render other detections first (typically in a thin+light box)
1005
- render_detection_bounding_boxes(rendered_detections,
1006
- im,
1007
- label_map=None,
1008
- thickness=options.otherDetectionsLineWidth,
1009
- expansion=options.boxExpansion,
1010
- colormap=options.otherDetectionsColors,
1011
- confidence_threshold=options.otherDetectionsThreshold)
1012
-
1013
- # Now render the example detection (on top of at least one
1014
- # of the other detections)
1015
-
1016
- # This converts the *first* instance to an API standard detection;
1017
- # because we just sorted this list in descending order by confidence,
1018
- # this is the highest-confidence detection.
1019
- d = detection.to_api_detection()
1020
-
1021
- render_detection_bounding_boxes([d],im,thickness=options.lineThickness,
1022
- expansion=options.boxExpansion,
1023
- confidence_threshold=-10)
1024
-
1025
- im.save(outputFullPath)
1026
-
1027
- else:
1028
-
1029
- _render_bounding_box(detection, inputFullPath, outputFullPath,
1030
- lineWidth=options.lineThickness, expansion=options.boxExpansion)
1031
-
1032
- # ...if we are/aren't rendering other bounding boxes
1033
-
1034
- # If we're rendering detection tiles, we'll re-load and re-write the image we
1035
- # just wrote to outputFullPath
1036
- if options.bRenderDetectionTiles:
1037
-
1038
- assert not is_sas_url(options.imageBase), "Can't render detection tiles from SAS URLs"
1039
-
1040
- if options.detectionTilesPrimaryImageWidth is not None:
1041
- primaryImageWidth = options.detectionTilesPrimaryImageWidth
1042
- else:
1043
- # "im" may be a resized version of the original image, if we've already run
1044
- # the code to render other bounding boxes.
1045
- primaryImageWidth = im.size[0]
1046
-
1047
- if options.detectionTilesCroppedGridWidth <= 1.0:
1048
- croppedGridWidth = round(options.detectionTilesCroppedGridWidth * primaryImageWidth)
1049
- else:
1050
- croppedGridWidth = options.detectionTilesCroppedGridWidth
1051
-
1052
- secondaryImageFilenameList = []
1053
- secondaryImageBoundingBoxList = []
1054
-
1055
- # If we start from zero, we include the sample crop
1056
- for instance in detection.instances[0:]:
1057
- secondaryImageFilenameList.append(os.path.join(options.imageBase,
1058
- instance.filename))
1059
- secondaryImageBoundingBoxList.append(instance.bbox)
1060
-
1061
- # Optionally limit the number of crops we pass to the rendering function
1062
- if (options.detectionTilesMaxCrops is not None) and \
1063
- (len(detection.instances) > options.detectionTilesMaxCrops):
1064
- secondaryImageFilenameList = \
1065
- secondaryImageFilenameList[0:options.detectionTilesMaxCrops]
1066
- secondaryImageBoundingBoxList = \
1067
- secondaryImageBoundingBoxList[0:options.detectionTilesMaxCrops]
1068
-
1069
- # This will over-write the image we've already written to outputFullPath
1070
- render_images_with_thumbnails.render_images_with_thumbnails(
1071
- primary_image_filename=outputFullPath,
1072
- primary_image_width=primaryImageWidth,
1073
- secondary_image_filename_list=secondaryImageFilenameList,
1074
- secondary_image_bounding_box_list=secondaryImageBoundingBoxList,
1075
- cropped_grid_width=croppedGridWidth,
1076
- output_image_filename=outputFullPath,
1077
- primary_image_location=options.detectionTilesPrimaryImageLocation)
1078
-
1079
- # ...if we are/aren't rendering detection tiles
1080
-
1081
- except Exception as e:
1082
-
1083
- stack_trace = traceback.format_exc()
1084
- print('Warning: error rendering bounding box from {} to {}: {} ({})'.format(
1085
- inputFullPath,outputFullPath,e,stack_trace))
1086
- if options.bFailOnRenderError:
1087
- raise
1088
-
1089
- # ...def _render_sample_image_for_detection(...)
1090
-
1091
-
1092
- #%% Main entry point
1093
-
1094
- def find_repeat_detections(inputFilename, outputFilename=None, options=None):
1095
- """
1096
- Find detections in a MD results file that occur repeatedly and are likely to be
1097
- rocks/sticks.
1098
-
1099
- Args:
1100
- inputFilename (str): the MD results .json file to analyze
1101
- outputFilename (str, optional): the filename to which we should write results
1102
- with repeat detections removed, typically set to None during the first
1103
- part of the RDE process.
1104
- options (RepeatDetectionOptions): all the interesting options controlling this
1105
- process; see RepeatDetectionOptions for details.
1106
-
1107
- Returns:
1108
- RepeatDetectionResults: results of the RDE process; see RepeatDetectionResults
1109
- for details.
1110
- """
1111
-
1112
- ##%% Input handling
1113
-
1114
- if options is None:
1115
-
1116
- options = RepeatDetectionOptions()
1117
-
1118
- # Validate some options
1119
-
1120
- if options.customDirNameFunction is not None:
1121
- assert options.nDirLevelsFromLeaf == 0, \
1122
- 'Cannot mix custom dir name functions with nDirLevelsFromLeaf'
1123
-
1124
- if options.nDirLevelsFromLeaf != 0:
1125
- assert options.customDirNameFunction is None, \
1126
- 'Cannot mix custom dir name functions with nDirLevelsFromLeaf'
1127
-
1128
- if options.filterFileToLoad is not None and len(options.filterFileToLoad) > 0:
1129
-
1130
- print('Bypassing detection-finding, loading from {}'.format(options.filterFileToLoad))
1131
-
1132
- # Load the filtering file
1133
- detectionIndexFileName = options.filterFileToLoad
1134
- sIn = open(detectionIndexFileName, 'r').read()
1135
- detectionInfo = jsonpickle.decode(sIn)
1136
- filteringBaseDir = os.path.dirname(options.filterFileToLoad)
1137
- suspiciousDetections = detectionInfo['suspiciousDetections']
1138
-
1139
- # Load the same options we used when finding repeat detections
1140
- options = detectionInfo['options']
1141
-
1142
- # ...except for things that explicitly tell this function not to
1143
- # find repeat detections.
1144
- options.filterFileToLoad = detectionIndexFileName
1145
- options.bWriteFilteringFolder = False
1146
-
1147
- # ...if we're loading from an existing filtering file
1148
-
1149
- toReturn = RepeatDetectionResults()
1150
-
1151
-
1152
- # Check early to avoid problems with the output folder
1153
-
1154
- if options.bWriteFilteringFolder:
1155
- assert options.outputBase is not None and len(options.outputBase) > 0
1156
- os.makedirs(options.outputBase,exist_ok=True)
1157
-
1158
-
1159
- # Load file to a pandas dataframe. Also populates 'max_detection_conf', even if it's
1160
- # not present in the .json file.
1161
- detectionResults, otherFields = load_api_results(inputFilename, normalize_paths=True,
1162
- filename_replacements=options.filenameReplacements,
1163
- force_forward_slashes=True)
1164
- toReturn.detectionResults = detectionResults
1165
- toReturn.otherFields = otherFields
1166
-
1167
- # detectionResults[detectionResults['failure'].notna()]
1168
-
1169
- # Before doing any real work, make sure we can *probably* access images
1170
- # This is just a cursory check on the first image, but it heads off most
1171
- # problems related to incorrect mount points, etc. Better to do this before
1172
- # spending 20 minutes finding repeat detections.
1173
-
1174
- if options.bWriteFilteringFolder:
1175
-
1176
- if not is_sas_url(options.imageBase):
1177
-
1178
- row = detectionResults.iloc[0]
1179
- relativePath = row['file']
1180
- if options.filenameReplacements is not None:
1181
- for s in options.filenameReplacements.keys():
1182
- relativePath = relativePath.replace(s,options.filenameReplacements[s])
1183
- absolutePath = os.path.join(options.imageBase,relativePath)
1184
- assert os.path.isfile(absolutePath), 'Could not find file {}'.format(absolutePath)
1185
-
1186
-
1187
- ##%% Separate files into locations
1188
-
1189
- # This will be a map from a directory name to smaller data frames
1190
- rowsByDirectory = {}
1191
-
1192
- # This is a mapping back into the rows of the original table
1193
- filenameToRow = {}
1194
-
1195
- print('Separating images into locations...')
1196
-
1197
- nCustomDirReplacements = 0
1198
-
1199
- # iRow = 0; row = detectionResults.iloc[0]
1200
- for iRow, row in tqdm(detectionResults.iterrows(),total=len(detectionResults)):
1201
-
1202
- relativePath = row['file']
1203
-
1204
- if options.customDirNameFunction is not None:
1205
- basicDirName = os.path.dirname(relativePath.replace('\\','/'))
1206
- dirName = options.customDirNameFunction(relativePath)
1207
- if basicDirName != dirName:
1208
- nCustomDirReplacements += 1
1209
- else:
1210
- dirName = os.path.dirname(relativePath)
1211
-
1212
- if len(dirName) == 0:
1213
- assert options.nDirLevelsFromLeaf == 0, \
1214
- 'Can''t use the dirLevelsFromLeaf option with flat filenames'
1215
- else:
1216
- if options.nDirLevelsFromLeaf > 0:
1217
- iLevel = 0
1218
- while (iLevel < options.nDirLevelsFromLeaf):
1219
- iLevel += 1
1220
- dirName = os.path.dirname(dirName)
1221
- assert len(dirName) > 0
1222
-
1223
- if not dirName in rowsByDirectory:
1224
- # Create a new DataFrame with just this row
1225
- # rowsByDirectory[dirName] = pd.DataFrame(row)
1226
- rowsByDirectory[dirName] = []
1227
-
1228
- rowsByDirectory[dirName].append(row)
1229
-
1230
- assert relativePath not in filenameToRow
1231
- filenameToRow[relativePath] = iRow
1232
-
1233
- # ...for each unique detection
1234
-
1235
- if options.customDirNameFunction is not None:
1236
- print('Custom dir name function made {} replacements (of {} images)'.format(
1237
- nCustomDirReplacements,len(detectionResults)))
1238
-
1239
- # Convert lists of rows to proper DataFrames
1240
- dirs = list(rowsByDirectory.keys())
1241
- for d in dirs:
1242
- rowsByDirectory[d] = pd.DataFrame(rowsByDirectory[d])
1243
-
1244
- toReturn.rowsByDirectory = rowsByDirectory
1245
- toReturn.filenameToRow = filenameToRow
1246
-
1247
- print('Finished separating {} files into {} locations'.format(len(detectionResults),
1248
- len(rowsByDirectory)))
1249
-
1250
- ##% Look for repeat detections (or load them from file)
1251
-
1252
- dirsToSearch = list(rowsByDirectory.keys())
1253
- if options.debugMaxDir > 0:
1254
- dirsToSearch = dirsToSearch[0:options.debugMaxDir]
1255
-
1256
- # Map numeric directory indices to names (we'll write this out to the detection index .json file)
1257
- dirIndexToName = {}
1258
- for iDir, dirName in enumerate(dirsToSearch):
1259
- dirIndexToName[iDir] = dirName
1260
-
1261
- # Are we actually looking for matches, or just loading from a file?
1262
- if len(options.filterFileToLoad) == 0:
1263
-
1264
- # length-nDirs list of lists of DetectionLocation objects
1265
- suspiciousDetections = [None] * len(dirsToSearch)
1266
-
1267
- # We're actually looking for matches...
1268
- print('Finding similar detections...')
1269
-
1270
- dirNameAndRows = []
1271
- for dirName in dirsToSearch:
1272
- rowsThisDirectory = rowsByDirectory[dirName]
1273
- dirNameAndRows.append((dirName,rowsThisDirectory))
1274
-
1275
- allCandidateDetections = [None] * len(dirsToSearch)
1276
-
1277
- # If we serialize results to intermediate files, we need to remove slashes from
1278
- # location names; we store mappings here.
1279
- normalized_location_name_to_location_name = None
1280
- location_name_to_normalized_location_name = None
1281
-
1282
- if not options.bParallelizeComparisons:
1283
-
1284
- options.pbar = None
1285
- for iDir, dirName in tqdm(enumerate(dirsToSearch)):
1286
- dirNameAndRow = dirNameAndRows[iDir]
1287
- assert dirNameAndRow[0] == dirName
1288
- print('Processing dir {} of {}: {}'.format(iDir,len(dirsToSearch),dirName))
1289
- allCandidateDetections[iDir] = \
1290
- _find_matches_in_directory(dirNameAndRow, options)
1291
-
1292
- else:
1293
-
1294
- n_workers = options.nWorkers
1295
- if n_workers > len(dirNameAndRows):
1296
- print('Pool of {} requested, but only {} folders available, reducing pool to {}'.\
1297
- format(n_workers,len(dirNameAndRows),len(dirNameAndRows)))
1298
- n_workers = len(dirNameAndRows)
1299
-
1300
- if options.parallelizationUsesThreads:
1301
- pool = ThreadPool(n_workers); poolstring = 'threads'
1302
- else:
1303
- pool = Pool(n_workers); poolstring = 'processes'
1304
-
1305
- print('Starting comparison pool with {} {}'.format(n_workers,poolstring))
1306
-
1307
- assert options.pass_detections_to_processes_method in ('file','memory'), \
1308
- 'Unrecognized IPC mechanism: {}'.format(options.pass_detections_to_processes_method)
1309
-
1310
- # ** Experimental **
1311
- #
1312
- # Rather than passing detections and results around in memory, write detections and
1313
- # results for each worker to intermediate files. May improve performance for very large
1314
- # results sets that exceed working memory.
1315
- if options.pass_detections_to_processes_method == 'file':
1316
-
1317
- ##%% Convert location names to normalized names we can write to files
1318
-
1319
- normalized_location_name_to_location_name = {}
1320
- for dir_name in dirsToSearch:
1321
- normalized_location_name = flatten_path(dir_name)
1322
- assert normalized_location_name not in normalized_location_name_to_location_name, \
1323
- 'Redundant location name {}, can\'t serialize to intermediate files'.format(
1324
- dir_name)
1325
- normalized_location_name_to_location_name[normalized_location_name] = dir_name
1326
-
1327
- location_name_to_normalized_location_name = \
1328
- invert_dictionary(normalized_location_name_to_location_name)
1329
-
1330
-
1331
- ##%% Write results to files for each location
1332
-
1333
- print('Writing results to intermediate files')
1334
-
1335
- intermediate_json_file_folder = os.path.join(options.outputBase,'intermediate_results')
1336
- os.makedirs(intermediate_json_file_folder,exist_ok=True)
1337
-
1338
- # i_location = 0; location_info = dirNameAndRows[0]
1339
- dirNameAndIntermediateFile = []
1340
-
1341
- # i_location = 0; location_info = dirNameAndRows[i_location]
1342
- for i_location, location_info in tqdm(enumerate(dirNameAndRows)):
1343
-
1344
- location_name = location_info[0]
1345
- assert location_name in location_name_to_normalized_location_name
1346
- normalized_location_name = location_name_to_normalized_location_name[location_name]
1347
- intermediate_results_file = os.path.join(intermediate_json_file_folder,
1348
- normalized_location_name + '.csv')
1349
- detections_table_this_location = location_info[1]
1350
- detections_table_this_location.to_csv(intermediate_results_file,header=True,index=False)
1351
- dirNameAndIntermediateFile.append((location_name,intermediate_results_file))
1352
-
1353
-
1354
- ##%% Find detections in each directory
1355
-
1356
- options.pbar = None
1357
- allCandidateDetectionFiles = list(pool.imap(
1358
- partial(_find_matches_in_directory,options=options), dirNameAndIntermediateFile))
1359
-
1360
-
1361
- ##%% Load into a combined list of candidate detections
1362
-
1363
- allCandidateDetections = []
1364
-
1365
- # candidate_detection_file = allCandidateDetectionFiles[0]
1366
- for candidate_detection_file in allCandidateDetectionFiles:
1367
- s = open(candidate_detection_file, 'r').read()
1368
- candidate_detections_this_file = jsonpickle.decode(s)
1369
- allCandidateDetections.append(candidate_detections_this_file)
1370
-
1371
-
1372
- ##%% Clean up intermediate files
1373
-
1374
- shutil.rmtree(intermediate_json_file_folder)
1375
-
1376
- # If we're passing things around in memory, rather than via intermediate files
1377
- else:
1378
-
1379
- # We get slightly nicer progress bar behavior using threads, by passing a pbar
1380
- # object and letting it get updated. We can't serialize this object across
1381
- # processes.
1382
- if options.parallelizationUsesThreads:
1383
- options.pbar = tqdm(total=len(dirNameAndRows))
1384
- allCandidateDetections = list(pool.imap(
1385
- partial(_find_matches_in_directory,options=options), dirNameAndRows))
1386
- else:
1387
- options.pbar = None
1388
- allCandidateDetections = list(tqdm(pool.imap(
1389
- partial(_find_matches_in_directory,options=options), dirNameAndRows)))
1390
-
1391
- print('\nFinished looking for similar detections')
1392
-
1393
-
1394
- ##%% Mark suspicious locations based on match results
1395
-
1396
- print('Marking repeat detections...')
1397
-
1398
- nImagesWithSuspiciousDetections = 0
1399
- nSuspiciousDetections = 0
1400
-
1401
- # For each directory
1402
- #
1403
- # iDir = 51
1404
- for iDir in range(len(dirsToSearch)):
1405
-
1406
- # A list of DetectionLocation objects
1407
- suspiciousDetectionsThisDir = []
1408
-
1409
- # A list of DetectionLocation objects
1410
- candidateDetectionsThisDir = allCandidateDetections[iDir]
1411
-
1412
- for iLocation, candidateLocation in enumerate(candidateDetectionsThisDir):
1413
-
1414
- # occurrenceList is a list of file/detection pairs
1415
- nOccurrences = len(candidateLocation.instances)
1416
-
1417
- if nOccurrences < options.occurrenceThreshold:
1418
- continue
1419
-
1420
- nImagesWithSuspiciousDetections += nOccurrences
1421
- nSuspiciousDetections += 1
1422
-
1423
- suspiciousDetectionsThisDir.append(candidateLocation)
1424
-
1425
- suspiciousDetections[iDir] = suspiciousDetectionsThisDir
1426
-
1427
- # Sort the above-threshold detections for easier review
1428
- if options.smartSort is not None:
1429
- suspiciousDetections[iDir] = _sort_detections_for_directory(
1430
- suspiciousDetections[iDir],options)
1431
-
1432
- print('Found {} suspicious detections in directory {} ({})'.format(
1433
- len(suspiciousDetections[iDir]),iDir,dirsToSearch[iDir]))
1434
-
1435
- # ...for each directory
1436
-
1437
- print('Finished marking repeat detections')
1438
-
1439
- print('Found {} unique detections on {} images that are suspicious'.format(
1440
- nSuspiciousDetections, nImagesWithSuspiciousDetections))
1441
-
1442
- # If we're just loading detections from a file...
1443
- else:
1444
-
1445
- assert len(suspiciousDetections) == len(dirsToSearch)
1446
-
1447
- nDetectionsRemoved = 0
1448
- nDetectionsLoaded = 0
1449
-
1450
- # We're skipping detection-finding, but to see which images are actually legit false
1451
- # positives, we may be looking for physical files or loading from a text file.
1452
- fileList = None
1453
- if options.filteredFileListToLoad is not None:
1454
- with open(options.filteredFileListToLoad) as f:
1455
- fileList = f.readlines()
1456
- fileList = [x.strip() for x in fileList]
1457
- nSuspiciousDetections = sum([len(x) for x in suspiciousDetections])
1458
- print('Loaded false positive list from file ' + \
1459
- 'will remove {} of {} suspicious detections'.format(
1460
- len(fileList), nSuspiciousDetections))
1461
-
1462
- # For each directory
1463
- # iDir = 0; detections = suspiciousDetections[0]
1464
- #
1465
- # suspiciousDetections is an array of DetectionLocation objects,
1466
- # one per directory.
1467
- for iDir, detections in enumerate(suspiciousDetections):
1468
-
1469
- bValidDetection = [True] * len(detections)
1470
- nDetectionsLoaded += len(detections)
1471
-
1472
- # For each detection that was present before filtering
1473
- # iDetection = 0; detection = detections[iDetection]
1474
- for iDetection, detection in enumerate(detections):
1475
-
1476
- # Are we checking the directory to see whether detections were actually false
1477
- # positives, or reading from a list?
1478
- if fileList is None:
1479
-
1480
- # Is the image still there?
1481
- imageFullPath = os.path.join(filteringBaseDir,
1482
- detection.sampleImageRelativeFileName)
1483
-
1484
- # If not, remove this from the list of suspicious detections
1485
- if not os.path.isfile(imageFullPath):
1486
- nDetectionsRemoved += 1
1487
- bValidDetection[iDetection] = False
1488
-
1489
- else:
1490
-
1491
- if detection.sampleImageRelativeFileName not in fileList:
1492
- nDetectionsRemoved += 1
1493
- bValidDetection[iDetection] = False
1494
-
1495
- # ...for each detection
1496
-
1497
- nRemovedThisDir = len(bValidDetection) - sum(bValidDetection)
1498
- if nRemovedThisDir > 0:
1499
- print('Removed {} of {} detections from directory {}'.\
1500
- format(nRemovedThisDir,len(detections), iDir))
1501
-
1502
- detectionsFiltered = list(compress(detections, bValidDetection))
1503
- suspiciousDetections[iDir] = detectionsFiltered
1504
-
1505
- # ...for each directory
1506
-
1507
- print('Removed {} of {} total detections via manual filtering'.\
1508
- format(nDetectionsRemoved, nDetectionsLoaded))
1509
-
1510
- # ...if we are/aren't finding detections (vs. loading from file)
1511
-
1512
- toReturn.suspiciousDetections = suspiciousDetections
1513
-
1514
- toReturn.allRowsFiltered = _update_detection_table(toReturn, options, outputFilename)
1515
-
1516
-
1517
- ##%% Create filtering directory
1518
-
1519
- if options.bWriteFilteringFolder:
1520
-
1521
- print('Creating filtering folder...')
1522
-
1523
- dateString = datetime.now().strftime('%Y.%m.%d.%H.%M.%S')
1524
- filteringDir = os.path.join(options.outputBase, 'filtering_' + dateString)
1525
- os.makedirs(filteringDir, exist_ok=True)
1526
-
1527
- # Take a first loop over every suspicious detection, and do the things that make
1528
- # sense to do in a serial sampleImageDetectionsloop:
1529
- #
1530
- # * Generate file names (which requires an index variable)
1531
- # * Sort instances by confidence
1532
- # * Look up detections for each sample image in the big table (so we don't have to pass the
1533
- # table to workers)
1534
- for iDir, suspiciousDetectionsThisDir in enumerate(tqdm(suspiciousDetections)):
1535
-
1536
- for iDetection, detection in enumerate(suspiciousDetectionsThisDir):
1537
-
1538
- # Sort instances in descending order by confidence
1539
- detection.instances.sort(key=attrgetter('confidence'),reverse=True)
1540
-
1541
- if detection.clusterLabel is not None:
1542
- clusterString = '_c{:0>4d}'.format(detection.clusterLabel)
1543
- else:
1544
- clusterString = ''
1545
-
1546
- # Choose the highest-confidence index
1547
- instance = detection.instances[0]
1548
- relativePath = instance.filename
1549
-
1550
- outputRelativePath = 'dir{:0>4d}_det{:0>4d}{}_n{:0>4d}.jpg'.format(
1551
- iDir, iDetection, clusterString, len(detection.instances))
1552
- detection.sampleImageRelativeFileName = outputRelativePath
1553
-
1554
- iRow = filenameToRow[relativePath]
1555
- row = detectionResults.iloc[iRow]
1556
- detection.sampleImageDetections = row['detections']
1557
-
1558
- # ...for each suspicious detection in this folder
1559
-
1560
- # ...for each folder
1561
-
1562
- # Collapse suspicious detections into a flat list
1563
- allSuspiciousDetections = []
1564
-
1565
- # iDir = 0; suspiciousDetectionsThisDir = suspiciousDetections[iDir]
1566
- for iDir, suspiciousDetectionsThisDir in enumerate(tqdm(suspiciousDetections)):
1567
- for iDetection, detection in enumerate(suspiciousDetectionsThisDir):
1568
- allSuspiciousDetections.append(detection)
1569
-
1570
- # Render suspicious detections
1571
- if options.bParallelizeRendering:
1572
-
1573
- n_workers = options.nWorkers
1574
-
1575
- if options.parallelizationUsesThreads:
1576
- pool = ThreadPool(n_workers); poolstring = 'threads'
1577
- else:
1578
- pool = Pool(n_workers); poolstring = 'processes'
1579
-
1580
- print('Starting rendering pool with {} {}'.format(n_workers,poolstring))
1581
-
1582
- # We get slightly nicer progress bar behavior using threads, by passing a pbar
1583
- # object and letting it get updated. We can't serialize this object across
1584
- # processes.
1585
- if options.parallelizationUsesThreads:
1586
- options.pbar = tqdm(total=len(allSuspiciousDetections))
1587
- allCandidateDetections = list(pool.imap(
1588
- partial(_render_sample_image_for_detection,filteringDir=filteringDir,
1589
- options=options), allSuspiciousDetections))
1590
- else:
1591
- options.pbar = None
1592
- allCandidateDetections = list(tqdm(pool.imap(
1593
- partial(_render_sample_image_for_detection,filteringDir=filteringDir,
1594
- options=options), allSuspiciousDetections)))
1595
-
1596
- else:
1597
-
1598
- # Serial loop over detections
1599
- for detection in allSuspiciousDetections:
1600
- _render_sample_image_for_detection(detection,filteringDir,options)
1601
-
1602
- # Delete (large) temporary data from the list of suspicious detections
1603
- for detection in allSuspiciousDetections:
1604
- detection.sampleImageDetections = None
1605
-
1606
- # Write out the detection index
1607
- detectionIndexFileName = os.path.join(filteringDir, detection_index_file_name_base)
1608
-
1609
- # Prepare the data we're going to write to the detection index file
1610
- detectionInfo = {}
1611
-
1612
- detectionInfo['suspiciousDetections'] = suspiciousDetections
1613
- detectionInfo['dirIndexToName'] = dirIndexToName
1614
-
1615
- # Remove the one non-serializable object from the options struct before serializing
1616
- # to .json
1617
- options.pbar = None
1618
- detectionInfo['options'] = options
1619
-
1620
- s = jsonpickle.encode(detectionInfo,make_refs=False)
1621
- with open(detectionIndexFileName, 'w') as f:
1622
- f.write(s)
1623
- toReturn.filterFile = detectionIndexFileName
1624
-
1625
- print('Done')
1626
-
1627
- # ...if we're writing filtering info
1628
-
1629
- return toReturn
1630
-
1631
- # ...def find_repeat_detections()