megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/__init__.py +0 -0
- api/batch_processing/__init__.py +0 -0
- api/batch_processing/api_core/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/score.py +0 -1
- api/batch_processing/api_core/server_job_status_table.py +0 -1
- api/batch_processing/api_core_support/__init__.py +0 -0
- api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
- api/batch_processing/api_support/__init__.py +0 -0
- api/batch_processing/api_support/summarize_daily_activity.py +0 -1
- api/batch_processing/data_preparation/__init__.py +0 -0
- api/batch_processing/data_preparation/manage_local_batch.py +93 -79
- api/batch_processing/data_preparation/manage_video_batch.py +8 -8
- api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
- api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
- api/batch_processing/postprocessing/__init__.py +0 -0
- api/batch_processing/postprocessing/add_max_conf.py +12 -12
- api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
- api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
- api/batch_processing/postprocessing/compare_batch_results.py +114 -44
- api/batch_processing/postprocessing/convert_output_format.py +62 -19
- api/batch_processing/postprocessing/load_api_results.py +17 -20
- api/batch_processing/postprocessing/md_to_coco.py +31 -21
- api/batch_processing/postprocessing/md_to_labelme.py +165 -68
- api/batch_processing/postprocessing/merge_detections.py +40 -15
- api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
- api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
- api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
- api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
- api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
- api/synchronous/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
- api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
- api/synchronous/api_core/animal_detection_api/config.py +35 -35
- api/synchronous/api_core/tests/__init__.py +0 -0
- api/synchronous/api_core/tests/load_test.py +109 -109
- classification/__init__.py +0 -0
- classification/aggregate_classifier_probs.py +21 -24
- classification/analyze_failed_images.py +11 -13
- classification/cache_batchapi_outputs.py +51 -51
- classification/create_classification_dataset.py +69 -68
- classification/crop_detections.py +54 -53
- classification/csv_to_json.py +97 -100
- classification/detect_and_crop.py +105 -105
- classification/evaluate_model.py +43 -42
- classification/identify_mislabeled_candidates.py +47 -46
- classification/json_to_azcopy_list.py +10 -10
- classification/json_validator.py +72 -71
- classification/map_classification_categories.py +44 -43
- classification/merge_classification_detection_output.py +68 -68
- classification/prepare_classification_script.py +157 -154
- classification/prepare_classification_script_mc.py +228 -228
- classification/run_classifier.py +27 -26
- classification/save_mislabeled.py +30 -30
- classification/train_classifier.py +20 -20
- classification/train_classifier_tf.py +21 -22
- classification/train_utils.py +10 -10
- data_management/__init__.py +0 -0
- data_management/annotations/__init__.py +0 -0
- data_management/annotations/annotation_constants.py +18 -31
- data_management/camtrap_dp_to_coco.py +238 -0
- data_management/cct_json_utils.py +107 -59
- data_management/cct_to_md.py +176 -158
- data_management/cct_to_wi.py +247 -219
- data_management/coco_to_labelme.py +272 -0
- data_management/coco_to_yolo.py +86 -62
- data_management/databases/__init__.py +0 -0
- data_management/databases/add_width_and_height_to_db.py +20 -16
- data_management/databases/combine_coco_camera_traps_files.py +35 -31
- data_management/databases/integrity_check_json_db.py +130 -83
- data_management/databases/subset_json_db.py +25 -16
- data_management/generate_crops_from_cct.py +27 -45
- data_management/get_image_sizes.py +188 -144
- data_management/importers/add_nacti_sizes.py +8 -8
- data_management/importers/add_timestamps_to_icct.py +78 -78
- data_management/importers/animl_results_to_md_results.py +158 -160
- data_management/importers/auckland_doc_test_to_json.py +9 -9
- data_management/importers/auckland_doc_to_json.py +8 -8
- data_management/importers/awc_to_json.py +7 -7
- data_management/importers/bellevue_to_json.py +15 -15
- data_management/importers/cacophony-thermal-importer.py +13 -13
- data_management/importers/carrizo_shrubfree_2018.py +8 -8
- data_management/importers/carrizo_trail_cam_2017.py +8 -8
- data_management/importers/cct_field_adjustments.py +9 -9
- data_management/importers/channel_islands_to_cct.py +10 -10
- data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
- data_management/importers/ena24_to_json.py +7 -7
- data_management/importers/filenames_to_json.py +8 -8
- data_management/importers/helena_to_cct.py +7 -7
- data_management/importers/idaho-camera-traps.py +7 -7
- data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
- data_management/importers/jb_csv_to_json.py +9 -9
- data_management/importers/mcgill_to_json.py +8 -8
- data_management/importers/missouri_to_json.py +18 -18
- data_management/importers/nacti_fieldname_adjustments.py +10 -10
- data_management/importers/noaa_seals_2019.py +8 -8
- data_management/importers/pc_to_json.py +7 -7
- data_management/importers/plot_wni_giraffes.py +7 -7
- data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
- data_management/importers/prepare_zsl_imerit.py +7 -7
- data_management/importers/rspb_to_json.py +8 -8
- data_management/importers/save_the_elephants_survey_A.py +8 -8
- data_management/importers/save_the_elephants_survey_B.py +9 -9
- data_management/importers/snapshot_safari_importer.py +26 -26
- data_management/importers/snapshot_safari_importer_reprise.py +665 -665
- data_management/importers/snapshot_serengeti_lila.py +14 -14
- data_management/importers/sulross_get_exif.py +8 -9
- data_management/importers/timelapse_csv_set_to_json.py +11 -11
- data_management/importers/ubc_to_json.py +13 -13
- data_management/importers/umn_to_json.py +7 -7
- data_management/importers/wellington_to_json.py +8 -8
- data_management/importers/wi_to_json.py +9 -9
- data_management/importers/zamba_results_to_md_results.py +181 -181
- data_management/labelme_to_coco.py +309 -159
- data_management/labelme_to_yolo.py +103 -60
- data_management/lila/__init__.py +0 -0
- data_management/lila/add_locations_to_island_camera_traps.py +9 -9
- data_management/lila/add_locations_to_nacti.py +147 -147
- data_management/lila/create_lila_blank_set.py +114 -31
- data_management/lila/create_lila_test_set.py +8 -8
- data_management/lila/create_links_to_md_results_files.py +106 -106
- data_management/lila/download_lila_subset.py +92 -90
- data_management/lila/generate_lila_per_image_labels.py +56 -43
- data_management/lila/get_lila_annotation_counts.py +18 -15
- data_management/lila/get_lila_image_counts.py +11 -11
- data_management/lila/lila_common.py +103 -70
- data_management/lila/test_lila_metadata_urls.py +132 -116
- data_management/ocr_tools.py +173 -128
- data_management/read_exif.py +161 -99
- data_management/remap_coco_categories.py +84 -0
- data_management/remove_exif.py +58 -62
- data_management/resize_coco_dataset.py +32 -44
- data_management/wi_download_csv_to_coco.py +246 -0
- data_management/yolo_output_to_md_output.py +86 -73
- data_management/yolo_to_coco.py +535 -95
- detection/__init__.py +0 -0
- detection/detector_training/__init__.py +0 -0
- detection/process_video.py +85 -33
- detection/pytorch_detector.py +43 -25
- detection/run_detector.py +157 -72
- detection/run_detector_batch.py +189 -114
- detection/run_inference_with_yolov5_val.py +118 -51
- detection/run_tiled_inference.py +113 -42
- detection/tf_detector.py +51 -28
- detection/video_utils.py +606 -521
- docs/source/conf.py +43 -0
- md_utils/__init__.py +0 -0
- md_utils/azure_utils.py +9 -9
- md_utils/ct_utils.py +249 -70
- md_utils/directory_listing.py +59 -64
- md_utils/md_tests.py +968 -862
- md_utils/path_utils.py +655 -155
- md_utils/process_utils.py +157 -133
- md_utils/sas_blob_utils.py +20 -20
- md_utils/split_locations_into_train_val.py +45 -32
- md_utils/string_utils.py +33 -10
- md_utils/url_utils.py +208 -27
- md_utils/write_html_image_list.py +51 -35
- md_visualization/__init__.py +0 -0
- md_visualization/plot_utils.py +102 -109
- md_visualization/render_images_with_thumbnails.py +34 -34
- md_visualization/visualization_utils.py +908 -311
- md_visualization/visualize_db.py +109 -58
- md_visualization/visualize_detector_output.py +61 -42
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
- megadetector-5.0.9.dist-info/RECORD +224 -0
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
- taxonomy_mapping/__init__.py +0 -0
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
- taxonomy_mapping/map_new_lila_datasets.py +154 -154
- taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
- taxonomy_mapping/preview_lila_taxonomy.py +591 -591
- taxonomy_mapping/retrieve_sample_image.py +12 -12
- taxonomy_mapping/simple_image_download.py +11 -11
- taxonomy_mapping/species_lookup.py +10 -10
- taxonomy_mapping/taxonomy_csv_checker.py +18 -18
- taxonomy_mapping/taxonomy_graph.py +47 -47
- taxonomy_mapping/validate_lila_category_mappings.py +83 -76
- data_management/cct_json_to_filename_json.py +0 -89
- data_management/cct_to_csv.py +0 -140
- data_management/databases/remove_corrupted_images_from_db.py +0 -191
- detection/detector_training/copy_checkpoints.py +0 -43
- md_visualization/visualize_megadb.py +0 -183
- megadetector-5.0.7.dist-info/RECORD +0 -202
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
|
@@ -1,10 +1,15 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
repeat_detections_core.py
|
|
4
|
+
|
|
5
|
+
Core utilities shared by find_repeat_detections and remove_repeat_detections.
|
|
6
|
+
|
|
7
|
+
Nothing in this file (in fact nothing in this subpackage) will make sense until you read
|
|
8
|
+
the RDE user's guide:
|
|
9
|
+
|
|
10
|
+
https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing/postprocessing/repeat_detection_elimination
|
|
11
|
+
|
|
12
|
+
"""
|
|
8
13
|
|
|
9
14
|
#%% Imports and environment
|
|
10
15
|
|
|
@@ -62,161 +67,214 @@ class RepeatDetectionOptions:
|
|
|
62
67
|
Options that control the behavior of repeat detection elimination
|
|
63
68
|
"""
|
|
64
69
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
70
|
+
#: Folder where images live; filenames in the MD results .json file should
|
|
71
|
+
#: be relative to this folder.
|
|
72
|
+
#:
|
|
73
|
+
#: imageBase can also be a SAS URL, in which case some error-checking is
|
|
74
|
+
#: disabled.
|
|
69
75
|
imageBase = ''
|
|
76
|
+
|
|
77
|
+
#: Folder where we should write temporary output.
|
|
70
78
|
outputBase = ''
|
|
71
79
|
|
|
72
|
-
|
|
80
|
+
#: Don't consider detections with confidence lower than this as suspicious
|
|
73
81
|
confidenceMin = 0.1
|
|
74
82
|
|
|
75
|
-
|
|
83
|
+
#: Don't consider detections with confidence higher than this as suspicious
|
|
76
84
|
confidenceMax = 1.0
|
|
77
85
|
|
|
78
|
-
|
|
86
|
+
#: What's the IOU threshold for considering two boxes the same?
|
|
79
87
|
iouThreshold = 0.9
|
|
80
88
|
|
|
81
|
-
|
|
82
|
-
|
|
89
|
+
#: How many occurrences of a single location (as defined by the IOU threshold)
|
|
90
|
+
#: are required before we declare it suspicious?
|
|
83
91
|
occurrenceThreshold = 20
|
|
84
92
|
|
|
85
|
-
|
|
93
|
+
#: Ignore "suspicious" detections smaller than some size
|
|
86
94
|
minSuspiciousDetectionSize = 0.0
|
|
87
95
|
|
|
88
|
-
|
|
89
|
-
|
|
96
|
+
#: Ignore "suspicious" detections larger than some size; these are often animals
|
|
97
|
+
#: taking up the whole image. This is expressed as a fraction of the image size.
|
|
90
98
|
maxSuspiciousDetectionSize = 0.2
|
|
91
99
|
|
|
92
|
-
|
|
100
|
+
#: Ignore folders with more than this many images in them
|
|
93
101
|
maxImagesPerFolder = None
|
|
94
102
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
+
#: A list of category IDs (ints) that we don't want consider as candidate repeat detections.
|
|
104
|
+
#:
|
|
105
|
+
#: Typically used to say, e.g., "don't bother analyzing people or vehicles for repeat
|
|
106
|
+
#: detections", which you could do by saying excludeClasses = [2,3].
|
|
107
|
+
excludeClasses = []
|
|
108
|
+
|
|
109
|
+
#: For very large sets of results, passing chunks of results to and from workers as
|
|
110
|
+
#: parameters ('memory') can be memory-intensive, so we can serialize to intermediate
|
|
111
|
+
#: files instead ('file').
|
|
112
|
+
#:
|
|
113
|
+
#: The use of 'file' here is still experimental.
|
|
103
114
|
pass_detections_to_processes_method = 'memory'
|
|
104
115
|
|
|
116
|
+
#: Number of workers to use for parallel operations
|
|
105
117
|
nWorkers = 10
|
|
106
118
|
|
|
107
|
-
|
|
119
|
+
#: Should we use threads (True) or processes (False) for parallelization?
|
|
120
|
+
#:
|
|
121
|
+
#: Not relevant if nWorkers <= 1, or if bParallelizeComparisons and
|
|
122
|
+
#: bParallelizeRendering are both False.
|
|
108
123
|
parallelizationUsesThreads = True
|
|
109
124
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
# the filtering_* folder produced in the first pass
|
|
125
|
+
#: If this is not empty, we'll load detections from a filter file rather than finding them
|
|
126
|
+
#: from the detector output. This should be a .json file containing detections, generally this
|
|
127
|
+
#: is the detectionIndex.json file in the filtering_* folder produced by find_repeat_detections().
|
|
114
128
|
filterFileToLoad = ''
|
|
115
129
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
130
|
+
#: (optional) List of filenames remaining after deletion of identified
|
|
131
|
+
#: repeated detections that are actually animals. This should be a flat
|
|
132
|
+
#: text file, one relative filename per line.
|
|
133
|
+
#:
|
|
134
|
+
#: This is a pretty esoteric code path and a candidate for removal.
|
|
135
|
+
#:
|
|
136
|
+
#: The scenario where I see it being most useful is the very hypothetical one
|
|
137
|
+
#: where we use an external tool for image handling that allows us to do something
|
|
138
|
+
#: smarter and less destructive than deleting images to mark them as non-false-positives.
|
|
125
139
|
filteredFileListToLoad = None
|
|
126
140
|
|
|
127
|
-
|
|
141
|
+
#: Should we write the folder of images used to manually review repeat detections?
|
|
128
142
|
bWriteFilteringFolder = True
|
|
129
143
|
|
|
144
|
+
#: For debugging: limit comparisons to a specific number of folders
|
|
130
145
|
debugMaxDir = -1
|
|
146
|
+
|
|
147
|
+
#: For debugging: limit rendering to a specific number of folders
|
|
131
148
|
debugMaxRenderDir = -1
|
|
149
|
+
|
|
150
|
+
#: For debugging: limit comparisons to a specific number of detections
|
|
132
151
|
debugMaxRenderDetection = -1
|
|
152
|
+
|
|
153
|
+
#: For debugging: limit comparisons to a specific number of instances
|
|
133
154
|
debugMaxRenderInstance = -1
|
|
155
|
+
|
|
156
|
+
#: Should we parallelize (across cameras) comparisons to find repeat detections?
|
|
134
157
|
bParallelizeComparisons = True
|
|
158
|
+
|
|
159
|
+
#: Should we parallelize image rendering?
|
|
135
160
|
bParallelizeRendering = True
|
|
136
161
|
|
|
137
|
-
|
|
138
|
-
|
|
162
|
+
#: If this is False (default), a detection from class A is *not* considered to be "the same"
|
|
163
|
+
#: as a detection from class B, even if they're at the same location.
|
|
139
164
|
categoryAgnosticComparisons = False
|
|
140
165
|
|
|
141
|
-
|
|
142
|
-
|
|
166
|
+
#: Determines whether bounding-box rendering errors (typically network errors) should
|
|
167
|
+
#: be treated as failures
|
|
143
168
|
bFailOnRenderError = False
|
|
144
169
|
|
|
170
|
+
#: Should we print a warning if images referred to in the MD results file are missing?
|
|
145
171
|
bPrintMissingImageWarnings = True
|
|
172
|
+
|
|
173
|
+
#: If bPrintMissingImageWarnings is True, should we print a warning about missing images
|
|
174
|
+
#: just once ('once') or every time ('all')?
|
|
146
175
|
missingImageWarningType = 'once' # 'all'
|
|
147
176
|
|
|
148
|
-
|
|
177
|
+
#: Image width for rendered images (it's called "max" because we don't resize smaller images).
|
|
178
|
+
#:
|
|
179
|
+
#: Original size is preserved if this is None.
|
|
180
|
+
#:
|
|
181
|
+
#: This does *not* include the tile image grid.
|
|
149
182
|
maxOutputImageWidth = None
|
|
150
183
|
|
|
151
|
-
|
|
184
|
+
#: Line thickness (in pixels) for box rendering
|
|
152
185
|
lineThickness = 10
|
|
186
|
+
|
|
187
|
+
#: Box expansion (in pixels)
|
|
153
188
|
boxExpansion = 2
|
|
154
189
|
|
|
155
|
-
|
|
190
|
+
#: Progress bar used during comparisons and rendering. Do not set externally.
|
|
191
|
+
#:
|
|
192
|
+
#: :meta private:
|
|
156
193
|
pbar = None
|
|
157
194
|
|
|
158
|
-
|
|
159
|
-
|
|
195
|
+
#: Replace filename tokens after reading, useful when the directory structure
|
|
196
|
+
#: has changed relative to the structure the detector saw.
|
|
160
197
|
filenameReplacements = {}
|
|
161
198
|
|
|
162
|
-
|
|
199
|
+
#: How many folders up from the leaf nodes should we be going to aggregate images into
|
|
200
|
+
#: cameras?
|
|
201
|
+
#:
|
|
202
|
+
#: If this is zero, each leaf folder is treated as a camera.
|
|
163
203
|
nDirLevelsFromLeaf = 0
|
|
164
204
|
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
205
|
+
#: An optional function that takes a string (an image file name) and returns
|
|
206
|
+
#: a string (the corresponding folder ID), typically used when multiple folders
|
|
207
|
+
#: actually correspond to the same camera in a manufacturer-specific way (e.g.
|
|
208
|
+
#: a/b/c/RECONYX100 and a/b/c/RECONYX101 may really be the same camera).
|
|
209
|
+
#:
|
|
210
|
+
#: See ct_utils for a common replacement function that handles most common
|
|
211
|
+
#: manufacturer folder names.
|
|
169
212
|
customDirNameFunction = None
|
|
170
213
|
|
|
171
|
-
|
|
172
|
-
# specified; "including" folders includes *only* those folders.
|
|
214
|
+
#: Include only specific folders, mutually exclusive with [excludeFolders]
|
|
173
215
|
includeFolders = None
|
|
216
|
+
|
|
217
|
+
#: Exclude specific folders, mutually exclusive with [includeFolders]
|
|
174
218
|
excludeFolders = None
|
|
175
219
|
|
|
176
|
-
|
|
177
|
-
|
|
220
|
+
#: Optionally show *other* detections (i.e., detections other than the
|
|
221
|
+
#: one the user is evaluating), typically in a light gray.
|
|
178
222
|
bRenderOtherDetections = False
|
|
223
|
+
|
|
224
|
+
#: Threshold to use for *other* detections
|
|
179
225
|
otherDetectionsThreshold = 0.2
|
|
226
|
+
|
|
227
|
+
#: Line width (in pixels) for *other* detections
|
|
180
228
|
otherDetectionsLineWidth = 1
|
|
181
229
|
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
bRenderDetectionTiles =
|
|
230
|
+
#: Optionally show a grid that includes a sample image for the detection, plus
|
|
231
|
+
#: the top N additional detections
|
|
232
|
+
bRenderDetectionTiles = True
|
|
185
233
|
|
|
186
|
-
|
|
234
|
+
#: Width of the original image (within the larger output image) when bRenderDetectionTiles
|
|
235
|
+
#: is True.
|
|
236
|
+
#:
|
|
237
|
+
#: If this is None, we'll render the original image in the detection tile image
|
|
238
|
+
#: at its original width.
|
|
187
239
|
detectionTilesPrimaryImageWidth = None
|
|
188
240
|
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
241
|
+
#: Width to use for the grid of detection instances.
|
|
242
|
+
#:
|
|
243
|
+
#: Can be a width in pixels, or a number from 0 to 1 representing a fraction
|
|
244
|
+
#: of the primary image width.
|
|
245
|
+
#:
|
|
246
|
+
#: If you want to render the grid at exactly 1 pixel wide, I guess you're out
|
|
247
|
+
#: of luck.
|
|
194
248
|
detectionTilesCroppedGridWidth = 0.6
|
|
195
|
-
detectionTilesPrimaryImageLocation='right'
|
|
196
|
-
detectionTilesMaxCrops = None
|
|
197
249
|
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
250
|
+
#: Location of the primary image within the mosaic ('right' or 'left)
|
|
251
|
+
detectionTilesPrimaryImageLocation = 'right'
|
|
252
|
+
|
|
253
|
+
#: Maximum number of individual detection instances to include in the mosaic
|
|
254
|
+
detectionTilesMaxCrops = 250
|
|
255
|
+
|
|
256
|
+
#: If bRenderOtherDetections is True, what color should we use to render the
|
|
257
|
+
#: (hopefully pretty subtle) non-target detections?
|
|
258
|
+
#:
|
|
259
|
+
#: In theory I'd like these "other detection" rectangles to be partially
|
|
260
|
+
#: transparent, but this is not straightforward, and the alpha is ignored
|
|
261
|
+
#: here. But maybe if I leave it here and wish hard enough, someday it
|
|
262
|
+
#: will work.
|
|
263
|
+
#:
|
|
264
|
+
#: otherDetectionsColors = ['dimgray']
|
|
207
265
|
otherDetectionsColors = [(105,105,105,100)]
|
|
208
266
|
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
267
|
+
#: Sort detections within a directory so nearby detections are adjacent
|
|
268
|
+
#: in the list, for faster review.
|
|
269
|
+
#:
|
|
270
|
+
#: Can be None, 'xsort', or 'clustersort'
|
|
271
|
+
#:
|
|
272
|
+
#: * None sorts detections chronologically by first occurrence
|
|
273
|
+
#: * 'xsort' sorts detections from left to right
|
|
274
|
+
#: * 'clustersort' clusters detections and sorts by cluster
|
|
217
275
|
smartSort = 'xsort'
|
|
218
276
|
|
|
219
|
-
|
|
277
|
+
#: Only relevant if smartSort == 'clustersort'
|
|
220
278
|
smartSortDistanceThreshold = 0.1
|
|
221
279
|
|
|
222
280
|
|
|
@@ -225,26 +283,28 @@ class RepeatDetectionResults:
|
|
|
225
283
|
The results of an entire repeat detection analysis
|
|
226
284
|
"""
|
|
227
285
|
|
|
228
|
-
|
|
229
|
-
|
|
286
|
+
#: The data table (Pandas DataFrame), as loaded from the input json file via
|
|
287
|
+
#: load_api_results(). Has columns ['file', 'detections','failure'].
|
|
230
288
|
detectionResults = None
|
|
231
289
|
|
|
232
|
-
|
|
290
|
+
#: The other fields in the input json file, loaded via load_api_results()
|
|
233
291
|
otherFields = None
|
|
234
292
|
|
|
235
|
-
|
|
293
|
+
#: The data table after modification
|
|
236
294
|
detectionResultsFiltered = None
|
|
237
295
|
|
|
238
|
-
|
|
296
|
+
#: dict mapping folder names to whole rows from the data table
|
|
239
297
|
rowsByDirectory = None
|
|
240
298
|
|
|
241
|
-
|
|
299
|
+
#: dict mapping filenames to rows in the master table
|
|
242
300
|
filenameToRow = None
|
|
243
301
|
|
|
244
|
-
|
|
245
|
-
|
|
302
|
+
#: An array of length nDirs, where each element is a list of DetectionLocation
|
|
303
|
+
#: objects for that directory that have been flagged as suspicious
|
|
246
304
|
suspiciousDetections = None
|
|
247
305
|
|
|
306
|
+
#: The location of the .json file written with information about the RDE
|
|
307
|
+
#: review images (typically detectionIndex.json)
|
|
248
308
|
filterFile = None
|
|
249
309
|
|
|
250
310
|
|
|
@@ -254,21 +314,25 @@ class IndexedDetection:
|
|
|
254
314
|
"""
|
|
255
315
|
|
|
256
316
|
def __init__(self, iDetection=-1, filename='', bbox=[], confidence=-1, category='unknown'):
|
|
257
|
-
|
|
258
|
-
Args:
|
|
259
|
-
iDetection: order in API output file
|
|
260
|
-
filename: path to the image of this detection
|
|
261
|
-
bbox: [x_min, y_min, width_of_box, height_of_box]
|
|
262
|
-
"""
|
|
317
|
+
|
|
263
318
|
assert isinstance(iDetection,int)
|
|
264
319
|
assert isinstance(filename,str)
|
|
265
320
|
assert isinstance(bbox,list)
|
|
266
321
|
assert isinstance(category,str)
|
|
267
322
|
|
|
323
|
+
#: index of this detection within all detections for this filename
|
|
268
324
|
self.iDetection = iDetection
|
|
325
|
+
|
|
326
|
+
#: path to the image corresponding to this detection
|
|
269
327
|
self.filename = filename
|
|
328
|
+
|
|
329
|
+
#: [x_min, y_min, width_of_box, height_of_box]
|
|
270
330
|
self.bbox = bbox
|
|
331
|
+
|
|
332
|
+
#: confidence value of this detection
|
|
271
333
|
self.confidence = confidence
|
|
334
|
+
|
|
335
|
+
#: category ID (not name) of this detection
|
|
272
336
|
self.category = category
|
|
273
337
|
|
|
274
338
|
def __repr__(self):
|
|
@@ -280,7 +344,7 @@ class DetectionLocation:
|
|
|
280
344
|
"""
|
|
281
345
|
A unique-ish detection location, meaningful in the context of one
|
|
282
346
|
directory. All detections within an IoU threshold of self.bbox
|
|
283
|
-
will be stored in
|
|
347
|
+
will be stored in IndexedDetection objects.
|
|
284
348
|
"""
|
|
285
349
|
|
|
286
350
|
def __init__(self, instance, detection, relativeDir, category, id=None):
|
|
@@ -290,15 +354,28 @@ class DetectionLocation:
|
|
|
290
354
|
assert isinstance(relativeDir,str)
|
|
291
355
|
assert isinstance(category,str)
|
|
292
356
|
|
|
293
|
-
|
|
357
|
+
#: list of IndexedDetections that match this detection
|
|
358
|
+
self.instances = [instance]
|
|
359
|
+
|
|
360
|
+
#: category ID (not name) for this detection
|
|
294
361
|
self.category = category
|
|
362
|
+
|
|
363
|
+
#: bbox as x,y,w,h
|
|
295
364
|
self.bbox = detection['bbox']
|
|
365
|
+
|
|
366
|
+
#: relative folder (i.e., camera name) in which this detectin was found
|
|
296
367
|
self.relativeDir = relativeDir
|
|
368
|
+
|
|
369
|
+
#: relative path to the canonical image representing this detection
|
|
297
370
|
self.sampleImageRelativeFileName = ''
|
|
371
|
+
|
|
372
|
+
#: list of detections on that canonical image that match this detection
|
|
298
373
|
self.sampleImageDetections = None
|
|
299
374
|
|
|
300
|
-
|
|
375
|
+
#: ID for this detection; this ID is only guaranteed to be unique within a directory
|
|
301
376
|
self.id = id
|
|
377
|
+
|
|
378
|
+
#: only used when doing cluster-based sorting
|
|
302
379
|
self.clusterLabel = None
|
|
303
380
|
|
|
304
381
|
def __repr__(self):
|
|
@@ -307,8 +384,11 @@ class DetectionLocation:
|
|
|
307
384
|
|
|
308
385
|
def to_api_detection(self):
|
|
309
386
|
"""
|
|
310
|
-
Converts to a 'detection' dictionary, making the semi-arbitrary
|
|
311
|
-
the first instance is representative of confidence.
|
|
387
|
+
Converts this detection to a 'detection' dictionary, making the semi-arbitrary
|
|
388
|
+
assumption that the first instance is representative of confidence.
|
|
389
|
+
|
|
390
|
+
Returns:
|
|
391
|
+
dict: dictionary in the format used to store detections in MD results
|
|
312
392
|
"""
|
|
313
393
|
|
|
314
394
|
# This is a bit of a hack right now, but for future-proofing, I don't want to call this
|
|
@@ -328,30 +408,13 @@ class DetectionLocation:
|
|
|
328
408
|
|
|
329
409
|
#%% Support functions
|
|
330
410
|
|
|
331
|
-
def
|
|
411
|
+
def _render_bounding_box(detection, inputFileName, outputFileName, lineWidth=5,
|
|
412
|
+
expansion=0):
|
|
332
413
|
"""
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
after removing true positives from the image directory.
|
|
336
|
-
|
|
337
|
-
Not used directly in this module, but provides a consistent way to enumerate
|
|
338
|
-
files in the format expected by this module.
|
|
414
|
+
Rendering the detection [detection] on the image [inputFileName], writing the result
|
|
415
|
+
to [outputFileName].
|
|
339
416
|
"""
|
|
340
417
|
|
|
341
|
-
imageList = path_utils.find_images(dirName)
|
|
342
|
-
imageList = [os.path.basename(fn) for fn in imageList]
|
|
343
|
-
|
|
344
|
-
if outputFileName is not None:
|
|
345
|
-
with open(outputFileName,'w') as f:
|
|
346
|
-
for s in imageList:
|
|
347
|
-
f.write(s + '\n')
|
|
348
|
-
|
|
349
|
-
return imageList
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
def render_bounding_box(detection, inputFileName, outputFileName, lineWidth=5,
|
|
353
|
-
expansion=0):
|
|
354
|
-
|
|
355
418
|
im = open_image(inputFileName)
|
|
356
419
|
d = detection.to_api_detection()
|
|
357
420
|
render_detection_bounding_boxes([d],im,thickness=lineWidth,expansion=expansion,
|
|
@@ -359,8 +422,12 @@ def render_bounding_box(detection, inputFileName, outputFileName, lineWidth=5,
|
|
|
359
422
|
im.save(outputFileName)
|
|
360
423
|
|
|
361
424
|
|
|
362
|
-
def
|
|
363
|
-
|
|
425
|
+
def _detection_rect_to_rtree_rect(detection_rect):
|
|
426
|
+
"""
|
|
427
|
+
We store detections as x/y/w/h, rtree and pyqtree use l/b/r/t. Convert from
|
|
428
|
+
our representation to rtree's.
|
|
429
|
+
"""
|
|
430
|
+
|
|
364
431
|
l = detection_rect[0]
|
|
365
432
|
b = detection_rect[1]
|
|
366
433
|
r = detection_rect[0] + detection_rect[2]
|
|
@@ -368,8 +435,12 @@ def detection_rect_to_rtree_rect(detection_rect):
|
|
|
368
435
|
return (l,b,r,t)
|
|
369
436
|
|
|
370
437
|
|
|
371
|
-
def
|
|
372
|
-
|
|
438
|
+
def _rtree_rect_to_detection_rect(rtree_rect):
|
|
439
|
+
"""
|
|
440
|
+
We store detections as x/y/w/h, rtree and pyqtree use l/b/r/t. Convert from
|
|
441
|
+
rtree's representation to ours.
|
|
442
|
+
"""
|
|
443
|
+
|
|
373
444
|
x = rtree_rect[0]
|
|
374
445
|
y = rtree_rect[1]
|
|
375
446
|
w = rtree_rect[2] - rtree_rect[0]
|
|
@@ -377,7 +448,7 @@ def rtree_rect_to_detection_rect(rtree_rect):
|
|
|
377
448
|
return (x,y,w,h)
|
|
378
449
|
|
|
379
450
|
|
|
380
|
-
def
|
|
451
|
+
def _sort_detections_for_directory(candidateDetections,options):
|
|
381
452
|
"""
|
|
382
453
|
candidateDetections is a list of DetectionLocation objects. Sorts them to
|
|
383
454
|
put nearby detections next to each other, for easier visual review. Returns
|
|
@@ -474,14 +545,15 @@ def sort_detections_for_directory(candidateDetections,options):
|
|
|
474
545
|
raise ValueError('Unrecognized sort method {}'.format(
|
|
475
546
|
options.smartSort))
|
|
476
547
|
|
|
477
|
-
# ...def
|
|
548
|
+
# ...def _sort_detections_for_directory(...)
|
|
478
549
|
|
|
479
550
|
|
|
480
|
-
def
|
|
551
|
+
def _find_matches_in_directory(dirNameAndRows, options):
|
|
481
552
|
"""
|
|
482
553
|
dirNameAndRows is a tuple of (name,rows).
|
|
483
554
|
|
|
484
|
-
"name" is a location name, typically a folder name
|
|
555
|
+
"name" is a location name, typically a folder name, though this may be an arbitrary
|
|
556
|
+
location identifier.
|
|
485
557
|
|
|
486
558
|
"rows" is a Pandas dataframe with one row per image in this location, with columns:
|
|
487
559
|
|
|
@@ -548,7 +620,7 @@ def find_matches_in_directory(dirNameAndRows, options):
|
|
|
548
620
|
|
|
549
621
|
i_iteration += 1
|
|
550
622
|
filename = row['file']
|
|
551
|
-
if not
|
|
623
|
+
if not path_utils.is_image_file(filename):
|
|
552
624
|
continue
|
|
553
625
|
|
|
554
626
|
if 'max_detection_conf' not in row or 'detections' not in row or \
|
|
@@ -643,7 +715,7 @@ def find_matches_in_directory(dirNameAndRows, options):
|
|
|
643
715
|
|
|
644
716
|
bFoundSimilarDetection = False
|
|
645
717
|
|
|
646
|
-
rtree_rect =
|
|
718
|
+
rtree_rect = _detection_rect_to_rtree_rect(bbox)
|
|
647
719
|
|
|
648
720
|
# This will return candidates of all classes
|
|
649
721
|
overlappingCandidateDetections =\
|
|
@@ -723,10 +795,10 @@ def find_matches_in_directory(dirNameAndRows, options):
|
|
|
723
795
|
else:
|
|
724
796
|
return candidateDetections
|
|
725
797
|
|
|
726
|
-
# ...def
|
|
798
|
+
# ...def _find_matches_in_directory(...)
|
|
727
799
|
|
|
728
800
|
|
|
729
|
-
def
|
|
801
|
+
def _update_detection_table(repeatDetectionResults, options, outputFilename=None):
|
|
730
802
|
"""
|
|
731
803
|
Changes confidence values in repeatDetectionResults.detectionResults so that detections
|
|
732
804
|
deemed to be possible false positives are given negative confidence values.
|
|
@@ -870,10 +942,10 @@ def update_detection_table(repeatDetectionResults, options, outputFilename=None)
|
|
|
870
942
|
|
|
871
943
|
return detectionResults
|
|
872
944
|
|
|
873
|
-
# ...def
|
|
945
|
+
# ...def _update_detection_table(...)
|
|
874
946
|
|
|
875
947
|
|
|
876
|
-
def
|
|
948
|
+
def _render_sample_image_for_detection(detection,filteringDir,options):
|
|
877
949
|
"""
|
|
878
950
|
Render a sample image for one unique detection, possibly containing lightly-colored
|
|
879
951
|
high-confidence detections from elsewhere in the sample image.
|
|
@@ -954,7 +1026,7 @@ def render_sample_image_for_detection(detection,filteringDir,options):
|
|
|
954
1026
|
|
|
955
1027
|
else:
|
|
956
1028
|
|
|
957
|
-
|
|
1029
|
+
_render_bounding_box(detection, inputFullPath, outputFullPath,
|
|
958
1030
|
lineWidth=options.lineThickness, expansion=options.boxExpansion)
|
|
959
1031
|
|
|
960
1032
|
# ...if we are/aren't rendering other bounding boxes
|
|
@@ -1003,11 +1075,7 @@ def render_sample_image_for_detection(detection,filteringDir,options):
|
|
|
1003
1075
|
cropped_grid_width=croppedGridWidth,
|
|
1004
1076
|
output_image_filename=outputFullPath,
|
|
1005
1077
|
primary_image_location=options.detectionTilesPrimaryImageLocation)
|
|
1006
|
-
|
|
1007
|
-
# bDetectionTilesPrimaryImageWidth = None
|
|
1008
|
-
# bDetectionTilesCroppedGridWidth = 0.6
|
|
1009
|
-
# bDetectionTilesPrimaryImageLocation='right'
|
|
1010
|
-
|
|
1078
|
+
|
|
1011
1079
|
# ...if we are/aren't rendering detection tiles
|
|
1012
1080
|
|
|
1013
1081
|
except Exception as e:
|
|
@@ -1018,12 +1086,28 @@ def render_sample_image_for_detection(detection,filteringDir,options):
|
|
|
1018
1086
|
if options.bFailOnRenderError:
|
|
1019
1087
|
raise
|
|
1020
1088
|
|
|
1021
|
-
# ...def
|
|
1089
|
+
# ...def _render_sample_image_for_detection(...)
|
|
1022
1090
|
|
|
1023
1091
|
|
|
1024
1092
|
#%% Main entry point
|
|
1025
1093
|
|
|
1026
1094
|
def find_repeat_detections(inputFilename, outputFilename=None, options=None):
|
|
1095
|
+
"""
|
|
1096
|
+
Find detections in a MD results file that occur repeatedly and are likely to be
|
|
1097
|
+
rocks/sticks.
|
|
1098
|
+
|
|
1099
|
+
Args:
|
|
1100
|
+
inputFilename (str): the MD results .json file to analyze
|
|
1101
|
+
outputFilename (str, optional): the filename to which we should write results
|
|
1102
|
+
with repeat detections removed, typically set to None during the first
|
|
1103
|
+
part of the RDE process.
|
|
1104
|
+
options (RepeatDetectionOptions): all the interesting options controlling this
|
|
1105
|
+
process; see RepeatDetectionOptions for details.
|
|
1106
|
+
|
|
1107
|
+
Returns:
|
|
1108
|
+
RepeatDetectionResults: results of the RDE process; see RepeatDetectionResults
|
|
1109
|
+
for details.
|
|
1110
|
+
"""
|
|
1027
1111
|
|
|
1028
1112
|
##%% Input handling
|
|
1029
1113
|
|
|
@@ -1203,7 +1287,7 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
|
|
|
1203
1287
|
assert dirNameAndRow[0] == dirName
|
|
1204
1288
|
print('Processing dir {} of {}: {}'.format(iDir,len(dirsToSearch),dirName))
|
|
1205
1289
|
allCandidateDetections[iDir] = \
|
|
1206
|
-
|
|
1290
|
+
_find_matches_in_directory(dirNameAndRow, options)
|
|
1207
1291
|
|
|
1208
1292
|
else:
|
|
1209
1293
|
|
|
@@ -1271,7 +1355,7 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
|
|
|
1271
1355
|
|
|
1272
1356
|
options.pbar = None
|
|
1273
1357
|
allCandidateDetectionFiles = list(pool.imap(
|
|
1274
|
-
partial(
|
|
1358
|
+
partial(_find_matches_in_directory,options=options), dirNameAndIntermediateFile))
|
|
1275
1359
|
|
|
1276
1360
|
|
|
1277
1361
|
##%% Load into a combined list of candidate detections
|
|
@@ -1298,11 +1382,11 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
|
|
|
1298
1382
|
if options.parallelizationUsesThreads:
|
|
1299
1383
|
options.pbar = tqdm(total=len(dirNameAndRows))
|
|
1300
1384
|
allCandidateDetections = list(pool.imap(
|
|
1301
|
-
partial(
|
|
1385
|
+
partial(_find_matches_in_directory,options=options), dirNameAndRows))
|
|
1302
1386
|
else:
|
|
1303
1387
|
options.pbar = None
|
|
1304
1388
|
allCandidateDetections = list(tqdm(pool.imap(
|
|
1305
|
-
partial(
|
|
1389
|
+
partial(_find_matches_in_directory,options=options), dirNameAndRows)))
|
|
1306
1390
|
|
|
1307
1391
|
print('\nFinished looking for similar detections')
|
|
1308
1392
|
|
|
@@ -1342,7 +1426,7 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
|
|
|
1342
1426
|
|
|
1343
1427
|
# Sort the above-threshold detections for easier review
|
|
1344
1428
|
if options.smartSort is not None:
|
|
1345
|
-
suspiciousDetections[iDir] =
|
|
1429
|
+
suspiciousDetections[iDir] = _sort_detections_for_directory(
|
|
1346
1430
|
suspiciousDetections[iDir],options)
|
|
1347
1431
|
|
|
1348
1432
|
print('Found {} suspicious detections in directory {} ({})'.format(
|
|
@@ -1427,7 +1511,7 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
|
|
|
1427
1511
|
|
|
1428
1512
|
toReturn.suspiciousDetections = suspiciousDetections
|
|
1429
1513
|
|
|
1430
|
-
toReturn.allRowsFiltered =
|
|
1514
|
+
toReturn.allRowsFiltered = _update_detection_table(toReturn, options, outputFilename)
|
|
1431
1515
|
|
|
1432
1516
|
|
|
1433
1517
|
##%% Create filtering directory
|
|
@@ -1501,19 +1585,19 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
|
|
|
1501
1585
|
if options.parallelizationUsesThreads:
|
|
1502
1586
|
options.pbar = tqdm(total=len(allSuspiciousDetections))
|
|
1503
1587
|
allCandidateDetections = list(pool.imap(
|
|
1504
|
-
partial(
|
|
1588
|
+
partial(_render_sample_image_for_detection,filteringDir=filteringDir,
|
|
1505
1589
|
options=options), allSuspiciousDetections))
|
|
1506
1590
|
else:
|
|
1507
1591
|
options.pbar = None
|
|
1508
1592
|
allCandidateDetections = list(tqdm(pool.imap(
|
|
1509
|
-
partial(
|
|
1593
|
+
partial(_render_sample_image_for_detection,filteringDir=filteringDir,
|
|
1510
1594
|
options=options), allSuspiciousDetections)))
|
|
1511
1595
|
|
|
1512
1596
|
else:
|
|
1513
1597
|
|
|
1514
1598
|
# Serial loop over detections
|
|
1515
1599
|
for detection in allSuspiciousDetections:
|
|
1516
|
-
|
|
1600
|
+
_render_sample_image_for_detection(detection,filteringDir,options)
|
|
1517
1601
|
|
|
1518
1602
|
# Delete (large) temporary data from the list of suspicious detections
|
|
1519
1603
|
for detection in allSuspiciousDetections:
|