megadetector 5.0.5__py3-none-any.whl → 5.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/batch_processing/data_preparation/manage_local_batch.py +302 -263
- api/batch_processing/data_preparation/manage_video_batch.py +81 -2
- api/batch_processing/postprocessing/add_max_conf.py +1 -0
- api/batch_processing/postprocessing/categorize_detections_by_size.py +50 -19
- api/batch_processing/postprocessing/compare_batch_results.py +110 -60
- api/batch_processing/postprocessing/load_api_results.py +56 -70
- api/batch_processing/postprocessing/md_to_coco.py +1 -1
- api/batch_processing/postprocessing/md_to_labelme.py +2 -1
- api/batch_processing/postprocessing/postprocess_batch_results.py +240 -81
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +625 -0
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +71 -23
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +227 -75
- api/batch_processing/postprocessing/subset_json_detector_output.py +132 -5
- api/batch_processing/postprocessing/top_folders_to_bottom.py +1 -1
- api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +2 -2
- classification/prepare_classification_script.py +191 -191
- data_management/coco_to_yolo.py +68 -45
- data_management/databases/integrity_check_json_db.py +7 -5
- data_management/generate_crops_from_cct.py +3 -3
- data_management/get_image_sizes.py +8 -6
- data_management/importers/add_timestamps_to_icct.py +79 -0
- data_management/importers/animl_results_to_md_results.py +160 -0
- data_management/importers/auckland_doc_test_to_json.py +4 -4
- data_management/importers/auckland_doc_to_json.py +1 -1
- data_management/importers/awc_to_json.py +5 -5
- data_management/importers/bellevue_to_json.py +5 -5
- data_management/importers/carrizo_shrubfree_2018.py +5 -5
- data_management/importers/carrizo_trail_cam_2017.py +5 -5
- data_management/importers/cct_field_adjustments.py +2 -3
- data_management/importers/channel_islands_to_cct.py +4 -4
- data_management/importers/ena24_to_json.py +5 -5
- data_management/importers/helena_to_cct.py +10 -10
- data_management/importers/idaho-camera-traps.py +12 -12
- data_management/importers/idfg_iwildcam_lila_prep.py +8 -8
- data_management/importers/jb_csv_to_json.py +4 -4
- data_management/importers/missouri_to_json.py +1 -1
- data_management/importers/noaa_seals_2019.py +1 -1
- data_management/importers/pc_to_json.py +5 -5
- data_management/importers/prepare-noaa-fish-data-for-lila.py +4 -4
- data_management/importers/prepare_zsl_imerit.py +5 -5
- data_management/importers/rspb_to_json.py +4 -4
- data_management/importers/save_the_elephants_survey_A.py +5 -5
- data_management/importers/save_the_elephants_survey_B.py +6 -6
- data_management/importers/snapshot_safari_importer.py +9 -9
- data_management/importers/snapshot_serengeti_lila.py +9 -9
- data_management/importers/timelapse_csv_set_to_json.py +5 -7
- data_management/importers/ubc_to_json.py +4 -4
- data_management/importers/umn_to_json.py +4 -4
- data_management/importers/wellington_to_json.py +1 -1
- data_management/importers/wi_to_json.py +2 -2
- data_management/importers/zamba_results_to_md_results.py +181 -0
- data_management/labelme_to_coco.py +35 -7
- data_management/labelme_to_yolo.py +229 -0
- data_management/lila/add_locations_to_island_camera_traps.py +1 -1
- data_management/lila/add_locations_to_nacti.py +147 -0
- data_management/lila/create_lila_blank_set.py +474 -0
- data_management/lila/create_lila_test_set.py +2 -1
- data_management/lila/create_links_to_md_results_files.py +106 -0
- data_management/lila/download_lila_subset.py +46 -21
- data_management/lila/generate_lila_per_image_labels.py +23 -14
- data_management/lila/get_lila_annotation_counts.py +17 -11
- data_management/lila/lila_common.py +14 -11
- data_management/lila/test_lila_metadata_urls.py +116 -0
- data_management/ocr_tools.py +829 -0
- data_management/resize_coco_dataset.py +13 -11
- data_management/yolo_output_to_md_output.py +84 -12
- data_management/yolo_to_coco.py +38 -20
- detection/process_video.py +36 -14
- detection/pytorch_detector.py +23 -8
- detection/run_detector.py +76 -19
- detection/run_detector_batch.py +178 -63
- detection/run_inference_with_yolov5_val.py +326 -57
- detection/run_tiled_inference.py +153 -43
- detection/video_utils.py +34 -8
- md_utils/ct_utils.py +172 -1
- md_utils/md_tests.py +372 -51
- md_utils/path_utils.py +167 -39
- md_utils/process_utils.py +26 -7
- md_utils/split_locations_into_train_val.py +215 -0
- md_utils/string_utils.py +10 -0
- md_utils/url_utils.py +0 -2
- md_utils/write_html_image_list.py +9 -26
- md_visualization/plot_utils.py +12 -8
- md_visualization/visualization_utils.py +106 -7
- md_visualization/visualize_db.py +16 -8
- md_visualization/visualize_detector_output.py +208 -97
- {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/METADATA +3 -6
- {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/RECORD +98 -121
- {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/WHEEL +1 -1
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +1 -1
- taxonomy_mapping/map_new_lila_datasets.py +43 -39
- taxonomy_mapping/prepare_lila_taxonomy_release.py +5 -2
- taxonomy_mapping/preview_lila_taxonomy.py +27 -27
- taxonomy_mapping/species_lookup.py +33 -13
- taxonomy_mapping/taxonomy_csv_checker.py +7 -5
- api/synchronous/api_core/yolov5/detect.py +0 -252
- api/synchronous/api_core/yolov5/export.py +0 -607
- api/synchronous/api_core/yolov5/hubconf.py +0 -146
- api/synchronous/api_core/yolov5/models/__init__.py +0 -0
- api/synchronous/api_core/yolov5/models/common.py +0 -738
- api/synchronous/api_core/yolov5/models/experimental.py +0 -104
- api/synchronous/api_core/yolov5/models/tf.py +0 -574
- api/synchronous/api_core/yolov5/models/yolo.py +0 -338
- api/synchronous/api_core/yolov5/train.py +0 -670
- api/synchronous/api_core/yolov5/utils/__init__.py +0 -36
- api/synchronous/api_core/yolov5/utils/activations.py +0 -103
- api/synchronous/api_core/yolov5/utils/augmentations.py +0 -284
- api/synchronous/api_core/yolov5/utils/autoanchor.py +0 -170
- api/synchronous/api_core/yolov5/utils/autobatch.py +0 -66
- api/synchronous/api_core/yolov5/utils/aws/__init__.py +0 -0
- api/synchronous/api_core/yolov5/utils/aws/resume.py +0 -40
- api/synchronous/api_core/yolov5/utils/benchmarks.py +0 -148
- api/synchronous/api_core/yolov5/utils/callbacks.py +0 -71
- api/synchronous/api_core/yolov5/utils/dataloaders.py +0 -1087
- api/synchronous/api_core/yolov5/utils/downloads.py +0 -178
- api/synchronous/api_core/yolov5/utils/flask_rest_api/example_request.py +0 -19
- api/synchronous/api_core/yolov5/utils/flask_rest_api/restapi.py +0 -46
- api/synchronous/api_core/yolov5/utils/general.py +0 -1018
- api/synchronous/api_core/yolov5/utils/loggers/__init__.py +0 -187
- api/synchronous/api_core/yolov5/utils/loggers/wandb/__init__.py +0 -0
- api/synchronous/api_core/yolov5/utils/loggers/wandb/log_dataset.py +0 -27
- api/synchronous/api_core/yolov5/utils/loggers/wandb/sweep.py +0 -41
- api/synchronous/api_core/yolov5/utils/loggers/wandb/wandb_utils.py +0 -577
- api/synchronous/api_core/yolov5/utils/loss.py +0 -234
- api/synchronous/api_core/yolov5/utils/metrics.py +0 -355
- api/synchronous/api_core/yolov5/utils/plots.py +0 -489
- api/synchronous/api_core/yolov5/utils/torch_utils.py +0 -314
- api/synchronous/api_core/yolov5/val.py +0 -394
- md_utils/matlab_porting_tools.py +0 -97
- {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/LICENSE +0 -0
- {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/top_level.txt +0 -0
|
@@ -14,7 +14,10 @@ import warnings
|
|
|
14
14
|
import sklearn.cluster
|
|
15
15
|
import numpy as np
|
|
16
16
|
import jsonpickle
|
|
17
|
+
import traceback
|
|
17
18
|
import pandas as pd
|
|
19
|
+
import json
|
|
20
|
+
import shutil
|
|
18
21
|
|
|
19
22
|
from tqdm import tqdm
|
|
20
23
|
from operator import attrgetter
|
|
@@ -35,6 +38,8 @@ from api.batch_processing.postprocessing.postprocess_batch_results import relati
|
|
|
35
38
|
from md_visualization.visualization_utils import open_image, render_detection_bounding_boxes
|
|
36
39
|
from md_visualization import render_images_with_thumbnails
|
|
37
40
|
from md_visualization import visualization_utils as vis_utils
|
|
41
|
+
from md_utils.path_utils import flatten_path
|
|
42
|
+
from md_utils.ct_utils import invert_dictionary
|
|
38
43
|
|
|
39
44
|
# "PIL cannot read EXIF metainfo for the images"
|
|
40
45
|
warnings.filterwarnings('ignore', '(Possibly )?corrupt EXIF data', UserWarning)
|
|
@@ -42,10 +47,12 @@ warnings.filterwarnings('ignore', '(Possibly )?corrupt EXIF data', UserWarning)
|
|
|
42
47
|
# "Metadata Warning, tag 256 had too many entries: 42, expected 1"
|
|
43
48
|
warnings.filterwarnings('ignore', 'Metadata warning', UserWarning)
|
|
44
49
|
|
|
50
|
+
jsonpickle.set_encoder_options('json', sort_keys=True, indent=1)
|
|
51
|
+
|
|
45
52
|
|
|
46
53
|
#%% Constants
|
|
47
54
|
|
|
48
|
-
|
|
55
|
+
detection_index_file_name_base = 'detectionIndex.json'
|
|
49
56
|
|
|
50
57
|
|
|
51
58
|
#%% Classes
|
|
@@ -74,26 +81,32 @@ class RepeatDetectionOptions:
|
|
|
74
81
|
# How many occurrences of a single location (as defined by the IOU threshold)
|
|
75
82
|
# are required before we declare it suspicious?
|
|
76
83
|
occurrenceThreshold = 20
|
|
84
|
+
|
|
85
|
+
# Ignore "suspicious" detections smaller than some size
|
|
86
|
+
minSuspiciousDetectionSize = 0.0
|
|
77
87
|
|
|
78
88
|
# Ignore "suspicious" detections larger than some size; these are often animals
|
|
79
89
|
# taking up the whole image. This is expressed as a fraction of the image size.
|
|
80
90
|
maxSuspiciousDetectionSize = 0.2
|
|
81
91
|
|
|
82
|
-
# Ignore "suspicious" detections smaller than some size
|
|
83
|
-
minSuspiciousDetectionSize = 0.0
|
|
84
|
-
|
|
85
92
|
# Ignore folders with more than this many images in them
|
|
86
93
|
maxImagesPerFolder = None
|
|
87
94
|
|
|
88
95
|
# A list of classes we don't want to treat as suspicious. Each element is an int.
|
|
89
96
|
excludeClasses = [] # [annotation_constants.detector_bbox_category_name_to_id['person']]
|
|
90
97
|
|
|
98
|
+
# For very large sets of results, passing chunks of results to and from workers as
|
|
99
|
+
# parameters ('memory') can be memory-intensive, so we can serialize to intermediate
|
|
100
|
+
# files instead ('file').
|
|
101
|
+
#
|
|
102
|
+
# The use of 'file' here is still experimental.
|
|
103
|
+
pass_detections_to_processes_method = 'memory'
|
|
104
|
+
|
|
91
105
|
nWorkers = 10
|
|
92
106
|
|
|
107
|
+
# Should we use threads or processes for parallelization?
|
|
93
108
|
parallelizationUsesThreads = True
|
|
94
109
|
|
|
95
|
-
viz_target_width = 800
|
|
96
|
-
|
|
97
110
|
# Load detections from a filter file rather than finding them from the detector output
|
|
98
111
|
|
|
99
112
|
# .json file containing detections, generally this is the detectionIndex.json file in
|
|
@@ -121,6 +134,10 @@ class RepeatDetectionOptions:
|
|
|
121
134
|
bParallelizeComparisons = True
|
|
122
135
|
bParallelizeRendering = True
|
|
123
136
|
|
|
137
|
+
# If this is False (default), a detection from class A is not considered to be "the same"
|
|
138
|
+
# as a detection from class B, even if they're at the same location.
|
|
139
|
+
categoryAgnosticComparisons = False
|
|
140
|
+
|
|
124
141
|
# Determines whether bounding-box rendering errors (typically network errors) should
|
|
125
142
|
# be treated as failures
|
|
126
143
|
bFailOnRenderError = False
|
|
@@ -209,7 +226,7 @@ class RepeatDetectionResults:
|
|
|
209
226
|
"""
|
|
210
227
|
|
|
211
228
|
# The data table (Pandas DataFrame), as loaded from the input json file via
|
|
212
|
-
# load_api_results()
|
|
229
|
+
# load_api_results(). Has columns ['file', 'detections','failure'].
|
|
213
230
|
detectionResults = None
|
|
214
231
|
|
|
215
232
|
# The other fields in the input json file, loaded via load_api_results()
|
|
@@ -309,7 +326,7 @@ class DetectionLocation:
|
|
|
309
326
|
return detection
|
|
310
327
|
|
|
311
328
|
|
|
312
|
-
#%%
|
|
329
|
+
#%% Support functions
|
|
313
330
|
|
|
314
331
|
def enumerate_images(dirName,outputFileName=None):
|
|
315
332
|
"""
|
|
@@ -343,7 +360,7 @@ def render_bounding_box(detection, inputFileName, outputFileName, lineWidth=5,
|
|
|
343
360
|
|
|
344
361
|
|
|
345
362
|
def detection_rect_to_rtree_rect(detection_rect):
|
|
346
|
-
# We store
|
|
363
|
+
# We store detections as x/y/w/h, rtree and pyqtree use l/b/r/t
|
|
347
364
|
l = detection_rect[0]
|
|
348
365
|
b = detection_rect[1]
|
|
349
366
|
r = detection_rect[0] + detection_rect[2]
|
|
@@ -352,7 +369,7 @@ def detection_rect_to_rtree_rect(detection_rect):
|
|
|
352
369
|
|
|
353
370
|
|
|
354
371
|
def rtree_rect_to_detection_rect(rtree_rect):
|
|
355
|
-
# We store
|
|
372
|
+
# We store detections as x/y/w/h, rtree and pyqtree use l/b/r/t
|
|
356
373
|
x = rtree_rect[0]
|
|
357
374
|
y = rtree_rect[1]
|
|
358
375
|
w = rtree_rect[2] - rtree_rect[0]
|
|
@@ -360,12 +377,11 @@ def rtree_rect_to_detection_rect(rtree_rect):
|
|
|
360
377
|
return (x,y,w,h)
|
|
361
378
|
|
|
362
379
|
|
|
363
|
-
#%% Sort a list of candidate detections to make them visually easier to review
|
|
364
|
-
|
|
365
380
|
def sort_detections_for_directory(candidateDetections,options):
|
|
366
381
|
"""
|
|
367
382
|
candidateDetections is a list of DetectionLocation objects. Sorts them to
|
|
368
|
-
put nearby detections next to each other, for easier visual review.
|
|
383
|
+
put nearby detections next to each other, for easier visual review. Returns
|
|
384
|
+
a sorted copy of candidateDetections, does not sort in-place.
|
|
369
385
|
"""
|
|
370
386
|
|
|
371
387
|
if len(candidateDetections) <= 1 or options.smartSort is None:
|
|
@@ -458,13 +474,24 @@ def sort_detections_for_directory(candidateDetections,options):
|
|
|
458
474
|
raise ValueError('Unrecognized sort method {}'.format(
|
|
459
475
|
options.smartSort))
|
|
460
476
|
|
|
461
|
-
|
|
462
|
-
|
|
477
|
+
# ...def sort_detections_for_directory(...)
|
|
478
|
+
|
|
463
479
|
|
|
464
480
|
def find_matches_in_directory(dirNameAndRows, options):
|
|
465
481
|
"""
|
|
466
482
|
dirNameAndRows is a tuple of (name,rows).
|
|
467
483
|
|
|
484
|
+
"name" is a location name, typically a folder name.
|
|
485
|
+
|
|
486
|
+
"rows" is a Pandas dataframe with one row per image in this location, with columns:
|
|
487
|
+
|
|
488
|
+
* 'file': relative file name
|
|
489
|
+
* 'detections': a list of MD detection objects, i.e. dicts with keys ['category','conf','bbox']
|
|
490
|
+
* 'max_detection_conf': maximum confidence of any detection, in any category
|
|
491
|
+
|
|
492
|
+
"rows" can also point to a .csv file, in which case the detection table will be read from that
|
|
493
|
+
.csv file, and results will be written to a .csv file rather than being returned.
|
|
494
|
+
|
|
468
495
|
Find all unique detections in this directory.
|
|
469
496
|
|
|
470
497
|
Returns a list of DetectionLocation objects.
|
|
@@ -476,11 +503,21 @@ def find_matches_in_directory(dirNameAndRows, options):
|
|
|
476
503
|
# Create a tree to store candidate detections
|
|
477
504
|
candidateDetectionsIndex = pyqtree.Index(bbox=(-0.1,-0.1,1.1,1.1))
|
|
478
505
|
|
|
479
|
-
assert len(dirNameAndRows) == 2
|
|
480
|
-
assert isinstance(dirNameAndRows[0],str)
|
|
481
|
-
dirName = dirNameAndRows[0]
|
|
506
|
+
assert len(dirNameAndRows) == 2, 'find_matches_in_directory: invalid input'
|
|
507
|
+
assert isinstance(dirNameAndRows[0],str), 'find_matches_in_directory: invalid location name'
|
|
508
|
+
dirName = dirNameAndRows[0]
|
|
482
509
|
rows = dirNameAndRows[1]
|
|
483
|
-
|
|
510
|
+
|
|
511
|
+
detections_loaded_from_csv_file = None
|
|
512
|
+
|
|
513
|
+
if isinstance(rows,str):
|
|
514
|
+
detections_loaded_from_csv_file = rows
|
|
515
|
+
print('Loading results for location {} from {}'.format(
|
|
516
|
+
dirName,detections_loaded_from_csv_file))
|
|
517
|
+
rows = pd.read_csv(detections_loaded_from_csv_file)
|
|
518
|
+
# Pandas writes out detections out as strings, convert them back to lists
|
|
519
|
+
rows['detections'] = rows['detections'].apply(lambda s: json.loads(s.replace('\'','"')))
|
|
520
|
+
|
|
484
521
|
if options.maxImagesPerFolder is not None and len(rows) > options.maxImagesPerFolder:
|
|
485
522
|
print('Ignoring directory {} because it has {} images (limit set to {})'.format(
|
|
486
523
|
dirName,len(rows),options.maxImagesPerFolder))
|
|
@@ -535,7 +572,7 @@ def find_matches_in_directory(dirNameAndRows, options):
|
|
|
535
572
|
# }
|
|
536
573
|
detections = row['detections']
|
|
537
574
|
if isinstance(detections,float):
|
|
538
|
-
assert isinstance(row['failure'],str)
|
|
575
|
+
assert isinstance(row['failure'],str), 'Expected failure indicator'
|
|
539
576
|
print('Skipping failed image {} ({})'.format(filename,row['failure']))
|
|
540
577
|
continue
|
|
541
578
|
|
|
@@ -550,8 +587,9 @@ def find_matches_in_directory(dirNameAndRows, options):
|
|
|
550
587
|
print('Skipping detection {}'.format(iDetection))
|
|
551
588
|
continue
|
|
552
589
|
|
|
553
|
-
assert 'category' in detection and
|
|
554
|
-
'
|
|
590
|
+
assert 'category' in detection and \
|
|
591
|
+
'conf' in detection and \
|
|
592
|
+
'bbox' in detection, 'Illegal detection'
|
|
555
593
|
|
|
556
594
|
confidence = detection['conf']
|
|
557
595
|
|
|
@@ -568,7 +606,7 @@ def find_matches_in_directory(dirNameAndRows, options):
|
|
|
568
606
|
continue
|
|
569
607
|
|
|
570
608
|
# Optionally exclude some classes from consideration as suspicious
|
|
571
|
-
if len(options.excludeClasses) > 0:
|
|
609
|
+
if (options.excludeClasses is not None) and (len(options.excludeClasses) > 0):
|
|
572
610
|
iClass = int(detection['category'])
|
|
573
611
|
if iClass in options.excludeClasses:
|
|
574
612
|
continue
|
|
@@ -584,8 +622,12 @@ def find_matches_in_directory(dirNameAndRows, options):
|
|
|
584
622
|
|
|
585
623
|
area = h * w
|
|
586
624
|
|
|
625
|
+
if area < 0:
|
|
626
|
+
print('Warning: negative-area bounding box for file {}'.format(filename))
|
|
627
|
+
area = abs(area); h = abs(h); w = abs(w)
|
|
628
|
+
|
|
587
629
|
assert area >= 0.0 and area <= 1.0, \
|
|
588
|
-
'Illegal bounding box area {}'.format(area)
|
|
630
|
+
'Illegal bounding box area {} in image {}'.format(area,filename)
|
|
589
631
|
|
|
590
632
|
if area < options.minSuspiciousDetectionSize:
|
|
591
633
|
continue
|
|
@@ -615,7 +657,7 @@ def find_matches_in_directory(dirNameAndRows, options):
|
|
|
615
657
|
overlappingCandidateDetections):
|
|
616
658
|
|
|
617
659
|
# Don't match across categories
|
|
618
|
-
if candidate.category != category:
|
|
660
|
+
if (candidate.category != category) and (not (options.categoryAgnosticComparisons)):
|
|
619
661
|
continue
|
|
620
662
|
|
|
621
663
|
# Is this a match?
|
|
@@ -649,9 +691,7 @@ def find_matches_in_directory(dirNameAndRows, options):
|
|
|
649
691
|
candidate = DetectionLocation(instance=instance,
|
|
650
692
|
detection=detection, relativeDir=dirName,
|
|
651
693
|
category=category, id=i_iteration)
|
|
652
|
-
|
|
653
|
-
# candidateDetections.append(candidate)
|
|
654
|
-
|
|
694
|
+
|
|
655
695
|
# pyqtree
|
|
656
696
|
candidateDetectionsIndex.insert(item=candidate,bbox=rtree_rect)
|
|
657
697
|
|
|
@@ -669,20 +709,45 @@ def find_matches_in_directory(dirNameAndRows, options):
|
|
|
669
709
|
candidateDetections.sort(
|
|
670
710
|
key=lambda x: x.id, reverse=False)
|
|
671
711
|
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
712
|
+
if detections_loaded_from_csv_file is not None:
|
|
713
|
+
location_results_file = \
|
|
714
|
+
os.path.splitext(detections_loaded_from_csv_file)[0] + \
|
|
715
|
+
'_results.json'
|
|
716
|
+
print('Writing results for location {} to {}'.format(
|
|
717
|
+
dirName,location_results_file))
|
|
718
|
+
s = jsonpickle.encode(candidateDetections,make_refs=False)
|
|
719
|
+
with open(location_results_file,'w') as f:
|
|
720
|
+
f.write(s)
|
|
721
|
+
# json.dump(candidateDetections,f,indent=1)
|
|
722
|
+
return location_results_file
|
|
723
|
+
else:
|
|
724
|
+
return candidateDetections
|
|
675
725
|
|
|
726
|
+
# ...def find_matches_in_directory(...)
|
|
676
727
|
|
|
677
|
-
#%% Update the detection table based on suspicious results, write .csv output
|
|
678
728
|
|
|
679
|
-
def update_detection_table(
|
|
729
|
+
def update_detection_table(repeatDetectionResults, options, outputFilename=None):
|
|
730
|
+
"""
|
|
731
|
+
Changes confidence values in repeatDetectionResults.detectionResults so that detections
|
|
732
|
+
deemed to be possible false positives are given negative confidence values.
|
|
733
|
+
|
|
734
|
+
repeatDetectionResults is an object of type RepeatDetectionResults, with a pandas
|
|
735
|
+
dataframe (detectionResults) containing all the detections loaded from the .json file,
|
|
736
|
+
and a list of detections for each location (suspiciousDetections) that are deemed to
|
|
737
|
+
be suspicious.
|
|
738
|
+
|
|
739
|
+
returns the modified pandas dataframe (repeatDetectionResults.detectionResults), but
|
|
740
|
+
also modifies it in place.
|
|
741
|
+
"""
|
|
680
742
|
|
|
681
|
-
|
|
743
|
+
# This is the pandas dataframe that contains actual detection results.
|
|
744
|
+
#
|
|
745
|
+
# Has fields ['file', 'detections','failure'].
|
|
746
|
+
detectionResults = repeatDetectionResults.detectionResults
|
|
682
747
|
|
|
683
748
|
# An array of length nDirs, where each element is a list of DetectionLocation
|
|
684
749
|
# objects for that directory that have been flagged as suspicious
|
|
685
|
-
suspiciousDetectionsByDirectory =
|
|
750
|
+
suspiciousDetectionsByDirectory = repeatDetectionResults.suspiciousDetections
|
|
686
751
|
|
|
687
752
|
nBboxChanges = 0
|
|
688
753
|
|
|
@@ -711,8 +776,8 @@ def update_detection_table(RepeatDetectionResults, options, outputFilename=None)
|
|
|
711
776
|
# if iou < options.iouThreshold:
|
|
712
777
|
# print('IOU warning: {},{}'.format(iou,options.iouThreshold))
|
|
713
778
|
|
|
714
|
-
assert instance.filename in
|
|
715
|
-
iRow =
|
|
779
|
+
assert instance.filename in repeatDetectionResults.filenameToRow
|
|
780
|
+
iRow = repeatDetectionResults.filenameToRow[instance.filename]
|
|
716
781
|
row = detectionResults.iloc[iRow]
|
|
717
782
|
rowDetections = row['detections']
|
|
718
783
|
detectionToModify = rowDetections[instance.iDetection]
|
|
@@ -796,7 +861,7 @@ def update_detection_table(RepeatDetectionResults, options, outputFilename=None)
|
|
|
796
861
|
|
|
797
862
|
# If we're also writing output...
|
|
798
863
|
if outputFilename is not None and len(outputFilename) > 0:
|
|
799
|
-
write_api_results(detectionResults,
|
|
864
|
+
write_api_results(detectionResults, repeatDetectionResults.otherFields,
|
|
800
865
|
outputFilename)
|
|
801
866
|
|
|
802
867
|
print(
|
|
@@ -805,7 +870,7 @@ def update_detection_table(RepeatDetectionResults, options, outputFilename=None)
|
|
|
805
870
|
|
|
806
871
|
return detectionResults
|
|
807
872
|
|
|
808
|
-
# ...def update_detection_table(
|
|
873
|
+
# ...def update_detection_table(...)
|
|
809
874
|
|
|
810
875
|
|
|
811
876
|
def render_sample_image_for_detection(detection,filteringDir,options):
|
|
@@ -841,12 +906,12 @@ def render_sample_image_for_detection(detection,filteringDir,options):
|
|
|
841
906
|
|
|
842
907
|
try:
|
|
843
908
|
|
|
909
|
+
im = open_image(inputFullPath)
|
|
910
|
+
|
|
844
911
|
# Should we render (typically in a very light color) detections
|
|
845
912
|
# *other* than the one we're highlighting here?
|
|
846
913
|
if options.bRenderOtherDetections:
|
|
847
|
-
|
|
848
|
-
im = open_image(inputFullPath)
|
|
849
|
-
|
|
914
|
+
|
|
850
915
|
# Optionally resize the output image
|
|
851
916
|
if (options.maxOutputImageWidth is not None) and \
|
|
852
917
|
(im.size[0] > options.maxOutputImageWidth):
|
|
@@ -892,6 +957,10 @@ def render_sample_image_for_detection(detection,filteringDir,options):
|
|
|
892
957
|
render_bounding_box(detection, inputFullPath, outputFullPath,
|
|
893
958
|
lineWidth=options.lineThickness, expansion=options.boxExpansion)
|
|
894
959
|
|
|
960
|
+
# ...if we are/aren't rendering other bounding boxes
|
|
961
|
+
|
|
962
|
+
# If we're rendering detection tiles, we'll re-load and re-write the image we
|
|
963
|
+
# just wrote to outputFullPath
|
|
895
964
|
if options.bRenderDetectionTiles:
|
|
896
965
|
|
|
897
966
|
assert not is_sas_url(options.imageBase), "Can't render detection tiles from SAS URLs"
|
|
@@ -899,6 +968,8 @@ def render_sample_image_for_detection(detection,filteringDir,options):
|
|
|
899
968
|
if options.detectionTilesPrimaryImageWidth is not None:
|
|
900
969
|
primaryImageWidth = options.detectionTilesPrimaryImageWidth
|
|
901
970
|
else:
|
|
971
|
+
# "im" may be a resized version of the original image, if we've already run
|
|
972
|
+
# the code to render other bounding boxes.
|
|
902
973
|
primaryImageWidth = im.size[0]
|
|
903
974
|
|
|
904
975
|
if options.detectionTilesCroppedGridWidth <= 1.0:
|
|
@@ -922,7 +993,8 @@ def render_sample_image_for_detection(detection,filteringDir,options):
|
|
|
922
993
|
secondaryImageFilenameList[0:options.detectionTilesMaxCrops]
|
|
923
994
|
secondaryImageBoundingBoxList = \
|
|
924
995
|
secondaryImageBoundingBoxList[0:options.detectionTilesMaxCrops]
|
|
925
|
-
|
|
996
|
+
|
|
997
|
+
# This will over-write the image we've already written to outputFullPath
|
|
926
998
|
render_images_with_thumbnails.render_images_with_thumbnails(
|
|
927
999
|
primary_image_filename=outputFullPath,
|
|
928
1000
|
primary_image_width=primaryImageWidth,
|
|
@@ -936,16 +1008,20 @@ def render_sample_image_for_detection(detection,filteringDir,options):
|
|
|
936
1008
|
# bDetectionTilesCroppedGridWidth = 0.6
|
|
937
1009
|
# bDetectionTilesPrimaryImageLocation='right'
|
|
938
1010
|
|
|
939
|
-
# ...if we are/aren't rendering
|
|
1011
|
+
# ...if we are/aren't rendering detection tiles
|
|
940
1012
|
|
|
941
1013
|
except Exception as e:
|
|
942
|
-
|
|
943
|
-
|
|
1014
|
+
|
|
1015
|
+
stack_trace = traceback.format_exc()
|
|
1016
|
+
print('Warning: error rendering bounding box from {} to {}: {} ({})'.format(
|
|
1017
|
+
inputFullPath,outputFullPath,e,stack_trace))
|
|
944
1018
|
if options.bFailOnRenderError:
|
|
945
1019
|
raise
|
|
946
|
-
|
|
947
1020
|
|
|
948
|
-
|
|
1021
|
+
# ...def render_sample_image_for_detection(...)
|
|
1022
|
+
|
|
1023
|
+
|
|
1024
|
+
#%% Main entry point
|
|
949
1025
|
|
|
950
1026
|
def find_repeat_detections(inputFilename, outputFilename=None, options=None):
|
|
951
1027
|
|
|
@@ -998,9 +1074,9 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
|
|
|
998
1074
|
|
|
999
1075
|
# Load file to a pandas dataframe. Also populates 'max_detection_conf', even if it's
|
|
1000
1076
|
# not present in the .json file.
|
|
1001
|
-
|
|
1002
1077
|
detectionResults, otherFields = load_api_results(inputFilename, normalize_paths=True,
|
|
1003
|
-
filename_replacements=options.filenameReplacements
|
|
1078
|
+
filename_replacements=options.filenameReplacements,
|
|
1079
|
+
force_forward_slashes=True)
|
|
1004
1080
|
toReturn.detectionResults = detectionResults
|
|
1005
1081
|
toReturn.otherFields = otherFields
|
|
1006
1082
|
|
|
@@ -1024,7 +1100,7 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
|
|
|
1024
1100
|
assert os.path.isfile(absolutePath), 'Could not find file {}'.format(absolutePath)
|
|
1025
1101
|
|
|
1026
1102
|
|
|
1027
|
-
##%% Separate files into
|
|
1103
|
+
##%% Separate files into locations
|
|
1028
1104
|
|
|
1029
1105
|
# This will be a map from a directory name to smaller data frames
|
|
1030
1106
|
rowsByDirectory = {}
|
|
@@ -1032,12 +1108,12 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
|
|
|
1032
1108
|
# This is a mapping back into the rows of the original table
|
|
1033
1109
|
filenameToRow = {}
|
|
1034
1110
|
|
|
1035
|
-
print('Separating
|
|
1111
|
+
print('Separating images into locations...')
|
|
1036
1112
|
|
|
1037
1113
|
nCustomDirReplacements = 0
|
|
1038
1114
|
|
|
1039
1115
|
# iRow = 0; row = detectionResults.iloc[0]
|
|
1040
|
-
for iRow, row in detectionResults.iterrows():
|
|
1116
|
+
for iRow, row in tqdm(detectionResults.iterrows(),total=len(detectionResults)):
|
|
1041
1117
|
|
|
1042
1118
|
relativePath = row['file']
|
|
1043
1119
|
|
|
@@ -1075,7 +1151,7 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
|
|
|
1075
1151
|
if options.customDirNameFunction is not None:
|
|
1076
1152
|
print('Custom dir name function made {} replacements (of {} images)'.format(
|
|
1077
1153
|
nCustomDirReplacements,len(detectionResults)))
|
|
1078
|
-
|
|
1154
|
+
|
|
1079
1155
|
# Convert lists of rows to proper DataFrames
|
|
1080
1156
|
dirs = list(rowsByDirectory.keys())
|
|
1081
1157
|
for d in dirs:
|
|
@@ -1084,11 +1160,10 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
|
|
|
1084
1160
|
toReturn.rowsByDirectory = rowsByDirectory
|
|
1085
1161
|
toReturn.filenameToRow = filenameToRow
|
|
1086
1162
|
|
|
1087
|
-
print('Finished separating {} files into {}
|
|
1088
|
-
|
|
1089
|
-
|
|
1163
|
+
print('Finished separating {} files into {} locations'.format(len(detectionResults),
|
|
1164
|
+
len(rowsByDirectory)))
|
|
1090
1165
|
|
|
1091
|
-
##% Look for
|
|
1166
|
+
##% Look for repeat detections (or load them from file)
|
|
1092
1167
|
|
|
1093
1168
|
dirsToSearch = list(rowsByDirectory.keys())
|
|
1094
1169
|
if options.debugMaxDir > 0:
|
|
@@ -1115,6 +1190,11 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
|
|
|
1115
1190
|
|
|
1116
1191
|
allCandidateDetections = [None] * len(dirsToSearch)
|
|
1117
1192
|
|
|
1193
|
+
# If we serialize results to intermediate files, we need to remove slashes from
|
|
1194
|
+
# location names; we store mappings here.
|
|
1195
|
+
normalized_location_name_to_location_name = None
|
|
1196
|
+
location_name_to_normalized_location_name = None
|
|
1197
|
+
|
|
1118
1198
|
if not options.bParallelizeComparisons:
|
|
1119
1199
|
|
|
1120
1200
|
options.pbar = None
|
|
@@ -1132,7 +1212,7 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
|
|
|
1132
1212
|
print('Pool of {} requested, but only {} folders available, reducing pool to {}'.\
|
|
1133
1213
|
format(n_workers,len(dirNameAndRows),len(dirNameAndRows)))
|
|
1134
1214
|
n_workers = len(dirNameAndRows)
|
|
1135
|
-
|
|
1215
|
+
|
|
1136
1216
|
if options.parallelizationUsesThreads:
|
|
1137
1217
|
pool = ThreadPool(n_workers); poolstring = 'threads'
|
|
1138
1218
|
else:
|
|
@@ -1140,24 +1220,96 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
|
|
|
1140
1220
|
|
|
1141
1221
|
print('Starting comparison pool with {} {}'.format(n_workers,poolstring))
|
|
1142
1222
|
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1223
|
+
assert options.pass_detections_to_processes_method in ('file','memory'), \
|
|
1224
|
+
'Unrecognized IPC mechanism: {}'.format(options.pass_detections_to_processes_method)
|
|
1225
|
+
|
|
1226
|
+
# ** Experimental **
|
|
1227
|
+
#
|
|
1228
|
+
# Rather than passing detections and results around in memory, write detections and
|
|
1229
|
+
# results for each worker to intermediate files. May improve performance for very large
|
|
1230
|
+
# results sets that exceed working memory.
|
|
1231
|
+
if options.pass_detections_to_processes_method == 'file':
|
|
1232
|
+
|
|
1233
|
+
##%% Convert location names to normalized names we can write to files
|
|
1234
|
+
|
|
1235
|
+
normalized_location_name_to_location_name = {}
|
|
1236
|
+
for dir_name in dirsToSearch:
|
|
1237
|
+
normalized_location_name = flatten_path(dir_name)
|
|
1238
|
+
assert normalized_location_name not in normalized_location_name_to_location_name, \
|
|
1239
|
+
'Redundant location name {}, can\'t serialize to intermediate files'.format(
|
|
1240
|
+
dir_name)
|
|
1241
|
+
normalized_location_name_to_location_name[normalized_location_name] = dir_name
|
|
1242
|
+
|
|
1243
|
+
location_name_to_normalized_location_name = \
|
|
1244
|
+
invert_dictionary(normalized_location_name_to_location_name)
|
|
1245
|
+
|
|
1246
|
+
|
|
1247
|
+
##%% Write results to files for each location
|
|
1248
|
+
|
|
1249
|
+
print('Writing results to intermediate files')
|
|
1250
|
+
|
|
1251
|
+
intermediate_json_file_folder = os.path.join(options.outputBase,'intermediate_results')
|
|
1252
|
+
os.makedirs(intermediate_json_file_folder,exist_ok=True)
|
|
1253
|
+
|
|
1254
|
+
# i_location = 0; location_info = dirNameAndRows[0]
|
|
1255
|
+
dirNameAndIntermediateFile = []
|
|
1256
|
+
|
|
1257
|
+
# i_location = 0; location_info = dirNameAndRows[i_location]
|
|
1258
|
+
for i_location, location_info in tqdm(enumerate(dirNameAndRows)):
|
|
1259
|
+
|
|
1260
|
+
location_name = location_info[0]
|
|
1261
|
+
assert location_name in location_name_to_normalized_location_name
|
|
1262
|
+
normalized_location_name = location_name_to_normalized_location_name[location_name]
|
|
1263
|
+
intermediate_results_file = os.path.join(intermediate_json_file_folder,
|
|
1264
|
+
normalized_location_name + '.csv')
|
|
1265
|
+
detections_table_this_location = location_info[1]
|
|
1266
|
+
detections_table_this_location.to_csv(intermediate_results_file,header=True,index=False)
|
|
1267
|
+
dirNameAndIntermediateFile.append((location_name,intermediate_results_file))
|
|
1268
|
+
|
|
1269
|
+
|
|
1270
|
+
##%% Find detections in each directory
|
|
1271
|
+
|
|
1151
1272
|
options.pbar = None
|
|
1152
|
-
|
|
1153
|
-
partial(find_matches_in_directory,options=options),
|
|
1273
|
+
allCandidateDetectionFiles = list(pool.imap(
|
|
1274
|
+
partial(find_matches_in_directory,options=options), dirNameAndIntermediateFile))
|
|
1275
|
+
|
|
1276
|
+
|
|
1277
|
+
##%% Load into a combined list of candidate detections
|
|
1278
|
+
|
|
1279
|
+
allCandidateDetections = []
|
|
1280
|
+
|
|
1281
|
+
# candidate_detection_file = allCandidateDetectionFiles[0]
|
|
1282
|
+
for candidate_detection_file in allCandidateDetectionFiles:
|
|
1283
|
+
s = open(candidate_detection_file, 'r').read()
|
|
1284
|
+
candidate_detections_this_file = jsonpickle.decode(s)
|
|
1285
|
+
allCandidateDetections.append(candidate_detections_this_file)
|
|
1286
|
+
|
|
1287
|
+
|
|
1288
|
+
##%% Clean up intermediate files
|
|
1289
|
+
|
|
1290
|
+
shutil.rmtree(intermediate_json_file_folder)
|
|
1291
|
+
|
|
1292
|
+
# If we're passing things around in memory, rather than via intermediate files
|
|
1293
|
+
else:
|
|
1294
|
+
|
|
1295
|
+
# We get slightly nicer progress bar behavior using threads, by passing a pbar
|
|
1296
|
+
# object and letting it get updated. We can't serialize this object across
|
|
1297
|
+
# processes.
|
|
1298
|
+
if options.parallelizationUsesThreads:
|
|
1299
|
+
options.pbar = tqdm(total=len(dirNameAndRows))
|
|
1300
|
+
allCandidateDetections = list(pool.imap(
|
|
1301
|
+
partial(find_matches_in_directory,options=options), dirNameAndRows))
|
|
1302
|
+
else:
|
|
1303
|
+
options.pbar = None
|
|
1304
|
+
allCandidateDetections = list(tqdm(pool.imap(
|
|
1305
|
+
partial(find_matches_in_directory,options=options), dirNameAndRows)))
|
|
1154
1306
|
|
|
1155
1307
|
print('\nFinished looking for similar detections')
|
|
1156
1308
|
|
|
1157
1309
|
|
|
1158
|
-
##%%
|
|
1310
|
+
##%% Mark suspicious locations based on match results
|
|
1159
1311
|
|
|
1160
|
-
print('
|
|
1312
|
+
print('Marking repeat detections...')
|
|
1161
1313
|
|
|
1162
1314
|
nImagesWithSuspiciousDetections = 0
|
|
1163
1315
|
nSuspiciousDetections = 0
|
|
@@ -1198,7 +1350,8 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
|
|
|
1198
1350
|
|
|
1199
1351
|
# ...for each directory
|
|
1200
1352
|
|
|
1201
|
-
print('Finished
|
|
1353
|
+
print('Finished marking repeat detections')
|
|
1354
|
+
|
|
1202
1355
|
print('Found {} unique detections on {} images that are suspicious'.format(
|
|
1203
1356
|
nSuspiciousDetections, nImagesWithSuspiciousDetections))
|
|
1204
1357
|
|
|
@@ -1367,8 +1520,7 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
|
|
|
1367
1520
|
detection.sampleImageDetections = None
|
|
1368
1521
|
|
|
1369
1522
|
# Write out the detection index
|
|
1370
|
-
detectionIndexFileName = os.path.join(filteringDir,
|
|
1371
|
-
jsonpickle.set_encoder_options('json', sort_keys=True, indent=2)
|
|
1523
|
+
detectionIndexFileName = os.path.join(filteringDir, detection_index_file_name_base)
|
|
1372
1524
|
|
|
1373
1525
|
# Prepare the data we're going to write to the detection index file
|
|
1374
1526
|
detectionInfo = {}
|
|
@@ -1392,4 +1544,4 @@ def find_repeat_detections(inputFilename, outputFilename=None, options=None):
|
|
|
1392
1544
|
|
|
1393
1545
|
return toReturn
|
|
1394
1546
|
|
|
1395
|
-
# ...find_repeat_detections()
|
|
1547
|
+
# ...def find_repeat_detections()
|