megadetector 5.0.27__py3-none-any.whl → 5.0.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
- megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
- megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
- megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
- megadetector/classification/aggregate_classifier_probs.py +3 -3
- megadetector/classification/analyze_failed_images.py +5 -5
- megadetector/classification/cache_batchapi_outputs.py +5 -5
- megadetector/classification/create_classification_dataset.py +11 -12
- megadetector/classification/crop_detections.py +10 -10
- megadetector/classification/csv_to_json.py +8 -8
- megadetector/classification/detect_and_crop.py +13 -15
- megadetector/classification/evaluate_model.py +7 -7
- megadetector/classification/identify_mislabeled_candidates.py +6 -6
- megadetector/classification/json_to_azcopy_list.py +1 -1
- megadetector/classification/json_validator.py +29 -32
- megadetector/classification/map_classification_categories.py +9 -9
- megadetector/classification/merge_classification_detection_output.py +12 -9
- megadetector/classification/prepare_classification_script.py +19 -19
- megadetector/classification/prepare_classification_script_mc.py +23 -23
- megadetector/classification/run_classifier.py +4 -4
- megadetector/classification/save_mislabeled.py +6 -6
- megadetector/classification/train_classifier.py +1 -1
- megadetector/classification/train_classifier_tf.py +9 -9
- megadetector/classification/train_utils.py +10 -10
- megadetector/data_management/annotations/annotation_constants.py +1 -1
- megadetector/data_management/camtrap_dp_to_coco.py +45 -45
- megadetector/data_management/cct_json_utils.py +101 -101
- megadetector/data_management/cct_to_md.py +49 -49
- megadetector/data_management/cct_to_wi.py +33 -33
- megadetector/data_management/coco_to_labelme.py +75 -75
- megadetector/data_management/coco_to_yolo.py +189 -189
- megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
- megadetector/data_management/databases/integrity_check_json_db.py +202 -188
- megadetector/data_management/databases/subset_json_db.py +33 -33
- megadetector/data_management/generate_crops_from_cct.py +38 -38
- megadetector/data_management/get_image_sizes.py +54 -49
- megadetector/data_management/labelme_to_coco.py +130 -124
- megadetector/data_management/labelme_to_yolo.py +78 -72
- megadetector/data_management/lila/create_lila_blank_set.py +81 -83
- megadetector/data_management/lila/create_lila_test_set.py +32 -31
- megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
- megadetector/data_management/lila/download_lila_subset.py +21 -24
- megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
- megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
- megadetector/data_management/lila/get_lila_image_counts.py +22 -22
- megadetector/data_management/lila/lila_common.py +70 -70
- megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
- megadetector/data_management/mewc_to_md.py +339 -340
- megadetector/data_management/ocr_tools.py +258 -252
- megadetector/data_management/read_exif.py +232 -223
- megadetector/data_management/remap_coco_categories.py +26 -26
- megadetector/data_management/remove_exif.py +31 -20
- megadetector/data_management/rename_images.py +187 -187
- megadetector/data_management/resize_coco_dataset.py +41 -41
- megadetector/data_management/speciesnet_to_md.py +41 -41
- megadetector/data_management/wi_download_csv_to_coco.py +55 -55
- megadetector/data_management/yolo_output_to_md_output.py +117 -120
- megadetector/data_management/yolo_to_coco.py +195 -188
- megadetector/detection/change_detection.py +831 -0
- megadetector/detection/process_video.py +341 -338
- megadetector/detection/pytorch_detector.py +308 -266
- megadetector/detection/run_detector.py +186 -166
- megadetector/detection/run_detector_batch.py +366 -364
- megadetector/detection/run_inference_with_yolov5_val.py +328 -325
- megadetector/detection/run_tiled_inference.py +312 -253
- megadetector/detection/tf_detector.py +24 -24
- megadetector/detection/video_utils.py +291 -283
- megadetector/postprocessing/add_max_conf.py +15 -11
- megadetector/postprocessing/categorize_detections_by_size.py +44 -44
- megadetector/postprocessing/classification_postprocessing.py +808 -311
- megadetector/postprocessing/combine_batch_outputs.py +20 -21
- megadetector/postprocessing/compare_batch_results.py +528 -517
- megadetector/postprocessing/convert_output_format.py +97 -97
- megadetector/postprocessing/create_crop_folder.py +220 -147
- megadetector/postprocessing/detector_calibration.py +173 -168
- megadetector/postprocessing/generate_csv_report.py +508 -0
- megadetector/postprocessing/load_api_results.py +25 -22
- megadetector/postprocessing/md_to_coco.py +129 -98
- megadetector/postprocessing/md_to_labelme.py +89 -83
- megadetector/postprocessing/md_to_wi.py +40 -40
- megadetector/postprocessing/merge_detections.py +87 -114
- megadetector/postprocessing/postprocess_batch_results.py +319 -302
- megadetector/postprocessing/remap_detection_categories.py +36 -36
- megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
- megadetector/postprocessing/separate_detections_into_folders.py +226 -211
- megadetector/postprocessing/subset_json_detector_output.py +265 -262
- megadetector/postprocessing/top_folders_to_bottom.py +45 -45
- megadetector/postprocessing/validate_batch_results.py +70 -70
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -69
- megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
- megadetector/taxonomy_mapping/simple_image_download.py +8 -8
- megadetector/taxonomy_mapping/species_lookup.py +33 -33
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
- megadetector/taxonomy_mapping/taxonomy_graph.py +11 -11
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
- megadetector/utils/azure_utils.py +22 -22
- megadetector/utils/ct_utils.py +1019 -200
- megadetector/utils/directory_listing.py +21 -77
- megadetector/utils/gpu_test.py +22 -22
- megadetector/utils/md_tests.py +541 -518
- megadetector/utils/path_utils.py +1511 -406
- megadetector/utils/process_utils.py +41 -41
- megadetector/utils/sas_blob_utils.py +53 -49
- megadetector/utils/split_locations_into_train_val.py +73 -60
- megadetector/utils/string_utils.py +147 -26
- megadetector/utils/url_utils.py +463 -173
- megadetector/utils/wi_utils.py +2629 -2868
- megadetector/utils/write_html_image_list.py +137 -137
- megadetector/visualization/plot_utils.py +21 -21
- megadetector/visualization/render_images_with_thumbnails.py +37 -73
- megadetector/visualization/visualization_utils.py +424 -404
- megadetector/visualization/visualize_db.py +197 -190
- megadetector/visualization/visualize_detector_output.py +126 -98
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/METADATA +6 -3
- megadetector-5.0.29.dist-info/RECORD +163 -0
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
- megadetector/data_management/importers/add_nacti_sizes.py +0 -52
- megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
- megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
- megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
- megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
- megadetector/data_management/importers/awc_to_json.py +0 -191
- megadetector/data_management/importers/bellevue_to_json.py +0 -272
- megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
- megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
- megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
- megadetector/data_management/importers/cct_field_adjustments.py +0 -58
- megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
- megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
- megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
- megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
- megadetector/data_management/importers/ena24_to_json.py +0 -276
- megadetector/data_management/importers/filenames_to_json.py +0 -386
- megadetector/data_management/importers/helena_to_cct.py +0 -283
- megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
- megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
- megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
- megadetector/data_management/importers/jb_csv_to_json.py +0 -150
- megadetector/data_management/importers/mcgill_to_json.py +0 -250
- megadetector/data_management/importers/missouri_to_json.py +0 -490
- megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
- megadetector/data_management/importers/noaa_seals_2019.py +0 -181
- megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
- megadetector/data_management/importers/pc_to_json.py +0 -365
- megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
- megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
- megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
- megadetector/data_management/importers/rspb_to_json.py +0 -356
- megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
- megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
- megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
- megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
- megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
- megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
- megadetector/data_management/importers/sulross_get_exif.py +0 -65
- megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
- megadetector/data_management/importers/ubc_to_json.py +0 -399
- megadetector/data_management/importers/umn_to_json.py +0 -507
- megadetector/data_management/importers/wellington_to_json.py +0 -263
- megadetector/data_management/importers/wi_to_json.py +0 -442
- megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
- megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
- megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
- megadetector-5.0.27.dist-info/RECORD +0 -208
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
|
@@ -7,7 +7,7 @@ Does some integrity-checking and computes basic statistics on a COCO Camera Trap
|
|
|
7
7
|
* Verifies that required fields are present and have the right types
|
|
8
8
|
* Verifies that annotations refer to valid images
|
|
9
9
|
* Verifies that annotations refer to valid categories
|
|
10
|
-
* Verifies that image, category, and annotation IDs are unique
|
|
10
|
+
* Verifies that image, category, and annotation IDs are unique
|
|
11
11
|
* Optionally checks file existence
|
|
12
12
|
* Finds un-annotated images
|
|
13
13
|
* Finds unused categories
|
|
@@ -37,36 +37,39 @@ class IntegrityCheckOptions:
|
|
|
37
37
|
"""
|
|
38
38
|
Options for integrity_check_json_db()
|
|
39
39
|
"""
|
|
40
|
-
|
|
40
|
+
|
|
41
41
|
def __init__(self):
|
|
42
|
-
|
|
42
|
+
|
|
43
43
|
#: Image path; the filenames in the .json file should be relative to this folder
|
|
44
44
|
self.baseDir = ''
|
|
45
|
-
|
|
45
|
+
|
|
46
46
|
#: Should we validate the image sizes?
|
|
47
47
|
self.bCheckImageSizes = False
|
|
48
|
-
|
|
48
|
+
|
|
49
49
|
#: Should we check that all the images in the .json file exist on disk?
|
|
50
50
|
self.bCheckImageExistence = False
|
|
51
|
-
|
|
51
|
+
|
|
52
52
|
#: Should we search [baseDir] for images that are not used in the .json file?
|
|
53
53
|
self.bFindUnusedImages = False
|
|
54
|
-
|
|
54
|
+
|
|
55
55
|
#: Should we require that all images in the .json file have a 'location' field?
|
|
56
56
|
self.bRequireLocation = True
|
|
57
|
-
|
|
57
|
+
|
|
58
58
|
#: For debugging, limit the number of images we'll process
|
|
59
59
|
self.iMaxNumImages = -1
|
|
60
|
-
|
|
60
|
+
|
|
61
61
|
#: Number of threads to use for parallelization, set to <= 1 to disable parallelization
|
|
62
62
|
self.nThreads = 10
|
|
63
|
-
|
|
63
|
+
|
|
64
64
|
#: Enable additional debug output
|
|
65
65
|
self.verbose = True
|
|
66
|
-
|
|
66
|
+
|
|
67
67
|
#: Allow integer-valued image and annotation IDs (COCO uses this, CCT files use strings)
|
|
68
68
|
self.allowIntIDs = False
|
|
69
|
-
|
|
69
|
+
|
|
70
|
+
#: If True, error if the 'info' field is not present
|
|
71
|
+
self.requireInfo = False
|
|
72
|
+
|
|
70
73
|
# This is used in a medium-hacky way to share modified options across threads
|
|
71
74
|
default_options = IntegrityCheckOptions()
|
|
72
75
|
|
|
@@ -76,7 +79,7 @@ default_options = IntegrityCheckOptions()
|
|
|
76
79
|
def _check_image_existence_and_size(image,options=None):
|
|
77
80
|
"""
|
|
78
81
|
Validate the image represented in the CCT image dict [image], which should have fields:
|
|
79
|
-
|
|
82
|
+
|
|
80
83
|
* file_name
|
|
81
84
|
* width
|
|
82
85
|
* height
|
|
@@ -84,233 +87,239 @@ def _check_image_existence_and_size(image,options=None):
|
|
|
84
87
|
Args:
|
|
85
88
|
image (dict): image to validate
|
|
86
89
|
options (IntegrityCheckOptions): parameters impacting validation
|
|
87
|
-
|
|
90
|
+
|
|
88
91
|
Returns:
|
|
89
92
|
str: None if this image passes validation, otherwise an error string
|
|
90
93
|
"""
|
|
91
94
|
|
|
92
|
-
if options is None:
|
|
95
|
+
if options is None:
|
|
93
96
|
options = default_options
|
|
94
|
-
|
|
97
|
+
|
|
95
98
|
assert options.bCheckImageExistence
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
if not os.path.isfile(
|
|
99
|
-
s = 'Image path {} does not exist'.format(
|
|
99
|
+
|
|
100
|
+
file_path = os.path.join(options.baseDir,image['file_name'])
|
|
101
|
+
if not os.path.isfile(file_path):
|
|
102
|
+
s = 'Image path {} does not exist'.format(file_path)
|
|
100
103
|
return s
|
|
101
|
-
|
|
104
|
+
|
|
102
105
|
if options.bCheckImageSizes:
|
|
103
106
|
if not ('height' in image and 'width' in image):
|
|
104
|
-
s = 'Missing image size in {}'.format(
|
|
107
|
+
s = 'Missing image size in {}'.format(file_path)
|
|
105
108
|
return s
|
|
106
109
|
|
|
107
|
-
# width, height = Image.open(
|
|
108
|
-
pil_im = open_image(
|
|
110
|
+
# width, height = Image.open(file_path).size
|
|
111
|
+
pil_im = open_image(file_path)
|
|
109
112
|
width,height = pil_im.size
|
|
110
113
|
if (not (width == image['width'] and height == image['height'])):
|
|
111
114
|
s = 'Size mismatch for image {}: {} (reported {},{}, actual {},{})'.format(
|
|
112
|
-
image['id'],
|
|
115
|
+
image['id'], file_path, image['width'], image['height'], width, height)
|
|
113
116
|
return s
|
|
114
|
-
|
|
117
|
+
|
|
115
118
|
return None
|
|
116
119
|
|
|
117
|
-
|
|
118
|
-
def integrity_check_json_db(
|
|
120
|
+
|
|
121
|
+
def integrity_check_json_db(json_file, options=None):
|
|
119
122
|
"""
|
|
120
123
|
Does some integrity-checking and computes basic statistics on a COCO Camera Traps .json file; see
|
|
121
124
|
module header comment for a list of the validation steps.
|
|
122
|
-
|
|
125
|
+
|
|
123
126
|
Args:
|
|
124
|
-
|
|
125
|
-
|
|
127
|
+
json_file (str): filename to validate, or an already-loaded dict
|
|
128
|
+
|
|
126
129
|
Returns:
|
|
127
130
|
tuple: tuple containing:
|
|
128
|
-
- sorted_categories (dict): list of categories used in [
|
|
129
|
-
- data (dict): the data loaded from [
|
|
131
|
+
- sorted_categories (dict): list of categories used in [json_file], sorted by frequency
|
|
132
|
+
- data (dict): the data loaded from [json_file]
|
|
130
133
|
- error_info (dict): specific validation errors
|
|
131
134
|
"""
|
|
132
|
-
|
|
133
|
-
if options is None:
|
|
135
|
+
|
|
136
|
+
if options is None:
|
|
134
137
|
options = IntegrityCheckOptions()
|
|
135
|
-
|
|
136
|
-
if options.bCheckImageSizes:
|
|
138
|
+
|
|
139
|
+
if options.bCheckImageSizes:
|
|
137
140
|
options.bCheckImageExistence = True
|
|
138
|
-
|
|
141
|
+
|
|
139
142
|
if options.verbose:
|
|
140
143
|
print(options.__dict__)
|
|
141
|
-
|
|
144
|
+
|
|
142
145
|
if options.baseDir is None:
|
|
143
146
|
options.baseDir = ''
|
|
144
|
-
|
|
147
|
+
|
|
145
148
|
base_dir = options.baseDir
|
|
146
|
-
|
|
147
|
-
|
|
149
|
+
|
|
150
|
+
|
|
148
151
|
##%% Read .json file if necessary, integrity-check fields
|
|
149
|
-
|
|
150
|
-
if isinstance(
|
|
151
|
-
|
|
152
|
-
data =
|
|
153
|
-
|
|
154
|
-
elif isinstance(
|
|
155
|
-
|
|
156
|
-
assert os.path.isfile(
|
|
157
|
-
|
|
152
|
+
|
|
153
|
+
if isinstance(json_file,dict):
|
|
154
|
+
|
|
155
|
+
data = json_file
|
|
156
|
+
|
|
157
|
+
elif isinstance(json_file,str):
|
|
158
|
+
|
|
159
|
+
assert os.path.isfile(json_file), '.json file {} does not exist'.format(json_file)
|
|
160
|
+
|
|
158
161
|
if options.verbose:
|
|
159
162
|
print('Reading .json {} with base dir [{}]...'.format(
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
with open(
|
|
163
|
-
data = json.load(f)
|
|
164
|
-
|
|
163
|
+
json_file,base_dir))
|
|
164
|
+
|
|
165
|
+
with open(json_file,'r') as f:
|
|
166
|
+
data = json.load(f)
|
|
167
|
+
|
|
165
168
|
else:
|
|
166
|
-
|
|
167
|
-
raise ValueError('Illegal value for
|
|
168
|
-
|
|
169
|
+
|
|
170
|
+
raise ValueError('Illegal value for json_file')
|
|
171
|
+
|
|
169
172
|
images = data['images']
|
|
170
173
|
annotations = data['annotations']
|
|
171
174
|
categories = data['categories']
|
|
172
|
-
# info = data['info']
|
|
173
|
-
assert 'info' in data, 'No info struct in database'
|
|
174
175
|
|
|
175
|
-
if
|
|
176
|
-
assert
|
|
177
|
-
|
|
178
|
-
|
|
176
|
+
if options.requireInfo:
|
|
177
|
+
assert 'info' in data, 'No info struct in database'
|
|
178
|
+
|
|
179
|
+
if len(base_dir) > 0:
|
|
180
|
+
assert os.path.isdir(base_dir), \
|
|
181
|
+
'Base directory {} does not exist'.format(base_dir)
|
|
182
|
+
|
|
183
|
+
|
|
179
184
|
##%% Build dictionaries, checking ID uniqueness and internal validity as we go
|
|
180
|
-
|
|
185
|
+
|
|
181
186
|
image_id_to_image = {}
|
|
182
187
|
ann_id_to_ann = {}
|
|
183
188
|
category_id_to_category = {}
|
|
184
189
|
category_name_to_category = {}
|
|
185
190
|
image_location_set = set()
|
|
186
|
-
|
|
191
|
+
|
|
187
192
|
if options.verbose:
|
|
188
193
|
print('Checking categories...')
|
|
189
|
-
|
|
194
|
+
|
|
190
195
|
for cat in tqdm(categories):
|
|
191
|
-
|
|
196
|
+
|
|
192
197
|
# Confirm that required fields are present
|
|
193
198
|
assert 'name' in cat
|
|
194
199
|
assert 'id' in cat
|
|
195
|
-
|
|
196
|
-
assert isinstance(cat['id'],int),
|
|
197
|
-
|
|
198
|
-
|
|
200
|
+
|
|
201
|
+
assert isinstance(cat['id'],int), \
|
|
202
|
+
'Illegal category ID type: [{}]'.format(str(cat['id']))
|
|
203
|
+
assert isinstance(cat['name'],str), \
|
|
204
|
+
'Illegal category name type [{}]'.format(str(cat['name']))
|
|
205
|
+
|
|
199
206
|
category_id = cat['id']
|
|
200
207
|
category_name = cat['name']
|
|
201
|
-
|
|
208
|
+
|
|
202
209
|
# Confirm ID uniqueness
|
|
203
|
-
assert category_id not in category_id_to_category,
|
|
210
|
+
assert category_id not in category_id_to_category, \
|
|
211
|
+
'Category ID {} is used more than once'.format(category_id)
|
|
204
212
|
category_id_to_category[category_id] = cat
|
|
205
213
|
cat['_count'] = 0
|
|
206
|
-
|
|
207
|
-
assert category_name not in category_name_to_category,
|
|
208
|
-
|
|
209
|
-
|
|
214
|
+
|
|
215
|
+
assert category_name not in category_name_to_category, \
|
|
216
|
+
'Category name {} is used more than once'.format(category_name)
|
|
217
|
+
category_name_to_category[category_name] = cat
|
|
218
|
+
|
|
210
219
|
# ...for each category
|
|
211
|
-
|
|
220
|
+
|
|
212
221
|
if options.verbose:
|
|
213
222
|
print('\nChecking images...')
|
|
214
|
-
|
|
223
|
+
|
|
215
224
|
if options.iMaxNumImages > 0 and len(images) > options.iMaxNumImages:
|
|
216
|
-
|
|
225
|
+
|
|
217
226
|
if options.verbose:
|
|
218
227
|
print('Trimming image list to {}'.format(options.iMaxNumImages))
|
|
219
228
|
images = images[0:options.iMaxNumImages]
|
|
220
|
-
|
|
229
|
+
|
|
221
230
|
image_paths_in_json = set()
|
|
222
|
-
|
|
231
|
+
|
|
223
232
|
sequences = set()
|
|
224
|
-
|
|
233
|
+
|
|
225
234
|
# image = images[0]
|
|
226
235
|
for image in tqdm(images):
|
|
227
|
-
|
|
236
|
+
|
|
228
237
|
image['_count'] = 0
|
|
229
|
-
|
|
238
|
+
|
|
230
239
|
# Confirm that required fields are present
|
|
231
240
|
assert 'file_name' in image
|
|
232
241
|
assert 'id' in image
|
|
233
242
|
|
|
234
243
|
image['file_name'] = image['file_name'].replace('\\','/')
|
|
235
|
-
|
|
244
|
+
|
|
236
245
|
image_paths_in_json.add(image['file_name'])
|
|
237
|
-
|
|
246
|
+
|
|
238
247
|
assert isinstance(image['file_name'],str), 'Illegal image filename type'
|
|
239
|
-
|
|
248
|
+
|
|
240
249
|
if options.allowIntIDs:
|
|
241
250
|
assert isinstance(image['id'],str) or isinstance(image['id'],int), \
|
|
242
251
|
'Illegal image ID type'
|
|
243
252
|
else:
|
|
244
253
|
assert isinstance(image['id'],str), 'Illegal image ID type'
|
|
245
|
-
|
|
246
|
-
image_id = image['id']
|
|
247
|
-
|
|
254
|
+
|
|
255
|
+
image_id = image['id']
|
|
256
|
+
|
|
248
257
|
# Confirm ID uniqueness
|
|
249
258
|
assert image_id not in image_id_to_image, 'Duplicate image ID {}'.format(image_id)
|
|
250
|
-
|
|
259
|
+
|
|
251
260
|
image_id_to_image[image_id] = image
|
|
252
|
-
|
|
261
|
+
|
|
253
262
|
if 'height' in image:
|
|
254
263
|
assert 'width' in image, 'Image with height but no width: {}'.format(image['id'])
|
|
255
|
-
|
|
264
|
+
|
|
256
265
|
if 'width' in image:
|
|
257
266
|
assert 'height' in image, 'Image with width but no height: {}'.format(image['id'])
|
|
258
267
|
|
|
259
268
|
if options.bRequireLocation:
|
|
260
269
|
assert 'location' in image, 'No location available for: {}'.format(image['id'])
|
|
261
|
-
|
|
270
|
+
|
|
262
271
|
if 'location' in image:
|
|
263
272
|
# We previously supported ints here; this should be strings now
|
|
264
273
|
# assert isinstance(image['location'], str) or isinstance(image['location'], int), \
|
|
265
274
|
# 'Illegal image location type'
|
|
266
275
|
assert isinstance(image['location'], str)
|
|
267
276
|
image_location_set.add(image['location'])
|
|
268
|
-
|
|
277
|
+
|
|
269
278
|
if 'seq_id' in image:
|
|
270
279
|
sequences.add(image['seq_id'])
|
|
271
|
-
|
|
280
|
+
|
|
272
281
|
assert not ('sequence_id' in image or 'sequence' in image), 'Illegal sequence identifier'
|
|
273
|
-
|
|
282
|
+
|
|
274
283
|
unused_files = []
|
|
275
|
-
|
|
284
|
+
|
|
276
285
|
image_paths_relative = None
|
|
277
|
-
|
|
286
|
+
|
|
278
287
|
# Are we checking for unused images?
|
|
279
|
-
if (len(base_dir) > 0) and options.bFindUnusedImages:
|
|
280
|
-
|
|
288
|
+
if (len(base_dir) > 0) and options.bFindUnusedImages:
|
|
289
|
+
|
|
281
290
|
if options.verbose:
|
|
282
291
|
print('\nEnumerating images...')
|
|
283
|
-
|
|
292
|
+
|
|
284
293
|
image_paths_relative = find_images(base_dir,return_relative_paths=True,recursive=True)
|
|
285
|
-
|
|
294
|
+
|
|
286
295
|
for fn_relative in image_paths_relative:
|
|
287
296
|
if fn_relative not in image_paths_in_json:
|
|
288
297
|
unused_files.append(fn_relative)
|
|
289
|
-
|
|
298
|
+
|
|
290
299
|
# List of (filename,error_string) tuples
|
|
291
300
|
validation_errors = []
|
|
292
|
-
|
|
301
|
+
|
|
293
302
|
# If we're checking image existence but not image size, we don't need to read the images
|
|
294
303
|
if options.bCheckImageExistence and not options.bCheckImageSizes:
|
|
295
|
-
|
|
304
|
+
|
|
296
305
|
if image_paths_relative is None:
|
|
297
306
|
image_paths_relative = find_images(base_dir,return_relative_paths=True,recursive=True)
|
|
298
|
-
|
|
307
|
+
|
|
299
308
|
image_paths_relative_set = set(image_paths_relative)
|
|
300
|
-
|
|
309
|
+
|
|
301
310
|
for im in images:
|
|
302
|
-
if im['file_name'] not in image_paths_relative_set:
|
|
311
|
+
if im['file_name'] not in image_paths_relative_set:
|
|
303
312
|
validation_errors.append((im['file_name'],'not found in relative path list'))
|
|
304
|
-
|
|
313
|
+
|
|
305
314
|
# If we're checking image size, we need to read the images
|
|
306
315
|
if options.bCheckImageSizes:
|
|
307
|
-
|
|
316
|
+
|
|
308
317
|
if len(base_dir) == 0:
|
|
309
318
|
print('Warning: checking image sizes without a base directory, assuming "."')
|
|
310
|
-
|
|
319
|
+
|
|
311
320
|
if options.verbose:
|
|
312
321
|
print('Checking image existence and/or image sizes...')
|
|
313
|
-
|
|
322
|
+
|
|
314
323
|
if options.nThreads is not None and options.nThreads > 1:
|
|
315
324
|
if options.verbose:
|
|
316
325
|
print('Starting a pool of {} workers'.format(options.nThreads))
|
|
@@ -319,31 +328,36 @@ def integrity_check_json_db(jsonFile, options=None):
|
|
|
319
328
|
default_options.baseDir = options.baseDir
|
|
320
329
|
default_options.bCheckImageSizes = options.bCheckImageSizes
|
|
321
330
|
default_options.bCheckImageExistence = options.bCheckImageExistence
|
|
322
|
-
|
|
331
|
+
try:
|
|
332
|
+
results = tqdm(pool.imap(_check_image_existence_and_size, images), total=len(images))
|
|
333
|
+
finally:
|
|
334
|
+
pool.close()
|
|
335
|
+
pool.join()
|
|
336
|
+
print("Pool closed and joined for image size checks")
|
|
323
337
|
else:
|
|
324
338
|
results = []
|
|
325
|
-
for im in tqdm(images):
|
|
339
|
+
for im in tqdm(images):
|
|
326
340
|
results.append(_check_image_existence_and_size(im,options))
|
|
327
|
-
|
|
341
|
+
|
|
328
342
|
for i_image,result in enumerate(results):
|
|
329
343
|
if result is not None:
|
|
330
344
|
validation_errors.append((images[i_image]['file_name'],result))
|
|
331
|
-
|
|
345
|
+
|
|
332
346
|
# ...for each image
|
|
333
|
-
|
|
347
|
+
|
|
334
348
|
if options.verbose:
|
|
335
349
|
print('{} validation errors (of {})'.format(len(validation_errors),len(images)))
|
|
336
350
|
print('Checking annotations...')
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
351
|
+
|
|
352
|
+
n_boxes = 0
|
|
353
|
+
|
|
340
354
|
for ann in tqdm(annotations):
|
|
341
|
-
|
|
355
|
+
|
|
342
356
|
# Confirm that required fields are present
|
|
343
357
|
assert 'image_id' in ann
|
|
344
358
|
assert 'id' in ann
|
|
345
359
|
assert 'category_id' in ann
|
|
346
|
-
|
|
360
|
+
|
|
347
361
|
if options.allowIntIDs:
|
|
348
362
|
assert isinstance(ann['id'],str) or isinstance(ann['id'],int), \
|
|
349
363
|
'Illegal annotation ID type'
|
|
@@ -352,149 +366,149 @@ def integrity_check_json_db(jsonFile, options=None):
|
|
|
352
366
|
else:
|
|
353
367
|
assert isinstance(ann['id'],str), 'Illegal annotation ID type'
|
|
354
368
|
assert isinstance(ann['image_id'],str), 'Illegal annotation image ID type'
|
|
355
|
-
|
|
369
|
+
|
|
356
370
|
assert isinstance(ann['category_id'],int), 'Illegal annotation category ID type'
|
|
357
|
-
|
|
371
|
+
|
|
358
372
|
if 'bbox' in ann:
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
373
|
+
n_boxes += 1
|
|
374
|
+
|
|
375
|
+
ann_id = ann['id']
|
|
376
|
+
|
|
363
377
|
# Confirm ID uniqueness
|
|
364
|
-
assert
|
|
365
|
-
ann_id_to_ann[
|
|
366
|
-
|
|
378
|
+
assert ann_id not in ann_id_to_ann
|
|
379
|
+
ann_id_to_ann[ann_id] = ann
|
|
380
|
+
|
|
367
381
|
# Confirm validity
|
|
368
382
|
assert ann['category_id'] in category_id_to_category, \
|
|
369
383
|
'Category {} not found in category list'.format(ann['category_id'])
|
|
370
384
|
assert ann['image_id'] in image_id_to_image, \
|
|
371
385
|
'Image ID {} referred to by annotation {}, not available'.format(
|
|
372
386
|
ann['image_id'],ann['id'])
|
|
373
|
-
|
|
387
|
+
|
|
374
388
|
image_id_to_image[ann['image_id']]['_count'] += 1
|
|
375
|
-
category_id_to_category[ann['category_id']]['_count'] +=1
|
|
376
|
-
|
|
389
|
+
category_id_to_category[ann['category_id']]['_count'] +=1
|
|
390
|
+
|
|
377
391
|
# ...for each annotation
|
|
378
|
-
|
|
392
|
+
|
|
379
393
|
sorted_categories = sorted(categories, key=itemgetter('_count'), reverse=True)
|
|
380
|
-
|
|
381
|
-
|
|
394
|
+
|
|
395
|
+
|
|
382
396
|
##%% Print statistics
|
|
383
|
-
|
|
397
|
+
|
|
384
398
|
if options.verbose:
|
|
385
|
-
|
|
399
|
+
|
|
386
400
|
# Find un-annotated images and multi-annotation images
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
401
|
+
n_unannotated = 0
|
|
402
|
+
n_multi_annotated = 0
|
|
403
|
+
|
|
390
404
|
for image in images:
|
|
391
405
|
if image['_count'] == 0:
|
|
392
|
-
|
|
406
|
+
n_unannotated += 1
|
|
393
407
|
elif image['_count'] > 1:
|
|
394
|
-
|
|
395
|
-
|
|
408
|
+
n_multi_annotated += 1
|
|
409
|
+
|
|
396
410
|
print('\nFound {} unannotated images, {} images with multiple annotations'.format(
|
|
397
|
-
|
|
398
|
-
|
|
411
|
+
n_unannotated,n_multi_annotated))
|
|
412
|
+
|
|
399
413
|
if (len(base_dir) > 0) and options.bFindUnusedImages:
|
|
400
414
|
print('Found {} unused image files'.format(len(unused_files)))
|
|
401
|
-
|
|
415
|
+
|
|
402
416
|
n_unused_categories = 0
|
|
403
|
-
|
|
417
|
+
|
|
404
418
|
# Find unused categories
|
|
405
419
|
for cat in categories:
|
|
406
420
|
if cat['_count'] == 0:
|
|
407
421
|
print('Unused category: {}'.format(cat['name']))
|
|
408
422
|
n_unused_categories += 1
|
|
409
|
-
|
|
423
|
+
|
|
410
424
|
print('Found {} unused categories'.format(n_unused_categories))
|
|
411
|
-
|
|
412
|
-
|
|
425
|
+
|
|
426
|
+
sequence_string = 'no sequence info'
|
|
413
427
|
if len(sequences) > 0:
|
|
414
|
-
|
|
415
|
-
|
|
428
|
+
sequence_string = '{} sequences'.format(len(sequences))
|
|
429
|
+
|
|
416
430
|
print('\nDB contains {} images, {} annotations, {} bboxes, {} categories, {}\n'.format(
|
|
417
|
-
len(images),len(annotations),
|
|
418
|
-
|
|
431
|
+
len(images),len(annotations),n_boxes,len(categories),sequence_string))
|
|
432
|
+
|
|
419
433
|
if len(image_location_set) > 0:
|
|
420
434
|
print('DB contains images from {} locations\n'.format(len(image_location_set)))
|
|
421
|
-
|
|
435
|
+
|
|
422
436
|
print('Categories and annotation (not image) counts:\n')
|
|
423
|
-
|
|
437
|
+
|
|
424
438
|
for cat in sorted_categories:
|
|
425
439
|
print('{:6} {}'.format(cat['_count'],cat['name']))
|
|
426
|
-
|
|
440
|
+
|
|
427
441
|
print('')
|
|
428
|
-
|
|
442
|
+
|
|
429
443
|
error_info = {}
|
|
430
444
|
error_info['unused_files'] = unused_files
|
|
431
445
|
error_info['validation_errors'] = validation_errors
|
|
432
|
-
|
|
446
|
+
|
|
433
447
|
return sorted_categories, data, error_info
|
|
434
448
|
|
|
435
449
|
# ...def integrity_check_json_db()
|
|
436
|
-
|
|
450
|
+
|
|
437
451
|
|
|
438
452
|
#%% Command-line driver
|
|
439
|
-
|
|
440
|
-
def main():
|
|
441
|
-
|
|
453
|
+
|
|
454
|
+
def main(): # noqa
|
|
455
|
+
|
|
442
456
|
parser = argparse.ArgumentParser()
|
|
443
|
-
parser.add_argument('
|
|
457
|
+
parser.add_argument('json_file',type=str,
|
|
444
458
|
help='COCO-formatted .json file to validate')
|
|
445
|
-
parser.add_argument('--bCheckImageSizes', action='store_true',
|
|
459
|
+
parser.add_argument('--bCheckImageSizes', action='store_true',
|
|
446
460
|
help='Validate image size, requires baseDir to be specified. ' + \
|
|
447
461
|
'Implies existence checking.')
|
|
448
|
-
parser.add_argument('--bCheckImageExistence', action='store_true',
|
|
462
|
+
parser.add_argument('--bCheckImageExistence', action='store_true',
|
|
449
463
|
help='Validate image existence, requires baseDir to be specified')
|
|
450
|
-
parser.add_argument('--bFindUnusedImages', action='store_true',
|
|
464
|
+
parser.add_argument('--bFindUnusedImages', action='store_true',
|
|
451
465
|
help='Check for images in baseDir that aren\'t in the database, ' + \
|
|
452
466
|
'requires baseDir to be specified')
|
|
453
|
-
parser.add_argument('--baseDir', action='store', type=str, default='',
|
|
467
|
+
parser.add_argument('--baseDir', action='store', type=str, default='',
|
|
454
468
|
help='Base directory for images')
|
|
455
469
|
parser.add_argument('--bAllowNoLocation', action='store_true',
|
|
456
470
|
help='Disable errors when no location is specified for an image')
|
|
457
|
-
parser.add_argument('--iMaxNumImages', action='store', type=int, default=-1,
|
|
471
|
+
parser.add_argument('--iMaxNumImages', action='store', type=int, default=-1,
|
|
458
472
|
help='Cap on total number of images to check')
|
|
459
|
-
parser.add_argument('--nThreads', action='store', type=int, default=10,
|
|
473
|
+
parser.add_argument('--nThreads', action='store', type=int, default=10,
|
|
460
474
|
help='Number of threads (only relevant when verifying image ' + \
|
|
461
475
|
'sizes and/or existence)')
|
|
462
|
-
|
|
476
|
+
|
|
463
477
|
if len(sys.argv[1:])==0:
|
|
464
478
|
parser.print_help()
|
|
465
479
|
parser.exit()
|
|
466
|
-
|
|
480
|
+
|
|
467
481
|
args = parser.parse_args()
|
|
468
482
|
args.bRequireLocation = (not args.bAllowNoLocation)
|
|
469
483
|
options = IntegrityCheckOptions()
|
|
470
484
|
ct_utils.args_to_object(args, options)
|
|
471
|
-
integrity_check_json_db(args.
|
|
485
|
+
integrity_check_json_db(args.json_file,options)
|
|
472
486
|
|
|
473
|
-
if __name__ == '__main__':
|
|
487
|
+
if __name__ == '__main__':
|
|
474
488
|
main()
|
|
475
489
|
|
|
476
490
|
|
|
477
491
|
#%% Interactive driver(s)
|
|
478
492
|
|
|
479
493
|
if False:
|
|
480
|
-
|
|
494
|
+
|
|
481
495
|
#%%
|
|
482
496
|
|
|
483
|
-
"""
|
|
497
|
+
"""
|
|
484
498
|
python integrity_check_json_db.py ~/data/ena24.json --baseDir ~/data/ENA24 --bAllowNoLocation
|
|
485
499
|
"""
|
|
486
|
-
|
|
500
|
+
|
|
487
501
|
# Integrity-check .json files for LILA
|
|
488
502
|
json_files = [os.path.expanduser('~/data/ena24.json')]
|
|
489
|
-
|
|
503
|
+
|
|
490
504
|
options = IntegrityCheckOptions()
|
|
491
505
|
options.baseDir = os.path.expanduser('~/data/ENA24')
|
|
492
506
|
options.bCheckImageSizes = False
|
|
493
507
|
options.bFindUnusedImages = True
|
|
494
508
|
options.bRequireLocation = False
|
|
495
|
-
|
|
496
|
-
# options.iMaxNumImages = 10
|
|
497
|
-
|
|
509
|
+
|
|
510
|
+
# options.iMaxNumImages = 10
|
|
511
|
+
|
|
498
512
|
for json_file in json_files:
|
|
499
|
-
|
|
513
|
+
|
|
500
514
|
sorted_categories,data,_ = integrity_check_json_db(json_file, options)
|