megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
- megadetector/classification/aggregate_classifier_probs.py +3 -3
- megadetector/classification/analyze_failed_images.py +5 -5
- megadetector/classification/cache_batchapi_outputs.py +5 -5
- megadetector/classification/create_classification_dataset.py +11 -12
- megadetector/classification/crop_detections.py +10 -10
- megadetector/classification/csv_to_json.py +8 -8
- megadetector/classification/detect_and_crop.py +13 -15
- megadetector/classification/efficientnet/model.py +8 -8
- megadetector/classification/efficientnet/utils.py +6 -5
- megadetector/classification/evaluate_model.py +7 -7
- megadetector/classification/identify_mislabeled_candidates.py +6 -6
- megadetector/classification/json_to_azcopy_list.py +1 -1
- megadetector/classification/json_validator.py +29 -32
- megadetector/classification/map_classification_categories.py +9 -9
- megadetector/classification/merge_classification_detection_output.py +12 -9
- megadetector/classification/prepare_classification_script.py +19 -19
- megadetector/classification/prepare_classification_script_mc.py +26 -26
- megadetector/classification/run_classifier.py +4 -4
- megadetector/classification/save_mislabeled.py +6 -6
- megadetector/classification/train_classifier.py +1 -1
- megadetector/classification/train_classifier_tf.py +9 -9
- megadetector/classification/train_utils.py +10 -10
- megadetector/data_management/annotations/annotation_constants.py +1 -2
- megadetector/data_management/camtrap_dp_to_coco.py +79 -46
- megadetector/data_management/cct_json_utils.py +103 -103
- megadetector/data_management/cct_to_md.py +49 -49
- megadetector/data_management/cct_to_wi.py +33 -33
- megadetector/data_management/coco_to_labelme.py +75 -75
- megadetector/data_management/coco_to_yolo.py +210 -193
- megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
- megadetector/data_management/databases/integrity_check_json_db.py +228 -200
- megadetector/data_management/databases/subset_json_db.py +33 -33
- megadetector/data_management/generate_crops_from_cct.py +88 -39
- megadetector/data_management/get_image_sizes.py +54 -49
- megadetector/data_management/labelme_to_coco.py +133 -125
- megadetector/data_management/labelme_to_yolo.py +159 -73
- megadetector/data_management/lila/create_lila_blank_set.py +81 -83
- megadetector/data_management/lila/create_lila_test_set.py +32 -31
- megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
- megadetector/data_management/lila/download_lila_subset.py +21 -24
- megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
- megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
- megadetector/data_management/lila/get_lila_image_counts.py +22 -22
- megadetector/data_management/lila/lila_common.py +73 -70
- megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
- megadetector/data_management/mewc_to_md.py +344 -340
- megadetector/data_management/ocr_tools.py +262 -255
- megadetector/data_management/read_exif.py +249 -227
- megadetector/data_management/remap_coco_categories.py +90 -28
- megadetector/data_management/remove_exif.py +81 -21
- megadetector/data_management/rename_images.py +187 -187
- megadetector/data_management/resize_coco_dataset.py +588 -120
- megadetector/data_management/speciesnet_to_md.py +41 -41
- megadetector/data_management/wi_download_csv_to_coco.py +55 -55
- megadetector/data_management/yolo_output_to_md_output.py +248 -122
- megadetector/data_management/yolo_to_coco.py +333 -191
- megadetector/detection/change_detection.py +832 -0
- megadetector/detection/process_video.py +340 -337
- megadetector/detection/pytorch_detector.py +358 -278
- megadetector/detection/run_detector.py +399 -186
- megadetector/detection/run_detector_batch.py +404 -377
- megadetector/detection/run_inference_with_yolov5_val.py +340 -327
- megadetector/detection/run_tiled_inference.py +257 -249
- megadetector/detection/tf_detector.py +24 -24
- megadetector/detection/video_utils.py +332 -295
- megadetector/postprocessing/add_max_conf.py +19 -11
- megadetector/postprocessing/categorize_detections_by_size.py +45 -45
- megadetector/postprocessing/classification_postprocessing.py +468 -433
- megadetector/postprocessing/combine_batch_outputs.py +23 -23
- megadetector/postprocessing/compare_batch_results.py +590 -525
- megadetector/postprocessing/convert_output_format.py +106 -102
- megadetector/postprocessing/create_crop_folder.py +347 -147
- megadetector/postprocessing/detector_calibration.py +173 -168
- megadetector/postprocessing/generate_csv_report.py +508 -499
- megadetector/postprocessing/load_api_results.py +48 -27
- megadetector/postprocessing/md_to_coco.py +133 -102
- megadetector/postprocessing/md_to_labelme.py +107 -90
- megadetector/postprocessing/md_to_wi.py +40 -40
- megadetector/postprocessing/merge_detections.py +92 -114
- megadetector/postprocessing/postprocess_batch_results.py +319 -301
- megadetector/postprocessing/remap_detection_categories.py +91 -38
- megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
- megadetector/postprocessing/separate_detections_into_folders.py +226 -211
- megadetector/postprocessing/subset_json_detector_output.py +265 -262
- megadetector/postprocessing/top_folders_to_bottom.py +45 -45
- megadetector/postprocessing/validate_batch_results.py +70 -70
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
- megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
- megadetector/taxonomy_mapping/simple_image_download.py +8 -8
- megadetector/taxonomy_mapping/species_lookup.py +156 -74
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
- megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
- megadetector/utils/ct_utils.py +1049 -211
- megadetector/utils/directory_listing.py +21 -77
- megadetector/utils/gpu_test.py +22 -22
- megadetector/utils/md_tests.py +632 -529
- megadetector/utils/path_utils.py +1520 -431
- megadetector/utils/process_utils.py +41 -41
- megadetector/utils/split_locations_into_train_val.py +62 -62
- megadetector/utils/string_utils.py +148 -27
- megadetector/utils/url_utils.py +489 -176
- megadetector/utils/wi_utils.py +2658 -2526
- megadetector/utils/write_html_image_list.py +137 -137
- megadetector/visualization/plot_utils.py +34 -30
- megadetector/visualization/render_images_with_thumbnails.py +39 -74
- megadetector/visualization/visualization_utils.py +487 -435
- megadetector/visualization/visualize_db.py +232 -198
- megadetector/visualization/visualize_detector_output.py +82 -76
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
- megadetector-10.0.0.dist-info/RECORD +139 -0
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
- megadetector/api/batch_processing/api_core/__init__.py +0 -0
- megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
- megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
- megadetector/api/batch_processing/api_core/server.py +0 -294
- megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
- megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
- megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
- megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
- megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
- megadetector/api/batch_processing/api_core/server_utils.py +0 -88
- megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
- megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
- megadetector/api/batch_processing/api_support/__init__.py +0 -0
- megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
- megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
- megadetector/api/synchronous/__init__.py +0 -0
- megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
- megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
- megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
- megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
- megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
- megadetector/data_management/importers/add_nacti_sizes.py +0 -52
- megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
- megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
- megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
- megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
- megadetector/data_management/importers/awc_to_json.py +0 -191
- megadetector/data_management/importers/bellevue_to_json.py +0 -272
- megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
- megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
- megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
- megadetector/data_management/importers/cct_field_adjustments.py +0 -58
- megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
- megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
- megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
- megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
- megadetector/data_management/importers/ena24_to_json.py +0 -276
- megadetector/data_management/importers/filenames_to_json.py +0 -386
- megadetector/data_management/importers/helena_to_cct.py +0 -283
- megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
- megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
- megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
- megadetector/data_management/importers/jb_csv_to_json.py +0 -150
- megadetector/data_management/importers/mcgill_to_json.py +0 -250
- megadetector/data_management/importers/missouri_to_json.py +0 -490
- megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
- megadetector/data_management/importers/noaa_seals_2019.py +0 -181
- megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
- megadetector/data_management/importers/pc_to_json.py +0 -365
- megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
- megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
- megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
- megadetector/data_management/importers/rspb_to_json.py +0 -356
- megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
- megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
- megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
- megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
- megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
- megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
- megadetector/data_management/importers/sulross_get_exif.py +0 -65
- megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
- megadetector/data_management/importers/ubc_to_json.py +0 -399
- megadetector/data_management/importers/umn_to_json.py +0 -507
- megadetector/data_management/importers/wellington_to_json.py +0 -263
- megadetector/data_management/importers/wi_to_json.py +0 -442
- megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
- megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
- megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
- megadetector/utils/azure_utils.py +0 -178
- megadetector/utils/sas_blob_utils.py +0 -509
- megadetector-5.0.28.dist-info/RECORD +0 -209
- /megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0
|
@@ -7,7 +7,7 @@ Does some integrity-checking and computes basic statistics on a COCO Camera Trap
|
|
|
7
7
|
* Verifies that required fields are present and have the right types
|
|
8
8
|
* Verifies that annotations refer to valid images
|
|
9
9
|
* Verifies that annotations refer to valid categories
|
|
10
|
-
* Verifies that image, category, and annotation IDs are unique
|
|
10
|
+
* Verifies that image, category, and annotation IDs are unique
|
|
11
11
|
* Optionally checks file existence
|
|
12
12
|
* Finds un-annotated images
|
|
13
13
|
* Finds unused categories
|
|
@@ -22,7 +22,8 @@ import json
|
|
|
22
22
|
import os
|
|
23
23
|
import sys
|
|
24
24
|
|
|
25
|
-
from
|
|
25
|
+
from functools import partial
|
|
26
|
+
from multiprocessing.pool import Pool, ThreadPool
|
|
26
27
|
from operator import itemgetter
|
|
27
28
|
from tqdm import tqdm
|
|
28
29
|
|
|
@@ -37,38 +38,41 @@ class IntegrityCheckOptions:
|
|
|
37
38
|
"""
|
|
38
39
|
Options for integrity_check_json_db()
|
|
39
40
|
"""
|
|
40
|
-
|
|
41
|
+
|
|
41
42
|
def __init__(self):
|
|
42
|
-
|
|
43
|
+
|
|
43
44
|
#: Image path; the filenames in the .json file should be relative to this folder
|
|
44
45
|
self.baseDir = ''
|
|
45
|
-
|
|
46
|
+
|
|
46
47
|
#: Should we validate the image sizes?
|
|
47
48
|
self.bCheckImageSizes = False
|
|
48
|
-
|
|
49
|
+
|
|
49
50
|
#: Should we check that all the images in the .json file exist on disk?
|
|
50
51
|
self.bCheckImageExistence = False
|
|
51
|
-
|
|
52
|
+
|
|
52
53
|
#: Should we search [baseDir] for images that are not used in the .json file?
|
|
53
54
|
self.bFindUnusedImages = False
|
|
54
|
-
|
|
55
|
+
|
|
55
56
|
#: Should we require that all images in the .json file have a 'location' field?
|
|
56
57
|
self.bRequireLocation = True
|
|
57
|
-
|
|
58
|
+
|
|
58
59
|
#: For debugging, limit the number of images we'll process
|
|
59
60
|
self.iMaxNumImages = -1
|
|
60
|
-
|
|
61
|
+
|
|
61
62
|
#: Number of threads to use for parallelization, set to <= 1 to disable parallelization
|
|
62
63
|
self.nThreads = 10
|
|
63
|
-
|
|
64
|
+
|
|
65
|
+
#: Whether to use threads (rather than processes for parallelization)
|
|
66
|
+
self.parallelizeWithThreads = True
|
|
67
|
+
|
|
64
68
|
#: Enable additional debug output
|
|
65
69
|
self.verbose = True
|
|
66
|
-
|
|
70
|
+
|
|
67
71
|
#: Allow integer-valued image and annotation IDs (COCO uses this, CCT files use strings)
|
|
68
72
|
self.allowIntIDs = False
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
73
|
+
|
|
74
|
+
#: If True, error if the 'info' field is not present
|
|
75
|
+
self.requireInfo = False
|
|
72
76
|
|
|
73
77
|
|
|
74
78
|
#%% Functions
|
|
@@ -76,7 +80,7 @@ default_options = IntegrityCheckOptions()
|
|
|
76
80
|
def _check_image_existence_and_size(image,options=None):
|
|
77
81
|
"""
|
|
78
82
|
Validate the image represented in the CCT image dict [image], which should have fields:
|
|
79
|
-
|
|
83
|
+
|
|
80
84
|
* file_name
|
|
81
85
|
* width
|
|
82
86
|
* height
|
|
@@ -84,266 +88,290 @@ def _check_image_existence_and_size(image,options=None):
|
|
|
84
88
|
Args:
|
|
85
89
|
image (dict): image to validate
|
|
86
90
|
options (IntegrityCheckOptions): parameters impacting validation
|
|
87
|
-
|
|
91
|
+
|
|
88
92
|
Returns:
|
|
89
93
|
str: None if this image passes validation, otherwise an error string
|
|
90
94
|
"""
|
|
91
95
|
|
|
92
|
-
if options is None:
|
|
93
|
-
options =
|
|
94
|
-
|
|
96
|
+
if options is None:
|
|
97
|
+
options = IntegrityCheckOptions()
|
|
98
|
+
|
|
95
99
|
assert options.bCheckImageExistence
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
if not os.path.isfile(
|
|
99
|
-
s = 'Image path {} does not exist'.format(
|
|
100
|
+
|
|
101
|
+
file_path = os.path.join(options.baseDir,image['file_name'])
|
|
102
|
+
if not os.path.isfile(file_path):
|
|
103
|
+
s = 'Image path {} does not exist'.format(file_path)
|
|
100
104
|
return s
|
|
101
|
-
|
|
105
|
+
|
|
102
106
|
if options.bCheckImageSizes:
|
|
103
107
|
if not ('height' in image and 'width' in image):
|
|
104
|
-
s = 'Missing image size in {}'.format(
|
|
108
|
+
s = 'Missing image size in {}'.format(file_path)
|
|
109
|
+
return s
|
|
110
|
+
|
|
111
|
+
# width, height = Image.open(file_path).size
|
|
112
|
+
try:
|
|
113
|
+
pil_im = open_image(file_path)
|
|
114
|
+
except Exception as e:
|
|
115
|
+
s = 'Error opening {}: {}'.format(file_path,str(e))
|
|
105
116
|
return s
|
|
106
117
|
|
|
107
|
-
# width, height = Image.open(filePath).size
|
|
108
|
-
pil_im = open_image(filePath)
|
|
109
118
|
width,height = pil_im.size
|
|
110
119
|
if (not (width == image['width'] and height == image['height'])):
|
|
111
120
|
s = 'Size mismatch for image {}: {} (reported {},{}, actual {},{})'.format(
|
|
112
|
-
image['id'],
|
|
121
|
+
image['id'], file_path, image['width'], image['height'], width, height)
|
|
113
122
|
return s
|
|
114
|
-
|
|
123
|
+
|
|
115
124
|
return None
|
|
116
125
|
|
|
117
|
-
|
|
118
|
-
def integrity_check_json_db(
|
|
126
|
+
|
|
127
|
+
def integrity_check_json_db(json_file, options=None):
|
|
119
128
|
"""
|
|
120
129
|
Does some integrity-checking and computes basic statistics on a COCO Camera Traps .json file; see
|
|
121
130
|
module header comment for a list of the validation steps.
|
|
122
|
-
|
|
131
|
+
|
|
123
132
|
Args:
|
|
124
|
-
|
|
125
|
-
|
|
133
|
+
json_file (str): filename to validate, or an already-loaded dict
|
|
134
|
+
options (IntegrityCheckOptions, optional): see IntegrityCheckOptions
|
|
135
|
+
|
|
126
136
|
Returns:
|
|
127
137
|
tuple: tuple containing:
|
|
128
|
-
- sorted_categories (dict): list of categories used in [
|
|
129
|
-
- data (dict): the data loaded from [
|
|
138
|
+
- sorted_categories (dict): list of categories used in [json_file], sorted by frequency
|
|
139
|
+
- data (dict): the data loaded from [json_file]
|
|
130
140
|
- error_info (dict): specific validation errors
|
|
131
141
|
"""
|
|
132
|
-
|
|
133
|
-
if options is None:
|
|
142
|
+
|
|
143
|
+
if options is None:
|
|
134
144
|
options = IntegrityCheckOptions()
|
|
135
|
-
|
|
136
|
-
if options.bCheckImageSizes:
|
|
145
|
+
|
|
146
|
+
if options.bCheckImageSizes:
|
|
137
147
|
options.bCheckImageExistence = True
|
|
138
|
-
|
|
148
|
+
|
|
139
149
|
if options.verbose:
|
|
140
150
|
print(options.__dict__)
|
|
141
|
-
|
|
151
|
+
|
|
142
152
|
if options.baseDir is None:
|
|
143
153
|
options.baseDir = ''
|
|
144
|
-
|
|
154
|
+
|
|
145
155
|
base_dir = options.baseDir
|
|
146
|
-
|
|
147
|
-
|
|
156
|
+
|
|
157
|
+
|
|
148
158
|
##%% Read .json file if necessary, integrity-check fields
|
|
149
|
-
|
|
150
|
-
if isinstance(
|
|
151
|
-
|
|
152
|
-
data =
|
|
153
|
-
|
|
154
|
-
elif isinstance(
|
|
155
|
-
|
|
156
|
-
assert os.path.isfile(
|
|
157
|
-
|
|
159
|
+
|
|
160
|
+
if isinstance(json_file,dict):
|
|
161
|
+
|
|
162
|
+
data = json_file
|
|
163
|
+
|
|
164
|
+
elif isinstance(json_file,str):
|
|
165
|
+
|
|
166
|
+
assert os.path.isfile(json_file), '.json file {} does not exist'.format(json_file)
|
|
167
|
+
|
|
158
168
|
if options.verbose:
|
|
159
169
|
print('Reading .json {} with base dir [{}]...'.format(
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
with open(
|
|
163
|
-
data = json.load(f)
|
|
164
|
-
|
|
170
|
+
json_file,base_dir))
|
|
171
|
+
|
|
172
|
+
with open(json_file,'r') as f:
|
|
173
|
+
data = json.load(f)
|
|
174
|
+
|
|
165
175
|
else:
|
|
166
|
-
|
|
167
|
-
raise ValueError('Illegal value for
|
|
168
|
-
|
|
176
|
+
|
|
177
|
+
raise ValueError('Illegal value for json_file')
|
|
178
|
+
|
|
169
179
|
images = data['images']
|
|
170
180
|
annotations = data['annotations']
|
|
171
181
|
categories = data['categories']
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
182
|
+
|
|
183
|
+
if options.requireInfo:
|
|
184
|
+
assert 'info' in data, 'No info struct in database'
|
|
185
|
+
|
|
186
|
+
if len(base_dir) > 0:
|
|
187
|
+
assert os.path.isdir(base_dir), \
|
|
188
|
+
'Base directory {} does not exist'.format(base_dir)
|
|
189
|
+
|
|
190
|
+
|
|
179
191
|
##%% Build dictionaries, checking ID uniqueness and internal validity as we go
|
|
180
|
-
|
|
192
|
+
|
|
181
193
|
image_id_to_image = {}
|
|
182
194
|
ann_id_to_ann = {}
|
|
183
195
|
category_id_to_category = {}
|
|
184
196
|
category_name_to_category = {}
|
|
185
197
|
image_location_set = set()
|
|
186
|
-
|
|
198
|
+
|
|
187
199
|
if options.verbose:
|
|
188
200
|
print('Checking categories...')
|
|
189
|
-
|
|
201
|
+
|
|
190
202
|
for cat in tqdm(categories):
|
|
191
|
-
|
|
203
|
+
|
|
192
204
|
# Confirm that required fields are present
|
|
193
205
|
assert 'name' in cat
|
|
194
206
|
assert 'id' in cat
|
|
195
|
-
|
|
196
|
-
assert isinstance(cat['id'],int),
|
|
197
|
-
|
|
198
|
-
|
|
207
|
+
|
|
208
|
+
assert isinstance(cat['id'],int), \
|
|
209
|
+
'Illegal category ID type: [{}]'.format(str(cat['id']))
|
|
210
|
+
assert isinstance(cat['name'],str), \
|
|
211
|
+
'Illegal category name type [{}]'.format(str(cat['name']))
|
|
212
|
+
|
|
199
213
|
category_id = cat['id']
|
|
200
214
|
category_name = cat['name']
|
|
201
|
-
|
|
215
|
+
|
|
202
216
|
# Confirm ID uniqueness
|
|
203
|
-
assert category_id not in category_id_to_category,
|
|
217
|
+
assert category_id not in category_id_to_category, \
|
|
218
|
+
'Category ID {} is used more than once'.format(category_id)
|
|
204
219
|
category_id_to_category[category_id] = cat
|
|
205
220
|
cat['_count'] = 0
|
|
206
|
-
|
|
207
|
-
assert category_name not in category_name_to_category,
|
|
208
|
-
|
|
209
|
-
|
|
221
|
+
|
|
222
|
+
assert category_name not in category_name_to_category, \
|
|
223
|
+
'Category name {} is used more than once'.format(category_name)
|
|
224
|
+
category_name_to_category[category_name] = cat
|
|
225
|
+
|
|
210
226
|
# ...for each category
|
|
211
|
-
|
|
227
|
+
|
|
212
228
|
if options.verbose:
|
|
213
|
-
print('\nChecking
|
|
214
|
-
|
|
229
|
+
print('\nChecking image records...')
|
|
230
|
+
|
|
215
231
|
if options.iMaxNumImages > 0 and len(images) > options.iMaxNumImages:
|
|
216
|
-
|
|
232
|
+
|
|
217
233
|
if options.verbose:
|
|
218
234
|
print('Trimming image list to {}'.format(options.iMaxNumImages))
|
|
219
235
|
images = images[0:options.iMaxNumImages]
|
|
220
|
-
|
|
236
|
+
|
|
221
237
|
image_paths_in_json = set()
|
|
222
|
-
|
|
238
|
+
|
|
223
239
|
sequences = set()
|
|
224
|
-
|
|
240
|
+
|
|
225
241
|
# image = images[0]
|
|
226
242
|
for image in tqdm(images):
|
|
227
|
-
|
|
243
|
+
|
|
228
244
|
image['_count'] = 0
|
|
229
|
-
|
|
245
|
+
|
|
230
246
|
# Confirm that required fields are present
|
|
231
247
|
assert 'file_name' in image
|
|
232
248
|
assert 'id' in image
|
|
233
249
|
|
|
234
250
|
image['file_name'] = image['file_name'].replace('\\','/')
|
|
235
|
-
|
|
251
|
+
|
|
236
252
|
image_paths_in_json.add(image['file_name'])
|
|
237
|
-
|
|
253
|
+
|
|
238
254
|
assert isinstance(image['file_name'],str), 'Illegal image filename type'
|
|
239
|
-
|
|
255
|
+
|
|
240
256
|
if options.allowIntIDs:
|
|
241
257
|
assert isinstance(image['id'],str) or isinstance(image['id'],int), \
|
|
242
258
|
'Illegal image ID type'
|
|
243
259
|
else:
|
|
244
260
|
assert isinstance(image['id'],str), 'Illegal image ID type'
|
|
245
|
-
|
|
246
|
-
image_id = image['id']
|
|
247
|
-
|
|
261
|
+
|
|
262
|
+
image_id = image['id']
|
|
263
|
+
|
|
248
264
|
# Confirm ID uniqueness
|
|
249
265
|
assert image_id not in image_id_to_image, 'Duplicate image ID {}'.format(image_id)
|
|
250
|
-
|
|
266
|
+
|
|
251
267
|
image_id_to_image[image_id] = image
|
|
252
|
-
|
|
268
|
+
|
|
253
269
|
if 'height' in image:
|
|
254
270
|
assert 'width' in image, 'Image with height but no width: {}'.format(image['id'])
|
|
255
|
-
|
|
271
|
+
|
|
256
272
|
if 'width' in image:
|
|
257
273
|
assert 'height' in image, 'Image with width but no height: {}'.format(image['id'])
|
|
258
274
|
|
|
259
275
|
if options.bRequireLocation:
|
|
260
276
|
assert 'location' in image, 'No location available for: {}'.format(image['id'])
|
|
261
|
-
|
|
277
|
+
|
|
262
278
|
if 'location' in image:
|
|
263
279
|
# We previously supported ints here; this should be strings now
|
|
264
280
|
# assert isinstance(image['location'], str) or isinstance(image['location'], int), \
|
|
265
281
|
# 'Illegal image location type'
|
|
266
282
|
assert isinstance(image['location'], str)
|
|
267
283
|
image_location_set.add(image['location'])
|
|
268
|
-
|
|
284
|
+
|
|
269
285
|
if 'seq_id' in image:
|
|
270
286
|
sequences.add(image['seq_id'])
|
|
271
|
-
|
|
287
|
+
|
|
272
288
|
assert not ('sequence_id' in image or 'sequence' in image), 'Illegal sequence identifier'
|
|
273
|
-
|
|
289
|
+
|
|
274
290
|
unused_files = []
|
|
275
|
-
|
|
291
|
+
|
|
276
292
|
image_paths_relative = None
|
|
277
|
-
|
|
293
|
+
|
|
278
294
|
# Are we checking for unused images?
|
|
279
|
-
if (len(base_dir) > 0) and options.bFindUnusedImages:
|
|
280
|
-
|
|
295
|
+
if (len(base_dir) > 0) and options.bFindUnusedImages:
|
|
296
|
+
|
|
281
297
|
if options.verbose:
|
|
282
298
|
print('\nEnumerating images...')
|
|
283
|
-
|
|
299
|
+
|
|
284
300
|
image_paths_relative = find_images(base_dir,return_relative_paths=True,recursive=True)
|
|
285
|
-
|
|
301
|
+
|
|
286
302
|
for fn_relative in image_paths_relative:
|
|
287
303
|
if fn_relative not in image_paths_in_json:
|
|
288
304
|
unused_files.append(fn_relative)
|
|
289
|
-
|
|
305
|
+
|
|
290
306
|
# List of (filename,error_string) tuples
|
|
291
307
|
validation_errors = []
|
|
292
|
-
|
|
308
|
+
|
|
293
309
|
# If we're checking image existence but not image size, we don't need to read the images
|
|
294
310
|
if options.bCheckImageExistence and not options.bCheckImageSizes:
|
|
295
|
-
|
|
311
|
+
|
|
296
312
|
if image_paths_relative is None:
|
|
297
313
|
image_paths_relative = find_images(base_dir,return_relative_paths=True,recursive=True)
|
|
298
|
-
|
|
314
|
+
|
|
299
315
|
image_paths_relative_set = set(image_paths_relative)
|
|
300
|
-
|
|
316
|
+
|
|
301
317
|
for im in images:
|
|
302
|
-
if im['file_name'] not in image_paths_relative_set:
|
|
318
|
+
if im['file_name'] not in image_paths_relative_set:
|
|
303
319
|
validation_errors.append((im['file_name'],'not found in relative path list'))
|
|
304
|
-
|
|
320
|
+
|
|
305
321
|
# If we're checking image size, we need to read the images
|
|
306
322
|
if options.bCheckImageSizes:
|
|
307
|
-
|
|
323
|
+
|
|
308
324
|
if len(base_dir) == 0:
|
|
309
325
|
print('Warning: checking image sizes without a base directory, assuming "."')
|
|
310
|
-
|
|
326
|
+
|
|
311
327
|
if options.verbose:
|
|
312
328
|
print('Checking image existence and/or image sizes...')
|
|
313
|
-
|
|
329
|
+
|
|
314
330
|
if options.nThreads is not None and options.nThreads > 1:
|
|
331
|
+
|
|
332
|
+
if options.parallelizeWithThreads:
|
|
333
|
+
worker_string = 'threads'
|
|
334
|
+
else:
|
|
335
|
+
worker_string = 'processes'
|
|
336
|
+
|
|
315
337
|
if options.verbose:
|
|
316
|
-
print('Starting a pool of {}
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
338
|
+
print('Starting a pool of {} {}'.format(options.nThreads,worker_string))
|
|
339
|
+
if options.parallelizeWithThreads:
|
|
340
|
+
pool = ThreadPool(options.nThreads)
|
|
341
|
+
else:
|
|
342
|
+
pool = Pool(options.nThreads)
|
|
343
|
+
try:
|
|
344
|
+
results = list(tqdm(pool.imap(
|
|
345
|
+
partial(_check_image_existence_and_size,options=options), images),
|
|
346
|
+
total=len(images)))
|
|
347
|
+
finally:
|
|
348
|
+
pool.close()
|
|
349
|
+
pool.join()
|
|
350
|
+
print("Pool closed and joined for image size checks")
|
|
323
351
|
else:
|
|
324
352
|
results = []
|
|
325
|
-
for im in tqdm(images):
|
|
353
|
+
for im in tqdm(images):
|
|
326
354
|
results.append(_check_image_existence_and_size(im,options))
|
|
327
|
-
|
|
355
|
+
|
|
328
356
|
for i_image,result in enumerate(results):
|
|
329
357
|
if result is not None:
|
|
330
358
|
validation_errors.append((images[i_image]['file_name'],result))
|
|
331
|
-
|
|
359
|
+
|
|
332
360
|
# ...for each image
|
|
333
|
-
|
|
361
|
+
|
|
334
362
|
if options.verbose:
|
|
335
363
|
print('{} validation errors (of {})'.format(len(validation_errors),len(images)))
|
|
336
364
|
print('Checking annotations...')
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
365
|
+
|
|
366
|
+
n_boxes = 0
|
|
367
|
+
|
|
340
368
|
for ann in tqdm(annotations):
|
|
341
|
-
|
|
369
|
+
|
|
342
370
|
# Confirm that required fields are present
|
|
343
371
|
assert 'image_id' in ann
|
|
344
372
|
assert 'id' in ann
|
|
345
373
|
assert 'category_id' in ann
|
|
346
|
-
|
|
374
|
+
|
|
347
375
|
if options.allowIntIDs:
|
|
348
376
|
assert isinstance(ann['id'],str) or isinstance(ann['id'],int), \
|
|
349
377
|
'Illegal annotation ID type'
|
|
@@ -352,149 +380,149 @@ def integrity_check_json_db(jsonFile, options=None):
|
|
|
352
380
|
else:
|
|
353
381
|
assert isinstance(ann['id'],str), 'Illegal annotation ID type'
|
|
354
382
|
assert isinstance(ann['image_id'],str), 'Illegal annotation image ID type'
|
|
355
|
-
|
|
383
|
+
|
|
356
384
|
assert isinstance(ann['category_id'],int), 'Illegal annotation category ID type'
|
|
357
|
-
|
|
385
|
+
|
|
358
386
|
if 'bbox' in ann:
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
387
|
+
n_boxes += 1
|
|
388
|
+
|
|
389
|
+
ann_id = ann['id']
|
|
390
|
+
|
|
363
391
|
# Confirm ID uniqueness
|
|
364
|
-
assert
|
|
365
|
-
ann_id_to_ann[
|
|
366
|
-
|
|
392
|
+
assert ann_id not in ann_id_to_ann
|
|
393
|
+
ann_id_to_ann[ann_id] = ann
|
|
394
|
+
|
|
367
395
|
# Confirm validity
|
|
368
396
|
assert ann['category_id'] in category_id_to_category, \
|
|
369
397
|
'Category {} not found in category list'.format(ann['category_id'])
|
|
370
398
|
assert ann['image_id'] in image_id_to_image, \
|
|
371
399
|
'Image ID {} referred to by annotation {}, not available'.format(
|
|
372
400
|
ann['image_id'],ann['id'])
|
|
373
|
-
|
|
401
|
+
|
|
374
402
|
image_id_to_image[ann['image_id']]['_count'] += 1
|
|
375
|
-
category_id_to_category[ann['category_id']]['_count'] +=1
|
|
376
|
-
|
|
403
|
+
category_id_to_category[ann['category_id']]['_count'] +=1
|
|
404
|
+
|
|
377
405
|
# ...for each annotation
|
|
378
|
-
|
|
406
|
+
|
|
379
407
|
sorted_categories = sorted(categories, key=itemgetter('_count'), reverse=True)
|
|
380
|
-
|
|
381
|
-
|
|
408
|
+
|
|
409
|
+
|
|
382
410
|
##%% Print statistics
|
|
383
|
-
|
|
411
|
+
|
|
384
412
|
if options.verbose:
|
|
385
|
-
|
|
413
|
+
|
|
386
414
|
# Find un-annotated images and multi-annotation images
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
415
|
+
n_unannotated = 0
|
|
416
|
+
n_multi_annotated = 0
|
|
417
|
+
|
|
390
418
|
for image in images:
|
|
391
419
|
if image['_count'] == 0:
|
|
392
|
-
|
|
420
|
+
n_unannotated += 1
|
|
393
421
|
elif image['_count'] > 1:
|
|
394
|
-
|
|
395
|
-
|
|
422
|
+
n_multi_annotated += 1
|
|
423
|
+
|
|
396
424
|
print('\nFound {} unannotated images, {} images with multiple annotations'.format(
|
|
397
|
-
|
|
398
|
-
|
|
425
|
+
n_unannotated,n_multi_annotated))
|
|
426
|
+
|
|
399
427
|
if (len(base_dir) > 0) and options.bFindUnusedImages:
|
|
400
428
|
print('Found {} unused image files'.format(len(unused_files)))
|
|
401
|
-
|
|
429
|
+
|
|
402
430
|
n_unused_categories = 0
|
|
403
|
-
|
|
431
|
+
|
|
404
432
|
# Find unused categories
|
|
405
433
|
for cat in categories:
|
|
406
434
|
if cat['_count'] == 0:
|
|
407
435
|
print('Unused category: {}'.format(cat['name']))
|
|
408
436
|
n_unused_categories += 1
|
|
409
|
-
|
|
437
|
+
|
|
410
438
|
print('Found {} unused categories'.format(n_unused_categories))
|
|
411
|
-
|
|
412
|
-
|
|
439
|
+
|
|
440
|
+
sequence_string = 'no sequence info'
|
|
413
441
|
if len(sequences) > 0:
|
|
414
|
-
|
|
415
|
-
|
|
442
|
+
sequence_string = '{} sequences'.format(len(sequences))
|
|
443
|
+
|
|
416
444
|
print('\nDB contains {} images, {} annotations, {} bboxes, {} categories, {}\n'.format(
|
|
417
|
-
len(images),len(annotations),
|
|
418
|
-
|
|
445
|
+
len(images),len(annotations),n_boxes,len(categories),sequence_string))
|
|
446
|
+
|
|
419
447
|
if len(image_location_set) > 0:
|
|
420
448
|
print('DB contains images from {} locations\n'.format(len(image_location_set)))
|
|
421
|
-
|
|
449
|
+
|
|
422
450
|
print('Categories and annotation (not image) counts:\n')
|
|
423
|
-
|
|
451
|
+
|
|
424
452
|
for cat in sorted_categories:
|
|
425
453
|
print('{:6} {}'.format(cat['_count'],cat['name']))
|
|
426
|
-
|
|
454
|
+
|
|
427
455
|
print('')
|
|
428
|
-
|
|
456
|
+
|
|
429
457
|
error_info = {}
|
|
430
458
|
error_info['unused_files'] = unused_files
|
|
431
459
|
error_info['validation_errors'] = validation_errors
|
|
432
|
-
|
|
460
|
+
|
|
433
461
|
return sorted_categories, data, error_info
|
|
434
462
|
|
|
435
463
|
# ...def integrity_check_json_db()
|
|
436
|
-
|
|
464
|
+
|
|
437
465
|
|
|
438
466
|
#%% Command-line driver
|
|
439
|
-
|
|
440
|
-
def main():
|
|
441
|
-
|
|
467
|
+
|
|
468
|
+
def main(): # noqa
|
|
469
|
+
|
|
442
470
|
parser = argparse.ArgumentParser()
|
|
443
|
-
parser.add_argument('
|
|
471
|
+
parser.add_argument('json_file',type=str,
|
|
444
472
|
help='COCO-formatted .json file to validate')
|
|
445
|
-
parser.add_argument('--bCheckImageSizes', action='store_true',
|
|
473
|
+
parser.add_argument('--bCheckImageSizes', action='store_true',
|
|
446
474
|
help='Validate image size, requires baseDir to be specified. ' + \
|
|
447
475
|
'Implies existence checking.')
|
|
448
|
-
parser.add_argument('--bCheckImageExistence', action='store_true',
|
|
476
|
+
parser.add_argument('--bCheckImageExistence', action='store_true',
|
|
449
477
|
help='Validate image existence, requires baseDir to be specified')
|
|
450
|
-
parser.add_argument('--bFindUnusedImages', action='store_true',
|
|
478
|
+
parser.add_argument('--bFindUnusedImages', action='store_true',
|
|
451
479
|
help='Check for images in baseDir that aren\'t in the database, ' + \
|
|
452
480
|
'requires baseDir to be specified')
|
|
453
|
-
parser.add_argument('--baseDir', action='store', type=str, default='',
|
|
481
|
+
parser.add_argument('--baseDir', action='store', type=str, default='',
|
|
454
482
|
help='Base directory for images')
|
|
455
483
|
parser.add_argument('--bAllowNoLocation', action='store_true',
|
|
456
484
|
help='Disable errors when no location is specified for an image')
|
|
457
|
-
parser.add_argument('--iMaxNumImages', action='store', type=int, default=-1,
|
|
485
|
+
parser.add_argument('--iMaxNumImages', action='store', type=int, default=-1,
|
|
458
486
|
help='Cap on total number of images to check')
|
|
459
|
-
parser.add_argument('--nThreads', action='store', type=int, default=10,
|
|
487
|
+
parser.add_argument('--nThreads', action='store', type=int, default=10,
|
|
460
488
|
help='Number of threads (only relevant when verifying image ' + \
|
|
461
489
|
'sizes and/or existence)')
|
|
462
|
-
|
|
490
|
+
|
|
463
491
|
if len(sys.argv[1:])==0:
|
|
464
492
|
parser.print_help()
|
|
465
493
|
parser.exit()
|
|
466
|
-
|
|
494
|
+
|
|
467
495
|
args = parser.parse_args()
|
|
468
496
|
args.bRequireLocation = (not args.bAllowNoLocation)
|
|
469
497
|
options = IntegrityCheckOptions()
|
|
470
498
|
ct_utils.args_to_object(args, options)
|
|
471
|
-
integrity_check_json_db(args.
|
|
499
|
+
integrity_check_json_db(args.json_file,options)
|
|
472
500
|
|
|
473
|
-
if __name__ == '__main__':
|
|
501
|
+
if __name__ == '__main__':
|
|
474
502
|
main()
|
|
475
503
|
|
|
476
504
|
|
|
477
505
|
#%% Interactive driver(s)
|
|
478
506
|
|
|
479
507
|
if False:
|
|
480
|
-
|
|
508
|
+
|
|
481
509
|
#%%
|
|
482
510
|
|
|
483
|
-
"""
|
|
511
|
+
"""
|
|
484
512
|
python integrity_check_json_db.py ~/data/ena24.json --baseDir ~/data/ENA24 --bAllowNoLocation
|
|
485
513
|
"""
|
|
486
|
-
|
|
514
|
+
|
|
487
515
|
# Integrity-check .json files for LILA
|
|
488
516
|
json_files = [os.path.expanduser('~/data/ena24.json')]
|
|
489
|
-
|
|
517
|
+
|
|
490
518
|
options = IntegrityCheckOptions()
|
|
491
519
|
options.baseDir = os.path.expanduser('~/data/ENA24')
|
|
492
520
|
options.bCheckImageSizes = False
|
|
493
521
|
options.bFindUnusedImages = True
|
|
494
522
|
options.bRequireLocation = False
|
|
495
|
-
|
|
496
|
-
# options.iMaxNumImages = 10
|
|
497
|
-
|
|
523
|
+
|
|
524
|
+
# options.iMaxNumImages = 10
|
|
525
|
+
|
|
498
526
|
for json_file in json_files:
|
|
499
|
-
|
|
527
|
+
|
|
500
528
|
sorted_categories,data,_ = integrity_check_json_db(json_file, options)
|