megadetector 5.0.27__py3-none-any.whl → 5.0.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
- megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
- megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
- megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
- megadetector/classification/aggregate_classifier_probs.py +3 -3
- megadetector/classification/analyze_failed_images.py +5 -5
- megadetector/classification/cache_batchapi_outputs.py +5 -5
- megadetector/classification/create_classification_dataset.py +11 -12
- megadetector/classification/crop_detections.py +10 -10
- megadetector/classification/csv_to_json.py +8 -8
- megadetector/classification/detect_and_crop.py +13 -15
- megadetector/classification/evaluate_model.py +7 -7
- megadetector/classification/identify_mislabeled_candidates.py +6 -6
- megadetector/classification/json_to_azcopy_list.py +1 -1
- megadetector/classification/json_validator.py +29 -32
- megadetector/classification/map_classification_categories.py +9 -9
- megadetector/classification/merge_classification_detection_output.py +12 -9
- megadetector/classification/prepare_classification_script.py +19 -19
- megadetector/classification/prepare_classification_script_mc.py +23 -23
- megadetector/classification/run_classifier.py +4 -4
- megadetector/classification/save_mislabeled.py +6 -6
- megadetector/classification/train_classifier.py +1 -1
- megadetector/classification/train_classifier_tf.py +9 -9
- megadetector/classification/train_utils.py +10 -10
- megadetector/data_management/annotations/annotation_constants.py +1 -1
- megadetector/data_management/camtrap_dp_to_coco.py +45 -45
- megadetector/data_management/cct_json_utils.py +101 -101
- megadetector/data_management/cct_to_md.py +49 -49
- megadetector/data_management/cct_to_wi.py +33 -33
- megadetector/data_management/coco_to_labelme.py +75 -75
- megadetector/data_management/coco_to_yolo.py +189 -189
- megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
- megadetector/data_management/databases/integrity_check_json_db.py +202 -188
- megadetector/data_management/databases/subset_json_db.py +33 -33
- megadetector/data_management/generate_crops_from_cct.py +38 -38
- megadetector/data_management/get_image_sizes.py +54 -49
- megadetector/data_management/labelme_to_coco.py +130 -124
- megadetector/data_management/labelme_to_yolo.py +78 -72
- megadetector/data_management/lila/create_lila_blank_set.py +81 -83
- megadetector/data_management/lila/create_lila_test_set.py +32 -31
- megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
- megadetector/data_management/lila/download_lila_subset.py +21 -24
- megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
- megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
- megadetector/data_management/lila/get_lila_image_counts.py +22 -22
- megadetector/data_management/lila/lila_common.py +70 -70
- megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
- megadetector/data_management/mewc_to_md.py +339 -340
- megadetector/data_management/ocr_tools.py +258 -252
- megadetector/data_management/read_exif.py +232 -223
- megadetector/data_management/remap_coco_categories.py +26 -26
- megadetector/data_management/remove_exif.py +31 -20
- megadetector/data_management/rename_images.py +187 -187
- megadetector/data_management/resize_coco_dataset.py +41 -41
- megadetector/data_management/speciesnet_to_md.py +41 -41
- megadetector/data_management/wi_download_csv_to_coco.py +55 -55
- megadetector/data_management/yolo_output_to_md_output.py +117 -120
- megadetector/data_management/yolo_to_coco.py +195 -188
- megadetector/detection/change_detection.py +831 -0
- megadetector/detection/process_video.py +341 -338
- megadetector/detection/pytorch_detector.py +308 -266
- megadetector/detection/run_detector.py +186 -166
- megadetector/detection/run_detector_batch.py +366 -364
- megadetector/detection/run_inference_with_yolov5_val.py +328 -325
- megadetector/detection/run_tiled_inference.py +312 -253
- megadetector/detection/tf_detector.py +24 -24
- megadetector/detection/video_utils.py +291 -283
- megadetector/postprocessing/add_max_conf.py +15 -11
- megadetector/postprocessing/categorize_detections_by_size.py +44 -44
- megadetector/postprocessing/classification_postprocessing.py +808 -311
- megadetector/postprocessing/combine_batch_outputs.py +20 -21
- megadetector/postprocessing/compare_batch_results.py +528 -517
- megadetector/postprocessing/convert_output_format.py +97 -97
- megadetector/postprocessing/create_crop_folder.py +220 -147
- megadetector/postprocessing/detector_calibration.py +173 -168
- megadetector/postprocessing/generate_csv_report.py +508 -0
- megadetector/postprocessing/load_api_results.py +25 -22
- megadetector/postprocessing/md_to_coco.py +129 -98
- megadetector/postprocessing/md_to_labelme.py +89 -83
- megadetector/postprocessing/md_to_wi.py +40 -40
- megadetector/postprocessing/merge_detections.py +87 -114
- megadetector/postprocessing/postprocess_batch_results.py +319 -302
- megadetector/postprocessing/remap_detection_categories.py +36 -36
- megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
- megadetector/postprocessing/separate_detections_into_folders.py +226 -211
- megadetector/postprocessing/subset_json_detector_output.py +265 -262
- megadetector/postprocessing/top_folders_to_bottom.py +45 -45
- megadetector/postprocessing/validate_batch_results.py +70 -70
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -69
- megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
- megadetector/taxonomy_mapping/simple_image_download.py +8 -8
- megadetector/taxonomy_mapping/species_lookup.py +33 -33
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
- megadetector/taxonomy_mapping/taxonomy_graph.py +11 -11
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
- megadetector/utils/azure_utils.py +22 -22
- megadetector/utils/ct_utils.py +1019 -200
- megadetector/utils/directory_listing.py +21 -77
- megadetector/utils/gpu_test.py +22 -22
- megadetector/utils/md_tests.py +541 -518
- megadetector/utils/path_utils.py +1511 -406
- megadetector/utils/process_utils.py +41 -41
- megadetector/utils/sas_blob_utils.py +53 -49
- megadetector/utils/split_locations_into_train_val.py +73 -60
- megadetector/utils/string_utils.py +147 -26
- megadetector/utils/url_utils.py +463 -173
- megadetector/utils/wi_utils.py +2629 -2868
- megadetector/utils/write_html_image_list.py +137 -137
- megadetector/visualization/plot_utils.py +21 -21
- megadetector/visualization/render_images_with_thumbnails.py +37 -73
- megadetector/visualization/visualization_utils.py +424 -404
- megadetector/visualization/visualize_db.py +197 -190
- megadetector/visualization/visualize_detector_output.py +126 -98
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/METADATA +6 -3
- megadetector-5.0.29.dist-info/RECORD +163 -0
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
- megadetector/data_management/importers/add_nacti_sizes.py +0 -52
- megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
- megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
- megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
- megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
- megadetector/data_management/importers/awc_to_json.py +0 -191
- megadetector/data_management/importers/bellevue_to_json.py +0 -272
- megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
- megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
- megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
- megadetector/data_management/importers/cct_field_adjustments.py +0 -58
- megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
- megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
- megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
- megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
- megadetector/data_management/importers/ena24_to_json.py +0 -276
- megadetector/data_management/importers/filenames_to_json.py +0 -386
- megadetector/data_management/importers/helena_to_cct.py +0 -283
- megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
- megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
- megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
- megadetector/data_management/importers/jb_csv_to_json.py +0 -150
- megadetector/data_management/importers/mcgill_to_json.py +0 -250
- megadetector/data_management/importers/missouri_to_json.py +0 -490
- megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
- megadetector/data_management/importers/noaa_seals_2019.py +0 -181
- megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
- megadetector/data_management/importers/pc_to_json.py +0 -365
- megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
- megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
- megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
- megadetector/data_management/importers/rspb_to_json.py +0 -356
- megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
- megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
- megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
- megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
- megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
- megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
- megadetector/data_management/importers/sulross_get_exif.py +0 -65
- megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
- megadetector/data_management/importers/ubc_to_json.py +0 -399
- megadetector/data_management/importers/umn_to_json.py +0 -507
- megadetector/data_management/importers/wellington_to_json.py +0 -263
- megadetector/data_management/importers/wi_to_json.py +0 -442
- megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
- megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
- megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
- megadetector-5.0.27.dist-info/RECORD +0 -208
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
|
@@ -5,16 +5,16 @@ subset_json_detector_output.py
|
|
|
5
5
|
Creates one or more subsets of a detector results file (.json), doing either
|
|
6
6
|
or both of the following (if both are requested, they happen in this order):
|
|
7
7
|
|
|
8
|
-
1) Retrieve all elements where filenames contain a specified query string,
|
|
9
|
-
optionally replacing that query with a replacement token. If the query is blank,
|
|
8
|
+
1) Retrieve all elements where filenames contain a specified query string,
|
|
9
|
+
optionally replacing that query with a replacement token. If the query is blank,
|
|
10
10
|
can also be used to prepend content to all filenames.
|
|
11
11
|
|
|
12
12
|
Does not support regex's, but supports a special case of ^string to indicate "must start with
|
|
13
13
|
to match".
|
|
14
14
|
|
|
15
|
-
2) Create separate .jsons for each unique path, optionally making the filenames
|
|
16
|
-
in those .json's relative paths. In this case, you specify an output directory,
|
|
17
|
-
rather than an output path. All images in the folder blah/foo/bar will end up
|
|
15
|
+
2) Create separate .jsons for each unique path, optionally making the filenames
|
|
16
|
+
in those .json's relative paths. In this case, you specify an output directory,
|
|
17
|
+
rather than an output path. All images in the folder blah/foo/bar will end up
|
|
18
18
|
in a .json file called blah_foo_bar.json.
|
|
19
19
|
|
|
20
20
|
Can also apply a confidence threshold.
|
|
@@ -26,16 +26,20 @@ To subset a COCO Camera Traps .json database, see subset_json_db.py
|
|
|
26
26
|
|
|
27
27
|
**Sample invocation (splitting into multiple json's)**
|
|
28
28
|
|
|
29
|
-
Read from "1800_idfg_statewide_wolf_detections_w_classifications.json", split up into
|
|
29
|
+
Read from "1800_idfg_statewide_wolf_detections_w_classifications.json", split up into
|
|
30
30
|
individual .jsons in 'd:/temp/idfg/output', making filenames relative to their individual
|
|
31
31
|
folders:
|
|
32
32
|
|
|
33
|
-
python subset_json_detector_output.py
|
|
33
|
+
python subset_json_detector_output.py ^
|
|
34
|
+
"d:/temp/idfg/1800_idfg_statewide_wolf_detections_w_classifications.json" "d:/temp/idfg/output" ^
|
|
35
|
+
--split_folders --make_folder_relative
|
|
34
36
|
|
|
35
37
|
Now do the same thing, but instead of writing .json's to d:/temp/idfg/output, write them to *subfolders*
|
|
36
38
|
corresponding to the subfolders for each .json file.
|
|
37
39
|
|
|
38
|
-
python subset_json_detector_output.py
|
|
40
|
+
python subset_json_detector_output.py ^
|
|
41
|
+
"d:/temp/idfg/1800_detections_S2.json" "d:/temp/idfg/output_to_folders" ^
|
|
42
|
+
--split_folders --make_folder_relative --copy_jsons_to_folders
|
|
39
43
|
|
|
40
44
|
**Sample invocation (creating a single subset matching a query)**
|
|
41
45
|
|
|
@@ -43,11 +47,13 @@ Read from "1800_detections.json", write to "1800_detections_2017.json"
|
|
|
43
47
|
|
|
44
48
|
Include only images matching "2017", and change "2017" to "blah"
|
|
45
49
|
|
|
46
|
-
python subset_json_detector_output.py "d:/temp/1800_detections.json" "d:/temp/1800_detections_2017_blah.json"
|
|
50
|
+
python subset_json_detector_output.py "d:/temp/1800_detections.json" "d:/temp/1800_detections_2017_blah.json" ^
|
|
51
|
+
--query 2017 --replacement blah
|
|
47
52
|
|
|
48
53
|
Include all images, prepend with "prefix/"
|
|
49
54
|
|
|
50
|
-
python subset_json_detector_output.py "d:/temp/1800_detections.json" "d:/temp/1800_detections_prefix.json"
|
|
55
|
+
python subset_json_detector_output.py "d:/temp/1800_detections.json" "d:/temp/1800_detections_prefix.json" ^
|
|
56
|
+
--replacement "prefix/"
|
|
51
57
|
|
|
52
58
|
"""
|
|
53
59
|
|
|
@@ -61,10 +67,9 @@ import os
|
|
|
61
67
|
import re
|
|
62
68
|
|
|
63
69
|
from tqdm import tqdm
|
|
64
|
-
from collections import defaultdict
|
|
65
70
|
|
|
71
|
+
from megadetector.utils import ct_utils
|
|
66
72
|
from megadetector.utils.ct_utils import args_to_object, get_max_conf, invert_dictionary
|
|
67
|
-
from megadetector.utils.path_utils import top_level_folder
|
|
68
73
|
from megadetector.utils.path_utils import recursive_file_list
|
|
69
74
|
|
|
70
75
|
|
|
@@ -76,23 +81,23 @@ class SubsetJsonDetectorOutputOptions:
|
|
|
76
81
|
"""
|
|
77
82
|
|
|
78
83
|
def __init__(self):
|
|
79
|
-
|
|
84
|
+
|
|
80
85
|
#: Only process files containing the token 'query'
|
|
81
86
|
self.query = None
|
|
82
|
-
|
|
87
|
+
|
|
83
88
|
#: Replace 'query' with 'replacement' if 'replacement' is not None. If 'query' is None,
|
|
84
89
|
#: prepend 'replacement'
|
|
85
90
|
self.replacement = None
|
|
86
|
-
|
|
91
|
+
|
|
87
92
|
#: Should we split output into individual .json files for each folder?
|
|
88
93
|
self.split_folders = False
|
|
89
|
-
|
|
90
|
-
#: Folder level to use for splitting ['bottom','
|
|
94
|
+
|
|
95
|
+
#: Folder level to use for splitting ['bottom','n_from_bottom','n_from_top','dict']
|
|
91
96
|
#:
|
|
92
97
|
#: 'dict' requires 'split_folder_param' to be a dictionary mapping each filename
|
|
93
98
|
#: to a token.
|
|
94
|
-
self.split_folder_mode = 'bottom'
|
|
95
|
-
|
|
99
|
+
self.split_folder_mode = 'bottom'
|
|
100
|
+
|
|
96
101
|
#: When using the 'n_from_bottom' parameter to define folder splitting, this
|
|
97
102
|
#: defines the number of directories from the bottom. 'n_from_bottom' with
|
|
98
103
|
#: a parameter of zero is the same as 'bottom'.
|
|
@@ -102,78 +107,77 @@ class SubsetJsonDetectorOutputOptions:
|
|
|
102
107
|
#: When 'split_folder_mode' is 'dict', this should be a dictionary mapping each filename
|
|
103
108
|
#: to a token.
|
|
104
109
|
self.split_folder_param = 0
|
|
105
|
-
|
|
110
|
+
|
|
106
111
|
#: Only meaningful if split_folders is True: should we convert pathnames to be relative
|
|
107
112
|
#: the folder for each .json file?
|
|
108
113
|
self.make_folder_relative = False
|
|
109
|
-
|
|
110
|
-
#: Only meaningful if split_folders and make_folder_relative are True: if not None,
|
|
111
|
-
#: will copy .json files to their corresponding output directories, relative to
|
|
114
|
+
|
|
115
|
+
#: Only meaningful if split_folders and make_folder_relative are True: if not None,
|
|
116
|
+
#: will copy .json files to their corresponding output directories, relative to
|
|
112
117
|
#: output_filename
|
|
113
118
|
self.copy_jsons_to_folders = False
|
|
114
|
-
|
|
119
|
+
|
|
115
120
|
#: Should we over-write .json files?
|
|
116
121
|
self.overwrite_json_files = False
|
|
117
|
-
|
|
122
|
+
|
|
118
123
|
#: If copy_jsons_to_folders is true, do we require that directories already exist?
|
|
119
124
|
self.copy_jsons_to_folders_directories_must_exist = True
|
|
120
|
-
|
|
125
|
+
|
|
121
126
|
#: Optional confidence threshold; if not None, detections below this confidence won't be
|
|
122
127
|
#: included in the output.
|
|
123
128
|
self.confidence_threshold = None
|
|
124
|
-
|
|
129
|
+
|
|
125
130
|
#: Should we remove failed images?
|
|
126
131
|
self.remove_failed_images = False
|
|
127
|
-
|
|
128
|
-
#: Either a list of category IDs (as string-ints) (not names), or a dictionary mapping category *IDs*
|
|
129
|
-
#: (as string-ints) (not names) to thresholds. Removes non-matching detections, does not
|
|
130
|
-
#: remove images. Not technically mutually exclusize with category_names_to_keep, but it's an esoteric
|
|
132
|
+
|
|
133
|
+
#: Either a list of category IDs (as string-ints) (not names), or a dictionary mapping category *IDs*
|
|
134
|
+
#: (as string-ints) (not names) to thresholds. Removes non-matching detections, does not
|
|
135
|
+
#: remove images. Not technically mutually exclusize with category_names_to_keep, but it's an esoteric
|
|
131
136
|
#: scenario indeed where you would want to specify both.
|
|
132
137
|
self.categories_to_keep = None
|
|
133
|
-
|
|
134
|
-
#: Either a list of category names (not IDs), or a dictionary mapping category *names* (not IDs) to thresholds.
|
|
135
|
-
#: Removes non-matching detections, does not remove images. Not technically mutually exclusize with
|
|
138
|
+
|
|
139
|
+
#: Either a list of category names (not IDs), or a dictionary mapping category *names* (not IDs) to thresholds.
|
|
140
|
+
#: Removes non-matching detections, does not remove images. Not technically mutually exclusize with
|
|
136
141
|
#: category_ids_to_keep, but it's an esoteric scenario indeed where you would want to specify both.
|
|
137
142
|
self.category_names_to_keep = None
|
|
138
|
-
|
|
143
|
+
|
|
139
144
|
#: Set to >0 during testing to limit the number of images that get processed.
|
|
140
145
|
self.debug_max_images = -1
|
|
141
|
-
|
|
146
|
+
|
|
142
147
|
#: Keep only files in this list, which can be a .json results file or a folder.
|
|
143
148
|
#
|
|
144
149
|
#: Assumes that the input .json file contains relative paths when comparing to a folder.
|
|
145
150
|
self.keep_files_in_list = None
|
|
146
|
-
|
|
147
|
-
#: Remove classification with <= N instances. Does not re-map categories
|
|
151
|
+
|
|
152
|
+
#: Remove classification with <= N instances. Does not re-map categories
|
|
148
153
|
#: to be contiguous. Set to 1 to remove empty categories only.
|
|
149
154
|
self.remove_classification_categories_below_count = None
|
|
150
|
-
|
|
155
|
+
|
|
151
156
|
# ...class SubsetJsonDetectorOutputOptions
|
|
152
157
|
|
|
153
|
-
|
|
158
|
+
|
|
154
159
|
#%% Main function
|
|
155
160
|
|
|
156
161
|
def _write_detection_results(data, output_filename, options):
|
|
157
162
|
"""
|
|
158
163
|
Writes the detector-output-formatted dict *data* to *output_filename*.
|
|
159
164
|
"""
|
|
160
|
-
|
|
165
|
+
|
|
161
166
|
if (not options.overwrite_json_files) and os.path.isfile(output_filename):
|
|
162
167
|
raise ValueError('File {} exists'.format(output_filename))
|
|
163
|
-
|
|
168
|
+
|
|
164
169
|
basedir = os.path.dirname(output_filename)
|
|
165
|
-
|
|
170
|
+
|
|
166
171
|
if options.copy_jsons_to_folders and options.copy_jsons_to_folders_directories_must_exist:
|
|
167
172
|
if not os.path.isdir(basedir):
|
|
168
173
|
raise ValueError('Directory {} does not exist'.format(basedir))
|
|
169
174
|
else:
|
|
170
175
|
os.makedirs(basedir, exist_ok=True)
|
|
171
|
-
|
|
176
|
+
|
|
172
177
|
n_images = len(data['images'])
|
|
173
|
-
|
|
178
|
+
|
|
174
179
|
print('Writing detection output (with {} images) to {}'.format(n_images,output_filename))
|
|
175
|
-
|
|
176
|
-
json.dump(data,f,indent=1)
|
|
180
|
+
ct_utils.write_json(output_filename, data)
|
|
177
181
|
|
|
178
182
|
# ...def _write_detection_results(...)
|
|
179
183
|
|
|
@@ -182,25 +186,25 @@ def remove_classification_categories_below_count(data, options):
|
|
|
182
186
|
"""
|
|
183
187
|
Removes all classification categories below a threshold count. Does not re-map
|
|
184
188
|
classification category IDs.
|
|
185
|
-
|
|
189
|
+
|
|
186
190
|
Args:
|
|
187
191
|
data (dict): data loaded from a MD results file
|
|
188
192
|
options (SubsetJsonDetectorOutputOptions): parameters for subsetting
|
|
189
|
-
|
|
193
|
+
|
|
190
194
|
Returns:
|
|
191
195
|
dict: Possibly-modified version of [data] (also modifies in place)
|
|
192
196
|
"""
|
|
193
|
-
|
|
197
|
+
|
|
194
198
|
if options.remove_classification_categories_below_count is None:
|
|
195
199
|
return data
|
|
196
200
|
if 'classification_categories' not in data:
|
|
197
201
|
return data
|
|
198
|
-
|
|
202
|
+
|
|
199
203
|
classification_category_id_to_count = {}
|
|
200
|
-
|
|
204
|
+
|
|
201
205
|
for classification_category_id in data['classification_categories']:
|
|
202
206
|
classification_category_id_to_count[classification_category_id] = 0
|
|
203
|
-
|
|
207
|
+
|
|
204
208
|
# Count the number of occurrences of each classification category
|
|
205
209
|
for im in data['images']:
|
|
206
210
|
if 'detections' not in im or im['detections'] is None:
|
|
@@ -211,8 +215,8 @@ def remove_classification_categories_below_count(data, options):
|
|
|
211
215
|
for classification in det['classifications']:
|
|
212
216
|
classification_category_id_to_count[classification[0]] = \
|
|
213
217
|
classification_category_id_to_count[classification[0]] + 1
|
|
214
|
-
|
|
215
|
-
|
|
218
|
+
|
|
219
|
+
|
|
216
220
|
# Which categories have above-threshold counts?
|
|
217
221
|
classification_category_ids_to_keep = set()
|
|
218
222
|
|
|
@@ -220,18 +224,18 @@ def remove_classification_categories_below_count(data, options):
|
|
|
220
224
|
if classification_category_id_to_count[classification_category_id] > \
|
|
221
225
|
options.remove_classification_categories_below_count:
|
|
222
226
|
classification_category_ids_to_keep.add(classification_category_id)
|
|
223
|
-
|
|
227
|
+
|
|
224
228
|
n_categories_removed = \
|
|
225
229
|
len(classification_category_id_to_count) - \
|
|
226
230
|
len(classification_category_ids_to_keep)
|
|
227
|
-
|
|
231
|
+
|
|
228
232
|
print('Removing {} of {} classification categories'.format(
|
|
229
233
|
n_categories_removed,len(classification_category_id_to_count)))
|
|
230
|
-
|
|
234
|
+
|
|
231
235
|
if n_categories_removed == 0:
|
|
232
236
|
return data
|
|
233
|
-
|
|
234
|
-
|
|
237
|
+
|
|
238
|
+
|
|
235
239
|
# Filter the category list
|
|
236
240
|
output_classification_categories = {}
|
|
237
241
|
for category_id in data['classification_categories']:
|
|
@@ -240,8 +244,8 @@ def remove_classification_categories_below_count(data, options):
|
|
|
240
244
|
data['classification_categories'][category_id]
|
|
241
245
|
data['classification_categories'] = output_classification_categories
|
|
242
246
|
assert len(data['classification_categories']) == len(classification_category_ids_to_keep)
|
|
243
|
-
|
|
244
|
-
|
|
247
|
+
|
|
248
|
+
|
|
245
249
|
# If necessary, filter the category descriptions
|
|
246
250
|
if 'classification_category_descriptions' in data:
|
|
247
251
|
output_classification_category_descriptions = {}
|
|
@@ -249,8 +253,8 @@ def remove_classification_categories_below_count(data, options):
|
|
|
249
253
|
if category_id in classification_category_ids_to_keep:
|
|
250
254
|
output_classification_category_descriptions[category_id] = \
|
|
251
255
|
data['classification_category_descriptions'][category_id]
|
|
252
|
-
data['classification_category_descriptions'] = output_classification_category_descriptions
|
|
253
|
-
|
|
256
|
+
data['classification_category_descriptions'] = output_classification_category_descriptions
|
|
257
|
+
|
|
254
258
|
# Filter images
|
|
255
259
|
for im in data['images']:
|
|
256
260
|
if 'detections' not in im or im['detections'] is None:
|
|
@@ -263,7 +267,7 @@ def remove_classification_categories_below_count(data, options):
|
|
|
263
267
|
if classification[0] in classification_category_ids_to_keep:
|
|
264
268
|
classifications_to_keep.append(classification)
|
|
265
269
|
det['classifications'] = classifications_to_keep
|
|
266
|
-
|
|
270
|
+
|
|
267
271
|
return data
|
|
268
272
|
|
|
269
273
|
# ...def remove_classification_categories_below_count(...)
|
|
@@ -272,34 +276,34 @@ def remove_classification_categories_below_count(data, options):
|
|
|
272
276
|
def subset_json_detector_output_by_confidence(data, options):
|
|
273
277
|
"""
|
|
274
278
|
Removes all detections below options.confidence_threshold.
|
|
275
|
-
|
|
279
|
+
|
|
276
280
|
Args:
|
|
277
281
|
data (dict): data loaded from a MD results file
|
|
278
282
|
options (SubsetJsonDetectorOutputOptions): parameters for subsetting
|
|
279
|
-
|
|
283
|
+
|
|
280
284
|
Returns:
|
|
281
285
|
dict: Possibly-modified version of [data] (also modifies in place)
|
|
282
286
|
"""
|
|
283
|
-
|
|
287
|
+
|
|
284
288
|
if options.confidence_threshold is None:
|
|
285
289
|
return data
|
|
286
|
-
|
|
290
|
+
|
|
287
291
|
images_in = data['images']
|
|
288
|
-
images_out = []
|
|
289
|
-
|
|
292
|
+
images_out = []
|
|
293
|
+
|
|
290
294
|
print('Subsetting by confidence >= {}'.format(options.confidence_threshold))
|
|
291
|
-
|
|
295
|
+
|
|
292
296
|
n_max_changes = 0
|
|
293
|
-
|
|
297
|
+
|
|
294
298
|
# im = images_in[0]
|
|
295
299
|
for i_image, im in tqdm(enumerate(images_in), total=len(images_in)):
|
|
296
|
-
|
|
300
|
+
|
|
297
301
|
# Always keep failed images; if the caller wants to remove these, they
|
|
298
302
|
# will use remove_failed_images
|
|
299
303
|
if ('detections' not in im) or (im['detections'] is None):
|
|
300
304
|
images_out.append(im)
|
|
301
305
|
continue
|
|
302
|
-
|
|
306
|
+
|
|
303
307
|
p_orig = get_max_conf(im)
|
|
304
308
|
|
|
305
309
|
# Find all detections above threshold for this image
|
|
@@ -308,7 +312,7 @@ def subset_json_detector_output_by_confidence(data, options):
|
|
|
308
312
|
# If there are no detections above threshold, set the max probability
|
|
309
313
|
# to -1, unless it already had a negative probability.
|
|
310
314
|
if len(detections) == 0:
|
|
311
|
-
if p_orig <= 0:
|
|
315
|
+
if p_orig <= 0:
|
|
312
316
|
p = p_orig
|
|
313
317
|
else:
|
|
314
318
|
p = -1
|
|
@@ -316,7 +320,7 @@ def subset_json_detector_output_by_confidence(data, options):
|
|
|
316
320
|
# Otherwise find the max confidence
|
|
317
321
|
else:
|
|
318
322
|
p = max([d['conf'] for d in detections])
|
|
319
|
-
|
|
323
|
+
|
|
320
324
|
im['detections'] = detections
|
|
321
325
|
|
|
322
326
|
# Did this thresholding result in a max-confidence change?
|
|
@@ -326,18 +330,18 @@ def subset_json_detector_output_by_confidence(data, options):
|
|
|
326
330
|
assert (p_orig <= 0) or (p < p_orig), \
|
|
327
331
|
'Confidence changed from {} to {}'.format(p_orig, p)
|
|
328
332
|
n_max_changes += 1
|
|
329
|
-
|
|
333
|
+
|
|
330
334
|
if 'max_detection_conf' in im:
|
|
331
335
|
im['max_detection_conf'] = p
|
|
332
|
-
|
|
336
|
+
|
|
333
337
|
images_out.append(im)
|
|
334
|
-
|
|
335
|
-
# ...for each image
|
|
336
|
-
|
|
337
|
-
data['images'] = images_out
|
|
338
|
+
|
|
339
|
+
# ...for each image
|
|
340
|
+
|
|
341
|
+
data['images'] = images_out
|
|
338
342
|
print('done, found {} matches (of {}), {} max conf changes'.format(
|
|
339
343
|
len(data['images']),len(images_in),n_max_changes))
|
|
340
|
-
|
|
344
|
+
|
|
341
345
|
return data
|
|
342
346
|
|
|
343
347
|
# ...def subset_json_detector_output_by_confidence(...)
|
|
@@ -347,20 +351,20 @@ def subset_json_detector_output_by_list(data, options):
|
|
|
347
351
|
"""
|
|
348
352
|
Keeps only files in options.keep_files_in_list, which can be a .json results file or a folder.
|
|
349
353
|
Assumes that the input .json file contains relative paths when comparing to a folder.
|
|
350
|
-
|
|
354
|
+
|
|
351
355
|
Args:
|
|
352
356
|
data (dict): data loaded from a MD results file
|
|
353
357
|
options (SubsetJsonDetectorOutputOptions): parameters for subsetting
|
|
354
|
-
|
|
358
|
+
|
|
355
359
|
Returns:
|
|
356
360
|
dict: Possibly-modified version of [data] (also modifies in place)
|
|
357
361
|
"""
|
|
358
|
-
|
|
362
|
+
|
|
359
363
|
if options.keep_files_in_list is None:
|
|
360
364
|
return
|
|
361
|
-
|
|
365
|
+
|
|
362
366
|
files_to_keep = None
|
|
363
|
-
|
|
367
|
+
|
|
364
368
|
if os.path.isfile(options.keep_files_in_list):
|
|
365
369
|
with open(options.keep_files_in_list,'r') as f:
|
|
366
370
|
d = json.load(f)
|
|
@@ -371,36 +375,36 @@ def subset_json_detector_output_by_list(data, options):
|
|
|
371
375
|
else:
|
|
372
376
|
raise ValueError('Subsetting .json file by list: {} is neither a .json results file nor a folder'.format(
|
|
373
377
|
options.keep_files_in_list))
|
|
374
|
-
|
|
378
|
+
|
|
375
379
|
files_to_keep = [fn.replace('\\','/') for fn in files_to_keep]
|
|
376
380
|
files_to_keep_set = set(files_to_keep)
|
|
377
|
-
|
|
381
|
+
|
|
378
382
|
images_to_keep = []
|
|
379
|
-
|
|
383
|
+
|
|
380
384
|
for im in data['images']:
|
|
381
385
|
fn = im['file'].replace('\\','/')
|
|
382
386
|
if fn in files_to_keep_set:
|
|
383
387
|
images_to_keep.append(im)
|
|
384
|
-
|
|
388
|
+
|
|
385
389
|
data['images'] = images_to_keep
|
|
386
|
-
|
|
390
|
+
|
|
387
391
|
return data
|
|
388
392
|
|
|
389
393
|
# ...def subset_json_detector_output_by_list(...)
|
|
390
394
|
|
|
391
|
-
|
|
395
|
+
|
|
392
396
|
def subset_json_detector_output_by_categories(data, options):
|
|
393
397
|
"""
|
|
394
398
|
Removes all detections without detections above a threshold for specific categories.
|
|
395
|
-
|
|
399
|
+
|
|
396
400
|
Args:
|
|
397
401
|
data (dict): data loaded from a MD results file
|
|
398
402
|
options (SubsetJsonDetectorOutputOptions): parameters for subsetting
|
|
399
|
-
|
|
403
|
+
|
|
400
404
|
Returns:
|
|
401
405
|
dict: Possibly-modified version of [data] (also modifies in place)
|
|
402
406
|
"""
|
|
403
|
-
|
|
407
|
+
|
|
404
408
|
# If categories_to_keep is supplied as a list, convert to a dict
|
|
405
409
|
if options.categories_to_keep is not None:
|
|
406
410
|
if not isinstance(options.categories_to_keep, dict):
|
|
@@ -409,7 +413,7 @@ def subset_json_detector_output_by_categories(data, options):
|
|
|
409
413
|
# Set unspecified thresholds to a silly negative value
|
|
410
414
|
dict_categories_to_keep[category_id] = -100000.0
|
|
411
415
|
options.categories_to_keep = dict_categories_to_keep
|
|
412
|
-
|
|
416
|
+
|
|
413
417
|
# If category_names_to_keep is supplied as a list, convert to a dict
|
|
414
418
|
if options.category_names_to_keep is not None:
|
|
415
419
|
if not isinstance(options.category_names_to_keep, dict):
|
|
@@ -418,9 +422,9 @@ def subset_json_detector_output_by_categories(data, options):
|
|
|
418
422
|
# Set unspecified thresholds to a silly negative value
|
|
419
423
|
dict_category_names_to_keep[category_name] = -100000.0
|
|
420
424
|
options.category_names_to_keep = dict_category_names_to_keep
|
|
421
|
-
|
|
425
|
+
|
|
422
426
|
category_name_to_category_id = invert_dictionary(data['detection_categories'])
|
|
423
|
-
|
|
427
|
+
|
|
424
428
|
# If some categories are supplied as names, convert all to IDs and add to "categories_to_keep"
|
|
425
429
|
if options.category_names_to_keep is not None:
|
|
426
430
|
if options.categories_to_keep is None:
|
|
@@ -433,16 +437,16 @@ def subset_json_detector_output_by_categories(data, options):
|
|
|
433
437
|
'Category {} ({}) specified as both a name and an ID'.format(
|
|
434
438
|
category_name,category_id)
|
|
435
439
|
options.categories_to_keep[category_id] = options.category_names_to_keep[category_name]
|
|
436
|
-
|
|
440
|
+
|
|
437
441
|
if options.categories_to_keep is None:
|
|
438
442
|
return data
|
|
439
|
-
|
|
443
|
+
|
|
440
444
|
images_in = data['images']
|
|
441
|
-
images_out = []
|
|
442
|
-
|
|
445
|
+
images_out = []
|
|
446
|
+
|
|
443
447
|
print('Subsetting by categories (keeping {} categories):'.format(
|
|
444
448
|
len(options.categories_to_keep)))
|
|
445
|
-
|
|
449
|
+
|
|
446
450
|
for category_id in sorted(list(options.categories_to_keep.keys())):
|
|
447
451
|
if category_id not in data['detection_categories']:
|
|
448
452
|
print('Warning: category ID {} not in category map in this file'.format(category_id))
|
|
@@ -451,28 +455,28 @@ def subset_json_detector_output_by_categories(data, options):
|
|
|
451
455
|
category_id,
|
|
452
456
|
data['detection_categories'][category_id],
|
|
453
457
|
options.categories_to_keep[category_id]))
|
|
454
|
-
|
|
458
|
+
|
|
455
459
|
n_detections_in = 0
|
|
456
460
|
n_detections_kept = 0
|
|
457
|
-
|
|
461
|
+
|
|
458
462
|
# im = images_in[0]
|
|
459
463
|
for i_image, im in tqdm(enumerate(images_in), total=len(images_in)):
|
|
460
|
-
|
|
464
|
+
|
|
461
465
|
# Always keep failed images; if the caller wants to remove these, they
|
|
462
|
-
# will use remove_failed_images
|
|
466
|
+
# will use remove_failed_images
|
|
463
467
|
if ('detections' not in im) or (im['detections'] is None):
|
|
464
468
|
images_out.append(im)
|
|
465
469
|
continue
|
|
466
|
-
|
|
470
|
+
|
|
467
471
|
n_detections_in += len(im['detections'])
|
|
468
|
-
|
|
472
|
+
|
|
469
473
|
# Find all matching detections for this image
|
|
470
474
|
detections = []
|
|
471
475
|
for d in im['detections']:
|
|
472
476
|
if (d['category'] in options.categories_to_keep) and \
|
|
473
477
|
(d['conf'] > options.categories_to_keep[d['category']]):
|
|
474
478
|
detections.append(d)
|
|
475
|
-
|
|
479
|
+
|
|
476
480
|
im['detections'] = detections
|
|
477
481
|
|
|
478
482
|
if 'max_detection_conf' in im:
|
|
@@ -481,17 +485,17 @@ def subset_json_detector_output_by_categories(data, options):
|
|
|
481
485
|
else:
|
|
482
486
|
p = max([d['conf'] for d in detections])
|
|
483
487
|
im['max_detection_conf'] = p
|
|
484
|
-
|
|
488
|
+
|
|
485
489
|
n_detections_kept += len(im['detections'])
|
|
486
|
-
|
|
490
|
+
|
|
487
491
|
images_out.append(im)
|
|
488
|
-
|
|
489
|
-
# ...for each image
|
|
490
|
-
|
|
491
|
-
data['images'] = images_out
|
|
492
|
+
|
|
493
|
+
# ...for each image
|
|
494
|
+
|
|
495
|
+
data['images'] = images_out
|
|
492
496
|
print('done, kept {} detections (of {})'.format(
|
|
493
497
|
n_detections_kept,n_detections_in))
|
|
494
|
-
|
|
498
|
+
|
|
495
499
|
return data
|
|
496
500
|
|
|
497
501
|
# ...def subset_json_detector_output_by_categories(...)
|
|
@@ -500,37 +504,37 @@ def subset_json_detector_output_by_categories(data, options):
|
|
|
500
504
|
def remove_failed_images(data,options):
|
|
501
505
|
"""
|
|
502
506
|
Removed failed images from [data]
|
|
503
|
-
|
|
507
|
+
|
|
504
508
|
Args:
|
|
505
509
|
data (dict): data loaded from a MD results file
|
|
506
510
|
options (SubsetJsonDetectorOutputOptions): parameters for subsetting
|
|
507
|
-
|
|
511
|
+
|
|
508
512
|
Returns:
|
|
509
513
|
dict: Possibly-modified version of [data] (also modifies in place)
|
|
510
514
|
"""
|
|
511
|
-
|
|
515
|
+
|
|
512
516
|
images_in = data['images']
|
|
513
|
-
images_out = []
|
|
514
|
-
|
|
517
|
+
images_out = []
|
|
518
|
+
|
|
515
519
|
if not options.remove_failed_images:
|
|
516
520
|
return data
|
|
517
|
-
|
|
521
|
+
|
|
518
522
|
print('Removing failed images...', end='')
|
|
519
|
-
|
|
523
|
+
|
|
520
524
|
# i_image = 0; im = images_in[0]
|
|
521
525
|
for i_image, im in tqdm(enumerate(images_in), total=len(images_in)):
|
|
522
|
-
|
|
526
|
+
|
|
523
527
|
if 'failure' in im and isinstance(im['failure'],str):
|
|
524
528
|
continue
|
|
525
529
|
else:
|
|
526
530
|
images_out.append(im)
|
|
527
|
-
|
|
528
|
-
# ...for each image
|
|
529
|
-
|
|
530
|
-
data['images'] = images_out
|
|
531
|
+
|
|
532
|
+
# ...for each image
|
|
533
|
+
|
|
534
|
+
data['images'] = images_out
|
|
531
535
|
n_removed = len(images_in) - len(data['images'])
|
|
532
536
|
print('Done, removed {} of {}'.format(n_removed, len(images_in)))
|
|
533
|
-
|
|
537
|
+
|
|
534
538
|
return data
|
|
535
539
|
|
|
536
540
|
# ...def remove_failed_images(...)
|
|
@@ -538,35 +542,35 @@ def remove_failed_images(data,options):
|
|
|
538
542
|
|
|
539
543
|
def subset_json_detector_output_by_query(data, options):
|
|
540
544
|
"""
|
|
541
|
-
Subsets to images whose filename matches options.query; replace all instances of
|
|
545
|
+
Subsets to images whose filename matches options.query; replace all instances of
|
|
542
546
|
options.query with options.replacement. No-op if options.query_string is None or ''.
|
|
543
|
-
|
|
547
|
+
|
|
544
548
|
Args:
|
|
545
549
|
data (dict): data loaded from a MD results file
|
|
546
550
|
options (SubsetJsonDetectorOutputOptions): parameters for subsetting
|
|
547
|
-
|
|
551
|
+
|
|
548
552
|
Returns:
|
|
549
553
|
dict: Possibly-modified version of [data] (also modifies in place)
|
|
550
554
|
"""
|
|
551
|
-
|
|
555
|
+
|
|
552
556
|
images_in = data['images']
|
|
553
|
-
images_out = []
|
|
554
|
-
|
|
557
|
+
images_out = []
|
|
558
|
+
|
|
555
559
|
print('Subsetting by query {}, replacement {}...'.format(options.query, options.replacement), end='')
|
|
556
|
-
|
|
560
|
+
|
|
557
561
|
query_string = options.query
|
|
558
562
|
query_starts_with = False
|
|
559
|
-
|
|
563
|
+
|
|
560
564
|
# Support a special case regex-like notation for "starts with"
|
|
561
565
|
if query_string is not None and query_string.startswith('^'):
|
|
562
566
|
query_string = query_string[1:]
|
|
563
567
|
query_starts_with = True
|
|
564
|
-
|
|
568
|
+
|
|
565
569
|
# i_image = 0; im = images_in[0]
|
|
566
570
|
for i_image, im in tqdm(enumerate(images_in), total=len(images_in)):
|
|
567
|
-
|
|
571
|
+
|
|
568
572
|
fn = im['file']
|
|
569
|
-
|
|
573
|
+
|
|
570
574
|
# Only take images that match the query
|
|
571
575
|
if query_string is not None:
|
|
572
576
|
if query_starts_with:
|
|
@@ -575,34 +579,34 @@ def subset_json_detector_output_by_query(data, options):
|
|
|
575
579
|
else:
|
|
576
580
|
if query_string not in fn:
|
|
577
581
|
continue
|
|
578
|
-
|
|
582
|
+
|
|
579
583
|
if options.replacement is not None:
|
|
580
584
|
if query_string is not None:
|
|
581
585
|
fn = fn.replace(query_string, options.replacement)
|
|
582
586
|
else:
|
|
583
587
|
fn = options.replacement + fn
|
|
584
|
-
|
|
588
|
+
|
|
585
589
|
im['file'] = fn
|
|
586
|
-
|
|
590
|
+
|
|
587
591
|
images_out.append(im)
|
|
588
|
-
|
|
589
|
-
# ...for each image
|
|
590
|
-
|
|
591
|
-
data['images'] = images_out
|
|
592
|
+
|
|
593
|
+
# ...for each image
|
|
594
|
+
|
|
595
|
+
data['images'] = images_out
|
|
592
596
|
print('done, found {} matches (of {})'.format(len(data['images']), len(images_in)))
|
|
593
|
-
|
|
597
|
+
|
|
594
598
|
return data
|
|
595
599
|
|
|
596
600
|
# ...def subset_json_detector_output_by_query(...)
|
|
597
601
|
|
|
598
|
-
|
|
602
|
+
|
|
599
603
|
def subset_json_detector_output(input_filename, output_filename, options, data=None):
|
|
600
604
|
"""
|
|
601
|
-
Main entry point; creates one or more subsets of a detector results file. See the
|
|
605
|
+
Main entry point; creates one or more subsets of a detector results file. See the
|
|
602
606
|
module header comment for more information about the available subsetting approaches.
|
|
603
|
-
|
|
607
|
+
|
|
604
608
|
Makes a copy of [data] before modifying if a data dictionary is supplied.
|
|
605
|
-
|
|
609
|
+
|
|
606
610
|
Args:
|
|
607
611
|
input_filename (str): filename to load and subset; can be None if [data] is supplied
|
|
608
612
|
output_filename (str): file or folder name (depending on [options]) to which we should
|
|
@@ -611,27 +615,27 @@ def subset_json_detector_output(input_filename, output_filename, options, data=N
|
|
|
611
615
|
see SubsetJsonDetectorOutputOptions for details.
|
|
612
616
|
data (dict, optional): data loaded from a .json file; if this is not None, [input_filename]
|
|
613
617
|
will be ignored. If supplied, this will be copied before it's modified.
|
|
614
|
-
|
|
618
|
+
|
|
615
619
|
Returns:
|
|
616
620
|
dict: Results that are either loaded from [input_filename] and processed, or copied
|
|
617
|
-
|
|
618
|
-
|
|
621
|
+
from [data] and processed.
|
|
619
622
|
"""
|
|
620
|
-
|
|
621
|
-
if options is None:
|
|
623
|
+
|
|
624
|
+
if options is None:
|
|
622
625
|
options = SubsetJsonDetectorOutputOptions()
|
|
623
626
|
else:
|
|
624
627
|
options = copy.deepcopy(options)
|
|
625
|
-
|
|
626
|
-
# Input validation
|
|
628
|
+
|
|
629
|
+
# Input validation
|
|
627
630
|
if options.copy_jsons_to_folders:
|
|
628
631
|
assert options.split_folders and options.make_folder_relative, \
|
|
629
632
|
'copy_jsons_to_folders set without make_folder_relative and split_folders'
|
|
630
|
-
|
|
633
|
+
|
|
631
634
|
if options.split_folders:
|
|
632
635
|
if os.path.isfile(output_filename):
|
|
633
|
-
raise ValueError('When splitting by folders, output must be a valid directory name,
|
|
634
|
-
|
|
636
|
+
raise ValueError('When splitting by folders, output must be a valid directory name, ' + \
|
|
637
|
+
'you specified an existing file')
|
|
638
|
+
|
|
635
639
|
if data is None:
|
|
636
640
|
print('Reading file {}'.format(input_filename))
|
|
637
641
|
with open(input_filename) as f:
|
|
@@ -644,232 +648,231 @@ def subset_json_detector_output(input_filename, output_filename, options, data=N
|
|
|
644
648
|
print('Copying data')
|
|
645
649
|
data = copy.deepcopy(data)
|
|
646
650
|
print('...done')
|
|
647
|
-
|
|
651
|
+
|
|
648
652
|
if options.query is not None:
|
|
649
|
-
|
|
653
|
+
|
|
650
654
|
data = subset_json_detector_output_by_query(data, options)
|
|
651
|
-
|
|
655
|
+
|
|
652
656
|
if options.remove_failed_images:
|
|
653
|
-
|
|
657
|
+
|
|
654
658
|
data = remove_failed_images(data, options)
|
|
655
|
-
|
|
659
|
+
|
|
656
660
|
if options.confidence_threshold is not None:
|
|
657
|
-
|
|
661
|
+
|
|
658
662
|
data = subset_json_detector_output_by_confidence(data, options)
|
|
659
|
-
|
|
663
|
+
|
|
660
664
|
if (options.categories_to_keep is not None) or (options.category_names_to_keep is not None):
|
|
661
|
-
|
|
665
|
+
|
|
662
666
|
data = subset_json_detector_output_by_categories(data, options)
|
|
663
|
-
|
|
667
|
+
|
|
664
668
|
if options.remove_classification_categories_below_count is not None:
|
|
665
|
-
|
|
669
|
+
|
|
666
670
|
data = remove_classification_categories_below_count(data, options)
|
|
667
|
-
|
|
671
|
+
|
|
668
672
|
if options.keep_files_in_list is not None:
|
|
669
|
-
|
|
673
|
+
|
|
670
674
|
data = subset_json_detector_output_by_list(data, options)
|
|
671
|
-
|
|
675
|
+
|
|
672
676
|
if not options.split_folders:
|
|
673
|
-
|
|
677
|
+
|
|
674
678
|
_write_detection_results(data, output_filename, options)
|
|
675
679
|
return data
|
|
676
|
-
|
|
680
|
+
|
|
677
681
|
else:
|
|
678
|
-
|
|
682
|
+
|
|
679
683
|
# Map images to unique folders
|
|
680
684
|
print('Finding unique folders')
|
|
681
|
-
|
|
685
|
+
|
|
682
686
|
folders_to_images = {}
|
|
683
|
-
|
|
687
|
+
|
|
684
688
|
# im = data['images'][0]
|
|
685
689
|
for im in tqdm(data['images']):
|
|
686
|
-
|
|
690
|
+
|
|
687
691
|
fn = im['file']
|
|
688
|
-
|
|
692
|
+
|
|
689
693
|
if options.split_folder_mode == 'bottom':
|
|
690
|
-
|
|
694
|
+
|
|
691
695
|
dirname = os.path.dirname(fn)
|
|
692
|
-
|
|
696
|
+
|
|
693
697
|
elif options.split_folder_mode == 'n_from_bottom':
|
|
694
|
-
|
|
698
|
+
|
|
695
699
|
dirname = os.path.dirname(fn)
|
|
696
700
|
for n in range(0, options.split_folder_param):
|
|
697
701
|
dirname = os.path.dirname(dirname)
|
|
698
|
-
|
|
702
|
+
|
|
699
703
|
elif options.split_folder_mode == 'n_from_top':
|
|
700
|
-
|
|
704
|
+
|
|
701
705
|
# Split string into folders, keeping delimiters
|
|
702
|
-
|
|
706
|
+
|
|
703
707
|
# Don't use this, it removes delimiters
|
|
704
708
|
# tokens = _split_path(fn)
|
|
705
709
|
tokens = re.split(r'([\\/])',fn)
|
|
706
|
-
|
|
707
|
-
n_tokens_to_keep = ((options.split_folder_param + 1) * 2) - 1
|
|
708
|
-
|
|
710
|
+
|
|
711
|
+
n_tokens_to_keep = ((options.split_folder_param + 1) * 2) - 1
|
|
712
|
+
|
|
709
713
|
if n_tokens_to_keep > len(tokens):
|
|
710
714
|
raise ValueError('Cannot walk {} folders from the top in path {}'.format(
|
|
711
715
|
options.split_folder_param, fn))
|
|
712
716
|
dirname = ''.join(tokens[0:n_tokens_to_keep])
|
|
713
|
-
|
|
714
|
-
elif options.split_folder_mode == 'top':
|
|
715
|
-
|
|
716
|
-
dirname = top_level_folder(fn)
|
|
717
|
-
|
|
717
|
+
|
|
718
718
|
elif options.split_folder_mode == 'dict':
|
|
719
|
-
|
|
719
|
+
|
|
720
720
|
assert isinstance(options.split_folder_param, dict)
|
|
721
721
|
dirname = options.split_folder_param[fn]
|
|
722
|
-
|
|
722
|
+
|
|
723
723
|
else:
|
|
724
|
-
|
|
724
|
+
|
|
725
725
|
raise ValueError('Unrecognized folder split mode {}'.format(options.split_folder_mode))
|
|
726
|
-
|
|
726
|
+
|
|
727
727
|
folders_to_images.setdefault(dirname, []).append(im)
|
|
728
|
-
|
|
728
|
+
|
|
729
729
|
# ...for each image
|
|
730
|
-
|
|
730
|
+
|
|
731
731
|
print('Found {} unique folders'.format(len(folders_to_images)))
|
|
732
|
-
|
|
732
|
+
|
|
733
733
|
# Optionally make paths relative
|
|
734
734
|
# dirname = list(folders_to_images.keys())[0]
|
|
735
735
|
if options.make_folder_relative:
|
|
736
|
-
|
|
736
|
+
|
|
737
737
|
print('Converting database-relative paths to individual-json-relative paths...')
|
|
738
|
-
|
|
738
|
+
|
|
739
739
|
for dirname in tqdm(folders_to_images):
|
|
740
740
|
# im = folders_to_images[dirname][0]
|
|
741
741
|
for im in folders_to_images[dirname]:
|
|
742
742
|
fn = im['file']
|
|
743
743
|
relfn = os.path.relpath(fn, dirname).replace('\\', '/')
|
|
744
744
|
im['file'] = relfn
|
|
745
|
-
|
|
745
|
+
|
|
746
746
|
# ...if we need to convert paths to be folder-relative
|
|
747
|
-
|
|
747
|
+
|
|
748
748
|
print('Finished converting to json-relative paths, writing output')
|
|
749
|
-
|
|
749
|
+
|
|
750
750
|
os.makedirs(output_filename, exist_ok=True)
|
|
751
751
|
all_images = data['images']
|
|
752
|
-
|
|
752
|
+
|
|
753
753
|
# dirname = list(folders_to_images.keys())[0]
|
|
754
754
|
for dirname in tqdm(folders_to_images):
|
|
755
|
-
|
|
755
|
+
|
|
756
756
|
json_fn = dirname.replace('/', '_').replace('\\', '_') + '.json'
|
|
757
|
-
|
|
757
|
+
|
|
758
758
|
if options.copy_jsons_to_folders:
|
|
759
759
|
json_fn = os.path.join(output_filename, dirname, json_fn)
|
|
760
760
|
else:
|
|
761
761
|
json_fn = os.path.join(output_filename, json_fn)
|
|
762
|
-
|
|
763
|
-
# Recycle the 'data' struct, replacing 'images' every time... medium-hacky, but
|
|
762
|
+
|
|
763
|
+
# Recycle the 'data' struct, replacing 'images' every time... medium-hacky, but
|
|
764
764
|
# forward-compatible in that I don't take dependencies on the other fields
|
|
765
765
|
dir_data = data
|
|
766
766
|
dir_data['images'] = folders_to_images[dirname]
|
|
767
767
|
_write_detection_results(dir_data, json_fn, options)
|
|
768
768
|
print('Wrote {} images to {}'.format(len(dir_data['images']), json_fn))
|
|
769
|
-
|
|
769
|
+
|
|
770
770
|
# ...for each directory
|
|
771
|
-
|
|
771
|
+
|
|
772
772
|
data['images'] = all_images
|
|
773
|
-
|
|
773
|
+
|
|
774
774
|
return data
|
|
775
|
-
|
|
775
|
+
|
|
776
776
|
# ...if we're splitting folders
|
|
777
777
|
|
|
778
778
|
# ...def subset_json_detector_output(...)
|
|
779
779
|
|
|
780
|
-
|
|
780
|
+
|
|
781
781
|
#%% Interactive driver
|
|
782
|
-
|
|
782
|
+
|
|
783
783
|
if False:
|
|
784
784
|
|
|
785
785
|
#%%
|
|
786
|
-
|
|
786
|
+
|
|
787
787
|
#%% Subset a file without splitting
|
|
788
|
-
|
|
788
|
+
|
|
789
789
|
input_filename = r"c:\temp\sample.json"
|
|
790
790
|
output_filename = r"c:\temp\output.json"
|
|
791
|
-
|
|
791
|
+
|
|
792
792
|
options = SubsetJsonDetectorOutputOptions()
|
|
793
793
|
options.replacement = None
|
|
794
794
|
options.query = 'S2'
|
|
795
|
-
|
|
795
|
+
|
|
796
796
|
data = subset_json_detector_output(input_filename,output_filename,options,None)
|
|
797
|
-
|
|
797
|
+
|
|
798
798
|
|
|
799
799
|
#%% Subset and split, but don't copy to individual folders
|
|
800
800
|
|
|
801
|
-
input_filename = r"C:\temp\xxx-
|
|
801
|
+
input_filename = r"C:\temp\xxx-export.json"
|
|
802
802
|
output_filename = r"c:\temp\out"
|
|
803
|
-
|
|
803
|
+
|
|
804
804
|
options = SubsetJsonDetectorOutputOptions()
|
|
805
|
-
options.split_folders = True
|
|
805
|
+
options.split_folders = True
|
|
806
806
|
options.make_folder_relative = True
|
|
807
807
|
options.split_folder_mode = 'n_from_top'
|
|
808
808
|
options.split_folder_param = 1
|
|
809
|
-
|
|
809
|
+
|
|
810
810
|
data = subset_json_detector_output(input_filename,output_filename,options,None)
|
|
811
|
-
|
|
812
|
-
|
|
811
|
+
|
|
812
|
+
|
|
813
813
|
#%% Subset and split, copying to individual folders
|
|
814
|
-
|
|
814
|
+
|
|
815
815
|
input_filename = r"c:\temp\sample.json"
|
|
816
816
|
output_filename = r"c:\temp\out"
|
|
817
|
-
|
|
817
|
+
|
|
818
818
|
options = SubsetJsonDetectorOutputOptions()
|
|
819
|
-
options.split_folders = True
|
|
819
|
+
options.split_folders = True
|
|
820
820
|
options.make_folder_relative = True
|
|
821
821
|
options.copy_jsons_to_folders = True
|
|
822
|
-
|
|
822
|
+
|
|
823
823
|
data = subset_json_detector_output(input_filename,output_filename,options,data)
|
|
824
|
-
|
|
824
|
+
|
|
825
825
|
|
|
826
826
|
#%% Command-line driver
|
|
827
827
|
|
|
828
|
-
def main():
|
|
829
|
-
|
|
828
|
+
def main(): # noqa
|
|
829
|
+
|
|
830
830
|
parser = argparse.ArgumentParser()
|
|
831
831
|
parser.add_argument('input_file', type=str, help='Input .json filename')
|
|
832
832
|
parser.add_argument('output_file', type=str, help='Output .json filename')
|
|
833
|
-
parser.add_argument('--query', type=str, default=None,
|
|
833
|
+
parser.add_argument('--query', type=str, default=None,
|
|
834
834
|
help='Query string to search for (omitting this matches all)')
|
|
835
|
-
parser.add_argument('--replacement', type=str, default=None,
|
|
835
|
+
parser.add_argument('--replacement', type=str, default=None,
|
|
836
836
|
help='Replace [query] with this')
|
|
837
|
-
parser.add_argument('--confidence_threshold', type=float, default=None,
|
|
837
|
+
parser.add_argument('--confidence_threshold', type=float, default=None,
|
|
838
838
|
help='Remove detections below this confidence level')
|
|
839
|
-
parser.add_argument('--keep_files_in_list', type=str, default=None,
|
|
839
|
+
parser.add_argument('--keep_files_in_list', type=str, default=None,
|
|
840
840
|
help='Keep only files in this list, which can be a .json results file or a folder.' + \
|
|
841
841
|
' Assumes that the input .json file contains relative paths when comparing to a folder.')
|
|
842
|
-
parser.add_argument('--split_folders', action='store_true',
|
|
842
|
+
parser.add_argument('--split_folders', action='store_true',
|
|
843
843
|
help='Split .json files by leaf-node folder')
|
|
844
844
|
parser.add_argument('--split_folder_param', type=int,
|
|
845
845
|
help='Directory level count for n_from_bottom and n_from_top splitting')
|
|
846
846
|
parser.add_argument('--split_folder_mode', type=str,
|
|
847
|
-
help='Folder level to use for splitting ("
|
|
848
|
-
parser.add_argument('--make_folder_relative', action='store_true',
|
|
849
|
-
help='Make image paths relative to their containing folder
|
|
850
|
-
|
|
847
|
+
help='Folder level to use for splitting ("bottom", "n_from_bottom", or "n_from_top")')
|
|
848
|
+
parser.add_argument('--make_folder_relative', action='store_true',
|
|
849
|
+
help='Make image paths relative to their containing folder ' + \
|
|
850
|
+
'(only meaningful with split_folders)')
|
|
851
|
+
parser.add_argument('--overwrite_json_files', action='store_true',
|
|
851
852
|
help='Overwrite output files')
|
|
852
|
-
parser.add_argument('--copy_jsons_to_folders', action='store_true',
|
|
853
|
-
help='When using split_folders and make_folder_relative, copy jsons to their
|
|
853
|
+
parser.add_argument('--copy_jsons_to_folders', action='store_true',
|
|
854
|
+
help='When using split_folders and make_folder_relative, copy jsons to their ' + \
|
|
855
|
+
'corresponding folders (relative to output_file)')
|
|
854
856
|
parser.add_argument('--create_folders', action='store_true',
|
|
855
|
-
help='When using copy_jsons_to_folders, create folders that don''t exist')
|
|
857
|
+
help='When using copy_jsons_to_folders, create folders that don''t exist')
|
|
856
858
|
parser.add_argument('--remove_classification_categories_below_count', type=int, default=None,
|
|
857
|
-
help='Remove classification categories with less than this many instances
|
|
858
|
-
|
|
859
|
+
help='Remove classification categories with less than this many instances ' + \
|
|
860
|
+
'(no removal by default)')
|
|
861
|
+
|
|
859
862
|
if len(sys.argv[1:]) == 0:
|
|
860
863
|
parser.print_help()
|
|
861
864
|
parser.exit()
|
|
862
|
-
|
|
863
|
-
args = parser.parse_args()
|
|
864
|
-
|
|
865
|
+
|
|
866
|
+
args = parser.parse_args()
|
|
867
|
+
|
|
865
868
|
# Convert to an options object
|
|
866
869
|
options = SubsetJsonDetectorOutputOptions()
|
|
867
870
|
if args.create_folders:
|
|
868
871
|
options.copy_jsons_to_folders_directories_must_exist = False
|
|
869
|
-
|
|
872
|
+
|
|
870
873
|
args_to_object(args, options)
|
|
871
|
-
|
|
874
|
+
|
|
872
875
|
subset_json_detector_output(args.input_file, args.output_file, options)
|
|
873
|
-
|
|
874
|
-
if __name__ == '__main__':
|
|
876
|
+
|
|
877
|
+
if __name__ == '__main__':
|
|
875
878
|
main()
|