megadetector 5.0.8__py3-none-any.whl → 5.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/__init__.py +0 -0
- api/batch_processing/__init__.py +0 -0
- api/batch_processing/api_core/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/score.py +0 -1
- api/batch_processing/api_core/server_job_status_table.py +0 -1
- api/batch_processing/api_core_support/__init__.py +0 -0
- api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
- api/batch_processing/api_support/__init__.py +0 -0
- api/batch_processing/api_support/summarize_daily_activity.py +0 -1
- api/batch_processing/data_preparation/__init__.py +0 -0
- api/batch_processing/data_preparation/manage_local_batch.py +65 -65
- api/batch_processing/data_preparation/manage_video_batch.py +8 -8
- api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
- api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
- api/batch_processing/postprocessing/__init__.py +0 -0
- api/batch_processing/postprocessing/add_max_conf.py +12 -12
- api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
- api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
- api/batch_processing/postprocessing/compare_batch_results.py +113 -43
- api/batch_processing/postprocessing/convert_output_format.py +41 -16
- api/batch_processing/postprocessing/load_api_results.py +16 -17
- api/batch_processing/postprocessing/md_to_coco.py +31 -21
- api/batch_processing/postprocessing/md_to_labelme.py +52 -22
- api/batch_processing/postprocessing/merge_detections.py +14 -14
- api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
- api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
- api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
- api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
- api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
- api/synchronous/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
- api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
- api/synchronous/api_core/animal_detection_api/config.py +35 -35
- api/synchronous/api_core/tests/__init__.py +0 -0
- api/synchronous/api_core/tests/load_test.py +109 -109
- classification/__init__.py +0 -0
- classification/aggregate_classifier_probs.py +21 -24
- classification/analyze_failed_images.py +11 -13
- classification/cache_batchapi_outputs.py +51 -51
- classification/create_classification_dataset.py +69 -68
- classification/crop_detections.py +54 -53
- classification/csv_to_json.py +97 -100
- classification/detect_and_crop.py +105 -105
- classification/evaluate_model.py +43 -42
- classification/identify_mislabeled_candidates.py +47 -46
- classification/json_to_azcopy_list.py +10 -10
- classification/json_validator.py +72 -71
- classification/map_classification_categories.py +44 -43
- classification/merge_classification_detection_output.py +68 -68
- classification/prepare_classification_script.py +157 -154
- classification/prepare_classification_script_mc.py +228 -228
- classification/run_classifier.py +27 -26
- classification/save_mislabeled.py +30 -30
- classification/train_classifier.py +20 -20
- classification/train_classifier_tf.py +21 -22
- classification/train_utils.py +10 -10
- data_management/__init__.py +0 -0
- data_management/annotations/__init__.py +0 -0
- data_management/annotations/annotation_constants.py +18 -31
- data_management/camtrap_dp_to_coco.py +238 -0
- data_management/cct_json_utils.py +102 -59
- data_management/cct_to_md.py +176 -158
- data_management/cct_to_wi.py +247 -219
- data_management/coco_to_labelme.py +272 -263
- data_management/coco_to_yolo.py +79 -58
- data_management/databases/__init__.py +0 -0
- data_management/databases/add_width_and_height_to_db.py +20 -16
- data_management/databases/combine_coco_camera_traps_files.py +35 -31
- data_management/databases/integrity_check_json_db.py +62 -24
- data_management/databases/subset_json_db.py +24 -15
- data_management/generate_crops_from_cct.py +27 -45
- data_management/get_image_sizes.py +188 -162
- data_management/importers/add_nacti_sizes.py +8 -8
- data_management/importers/add_timestamps_to_icct.py +78 -78
- data_management/importers/animl_results_to_md_results.py +158 -158
- data_management/importers/auckland_doc_test_to_json.py +9 -9
- data_management/importers/auckland_doc_to_json.py +8 -8
- data_management/importers/awc_to_json.py +7 -7
- data_management/importers/bellevue_to_json.py +15 -15
- data_management/importers/cacophony-thermal-importer.py +13 -13
- data_management/importers/carrizo_shrubfree_2018.py +8 -8
- data_management/importers/carrizo_trail_cam_2017.py +8 -8
- data_management/importers/cct_field_adjustments.py +9 -9
- data_management/importers/channel_islands_to_cct.py +10 -10
- data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
- data_management/importers/ena24_to_json.py +7 -7
- data_management/importers/filenames_to_json.py +8 -8
- data_management/importers/helena_to_cct.py +7 -7
- data_management/importers/idaho-camera-traps.py +7 -7
- data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
- data_management/importers/jb_csv_to_json.py +9 -9
- data_management/importers/mcgill_to_json.py +8 -8
- data_management/importers/missouri_to_json.py +18 -18
- data_management/importers/nacti_fieldname_adjustments.py +10 -10
- data_management/importers/noaa_seals_2019.py +7 -7
- data_management/importers/pc_to_json.py +7 -7
- data_management/importers/plot_wni_giraffes.py +7 -7
- data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
- data_management/importers/prepare_zsl_imerit.py +7 -7
- data_management/importers/rspb_to_json.py +8 -8
- data_management/importers/save_the_elephants_survey_A.py +8 -8
- data_management/importers/save_the_elephants_survey_B.py +9 -9
- data_management/importers/snapshot_safari_importer.py +26 -26
- data_management/importers/snapshot_safari_importer_reprise.py +665 -665
- data_management/importers/snapshot_serengeti_lila.py +14 -14
- data_management/importers/sulross_get_exif.py +8 -9
- data_management/importers/timelapse_csv_set_to_json.py +11 -11
- data_management/importers/ubc_to_json.py +13 -13
- data_management/importers/umn_to_json.py +7 -7
- data_management/importers/wellington_to_json.py +8 -8
- data_management/importers/wi_to_json.py +9 -9
- data_management/importers/zamba_results_to_md_results.py +181 -181
- data_management/labelme_to_coco.py +65 -24
- data_management/labelme_to_yolo.py +8 -8
- data_management/lila/__init__.py +0 -0
- data_management/lila/add_locations_to_island_camera_traps.py +9 -9
- data_management/lila/add_locations_to_nacti.py +147 -147
- data_management/lila/create_lila_blank_set.py +13 -13
- data_management/lila/create_lila_test_set.py +8 -8
- data_management/lila/create_links_to_md_results_files.py +106 -106
- data_management/lila/download_lila_subset.py +44 -110
- data_management/lila/generate_lila_per_image_labels.py +55 -42
- data_management/lila/get_lila_annotation_counts.py +18 -15
- data_management/lila/get_lila_image_counts.py +11 -11
- data_management/lila/lila_common.py +96 -33
- data_management/lila/test_lila_metadata_urls.py +132 -116
- data_management/ocr_tools.py +173 -128
- data_management/read_exif.py +110 -97
- data_management/remap_coco_categories.py +83 -83
- data_management/remove_exif.py +58 -62
- data_management/resize_coco_dataset.py +30 -23
- data_management/wi_download_csv_to_coco.py +246 -239
- data_management/yolo_output_to_md_output.py +86 -73
- data_management/yolo_to_coco.py +300 -60
- detection/__init__.py +0 -0
- detection/detector_training/__init__.py +0 -0
- detection/process_video.py +85 -33
- detection/pytorch_detector.py +43 -25
- detection/run_detector.py +157 -72
- detection/run_detector_batch.py +179 -113
- detection/run_inference_with_yolov5_val.py +108 -48
- detection/run_tiled_inference.py +111 -40
- detection/tf_detector.py +51 -29
- detection/video_utils.py +606 -521
- docs/source/conf.py +43 -0
- md_utils/__init__.py +0 -0
- md_utils/azure_utils.py +9 -9
- md_utils/ct_utils.py +228 -68
- md_utils/directory_listing.py +59 -64
- md_utils/md_tests.py +968 -871
- md_utils/path_utils.py +460 -134
- md_utils/process_utils.py +157 -133
- md_utils/sas_blob_utils.py +20 -20
- md_utils/split_locations_into_train_val.py +45 -32
- md_utils/string_utils.py +33 -10
- md_utils/url_utils.py +176 -60
- md_utils/write_html_image_list.py +40 -33
- md_visualization/__init__.py +0 -0
- md_visualization/plot_utils.py +102 -109
- md_visualization/render_images_with_thumbnails.py +34 -34
- md_visualization/visualization_utils.py +597 -291
- md_visualization/visualize_db.py +76 -48
- md_visualization/visualize_detector_output.py +61 -42
- {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/METADATA +13 -7
- megadetector-5.0.10.dist-info/RECORD +224 -0
- {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/top_level.txt +1 -0
- taxonomy_mapping/__init__.py +0 -0
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
- taxonomy_mapping/map_new_lila_datasets.py +154 -154
- taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
- taxonomy_mapping/preview_lila_taxonomy.py +591 -591
- taxonomy_mapping/retrieve_sample_image.py +12 -12
- taxonomy_mapping/simple_image_download.py +11 -11
- taxonomy_mapping/species_lookup.py +10 -10
- taxonomy_mapping/taxonomy_csv_checker.py +18 -18
- taxonomy_mapping/taxonomy_graph.py +47 -47
- taxonomy_mapping/validate_lila_category_mappings.py +83 -76
- data_management/cct_json_to_filename_json.py +0 -89
- data_management/cct_to_csv.py +0 -140
- data_management/databases/remove_corrupted_images_from_db.py +0 -191
- detection/detector_training/copy_checkpoints.py +0 -43
- megadetector-5.0.8.dist-info/RECORD +0 -205
- {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/LICENSE +0 -0
- {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/WHEEL +0 -0
|
@@ -1,61 +1,55 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
#
|
|
54
|
-
###
|
|
55
|
-
#
|
|
56
|
-
# To subset a COCO Camera Traps .json database, see subset_json_db.py
|
|
57
|
-
#
|
|
58
|
-
########
|
|
1
|
+
r"""
|
|
2
|
+
|
|
3
|
+
subset_json_detector_output.py
|
|
4
|
+
|
|
5
|
+
Creates one or more subsets of a detector results file (.json), doing either
|
|
6
|
+
or both of the following (if both are requested, they happen in this order):
|
|
7
|
+
|
|
8
|
+
1) Retrieve all elements where filenames contain a specified query string,
|
|
9
|
+
optionally replacing that query with a replacement token. If the query is blank,
|
|
10
|
+
can also be used to prepend content to all filenames.
|
|
11
|
+
|
|
12
|
+
Does not support regex's, but supports a special case of ^string to indicate "must start with
|
|
13
|
+
to match".
|
|
14
|
+
|
|
15
|
+
2) Create separate .jsons for each unique path, optionally making the filenames
|
|
16
|
+
in those .json's relative paths. In this case, you specify an output directory,
|
|
17
|
+
rather than an output path. All images in the folder blah/foo/bar will end up
|
|
18
|
+
in a .json file called blah_foo_bar.json.
|
|
19
|
+
|
|
20
|
+
Can also apply a confidence threshold.
|
|
21
|
+
|
|
22
|
+
Can also subset by categories above a threshold (programmatic invocation only, this is
|
|
23
|
+
not supported at the command line yet).
|
|
24
|
+
|
|
25
|
+
To subset a COCO Camera Traps .json database, see subset_json_db.py
|
|
26
|
+
|
|
27
|
+
**Sample invocation (splitting into multiple json's)**
|
|
28
|
+
|
|
29
|
+
Read from "1800_idfg_statewide_wolf_detections_w_classifications.json", split up into
|
|
30
|
+
individual .jsons in 'd:/temp/idfg/output', making filenames relative to their individual
|
|
31
|
+
folders:
|
|
32
|
+
|
|
33
|
+
python subset_json_detector_output.py "d:/temp/idfg/1800_idfg_statewide_wolf_detections_w_classifications.json" "d:/temp/idfg/output" --split_folders --make_folder_relative
|
|
34
|
+
|
|
35
|
+
Now do the same thing, but instead of writing .json's to d:/temp/idfg/output, write them to *subfolders*
|
|
36
|
+
corresponding to the subfolders for each .json file.
|
|
37
|
+
|
|
38
|
+
python subset_json_detector_output.py "d:/temp/idfg/1800_detections_S2.json" "d:/temp/idfg/output_to_folders" --split_folders --make_folder_relative --copy_jsons_to_folders
|
|
39
|
+
|
|
40
|
+
**Sample invocation (creating a single subset matching a query)**
|
|
41
|
+
|
|
42
|
+
Read from "1800_detections.json", write to "1800_detections_2017.json"
|
|
43
|
+
|
|
44
|
+
Include only images matching "2017", and change "2017" to "blah"
|
|
45
|
+
|
|
46
|
+
python subset_json_detector_output.py "d:/temp/1800_detections.json" "d:/temp/1800_detections_2017_blah.json" --query 2017 --replacement blah
|
|
47
|
+
|
|
48
|
+
Include all images, prepend with "prefix/"
|
|
49
|
+
|
|
50
|
+
python subset_json_detector_output.py "d:/temp/1800_detections.json" "d:/temp/1800_detections_prefix.json" --replacement "prefix/"
|
|
51
|
+
|
|
52
|
+
"""
|
|
59
53
|
|
|
60
54
|
#%% Constants and imports
|
|
61
55
|
|
|
@@ -68,81 +62,85 @@ import re
|
|
|
68
62
|
|
|
69
63
|
from tqdm import tqdm
|
|
70
64
|
|
|
71
|
-
from md_utils.ct_utils import args_to_object
|
|
72
|
-
from md_utils.
|
|
73
|
-
from md_utils.ct_utils import invert_dictionary
|
|
65
|
+
from md_utils.ct_utils import args_to_object, get_max_conf, invert_dictionary
|
|
66
|
+
from md_utils.path_utils import top_level_folder
|
|
74
67
|
|
|
75
68
|
|
|
76
69
|
#%% Helper classes
|
|
77
70
|
|
|
78
71
|
class SubsetJsonDetectorOutputOptions:
|
|
72
|
+
"""
|
|
73
|
+
Options used to parameterize subset_json_detector_output()
|
|
74
|
+
"""
|
|
79
75
|
|
|
80
|
-
|
|
76
|
+
#: Only process files containing the token 'query'
|
|
81
77
|
query = None
|
|
82
78
|
|
|
83
|
-
|
|
84
|
-
|
|
79
|
+
#: Replace 'query' with 'replacement' if 'replacement' is not None. If 'query' is None,
|
|
80
|
+
#: prepend 'replacement'
|
|
85
81
|
replacement = None
|
|
86
82
|
|
|
87
|
-
|
|
83
|
+
#: Should we split output into individual .json files for each folder?
|
|
88
84
|
split_folders = False
|
|
89
85
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
86
|
+
#: Folder level to use for splitting ['bottom','top','n_from_bottom','n_from_top','dict']
|
|
87
|
+
#:
|
|
88
|
+
#: 'dict' requires 'split_folder_param' to be a dictionary mapping each filename
|
|
89
|
+
#: to a token.
|
|
94
90
|
split_folder_mode = 'bottom' # 'top'
|
|
95
91
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
92
|
+
#: When using the 'n_from_bottom' parameter to define folder splitting, this
|
|
93
|
+
#: defines the number of directories from the bottom. 'n_from_bottom' with
|
|
94
|
+
#: a parameter of zero is the same as 'bottom'.
|
|
95
|
+
#:
|
|
96
|
+
#: Same story with 'n_from_top'.
|
|
97
|
+
#:
|
|
98
|
+
#: When 'split_folder_mode' is 'dict', this should be a dictionary mapping each filename
|
|
99
|
+
#: to a token.
|
|
104
100
|
split_folder_param = 0
|
|
105
101
|
|
|
106
|
-
|
|
107
|
-
|
|
102
|
+
#: Only meaningful if split_folders is True: should we convert pathnames to be relative
|
|
103
|
+
#: the folder for each .json file?
|
|
108
104
|
make_folder_relative = False
|
|
109
105
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
106
|
+
#: Only meaningful if split_folders and make_folder_relative are True: if not None,
|
|
107
|
+
#: will copy .json files to their corresponding output directories, relative to
|
|
108
|
+
#: output_filename
|
|
113
109
|
copy_jsons_to_folders = False
|
|
114
110
|
|
|
115
|
-
|
|
111
|
+
#: Should we over-write .json files?
|
|
116
112
|
overwrite_json_files = False
|
|
117
113
|
|
|
118
|
-
|
|
114
|
+
#: If copy_jsons_to_folders is true, do we require that directories already exist?
|
|
119
115
|
copy_jsons_to_folders_directories_must_exist = True
|
|
120
116
|
|
|
121
|
-
|
|
117
|
+
#: Optional confidence threshold; if not None, detections below this confidence won't be
|
|
118
|
+
#: included in the output.
|
|
122
119
|
confidence_threshold = None
|
|
123
120
|
|
|
124
|
-
|
|
121
|
+
#: Should we remove failed images?
|
|
125
122
|
remove_failed_images = False
|
|
126
123
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
124
|
+
#: Either a list of category IDs (as string-ints) (not names), or a dictionary mapping category *IDs*
|
|
125
|
+
#: (as string-ints) (not names) to thresholds. Removes non-matching detections, does not
|
|
126
|
+
#: remove images. Not technically mutually exclusize with category_names_to_keep, but it's an esoteric
|
|
127
|
+
#: scenario indeed where you would want to specify both.
|
|
131
128
|
categories_to_keep = None
|
|
132
129
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
130
|
+
#: Either a list of category names (not IDs), or a dictionary mapping category *names* (not IDs) to thresholds.
|
|
131
|
+
#: Removes non-matching detections, does not remove images. Not technically mutually exclusize with
|
|
132
|
+
#: category_ids_to_keep, but it's an esoteric scenario indeed where you would want to specify both.
|
|
136
133
|
category_names_to_keep = None
|
|
137
134
|
|
|
135
|
+
#: Set to >0 during testing to limit the number of images that get processed.
|
|
138
136
|
debug_max_images = -1
|
|
139
137
|
|
|
140
138
|
|
|
141
139
|
#%% Main function
|
|
142
140
|
|
|
143
|
-
def
|
|
141
|
+
def _write_detection_results(data, output_filename, options):
|
|
144
142
|
"""
|
|
145
|
-
|
|
143
|
+
Writes the detector-output-formatted dict *data* to *output_filename*.
|
|
146
144
|
"""
|
|
147
145
|
|
|
148
146
|
if (not options.overwrite_json_files) and os.path.isfile(output_filename):
|
|
@@ -160,12 +158,19 @@ def write_detection_results(data, output_filename, options):
|
|
|
160
158
|
with open(output_filename, 'w') as f:
|
|
161
159
|
json.dump(data,f,indent=1)
|
|
162
160
|
|
|
163
|
-
# ...
|
|
161
|
+
# ..._write_detection_results()
|
|
164
162
|
|
|
165
163
|
|
|
166
164
|
def subset_json_detector_output_by_confidence(data, options):
|
|
167
165
|
"""
|
|
168
|
-
|
|
166
|
+
Removes all detections below options.confidence_threshold.
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
data (dict): data loaded from a MD results file
|
|
170
|
+
options (SubsetJsonDetectorOutputOptions): parameters for subsetting
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
dict: Possibly-modified version of data (also modifies in place)
|
|
169
174
|
"""
|
|
170
175
|
|
|
171
176
|
if options.confidence_threshold is None:
|
|
@@ -232,7 +237,14 @@ def subset_json_detector_output_by_confidence(data, options):
|
|
|
232
237
|
|
|
233
238
|
def subset_json_detector_output_by_categories(data, options):
|
|
234
239
|
"""
|
|
235
|
-
|
|
240
|
+
Removes all detections without detections above a threshold for specific categories.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
data (dict): data loaded from a MD results file
|
|
244
|
+
options (SubsetJsonDetectorOutputOptions): parameters for subsetting
|
|
245
|
+
|
|
246
|
+
Returns:
|
|
247
|
+
dict: Possibly-modified version of data (also modifies in place)
|
|
236
248
|
"""
|
|
237
249
|
|
|
238
250
|
# If categories_to_keep is supplied as a list, convert to a dict
|
|
@@ -334,6 +346,13 @@ def subset_json_detector_output_by_categories(data, options):
|
|
|
334
346
|
def remove_failed_images(data,options):
|
|
335
347
|
"""
|
|
336
348
|
Removed failed images from [data]
|
|
349
|
+
|
|
350
|
+
Args:
|
|
351
|
+
data (dict): data loaded from a MD results file
|
|
352
|
+
options (SubsetJsonDetectorOutputOptions): parameters for subsetting
|
|
353
|
+
|
|
354
|
+
Returns:
|
|
355
|
+
dict: Possibly-modified version of data (also modifies in place)
|
|
337
356
|
"""
|
|
338
357
|
|
|
339
358
|
images_in = data['images']
|
|
@@ -365,8 +384,15 @@ def remove_failed_images(data,options):
|
|
|
365
384
|
|
|
366
385
|
def subset_json_detector_output_by_query(data, options):
|
|
367
386
|
"""
|
|
368
|
-
|
|
369
|
-
options.query with options.replacement.
|
|
387
|
+
Subsets to images whose filename matches options.query; replace all instances of
|
|
388
|
+
options.query with options.replacement. No-op if options.query_string is None or ''.
|
|
389
|
+
|
|
390
|
+
Args:
|
|
391
|
+
data (dict): data loaded from a MD results file
|
|
392
|
+
options (SubsetJsonDetectorOutputOptions): parameters for subsetting
|
|
393
|
+
|
|
394
|
+
Returns:
|
|
395
|
+
dict: Possibly-modified version of data (also modifies in place)
|
|
370
396
|
"""
|
|
371
397
|
|
|
372
398
|
images_in = data['images']
|
|
@@ -415,74 +441,27 @@ def subset_json_detector_output_by_query(data, options):
|
|
|
415
441
|
|
|
416
442
|
# ...subset_json_detector_output_by_query()
|
|
417
443
|
|
|
418
|
-
|
|
419
|
-
def split_path(path, maxdepth=100):
|
|
420
|
-
"""
|
|
421
|
-
Splits [path] into all its constituent tokens, e.g.:
|
|
422
|
-
|
|
423
|
-
c:\blah\boo\goo.txt
|
|
424
|
-
|
|
425
|
-
...becomes:
|
|
426
|
-
|
|
427
|
-
['c:\\', 'blah', 'boo', 'goo.txt']
|
|
428
|
-
|
|
429
|
-
http://nicks-liquid-soapbox.blogspot.com/2011/03/splitting-path-to-list-in-python.html
|
|
430
|
-
"""
|
|
431
|
-
|
|
432
|
-
(head, tail) = os.path.split(path)
|
|
433
|
-
return split_path(head, maxdepth - 1) + [tail] \
|
|
434
|
-
if maxdepth and head and head != path \
|
|
435
|
-
else [head or tail]
|
|
436
|
-
|
|
437
|
-
# ...split_path()
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
def top_level_folder(p):
|
|
441
|
-
"""
|
|
442
|
-
Gets the top-level folder from the path *p*; on Windows, will use the top-level folder
|
|
443
|
-
that isn't the drive. E.g., top_level_folder(r"c:\blah\foo") returns "c:\blah". Does not
|
|
444
|
-
include the leaf node, i.e. top_level_folder('/blah/foo') returns '/blah'.
|
|
445
|
-
"""
|
|
446
|
-
|
|
447
|
-
if p == '':
|
|
448
|
-
return ''
|
|
449
|
-
|
|
450
|
-
# Path('/blah').parts is ('/','blah')
|
|
451
|
-
parts = split_path(p)
|
|
452
|
-
|
|
453
|
-
if len(parts) == 1:
|
|
454
|
-
return parts[0]
|
|
455
|
-
|
|
456
|
-
# Handle paths like:
|
|
457
|
-
#
|
|
458
|
-
# /, \, /stuff, c:, c:\stuff
|
|
459
|
-
drive = os.path.splitdrive(p)[0]
|
|
460
|
-
if parts[0] == drive or parts[0] == drive + '/' or parts[0] == drive + '\\' or parts[0] in ['\\', '/']:
|
|
461
|
-
return os.path.join(parts[0], parts[1])
|
|
462
|
-
else:
|
|
463
|
-
return parts[0]
|
|
464
|
-
|
|
465
|
-
# ...top_level_folder()
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
if False:
|
|
469
|
-
|
|
470
|
-
p = 'blah/foo/bar'; s = top_level_folder(p); print(s); assert s == 'blah'
|
|
471
|
-
p = '/blah/foo/bar'; s = top_level_folder(p); print(s); assert s == '/blah'
|
|
472
|
-
p = 'bar'; s = top_level_folder(p); print(s); assert s == 'bar'
|
|
473
|
-
p = ''; s = top_level_folder(p); print(s); assert s == ''
|
|
474
|
-
p = 'c:\\'; s = top_level_folder(p); print(s); assert s == 'c:\\'
|
|
475
|
-
p = r'c:\blah'; s = top_level_folder(p); print(s); assert s == 'c:\\blah'
|
|
476
|
-
p = r'c:\foo'; s = top_level_folder(p); print(s); assert s == 'c:\\foo'
|
|
477
|
-
p = r'c:/foo'; s = top_level_folder(p); print(s); assert s == 'c:/foo'
|
|
478
|
-
p = r'c:\foo/bar'; s = top_level_folder(p); print(s); assert s == 'c:\\foo'
|
|
479
|
-
|
|
480
444
|
|
|
481
445
|
def subset_json_detector_output(input_filename, output_filename, options, data=None):
|
|
482
446
|
"""
|
|
483
|
-
Main
|
|
447
|
+
Main entry point; creates one or more subsets of a detector results file. See the
|
|
448
|
+
module header comment for more information about the available subsetting approaches.
|
|
484
449
|
|
|
485
450
|
Makes a copy of [data] before modifying if a data dictionary is supplied.
|
|
451
|
+
|
|
452
|
+
Args:
|
|
453
|
+
input_filename (str): filename to load and subset; can be None if [data] is supplied
|
|
454
|
+
output_filename (str): file or folder name (depending on [options]) to which we should
|
|
455
|
+
write subset results.
|
|
456
|
+
options (SubsetJsonDetectorOutputOptions): parameters for .json splitting/subsetting;
|
|
457
|
+
see SubsetJsonDetectorOutputOptions for details.
|
|
458
|
+
data (dict, optional): data loaded from a .json file; if this is not None, [input_filename]
|
|
459
|
+
will be ignored. If supplied, this will be copied before it's modified.
|
|
460
|
+
|
|
461
|
+
Returns:
|
|
462
|
+
dict: Results that are either loaded from [input_filename] and processed, or copied
|
|
463
|
+
from [data] and processed.
|
|
464
|
+
|
|
486
465
|
"""
|
|
487
466
|
|
|
488
467
|
if options is None:
|
|
@@ -528,7 +507,7 @@ def subset_json_detector_output(input_filename, output_filename, options, data=N
|
|
|
528
507
|
|
|
529
508
|
if not options.split_folders:
|
|
530
509
|
|
|
531
|
-
|
|
510
|
+
_write_detection_results(data, output_filename, options)
|
|
532
511
|
return data
|
|
533
512
|
|
|
534
513
|
else:
|
|
@@ -558,7 +537,7 @@ def subset_json_detector_output(input_filename, output_filename, options, data=N
|
|
|
558
537
|
# Split string into folders, keeping delimiters
|
|
559
538
|
|
|
560
539
|
# Don't use this, it removes delimiters
|
|
561
|
-
# tokens =
|
|
540
|
+
# tokens = _split_path(fn)
|
|
562
541
|
tokens = re.split(r'([\\/])',fn)
|
|
563
542
|
|
|
564
543
|
n_tokens_to_keep = ((options.split_folder_param + 1) * 2) - 1;
|
|
@@ -621,7 +600,7 @@ def subset_json_detector_output(input_filename, output_filename, options, data=N
|
|
|
621
600
|
# forward-compatible in that I don't take dependencies on the other fields
|
|
622
601
|
dir_data = data
|
|
623
602
|
dir_data['images'] = folders_to_images[dirname]
|
|
624
|
-
|
|
603
|
+
_write_detection_results(dir_data, json_fn, options)
|
|
625
604
|
print('Wrote {} images to {}'.format(len(dir_data['images']), json_fn))
|
|
626
605
|
|
|
627
606
|
# ...for each directory
|
|
@@ -713,7 +692,5 @@ def main():
|
|
|
713
692
|
|
|
714
693
|
subset_json_detector_output(args.input_file, args.output_file, options)
|
|
715
694
|
|
|
716
|
-
|
|
717
|
-
if __name__ == '__main__':
|
|
718
|
-
|
|
695
|
+
if __name__ == '__main__':
|
|
719
696
|
main()
|
|
@@ -1,26 +1,26 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
top_folders_to_bottom.py
|
|
4
|
+
|
|
5
|
+
Given a base folder with files like:
|
|
6
|
+
|
|
7
|
+
* A/1/2/a.jpg
|
|
8
|
+
* B/3/4/b.jpg
|
|
9
|
+
|
|
10
|
+
...moves the top-level folders to the bottom in a new output folder, i.e., creates:
|
|
11
|
+
|
|
12
|
+
* 1/2/A/a.jpg
|
|
13
|
+
* 3/4/B/b.jpg
|
|
14
|
+
|
|
15
|
+
In practice, this is used to make this:
|
|
16
|
+
|
|
17
|
+
animal/camera01/image01.jpg
|
|
18
|
+
|
|
19
|
+
...look like:
|
|
20
|
+
|
|
21
|
+
camera01/animal/image01.jpg
|
|
22
|
+
|
|
23
|
+
"""
|
|
24
24
|
|
|
25
25
|
#%% Constants and imports
|
|
26
26
|
|
|
@@ -35,31 +35,46 @@ from tqdm import tqdm
|
|
|
35
35
|
from functools import partial
|
|
36
36
|
from multiprocessing.pool import ThreadPool
|
|
37
37
|
|
|
38
|
+
from md_utils.path_utils import path_is_abs
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
#%% Classes
|
|
42
|
+
|
|
38
43
|
class TopFoldersToBottomOptions:
|
|
44
|
+
"""
|
|
45
|
+
Options used to parameterize top_folders_to_bottom()
|
|
46
|
+
"""
|
|
39
47
|
|
|
40
48
|
def __init__(self,input_folder,output_folder,copy=True,n_threads=1):
|
|
49
|
+
|
|
50
|
+
#: Whether to copy (True) vs. move (False) false when re-organizing
|
|
41
51
|
self.copy = copy
|
|
52
|
+
|
|
53
|
+
#: Number of worker threads to use, or <1 to disable parallelization
|
|
42
54
|
self.n_threads = n_threads
|
|
55
|
+
|
|
56
|
+
#: Input folder
|
|
43
57
|
self.input_folder = input_folder
|
|
58
|
+
|
|
59
|
+
#: Output folder
|
|
44
60
|
self.output_folder = output_folder
|
|
45
|
-
self.overwrite = False
|
|
46
61
|
|
|
62
|
+
#: If this is False and an output file exists, throw an error
|
|
63
|
+
self.overwrite = False
|
|
47
64
|
|
|
48
|
-
#%% Support functions
|
|
49
|
-
|
|
50
|
-
def path_is_abs(p): return (len(p) > 1) and (p[0] == '/' or p[1] == ':')
|
|
51
|
-
|
|
52
65
|
|
|
53
66
|
#%% Main functions
|
|
54
67
|
|
|
55
|
-
def
|
|
68
|
+
def _process_file(relative_filename,options,execute=True):
|
|
56
69
|
|
|
57
|
-
assert ('/' in relative_filename) and
|
|
70
|
+
assert ('/' in relative_filename) and \
|
|
71
|
+
('\\' not in relative_filename) and \
|
|
72
|
+
(not path_is_abs(relative_filename))
|
|
58
73
|
|
|
59
74
|
# Find top-level folder
|
|
60
75
|
tokens = relative_filename.split('/')
|
|
61
|
-
|
|
62
|
-
tokens.insert(len(tokens)-1,
|
|
76
|
+
topmost_folder = tokens.pop(0)
|
|
77
|
+
tokens.insert(len(tokens)-1,topmost_folder)
|
|
63
78
|
|
|
64
79
|
# Find file/folder names
|
|
65
80
|
output_relative_path = '/'.join(tokens)
|
|
@@ -86,11 +101,35 @@ def process_file(relative_filename,options,execute=True):
|
|
|
86
101
|
|
|
87
102
|
return output_absolute_path
|
|
88
103
|
|
|
89
|
-
# ...def
|
|
104
|
+
# ...def _process_file()
|
|
90
105
|
|
|
91
106
|
|
|
92
107
|
def top_folders_to_bottom(options):
|
|
93
|
-
|
|
108
|
+
"""
|
|
109
|
+
top_folders_to_bottom.py
|
|
110
|
+
|
|
111
|
+
Given a base folder with files like:
|
|
112
|
+
|
|
113
|
+
* A/1/2/a.jpg
|
|
114
|
+
* B/3/4/b.jpg
|
|
115
|
+
|
|
116
|
+
...moves the top-level folders to the bottom in a new output folder, i.e., creates:
|
|
117
|
+
|
|
118
|
+
* 1/2/A/a.jpg
|
|
119
|
+
* 3/4/B/b.jpg
|
|
120
|
+
|
|
121
|
+
In practice, this is used to make this:
|
|
122
|
+
|
|
123
|
+
animal/camera01/image01.jpg
|
|
124
|
+
|
|
125
|
+
...look like:
|
|
126
|
+
|
|
127
|
+
camera01/animal/image01.jpg
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
options (TopFoldersToBottomOptions): See TopFoldersToBottomOptions for parameter details.
|
|
131
|
+
|
|
132
|
+
"""
|
|
94
133
|
os.makedirs(options.output_folder,exist_ok=True)
|
|
95
134
|
|
|
96
135
|
# Enumerate input folder
|
|
@@ -112,7 +151,7 @@ def top_folders_to_bottom(options):
|
|
|
112
151
|
relative_files = [s for s in relative_files if '/' in s]
|
|
113
152
|
|
|
114
153
|
# Make sure each input file maps to a unique output file
|
|
115
|
-
absolute_output_files = [
|
|
154
|
+
absolute_output_files = [_process_file(s, options, execute=False) for s in relative_files]
|
|
116
155
|
assert len(absolute_output_files) == len(set(absolute_output_files)),\
|
|
117
156
|
"Error: input filenames don't map to unique output filenames"
|
|
118
157
|
|
|
@@ -122,13 +161,13 @@ def top_folders_to_bottom(options):
|
|
|
122
161
|
if options.n_threads <= 1:
|
|
123
162
|
|
|
124
163
|
for relative_filename in tqdm(relative_files):
|
|
125
|
-
|
|
164
|
+
_process_file(relative_filename,options)
|
|
126
165
|
|
|
127
166
|
else:
|
|
128
167
|
|
|
129
168
|
print('Starting a pool with {} threads'.format(options.n_threads))
|
|
130
169
|
pool = ThreadPool(options.n_threads)
|
|
131
|
-
process_file_with_options = partial(
|
|
170
|
+
process_file_with_options = partial(_process_file, options=options)
|
|
132
171
|
_ = list(tqdm(pool.imap(process_file_with_options, relative_files), total=len(relative_files)))
|
|
133
172
|
|
|
134
173
|
# ...def top_folders_to_bottom()
|
|
@@ -180,10 +219,5 @@ def main():
|
|
|
180
219
|
|
|
181
220
|
top_folders_to_bottom(options)
|
|
182
221
|
|
|
183
|
-
|
|
184
|
-
if __name__ == '__main__':
|
|
185
|
-
|
|
222
|
+
if __name__ == '__main__':
|
|
186
223
|
main()
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
File without changes
|
|
File without changes
|