megadetector 5.0.27__py3-none-any.whl → 5.0.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
- megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
- megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
- megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
- megadetector/classification/aggregate_classifier_probs.py +3 -3
- megadetector/classification/analyze_failed_images.py +5 -5
- megadetector/classification/cache_batchapi_outputs.py +5 -5
- megadetector/classification/create_classification_dataset.py +11 -12
- megadetector/classification/crop_detections.py +10 -10
- megadetector/classification/csv_to_json.py +8 -8
- megadetector/classification/detect_and_crop.py +13 -15
- megadetector/classification/evaluate_model.py +7 -7
- megadetector/classification/identify_mislabeled_candidates.py +6 -6
- megadetector/classification/json_to_azcopy_list.py +1 -1
- megadetector/classification/json_validator.py +29 -32
- megadetector/classification/map_classification_categories.py +9 -9
- megadetector/classification/merge_classification_detection_output.py +12 -9
- megadetector/classification/prepare_classification_script.py +19 -19
- megadetector/classification/prepare_classification_script_mc.py +23 -23
- megadetector/classification/run_classifier.py +4 -4
- megadetector/classification/save_mislabeled.py +6 -6
- megadetector/classification/train_classifier.py +1 -1
- megadetector/classification/train_classifier_tf.py +9 -9
- megadetector/classification/train_utils.py +10 -10
- megadetector/data_management/annotations/annotation_constants.py +1 -1
- megadetector/data_management/camtrap_dp_to_coco.py +45 -45
- megadetector/data_management/cct_json_utils.py +101 -101
- megadetector/data_management/cct_to_md.py +49 -49
- megadetector/data_management/cct_to_wi.py +33 -33
- megadetector/data_management/coco_to_labelme.py +75 -75
- megadetector/data_management/coco_to_yolo.py +189 -189
- megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
- megadetector/data_management/databases/integrity_check_json_db.py +202 -188
- megadetector/data_management/databases/subset_json_db.py +33 -33
- megadetector/data_management/generate_crops_from_cct.py +38 -38
- megadetector/data_management/get_image_sizes.py +54 -49
- megadetector/data_management/labelme_to_coco.py +130 -124
- megadetector/data_management/labelme_to_yolo.py +78 -72
- megadetector/data_management/lila/create_lila_blank_set.py +81 -83
- megadetector/data_management/lila/create_lila_test_set.py +32 -31
- megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
- megadetector/data_management/lila/download_lila_subset.py +21 -24
- megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
- megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
- megadetector/data_management/lila/get_lila_image_counts.py +22 -22
- megadetector/data_management/lila/lila_common.py +70 -70
- megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
- megadetector/data_management/mewc_to_md.py +339 -340
- megadetector/data_management/ocr_tools.py +258 -252
- megadetector/data_management/read_exif.py +232 -223
- megadetector/data_management/remap_coco_categories.py +26 -26
- megadetector/data_management/remove_exif.py +31 -20
- megadetector/data_management/rename_images.py +187 -187
- megadetector/data_management/resize_coco_dataset.py +41 -41
- megadetector/data_management/speciesnet_to_md.py +41 -41
- megadetector/data_management/wi_download_csv_to_coco.py +55 -55
- megadetector/data_management/yolo_output_to_md_output.py +117 -120
- megadetector/data_management/yolo_to_coco.py +195 -188
- megadetector/detection/change_detection.py +831 -0
- megadetector/detection/process_video.py +341 -338
- megadetector/detection/pytorch_detector.py +308 -266
- megadetector/detection/run_detector.py +186 -166
- megadetector/detection/run_detector_batch.py +366 -364
- megadetector/detection/run_inference_with_yolov5_val.py +328 -325
- megadetector/detection/run_tiled_inference.py +312 -253
- megadetector/detection/tf_detector.py +24 -24
- megadetector/detection/video_utils.py +291 -283
- megadetector/postprocessing/add_max_conf.py +15 -11
- megadetector/postprocessing/categorize_detections_by_size.py +44 -44
- megadetector/postprocessing/classification_postprocessing.py +808 -311
- megadetector/postprocessing/combine_batch_outputs.py +20 -21
- megadetector/postprocessing/compare_batch_results.py +528 -517
- megadetector/postprocessing/convert_output_format.py +97 -97
- megadetector/postprocessing/create_crop_folder.py +220 -147
- megadetector/postprocessing/detector_calibration.py +173 -168
- megadetector/postprocessing/generate_csv_report.py +508 -0
- megadetector/postprocessing/load_api_results.py +25 -22
- megadetector/postprocessing/md_to_coco.py +129 -98
- megadetector/postprocessing/md_to_labelme.py +89 -83
- megadetector/postprocessing/md_to_wi.py +40 -40
- megadetector/postprocessing/merge_detections.py +87 -114
- megadetector/postprocessing/postprocess_batch_results.py +319 -302
- megadetector/postprocessing/remap_detection_categories.py +36 -36
- megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
- megadetector/postprocessing/separate_detections_into_folders.py +226 -211
- megadetector/postprocessing/subset_json_detector_output.py +265 -262
- megadetector/postprocessing/top_folders_to_bottom.py +45 -45
- megadetector/postprocessing/validate_batch_results.py +70 -70
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -69
- megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
- megadetector/taxonomy_mapping/simple_image_download.py +8 -8
- megadetector/taxonomy_mapping/species_lookup.py +33 -33
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
- megadetector/taxonomy_mapping/taxonomy_graph.py +11 -11
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
- megadetector/utils/azure_utils.py +22 -22
- megadetector/utils/ct_utils.py +1019 -200
- megadetector/utils/directory_listing.py +21 -77
- megadetector/utils/gpu_test.py +22 -22
- megadetector/utils/md_tests.py +541 -518
- megadetector/utils/path_utils.py +1511 -406
- megadetector/utils/process_utils.py +41 -41
- megadetector/utils/sas_blob_utils.py +53 -49
- megadetector/utils/split_locations_into_train_val.py +73 -60
- megadetector/utils/string_utils.py +147 -26
- megadetector/utils/url_utils.py +463 -173
- megadetector/utils/wi_utils.py +2629 -2868
- megadetector/utils/write_html_image_list.py +137 -137
- megadetector/visualization/plot_utils.py +21 -21
- megadetector/visualization/render_images_with_thumbnails.py +37 -73
- megadetector/visualization/visualization_utils.py +424 -404
- megadetector/visualization/visualize_db.py +197 -190
- megadetector/visualization/visualize_detector_output.py +126 -98
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/METADATA +6 -3
- megadetector-5.0.29.dist-info/RECORD +163 -0
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
- megadetector/data_management/importers/add_nacti_sizes.py +0 -52
- megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
- megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
- megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
- megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
- megadetector/data_management/importers/awc_to_json.py +0 -191
- megadetector/data_management/importers/bellevue_to_json.py +0 -272
- megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
- megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
- megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
- megadetector/data_management/importers/cct_field_adjustments.py +0 -58
- megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
- megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
- megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
- megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
- megadetector/data_management/importers/ena24_to_json.py +0 -276
- megadetector/data_management/importers/filenames_to_json.py +0 -386
- megadetector/data_management/importers/helena_to_cct.py +0 -283
- megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
- megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
- megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
- megadetector/data_management/importers/jb_csv_to_json.py +0 -150
- megadetector/data_management/importers/mcgill_to_json.py +0 -250
- megadetector/data_management/importers/missouri_to_json.py +0 -490
- megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
- megadetector/data_management/importers/noaa_seals_2019.py +0 -181
- megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
- megadetector/data_management/importers/pc_to_json.py +0 -365
- megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
- megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
- megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
- megadetector/data_management/importers/rspb_to_json.py +0 -356
- megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
- megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
- megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
- megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
- megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
- megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
- megadetector/data_management/importers/sulross_get_exif.py +0 -65
- megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
- megadetector/data_management/importers/ubc_to_json.py +0 -399
- megadetector/data_management/importers/umn_to_json.py +0 -507
- megadetector/data_management/importers/wellington_to_json.py +0 -263
- megadetector/data_management/importers/wi_to_json.py +0 -442
- megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
- megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
- megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
- megadetector-5.0.27.dist-info/RECORD +0 -208
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
|
@@ -5,7 +5,7 @@ identify_mislabeled_candidates.py
|
|
|
5
5
|
Identify images that may have been mislabeled.
|
|
6
6
|
|
|
7
7
|
A "mislabeled candidate" is defined as an image meeting both criteria:
|
|
8
|
-
|
|
8
|
+
|
|
9
9
|
* according to the ground-truth label, the model made an incorrect prediction
|
|
10
10
|
|
|
11
11
|
* the model's prediction confidence exceeds its confidence for the ground-truth
|
|
@@ -13,12 +13,12 @@ A "mislabeled candidate" is defined as an image meeting both criteria:
|
|
|
13
13
|
|
|
14
14
|
This script outputs for each dataset a text file containing the filenames of
|
|
15
15
|
mislabeled candidates, one per line. The text files are saved to:
|
|
16
|
-
|
|
16
|
+
|
|
17
17
|
<logdir>/mislabeled_candidates_{split}_{dataset}.txt
|
|
18
18
|
|
|
19
19
|
To this list of files can then be passed to AzCopy to be downloaded:
|
|
20
20
|
|
|
21
|
-
""
|
|
21
|
+
""
|
|
22
22
|
azcopy cp "http://<url_of_container>?<sas_token>" "/save/files/here" \
|
|
23
23
|
--list-of-files "/path/to/mislabeled_candidates_{split}_{dataset}.txt"
|
|
24
24
|
""
|
|
@@ -68,7 +68,7 @@ from tqdm import tqdm
|
|
|
68
68
|
|
|
69
69
|
def main(logdir: str, splits: Iterable[str], margin: float,
|
|
70
70
|
include_dataset_in_filename: bool) -> None:
|
|
71
|
-
|
|
71
|
+
|
|
72
72
|
# load files
|
|
73
73
|
logdir = os.path.normpath(logdir) # removes any trailing slash
|
|
74
74
|
base_logdir = os.path.dirname(logdir)
|
|
@@ -112,7 +112,7 @@ def get_candidates_df(outputs_csv_path: str, label_names: Sequence[str],
|
|
|
112
112
|
Returns a DataFrame containing crops only from mislabeled candidate
|
|
113
113
|
images.
|
|
114
114
|
"""
|
|
115
|
-
|
|
115
|
+
|
|
116
116
|
df = pd.read_csv(outputs_csv_path, float_precision='high')
|
|
117
117
|
all_rows = range(len(df))
|
|
118
118
|
df['pred'] = df[label_names].idxmax(axis=1)
|
|
@@ -146,7 +146,7 @@ def _parse_args() -> argparse.Namespace:
|
|
|
146
146
|
|
|
147
147
|
|
|
148
148
|
if __name__ == '__main__':
|
|
149
|
-
|
|
149
|
+
|
|
150
150
|
args = _parse_args()
|
|
151
151
|
main(logdir=args.logdir, splits=args.splits, margin=args.margin,
|
|
152
152
|
include_dataset_in_filename=args.include_dataset_in_filename)
|
|
@@ -10,12 +10,12 @@ See README.md for an example of a classification label specification JSON file.
|
|
|
10
10
|
The validation step takes the classification label specification JSON file and
|
|
11
11
|
finds the dataset labels that belong to each classification label. It checks
|
|
12
12
|
that the following conditions hold:
|
|
13
|
-
|
|
13
|
+
|
|
14
14
|
1) Each classification label specification matches at least 1 dataset label.
|
|
15
15
|
|
|
16
16
|
2) If the classification label includes a taxonomical specification, then the
|
|
17
17
|
taxa is actually a part of our master taxonomy.
|
|
18
|
-
|
|
18
|
+
|
|
19
19
|
3) If the 'prioritize' key is found for a given label, then the label must
|
|
20
20
|
also have a 'max_count' key.
|
|
21
21
|
|
|
@@ -44,7 +44,7 @@ exist in Azure Blob Storage. In total, we output the following files:
|
|
|
44
44
|
|
|
45
45
|
- queried_images.json
|
|
46
46
|
main output file, ex:
|
|
47
|
-
|
|
47
|
+
|
|
48
48
|
{
|
|
49
49
|
"caltech/cct_images/59f5fe2b-23d2-11e8-a6a3-ec086b02610b.jpg": {
|
|
50
50
|
"dataset": "caltech",
|
|
@@ -86,6 +86,7 @@ from typing import Any
|
|
|
86
86
|
|
|
87
87
|
from megadetector.utils import path_utils
|
|
88
88
|
from megadetector.utils import sas_blob_utils
|
|
89
|
+
from megadetector.utils import ct_utils
|
|
89
90
|
|
|
90
91
|
from megadetector.data_management.megadb import megadb_utils
|
|
91
92
|
from megadetector.taxonomy_mapping.taxonomy_graph import (
|
|
@@ -113,7 +114,7 @@ def main(label_spec_json_path: str,
|
|
|
113
114
|
json_indent: int | None = None,
|
|
114
115
|
seed: int = 123,
|
|
115
116
|
mislabeled_images_dir: str | None = None) -> None:
|
|
116
|
-
|
|
117
|
+
|
|
117
118
|
# input validation
|
|
118
119
|
assert os.path.exists(label_spec_json_path)
|
|
119
120
|
assert os.path.exists(taxonomy_csv_path)
|
|
@@ -166,36 +167,32 @@ def main(label_spec_json_path: str,
|
|
|
166
167
|
date = datetime.now().strftime('%Y%m%d_%H%M%S') # ex: '20200722_110816'
|
|
167
168
|
log_path = os.path.join(output_dir, f'json_validator_log_{date}.json')
|
|
168
169
|
print(f'Saving log of bad images to {log_path}')
|
|
169
|
-
|
|
170
|
-
json.dump(log, f, indent=1)
|
|
170
|
+
ct_utils.write_json(log_path, log)
|
|
171
171
|
|
|
172
172
|
# save label counts, pre-subsampling
|
|
173
173
|
print('Saving pre-sampling label counts')
|
|
174
174
|
save_path = os.path.join(output_dir, 'image_counts_by_label_presample.json')
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
json.dump(image_counts_by_label, f, indent=1)
|
|
175
|
+
image_counts_by_label_presample = {
|
|
176
|
+
label: len(filter_images(output_js, label))
|
|
177
|
+
for label in sorted(input_js.keys())
|
|
178
|
+
}
|
|
179
|
+
ct_utils.write_json(save_path, image_counts_by_label_presample)
|
|
181
180
|
|
|
182
181
|
print('Sampling with priority (if needed)')
|
|
183
182
|
output_js = sample_with_priority(input_js, output_js)
|
|
184
183
|
|
|
185
184
|
print('Saving queried_images.json')
|
|
186
185
|
output_json_path = os.path.join(output_dir, 'queried_images.json')
|
|
187
|
-
|
|
188
|
-
json.dump(output_js, f, indent=json_indent)
|
|
186
|
+
ct_utils.write_json(output_json_path, output_js, indent=json_indent)
|
|
189
187
|
|
|
190
188
|
# save label counts, post-subsampling
|
|
191
189
|
print('Saving post-sampling label counts')
|
|
192
190
|
save_path = os.path.join(output_dir, 'image_counts_by_label_sampled.json')
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
json.dump(image_counts_by_label, f, indent=1)
|
|
191
|
+
image_counts_by_label_sampled = {
|
|
192
|
+
label: len(filter_images(output_js, label))
|
|
193
|
+
for label in sorted(input_js.keys())
|
|
194
|
+
}
|
|
195
|
+
ct_utils.write_json(save_path, image_counts_by_label_sampled)
|
|
199
196
|
|
|
200
197
|
|
|
201
198
|
#%% Support functions
|
|
@@ -215,7 +212,7 @@ def parse_spec(spec_dict: Mapping[str, Any],
|
|
|
215
212
|
|
|
216
213
|
Raises: ValueError, if specification does not match any dataset labels
|
|
217
214
|
"""
|
|
218
|
-
|
|
215
|
+
|
|
219
216
|
results = set()
|
|
220
217
|
if 'taxa' in spec_dict:
|
|
221
218
|
# spec_dict['taxa']: list of dict
|
|
@@ -262,7 +259,7 @@ def validate_json(input_js: dict[str, dict[str, Any]],
|
|
|
262
259
|
dataset labels, or if allow_multilabel=False but a dataset label is
|
|
263
260
|
included in two or more classification labels
|
|
264
261
|
"""
|
|
265
|
-
|
|
262
|
+
|
|
266
263
|
# maps output label name to set of (dataset, dataset_label) tuples
|
|
267
264
|
label_to_inclusions: dict[str, set[tuple[str, str]]] = {}
|
|
268
265
|
for label, spec_dict in input_js.items():
|
|
@@ -301,7 +298,7 @@ def get_output_json(label_to_inclusions: dict[str, set[tuple[str, str]]],
|
|
|
301
298
|
- 'label': list of str, assigned output label
|
|
302
299
|
- 'bbox': list of dicts, optional
|
|
303
300
|
"""
|
|
304
|
-
|
|
301
|
+
|
|
305
302
|
# Because MegaDB is organized by dataset, we do the same...
|
|
306
303
|
#
|
|
307
304
|
# ds_to_labels = {
|
|
@@ -360,9 +357,9 @@ def get_output_json(label_to_inclusions: dict[str, set[tuple[str, str]]],
|
|
|
360
357
|
'''
|
|
361
358
|
|
|
362
359
|
output_json = {} # maps full image path to json object
|
|
363
|
-
|
|
360
|
+
|
|
364
361
|
for ds in tqdm(sorted(ds_to_labels.keys())): # sort for determinism
|
|
365
|
-
|
|
362
|
+
|
|
366
363
|
mislabeled_images: Mapping[str, Any] = {}
|
|
367
364
|
if mislabeled_images_dir is not None:
|
|
368
365
|
csv_path = os.path.join(mislabeled_images_dir, f'{ds}.csv')
|
|
@@ -428,7 +425,7 @@ def get_image_sas_uris(img_paths: Iterable[str]) -> list[str]:
|
|
|
428
425
|
image_sas_uris: list of str, image blob URIs with SAS tokens, ready to
|
|
429
426
|
pass to the batch detection API
|
|
430
427
|
"""
|
|
431
|
-
|
|
428
|
+
|
|
432
429
|
# we need the datasets table for getting SAS keys
|
|
433
430
|
datasets_table = megadb_utils.MegadbUtils().get_datasets_table()
|
|
434
431
|
|
|
@@ -483,7 +480,7 @@ def remove_nonexistent_images(js: MutableMapping[str, dict[str, Any]],
|
|
|
483
480
|
check_local: optional str, path to local dir
|
|
484
481
|
num_threads: int, number of threads to use for checking blob existence
|
|
485
482
|
"""
|
|
486
|
-
|
|
483
|
+
|
|
487
484
|
def check_local_then_azure(local_path: str, blob_url: str) -> bool:
|
|
488
485
|
return (os.path.exists(local_path)
|
|
489
486
|
or sas_blob_utils.check_blob_exists(blob_url))
|
|
@@ -538,7 +535,7 @@ def remove_images_insufficient_locs(js: MutableMapping[str, dict[str, Any]],
|
|
|
538
535
|
min_locs: optional int, minimum # of locations that each label must
|
|
539
536
|
have in order to be included
|
|
540
537
|
"""
|
|
541
|
-
|
|
538
|
+
|
|
542
539
|
# 1st pass: populate label_to_locs
|
|
543
540
|
# label (tuple of str) => set of (dataset, location)
|
|
544
541
|
label_to_locs = defaultdict(set)
|
|
@@ -574,7 +571,7 @@ def filter_images(output_js: Mapping[str, Mapping[str, Any]], label: str,
|
|
|
574
571
|
|
|
575
572
|
Returns: set of str, image files that match the filtering criteria
|
|
576
573
|
"""
|
|
577
|
-
|
|
574
|
+
|
|
578
575
|
img_files: set[str] = set()
|
|
579
576
|
for img_file, img_dict in output_js.items():
|
|
580
577
|
cond1 = (label in img_dict['label'])
|
|
@@ -594,7 +591,7 @@ def sample_with_priority(input_js: Mapping[str, Mapping[str, Any]],
|
|
|
594
591
|
|
|
595
592
|
Returns: dict, keys are image file names, sorted alphabetically
|
|
596
593
|
"""
|
|
597
|
-
|
|
594
|
+
|
|
598
595
|
filtered_imgs: set[str] = set()
|
|
599
596
|
for label, spec_dict in input_js.items():
|
|
600
597
|
if 'prioritize' in spec_dict and 'max_count' not in spec_dict:
|
|
@@ -635,7 +632,7 @@ def sample_with_priority(input_js: Mapping[str, Mapping[str, Any]],
|
|
|
635
632
|
#%% Command-line driver
|
|
636
633
|
|
|
637
634
|
def _parse_args() -> argparse.Namespace:
|
|
638
|
-
|
|
635
|
+
|
|
639
636
|
parser = argparse.ArgumentParser(
|
|
640
637
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
641
638
|
description='Validates JSON.')
|
|
@@ -685,7 +682,7 @@ def _parse_args() -> argparse.Namespace:
|
|
|
685
682
|
|
|
686
683
|
|
|
687
684
|
if __name__ == '__main__':
|
|
688
|
-
|
|
685
|
+
|
|
689
686
|
args = _parse_args()
|
|
690
687
|
main(label_spec_json_path=args.label_spec_json,
|
|
691
688
|
taxonomy_csv_path=args.taxonomy_csv,
|
|
@@ -12,11 +12,11 @@ Takes as input 2 label specification JSON files:
|
|
|
12
12
|
|
|
13
13
|
1) desired label specification JSON file
|
|
14
14
|
this should not have a target named "other"
|
|
15
|
-
|
|
15
|
+
|
|
16
16
|
2) label specification JSON file of trained classifier
|
|
17
17
|
|
|
18
18
|
The mapping is accomplished as follows:
|
|
19
|
-
|
|
19
|
+
|
|
20
20
|
1. For each category in the classifier label spec, find all taxon nodes that
|
|
21
21
|
belong to that category.
|
|
22
22
|
|
|
@@ -54,6 +54,7 @@ from tqdm import tqdm
|
|
|
54
54
|
|
|
55
55
|
from megadetector.taxonomy_mapping.taxonomy_graph import (
|
|
56
56
|
build_taxonomy_graph, dag_to_tree, TaxonNode)
|
|
57
|
+
from megadetector.utils import ct_utils
|
|
57
58
|
|
|
58
59
|
|
|
59
60
|
#%% Example usage
|
|
@@ -105,8 +106,7 @@ def main(desired_label_spec_json_path: str,
|
|
|
105
106
|
target_to_classifier_labels = map_target_to_classifier(
|
|
106
107
|
target_label_to_nodes, classifier_label_to_nodes)
|
|
107
108
|
os.makedirs(os.path.dirname(output_json_path), exist_ok=True)
|
|
108
|
-
|
|
109
|
-
json.dump(target_to_classifier_labels, f, indent=1)
|
|
109
|
+
ct_utils.write_json(output_json_path, target_to_classifier_labels)
|
|
110
110
|
|
|
111
111
|
|
|
112
112
|
#%% Support functions
|
|
@@ -129,7 +129,7 @@ def map_target_to_classifier(
|
|
|
129
129
|
|
|
130
130
|
Returns: dict, maps target label to set of classifier labels
|
|
131
131
|
"""
|
|
132
|
-
|
|
132
|
+
|
|
133
133
|
remaining_classifier_labels = set(classifier_label_to_nodes.keys())
|
|
134
134
|
target_to_classifier_labels: defaultdict[str, set[str]] = defaultdict(set)
|
|
135
135
|
for target, target_nodes in tqdm(target_label_to_nodes.items()):
|
|
@@ -172,7 +172,7 @@ def parse_spec(spec_dict: Mapping[str, Any],
|
|
|
172
172
|
|
|
173
173
|
Raises: ValueError, if specification does not match any dataset labels
|
|
174
174
|
"""
|
|
175
|
-
|
|
175
|
+
|
|
176
176
|
result = set()
|
|
177
177
|
if 'taxa' in spec_dict:
|
|
178
178
|
for taxon in spec_dict['taxa']:
|
|
@@ -217,7 +217,7 @@ def label_spec_to_nodes(label_spec_js: dict[str, dict[str, Any]],
|
|
|
217
217
|
Raises: ValueError, if a classification label specification matches no
|
|
218
218
|
TaxonNode, or if a node is included in two or more classification labels
|
|
219
219
|
"""
|
|
220
|
-
|
|
220
|
+
|
|
221
221
|
# maps output label name to set of (dataset, dataset_label) tuples
|
|
222
222
|
seen_nodes: set[TaxonNode] = set()
|
|
223
223
|
label_to_nodes: dict[str, set[TaxonNode]] = {}
|
|
@@ -239,7 +239,7 @@ def label_spec_to_nodes(label_spec_js: dict[str, dict[str, Any]],
|
|
|
239
239
|
#%% Command-line driver
|
|
240
240
|
|
|
241
241
|
def _parse_args() -> argparse.Namespace:
|
|
242
|
-
|
|
242
|
+
|
|
243
243
|
parser = argparse.ArgumentParser(
|
|
244
244
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
245
245
|
description='Create mapping from target categories to classifier '
|
|
@@ -267,7 +267,7 @@ def _parse_args() -> argparse.Namespace:
|
|
|
267
267
|
|
|
268
268
|
|
|
269
269
|
if __name__ == '__main__':
|
|
270
|
-
|
|
270
|
+
|
|
271
271
|
args = _parse_args()
|
|
272
272
|
main(desired_label_spec_json_path=args.desired_label_spec_json,
|
|
273
273
|
classifier_label_spec_json_path=args.classifier_label_spec_json,
|
|
@@ -71,6 +71,7 @@ import pandas as pd
|
|
|
71
71
|
from tqdm import tqdm
|
|
72
72
|
|
|
73
73
|
from megadetector.utils.ct_utils import round_float
|
|
74
|
+
from megadetector.utils import ct_utils
|
|
74
75
|
|
|
75
76
|
|
|
76
77
|
#%% Example usage
|
|
@@ -109,7 +110,7 @@ def row_to_classification_list(row: Mapping[str, Any],
|
|
|
109
110
|
(label_id + 1_000_000, 1.) to the list. If label_pos='first', we put this at
|
|
110
111
|
the front of the list. Otherwise, we put it at the end.
|
|
111
112
|
"""
|
|
112
|
-
|
|
113
|
+
|
|
113
114
|
contains_label = ('label' in row)
|
|
114
115
|
assert contains_label or contains_preds
|
|
115
116
|
if relative_conf:
|
|
@@ -176,7 +177,7 @@ def process_queried_images(
|
|
|
176
177
|
Returns: dict, detections JSON file, except that the 'images' field is a
|
|
177
178
|
dict (img_path => dict) instead of a list
|
|
178
179
|
"""
|
|
179
|
-
|
|
180
|
+
|
|
180
181
|
# input validation
|
|
181
182
|
assert os.path.exists(queried_images_json_path)
|
|
182
183
|
detection_cache_dir = os.path.join(
|
|
@@ -274,7 +275,7 @@ def combine_classification_with_detection(
|
|
|
274
275
|
label_pos: str | None = None,
|
|
275
276
|
relative_conf: bool = False,
|
|
276
277
|
typical_confidence_threshold: float = None
|
|
277
|
-
) -> dict[str, Any]:
|
|
278
|
+
) -> dict[str, Any]:
|
|
278
279
|
"""
|
|
279
280
|
Adds classification information to a detection JSON. Classification
|
|
280
281
|
information may include the true label and/or the predicted confidences
|
|
@@ -302,7 +303,7 @@ def combine_classification_with_detection(
|
|
|
302
303
|
|
|
303
304
|
Returns: dict, detections JSON file updated with classification results
|
|
304
305
|
"""
|
|
305
|
-
|
|
306
|
+
|
|
306
307
|
classification_metadata = {
|
|
307
308
|
'classifier': classifier_name,
|
|
308
309
|
'classification_completion_time': classifier_timestamp
|
|
@@ -354,7 +355,7 @@ def main(classification_csv_path: str,
|
|
|
354
355
|
label_pos: str | None,
|
|
355
356
|
relative_conf: bool,
|
|
356
357
|
typical_confidence_threshold: float) -> None:
|
|
357
|
-
|
|
358
|
+
|
|
358
359
|
# input validation
|
|
359
360
|
assert os.path.exists(classification_csv_path)
|
|
360
361
|
assert os.path.exists(label_names_json_path)
|
|
@@ -409,8 +410,10 @@ def main(classification_csv_path: str,
|
|
|
409
410
|
typical_confidence_threshold=typical_confidence_threshold)
|
|
410
411
|
|
|
411
412
|
os.makedirs(os.path.dirname(output_json_path), exist_ok=True)
|
|
412
|
-
|
|
413
|
-
|
|
413
|
+
# The following line was removed as per the previous refactoring:
|
|
414
|
+
# with open(output_json_path, 'w') as f:
|
|
415
|
+
# json.dump(classification_js, f, indent=1)
|
|
416
|
+
ct_utils.write_json(output_json_path, classification_js)
|
|
414
417
|
|
|
415
418
|
print('Wrote merged classification/detection results to {}'.format(output_json_path))
|
|
416
419
|
|
|
@@ -418,7 +421,7 @@ def main(classification_csv_path: str,
|
|
|
418
421
|
#%% Command-line driver
|
|
419
422
|
|
|
420
423
|
def _parse_args() -> argparse.Namespace:
|
|
421
|
-
|
|
424
|
+
|
|
422
425
|
parser = argparse.ArgumentParser(
|
|
423
426
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
424
427
|
description='Merges classification results with Batch Detection API '
|
|
@@ -487,7 +490,7 @@ def _parse_args() -> argparse.Namespace:
|
|
|
487
490
|
|
|
488
491
|
|
|
489
492
|
if __name__ == '__main__':
|
|
490
|
-
|
|
493
|
+
|
|
491
494
|
args = _parse_args()
|
|
492
495
|
main(classification_csv_path=args.classification_csv,
|
|
493
496
|
label_names_json_path=args.label_names_json,
|
|
@@ -13,7 +13,7 @@ Differs from prepare_classification_script_mc.py only in the final class mapping
|
|
|
13
13
|
|
|
14
14
|
import os
|
|
15
15
|
|
|
16
|
-
def main():
|
|
16
|
+
def main(): # noqa
|
|
17
17
|
organization_name = 'idfg'
|
|
18
18
|
job_name = 'idfg-2022-01-27-EOE2021S_Group6'
|
|
19
19
|
input_filename = 'idfg-2022-01-27-EOE2021S_Group6_detections.filtered_rde_0.60_0.85_30_0.20.json'
|
|
@@ -44,7 +44,7 @@ def main():
|
|
|
44
44
|
|
|
45
45
|
for fn in input_files:
|
|
46
46
|
assert os.path.isfile(fn)
|
|
47
|
-
|
|
47
|
+
|
|
48
48
|
|
|
49
49
|
#%% Constants
|
|
50
50
|
|
|
@@ -76,7 +76,7 @@ def main():
|
|
|
76
76
|
typical_classification_threshold_str = '0.75'
|
|
77
77
|
|
|
78
78
|
classifier_name = 'idfg4'
|
|
79
|
-
|
|
79
|
+
|
|
80
80
|
|
|
81
81
|
#%% Set up environment
|
|
82
82
|
|
|
@@ -88,18 +88,18 @@ def main():
|
|
|
88
88
|
#%% Crop images
|
|
89
89
|
|
|
90
90
|
if include_cropping:
|
|
91
|
-
|
|
91
|
+
|
|
92
92
|
commands.append('\n### Cropping ###\n')
|
|
93
|
-
|
|
93
|
+
|
|
94
94
|
# fn = input_files[0]
|
|
95
95
|
for fn in input_files:
|
|
96
|
-
|
|
96
|
+
|
|
97
97
|
input_file_path = fn
|
|
98
98
|
crop_cmd = ''
|
|
99
|
-
|
|
99
|
+
|
|
100
100
|
crop_comment = '\n# Cropping {}\n'.format(fn)
|
|
101
101
|
crop_cmd += crop_comment
|
|
102
|
-
|
|
102
|
+
|
|
103
103
|
crop_cmd += "python crop_detections.py \\\n" + \
|
|
104
104
|
input_file_path + ' \\\n' + \
|
|
105
105
|
crop_path + ' \\\n' + \
|
|
@@ -122,12 +122,12 @@ def main():
|
|
|
122
122
|
|
|
123
123
|
input_file_path = fn
|
|
124
124
|
classifier_output_path = crop_path + classifier_output_suffix
|
|
125
|
-
|
|
125
|
+
|
|
126
126
|
classify_cmd = ''
|
|
127
|
-
|
|
127
|
+
|
|
128
128
|
classify_comment = '\n# Classifying {}\n'.format(fn)
|
|
129
129
|
classify_cmd += classify_comment
|
|
130
|
-
|
|
130
|
+
|
|
131
131
|
classify_cmd += "python run_classifier.py \\\n" + \
|
|
132
132
|
checkpoint_path + ' \\\n' + \
|
|
133
133
|
crop_path + ' \\\n' + \
|
|
@@ -137,14 +137,14 @@ def main():
|
|
|
137
137
|
'--image-size "' + image_size_str + '"' + ' \\\n' + \
|
|
138
138
|
'--batch-size "' + batch_size_str + '"' + ' \\\n' + \
|
|
139
139
|
'--num-workers "' + num_workers_str + '"' + ' \\\n'
|
|
140
|
-
|
|
140
|
+
|
|
141
141
|
if device_id is not None:
|
|
142
142
|
classify_cmd += '--device {}'.format(device_id)
|
|
143
|
-
|
|
144
|
-
classify_cmd += '\n\n'
|
|
143
|
+
|
|
144
|
+
classify_cmd += '\n\n'
|
|
145
145
|
classify_cmd = '{}'.format(classify_cmd)
|
|
146
146
|
commands.append(classify_cmd)
|
|
147
|
-
|
|
147
|
+
|
|
148
148
|
|
|
149
149
|
#%% Merge classification and detection outputs
|
|
150
150
|
|
|
@@ -161,12 +161,12 @@ def main():
|
|
|
161
161
|
final_output_suffix)
|
|
162
162
|
final_output_path = final_output_path.replace('_detections','')
|
|
163
163
|
final_output_path = final_output_path.replace('_crops','')
|
|
164
|
-
|
|
164
|
+
|
|
165
165
|
merge_cmd = ''
|
|
166
|
-
|
|
166
|
+
|
|
167
167
|
merge_comment = '\n# Merging {}\n'.format(fn)
|
|
168
168
|
merge_cmd += merge_comment
|
|
169
|
-
|
|
169
|
+
|
|
170
170
|
merge_cmd += "python merge_classification_detection_output.py \\\n" + \
|
|
171
171
|
classifier_output_path + ' \\\n' + \
|
|
172
172
|
classifier_categories_path + ' \\\n' + \
|
|
@@ -189,6 +189,6 @@ def main():
|
|
|
189
189
|
import stat
|
|
190
190
|
st = os.stat(output_file)
|
|
191
191
|
os.chmod(output_file, st.st_mode | stat.S_IEXEC)
|
|
192
|
-
|
|
192
|
+
|
|
193
193
|
if __name__ == '__main__':
|
|
194
194
|
main()
|
|
@@ -43,7 +43,7 @@ input_files = [
|
|
|
43
43
|
|
|
44
44
|
for fn in input_files:
|
|
45
45
|
assert os.path.isfile(fn)
|
|
46
|
-
|
|
46
|
+
|
|
47
47
|
|
|
48
48
|
#%% Constants
|
|
49
49
|
|
|
@@ -76,7 +76,7 @@ classification_threshold_str = '0.05'
|
|
|
76
76
|
typical_classification_threshold_str = '0.75'
|
|
77
77
|
|
|
78
78
|
classifier_name = 'megaclassifier_v0.1_efficientnet-b3'
|
|
79
|
-
|
|
79
|
+
|
|
80
80
|
|
|
81
81
|
#%% Set up environment
|
|
82
82
|
|
|
@@ -94,10 +94,10 @@ for fn in input_files:
|
|
|
94
94
|
|
|
95
95
|
input_file_path = fn
|
|
96
96
|
crop_cmd = ''
|
|
97
|
-
|
|
97
|
+
|
|
98
98
|
crop_comment = '\n# Cropping {}\n'.format(fn)
|
|
99
99
|
crop_cmd += crop_comment
|
|
100
|
-
|
|
100
|
+
|
|
101
101
|
crop_cmd += "python crop_detections.py \\\n" + \
|
|
102
102
|
input_file_path + ' \\\n' + \
|
|
103
103
|
crop_path + ' \\\n' + \
|
|
@@ -120,12 +120,12 @@ for fn in input_files:
|
|
|
120
120
|
|
|
121
121
|
input_file_path = fn
|
|
122
122
|
classifier_output_path = crop_path + classifier_output_suffix
|
|
123
|
-
|
|
123
|
+
|
|
124
124
|
classify_cmd = ''
|
|
125
|
-
|
|
125
|
+
|
|
126
126
|
classify_comment = '\n# Classifying {}\n'.format(fn)
|
|
127
127
|
classify_cmd += classify_comment
|
|
128
|
-
|
|
128
|
+
|
|
129
129
|
classify_cmd += "python run_classifier.py \\\n" + \
|
|
130
130
|
checkpoint_path + ' \\\n' + \
|
|
131
131
|
crop_path + ' \\\n' + \
|
|
@@ -135,14 +135,14 @@ for fn in input_files:
|
|
|
135
135
|
'--image-size "' + image_size_str + '"' + ' \\\n' + \
|
|
136
136
|
'--batch-size "' + batch_size_str + '"' + ' \\\n' + \
|
|
137
137
|
'--num-workers "' + num_workers_str + '"' + ' \\\n'
|
|
138
|
-
|
|
138
|
+
|
|
139
139
|
if device_id is not None:
|
|
140
140
|
classify_cmd += '--device {}'.format(device_id)
|
|
141
|
-
|
|
142
|
-
classify_cmd += '\n\n'
|
|
141
|
+
|
|
142
|
+
classify_cmd += '\n\n'
|
|
143
143
|
classify_cmd = '{}'.format(classify_cmd)
|
|
144
144
|
commands.append(classify_cmd)
|
|
145
|
-
|
|
145
|
+
|
|
146
146
|
|
|
147
147
|
#%% Remap classifier outputs
|
|
148
148
|
|
|
@@ -156,25 +156,25 @@ for fn in input_files:
|
|
|
156
156
|
classifier_output_path_remapped = \
|
|
157
157
|
classifier_output_path.replace(".csv.gz","_remapped.csv.gz")
|
|
158
158
|
assert not (classifier_output_path == classifier_output_path_remapped)
|
|
159
|
-
|
|
159
|
+
|
|
160
160
|
output_label_index = classifier_output_path_remapped.replace(
|
|
161
161
|
"_remapped.csv.gz","_label_index_remapped.json")
|
|
162
|
-
|
|
162
|
+
|
|
163
163
|
remap_cmd = ''
|
|
164
|
-
|
|
164
|
+
|
|
165
165
|
remap_comment = '\n# Remapping {}\n'.format(fn)
|
|
166
166
|
remap_cmd += remap_comment
|
|
167
|
-
|
|
167
|
+
|
|
168
168
|
remap_cmd += "python aggregate_classifier_probs.py \\\n" + \
|
|
169
169
|
classifier_output_path + ' \\\n' + \
|
|
170
170
|
'--target-mapping "' + target_mapping_path + '"' + ' \\\n' + \
|
|
171
171
|
'--output-csv "' + classifier_output_path_remapped + '"' + ' \\\n' + \
|
|
172
172
|
'--output-label-index "' + output_label_index + '"' + ' \\\n' + \
|
|
173
173
|
'\n'
|
|
174
|
-
|
|
174
|
+
|
|
175
175
|
remap_cmd = '{}'.format(remap_cmd)
|
|
176
176
|
commands.append(remap_cmd)
|
|
177
|
-
|
|
177
|
+
|
|
178
178
|
|
|
179
179
|
#%% Merge classification and detection outputs
|
|
180
180
|
|
|
@@ -185,25 +185,25 @@ for fn in input_files:
|
|
|
185
185
|
|
|
186
186
|
input_file_path = fn
|
|
187
187
|
classifier_output_path = crop_path + classifier_output_suffix
|
|
188
|
-
|
|
188
|
+
|
|
189
189
|
classifier_output_path_remapped = \
|
|
190
190
|
classifier_output_path.replace(".csv.gz","_remapped.csv.gz")
|
|
191
|
-
|
|
191
|
+
|
|
192
192
|
output_label_index = classifier_output_path_remapped.replace(
|
|
193
193
|
"_remapped.csv.gz","_label_index_remapped.json")
|
|
194
|
-
|
|
194
|
+
|
|
195
195
|
final_output_path = os.path.join(output_base,
|
|
196
196
|
os.path.basename(classifier_output_path)).\
|
|
197
197
|
replace(classifier_output_suffix,
|
|
198
198
|
final_output_suffix)
|
|
199
199
|
final_output_path = final_output_path.replace('_detections','')
|
|
200
200
|
final_output_path = final_output_path.replace('_crops','')
|
|
201
|
-
|
|
201
|
+
|
|
202
202
|
merge_cmd = ''
|
|
203
|
-
|
|
203
|
+
|
|
204
204
|
merge_comment = '\n# Merging {}\n'.format(fn)
|
|
205
205
|
merge_cmd += merge_comment
|
|
206
|
-
|
|
206
|
+
|
|
207
207
|
merge_cmd += "python merge_classification_detection_output.py \\\n" + \
|
|
208
208
|
classifier_output_path_remapped + ' \\\n' + \
|
|
209
209
|
output_label_index + ' \\\n' + \
|
|
@@ -103,7 +103,7 @@ def create_loader(cropped_images_dir: str,
|
|
|
103
103
|
batch_size: int, batch size in dataloader
|
|
104
104
|
num_workers: int, # of workers in dataloader
|
|
105
105
|
"""
|
|
106
|
-
|
|
106
|
+
|
|
107
107
|
crop_files = []
|
|
108
108
|
|
|
109
109
|
if detections_json_path is None:
|
|
@@ -160,7 +160,7 @@ def main(model_path: str,
|
|
|
160
160
|
batch_size: int,
|
|
161
161
|
num_workers: int,
|
|
162
162
|
device_id: int | None = None) -> None:
|
|
163
|
-
|
|
163
|
+
|
|
164
164
|
# Evaluating with accimage is much faster than Pillow or Pillow-SIMD, but accimage
|
|
165
165
|
# is Linux-only.
|
|
166
166
|
try:
|
|
@@ -207,7 +207,7 @@ def test_epoch(model: torch.nn.Module,
|
|
|
207
207
|
label_names: optional list of str, label names
|
|
208
208
|
output_csv_path: str
|
|
209
209
|
"""
|
|
210
|
-
|
|
210
|
+
|
|
211
211
|
# set dropout and BN layers to eval mode
|
|
212
212
|
model.eval()
|
|
213
213
|
|
|
@@ -274,7 +274,7 @@ def _parse_args() -> argparse.Namespace:
|
|
|
274
274
|
|
|
275
275
|
|
|
276
276
|
if __name__ == '__main__':
|
|
277
|
-
|
|
277
|
+
|
|
278
278
|
args = _parse_args()
|
|
279
279
|
main(model_path=args.model,
|
|
280
280
|
cropped_images_dir=args.crops_dir,
|