megadetector 5.0.8__py3-none-any.whl → 5.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/__init__.py +0 -0
- api/batch_processing/__init__.py +0 -0
- api/batch_processing/api_core/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/score.py +0 -1
- api/batch_processing/api_core/server_job_status_table.py +0 -1
- api/batch_processing/api_core_support/__init__.py +0 -0
- api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
- api/batch_processing/api_support/__init__.py +0 -0
- api/batch_processing/api_support/summarize_daily_activity.py +0 -1
- api/batch_processing/data_preparation/__init__.py +0 -0
- api/batch_processing/data_preparation/manage_local_batch.py +65 -65
- api/batch_processing/data_preparation/manage_video_batch.py +8 -8
- api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
- api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
- api/batch_processing/postprocessing/__init__.py +0 -0
- api/batch_processing/postprocessing/add_max_conf.py +12 -12
- api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
- api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
- api/batch_processing/postprocessing/compare_batch_results.py +113 -43
- api/batch_processing/postprocessing/convert_output_format.py +41 -16
- api/batch_processing/postprocessing/load_api_results.py +16 -17
- api/batch_processing/postprocessing/md_to_coco.py +31 -21
- api/batch_processing/postprocessing/md_to_labelme.py +52 -22
- api/batch_processing/postprocessing/merge_detections.py +14 -14
- api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
- api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
- api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
- api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
- api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
- api/synchronous/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
- api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
- api/synchronous/api_core/animal_detection_api/config.py +35 -35
- api/synchronous/api_core/tests/__init__.py +0 -0
- api/synchronous/api_core/tests/load_test.py +109 -109
- classification/__init__.py +0 -0
- classification/aggregate_classifier_probs.py +21 -24
- classification/analyze_failed_images.py +11 -13
- classification/cache_batchapi_outputs.py +51 -51
- classification/create_classification_dataset.py +69 -68
- classification/crop_detections.py +54 -53
- classification/csv_to_json.py +97 -100
- classification/detect_and_crop.py +105 -105
- classification/evaluate_model.py +43 -42
- classification/identify_mislabeled_candidates.py +47 -46
- classification/json_to_azcopy_list.py +10 -10
- classification/json_validator.py +72 -71
- classification/map_classification_categories.py +44 -43
- classification/merge_classification_detection_output.py +68 -68
- classification/prepare_classification_script.py +157 -154
- classification/prepare_classification_script_mc.py +228 -228
- classification/run_classifier.py +27 -26
- classification/save_mislabeled.py +30 -30
- classification/train_classifier.py +20 -20
- classification/train_classifier_tf.py +21 -22
- classification/train_utils.py +10 -10
- data_management/__init__.py +0 -0
- data_management/annotations/__init__.py +0 -0
- data_management/annotations/annotation_constants.py +18 -31
- data_management/camtrap_dp_to_coco.py +238 -0
- data_management/cct_json_utils.py +102 -59
- data_management/cct_to_md.py +176 -158
- data_management/cct_to_wi.py +247 -219
- data_management/coco_to_labelme.py +272 -263
- data_management/coco_to_yolo.py +79 -58
- data_management/databases/__init__.py +0 -0
- data_management/databases/add_width_and_height_to_db.py +20 -16
- data_management/databases/combine_coco_camera_traps_files.py +35 -31
- data_management/databases/integrity_check_json_db.py +62 -24
- data_management/databases/subset_json_db.py +24 -15
- data_management/generate_crops_from_cct.py +27 -45
- data_management/get_image_sizes.py +188 -162
- data_management/importers/add_nacti_sizes.py +8 -8
- data_management/importers/add_timestamps_to_icct.py +78 -78
- data_management/importers/animl_results_to_md_results.py +158 -158
- data_management/importers/auckland_doc_test_to_json.py +9 -9
- data_management/importers/auckland_doc_to_json.py +8 -8
- data_management/importers/awc_to_json.py +7 -7
- data_management/importers/bellevue_to_json.py +15 -15
- data_management/importers/cacophony-thermal-importer.py +13 -13
- data_management/importers/carrizo_shrubfree_2018.py +8 -8
- data_management/importers/carrizo_trail_cam_2017.py +8 -8
- data_management/importers/cct_field_adjustments.py +9 -9
- data_management/importers/channel_islands_to_cct.py +10 -10
- data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
- data_management/importers/ena24_to_json.py +7 -7
- data_management/importers/filenames_to_json.py +8 -8
- data_management/importers/helena_to_cct.py +7 -7
- data_management/importers/idaho-camera-traps.py +7 -7
- data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
- data_management/importers/jb_csv_to_json.py +9 -9
- data_management/importers/mcgill_to_json.py +8 -8
- data_management/importers/missouri_to_json.py +18 -18
- data_management/importers/nacti_fieldname_adjustments.py +10 -10
- data_management/importers/noaa_seals_2019.py +7 -7
- data_management/importers/pc_to_json.py +7 -7
- data_management/importers/plot_wni_giraffes.py +7 -7
- data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
- data_management/importers/prepare_zsl_imerit.py +7 -7
- data_management/importers/rspb_to_json.py +8 -8
- data_management/importers/save_the_elephants_survey_A.py +8 -8
- data_management/importers/save_the_elephants_survey_B.py +9 -9
- data_management/importers/snapshot_safari_importer.py +26 -26
- data_management/importers/snapshot_safari_importer_reprise.py +665 -665
- data_management/importers/snapshot_serengeti_lila.py +14 -14
- data_management/importers/sulross_get_exif.py +8 -9
- data_management/importers/timelapse_csv_set_to_json.py +11 -11
- data_management/importers/ubc_to_json.py +13 -13
- data_management/importers/umn_to_json.py +7 -7
- data_management/importers/wellington_to_json.py +8 -8
- data_management/importers/wi_to_json.py +9 -9
- data_management/importers/zamba_results_to_md_results.py +181 -181
- data_management/labelme_to_coco.py +65 -24
- data_management/labelme_to_yolo.py +8 -8
- data_management/lila/__init__.py +0 -0
- data_management/lila/add_locations_to_island_camera_traps.py +9 -9
- data_management/lila/add_locations_to_nacti.py +147 -147
- data_management/lila/create_lila_blank_set.py +13 -13
- data_management/lila/create_lila_test_set.py +8 -8
- data_management/lila/create_links_to_md_results_files.py +106 -106
- data_management/lila/download_lila_subset.py +44 -110
- data_management/lila/generate_lila_per_image_labels.py +55 -42
- data_management/lila/get_lila_annotation_counts.py +18 -15
- data_management/lila/get_lila_image_counts.py +11 -11
- data_management/lila/lila_common.py +96 -33
- data_management/lila/test_lila_metadata_urls.py +132 -116
- data_management/ocr_tools.py +173 -128
- data_management/read_exif.py +110 -97
- data_management/remap_coco_categories.py +83 -83
- data_management/remove_exif.py +58 -62
- data_management/resize_coco_dataset.py +30 -23
- data_management/wi_download_csv_to_coco.py +246 -239
- data_management/yolo_output_to_md_output.py +86 -73
- data_management/yolo_to_coco.py +300 -60
- detection/__init__.py +0 -0
- detection/detector_training/__init__.py +0 -0
- detection/process_video.py +85 -33
- detection/pytorch_detector.py +43 -25
- detection/run_detector.py +157 -72
- detection/run_detector_batch.py +179 -113
- detection/run_inference_with_yolov5_val.py +108 -48
- detection/run_tiled_inference.py +111 -40
- detection/tf_detector.py +51 -29
- detection/video_utils.py +606 -521
- docs/source/conf.py +43 -0
- md_utils/__init__.py +0 -0
- md_utils/azure_utils.py +9 -9
- md_utils/ct_utils.py +228 -68
- md_utils/directory_listing.py +59 -64
- md_utils/md_tests.py +968 -871
- md_utils/path_utils.py +460 -134
- md_utils/process_utils.py +157 -133
- md_utils/sas_blob_utils.py +20 -20
- md_utils/split_locations_into_train_val.py +45 -32
- md_utils/string_utils.py +33 -10
- md_utils/url_utils.py +176 -60
- md_utils/write_html_image_list.py +40 -33
- md_visualization/__init__.py +0 -0
- md_visualization/plot_utils.py +102 -109
- md_visualization/render_images_with_thumbnails.py +34 -34
- md_visualization/visualization_utils.py +597 -291
- md_visualization/visualize_db.py +76 -48
- md_visualization/visualize_detector_output.py +61 -42
- {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/METADATA +13 -7
- megadetector-5.0.9.dist-info/RECORD +224 -0
- {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
- taxonomy_mapping/__init__.py +0 -0
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
- taxonomy_mapping/map_new_lila_datasets.py +154 -154
- taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
- taxonomy_mapping/preview_lila_taxonomy.py +591 -591
- taxonomy_mapping/retrieve_sample_image.py +12 -12
- taxonomy_mapping/simple_image_download.py +11 -11
- taxonomy_mapping/species_lookup.py +10 -10
- taxonomy_mapping/taxonomy_csv_checker.py +18 -18
- taxonomy_mapping/taxonomy_graph.py +47 -47
- taxonomy_mapping/validate_lila_category_mappings.py +83 -76
- data_management/cct_json_to_filename_json.py +0 -89
- data_management/cct_to_csv.py +0 -140
- data_management/databases/remove_corrupted_images_from_db.py +0 -191
- detection/detector_training/copy_checkpoints.py +0 -43
- megadetector-5.0.8.dist-info/RECORD +0 -205
- {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
- {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/WHEEL +0 -0
|
@@ -1,52 +1,45 @@
|
|
|
1
|
-
########
|
|
2
|
-
#
|
|
3
|
-
# identify_mislabeled_candidates.py
|
|
4
|
-
#
|
|
5
|
-
# Identify images that may have been mislabeled.
|
|
6
|
-
#
|
|
7
|
-
# A "mislabeled candidate" is defined as an image meeting both criteria:
|
|
8
|
-
#
|
|
9
|
-
# * according to the ground-truth label, the model made an incorrect prediction
|
|
10
|
-
#
|
|
11
|
-
# * the model's prediction confidence exceeds its confidence for the ground-truth
|
|
12
|
-
# label by at least <margin>
|
|
13
|
-
#
|
|
14
|
-
# This script outputs for each dataset a text file containing the filenames of
|
|
15
|
-
# mislabeled candidates, one per line. The text files are saved to:
|
|
16
|
-
#
|
|
17
|
-
# <logdir>/mislabeled_candidates_{split}_{dataset}.txt
|
|
18
|
-
#
|
|
19
|
-
# To this list of files can then be passed to AzCopy to be downloaded:
|
|
20
|
-
#
|
|
21
|
-
"""
|
|
22
|
-
azcopy cp "http://<url_of_container>?<sas_token>" "/save/files/here" \
|
|
23
|
-
--list-of-files "/path/to/mislabeled_candidates_{split}_{dataset}.txt"
|
|
24
|
-
"""
|
|
25
|
-
#
|
|
26
|
-
# To save the filename as <dataset_name>/<blob_name> (instead of just <blob_name>
|
|
27
|
-
# by default), pass the --include-dataset-in-filename flag. Then, the images can
|
|
28
|
-
# be downloaded with:
|
|
29
|
-
#
|
|
30
|
-
"""
|
|
31
|
-
python data_management/megadb/download_images.py txt \
|
|
32
|
-
"/path/to/mislabeled_candidates_{split}_{dataset}.txt" \
|
|
33
|
-
/save/files/here \
|
|
34
|
-
--threads 50
|
|
35
1
|
"""
|
|
36
|
-
#
|
|
37
|
-
# Assumes the following directory layout:
|
|
38
|
-
# <base_logdir>/
|
|
39
|
-
# label_index.json
|
|
40
|
-
# <logdir>/
|
|
41
|
-
# outputs_{split}.csv.gz
|
|
42
|
-
#
|
|
43
|
-
########
|
|
44
2
|
|
|
45
|
-
|
|
3
|
+
identify_mislabeled_candidates.py
|
|
4
|
+
|
|
5
|
+
Identify images that may have been mislabeled.
|
|
6
|
+
|
|
7
|
+
A "mislabeled candidate" is defined as an image meeting both criteria:
|
|
8
|
+
|
|
9
|
+
* according to the ground-truth label, the model made an incorrect prediction
|
|
10
|
+
|
|
11
|
+
* the model's prediction confidence exceeds its confidence for the ground-truth
|
|
12
|
+
label by at least <margin>
|
|
13
|
+
|
|
14
|
+
This script outputs for each dataset a text file containing the filenames of
|
|
15
|
+
mislabeled candidates, one per line. The text files are saved to:
|
|
16
|
+
|
|
17
|
+
<logdir>/mislabeled_candidates_{split}_{dataset}.txt
|
|
18
|
+
|
|
19
|
+
To this list of files can then be passed to AzCopy to be downloaded:
|
|
20
|
+
|
|
21
|
+
""
|
|
22
|
+
azcopy cp "http://<url_of_container>?<sas_token>" "/save/files/here" \
|
|
23
|
+
--list-of-files "/path/to/mislabeled_candidates_{split}_{dataset}.txt"
|
|
24
|
+
""
|
|
25
|
+
|
|
26
|
+
To save the filename as <dataset_name>/<blob_name> (instead of just <blob_name>
|
|
27
|
+
by default), pass the --include-dataset-in-filename flag. Then, the images can
|
|
28
|
+
be downloaded with:
|
|
29
|
+
|
|
30
|
+
""
|
|
31
|
+
python data_management/megadb/download_images.py txt \
|
|
32
|
+
"/path/to/mislabeled_candidates_{split}_{dataset}.txt" \
|
|
33
|
+
/save/files/here \
|
|
34
|
+
--threads 50
|
|
35
|
+
""
|
|
36
|
+
|
|
37
|
+
Assumes the following directory layout:
|
|
38
|
+
<base_logdir>/
|
|
39
|
+
label_index.json
|
|
40
|
+
<logdir>/
|
|
41
|
+
outputs_{split}.csv.gz
|
|
46
42
|
|
|
47
|
-
"""
|
|
48
|
-
python identify_mislabeled_candidates.py <base_logdir>/<logdir> \
|
|
49
|
-
--margin 0.5 --splits val test
|
|
50
43
|
"""
|
|
51
44
|
|
|
52
45
|
#%% Imports
|
|
@@ -63,6 +56,14 @@ import pandas as pd
|
|
|
63
56
|
from tqdm import tqdm
|
|
64
57
|
|
|
65
58
|
|
|
59
|
+
#%% Example usage
|
|
60
|
+
|
|
61
|
+
"""
|
|
62
|
+
python identify_mislabeled_candidates.py <base_logdir>/<logdir> \
|
|
63
|
+
--margin 0.5 --splits val test
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
|
|
66
67
|
#%% Main function
|
|
67
68
|
|
|
68
69
|
def main(logdir: str, splits: Iterable[str], margin: float,
|
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
json_to_azcopy_list.py
|
|
4
|
+
|
|
5
|
+
Given a queried_images.json file output from json_validator.py, generates
|
|
6
|
+
one text file <dataset>_images.txt for every dataset included.
|
|
7
|
+
|
|
8
|
+
See: https://github.com/Azure/azure-storage-azcopy/wiki/Listing-specific-files-to-transfer
|
|
9
|
+
|
|
10
|
+
"""
|
|
11
11
|
|
|
12
12
|
#%% Imports and constants
|
|
13
13
|
|
classification/json_validator.py
CHANGED
|
@@ -1,76 +1,68 @@
|
|
|
1
|
-
|
|
2
|
-
#
|
|
3
|
-
# json_validator.py
|
|
4
|
-
#
|
|
5
|
-
# Validates a classification label specification JSON file and optionally
|
|
6
|
-
# queries MegaDB to find matching image files.
|
|
7
|
-
#
|
|
8
|
-
# See README.md for an example of a classification label specification JSON file.
|
|
9
|
-
#
|
|
10
|
-
# The validation step takes the classification label specification JSON file and
|
|
11
|
-
# finds the dataset labels that belong to each classification label. It checks
|
|
12
|
-
# that the following conditions hold:
|
|
13
|
-
#
|
|
14
|
-
# 1) Each classification label specification matches at least 1 dataset label.
|
|
15
|
-
#
|
|
16
|
-
# 2) If the classification label includes a taxonomical specification, then the
|
|
17
|
-
# taxa is actually a part of our master taxonomy.
|
|
18
|
-
#
|
|
19
|
-
# 3) If the 'prioritize' key is found for a given label, then the label must
|
|
20
|
-
# also have a 'max_count' key.
|
|
21
|
-
#
|
|
22
|
-
# 4) If --allow-multilabel=False, then no dataset label is included in more than
|
|
23
|
-
# one classification label.
|
|
24
|
-
#
|
|
25
|
-
# If --output-dir <output_dir> is given, then we query MegaDB for images
|
|
26
|
-
# that match the dataset labels identified during the validation step. We filter
|
|
27
|
-
# out images that have unaccepted file extensions and images that don't actually
|
|
28
|
-
# exist in Azure Blob Storage. In total, we output the following files:
|
|
29
|
-
#
|
|
30
|
-
# <output_dir>/
|
|
31
|
-
#
|
|
32
|
-
# - included_dataset_labels.txt
|
|
33
|
-
# lists the original dataset classes included for each classification label
|
|
34
|
-
#
|
|
35
|
-
# - image_counts_by_label_presample.json
|
|
36
|
-
# number of images for each classification label after filtering bad
|
|
37
|
-
# images, but before sampling
|
|
38
|
-
#
|
|
39
|
-
# - image_counts_by_label_sampled.json
|
|
40
|
-
# number of images for each classification label in queried_images.json
|
|
41
|
-
#
|
|
42
|
-
# - json_validator_log_{timestamp}.json
|
|
43
|
-
# log of excluded images / labels
|
|
44
|
-
#
|
|
45
|
-
# - queried_images.json
|
|
46
|
-
# main output file, ex:
|
|
47
|
-
#
|
|
48
|
-
# {
|
|
49
|
-
# "caltech/cct_images/59f5fe2b-23d2-11e8-a6a3-ec086b02610b.jpg": {
|
|
50
|
-
# "dataset": "caltech",
|
|
51
|
-
# "location": 13,
|
|
52
|
-
# "class": "mountain_lion", // class from dataset
|
|
53
|
-
# "label": ["monutain_lion"] // labels to use in classifier
|
|
54
|
-
# },
|
|
55
|
-
# "caltech/cct_images/59f79901-23d2-11e8-a6a3-ec086b02610b.jpg": {
|
|
56
|
-
# "dataset": "caltech",
|
|
57
|
-
# "location": 13,
|
|
58
|
-
# "class": "mountain_lion", // class from dataset
|
|
59
|
-
# "bbox": [{"category": "animal",
|
|
60
|
-
# "bbox": [0, 0.347, 0.237, 0.257]}],
|
|
61
|
-
# "label": ["monutain_lion"] // labels to use in classifier
|
|
62
|
-
# },
|
|
63
|
-
# ...
|
|
64
|
-
# }
|
|
65
|
-
#
|
|
66
|
-
########
|
|
1
|
+
"""
|
|
67
2
|
|
|
68
|
-
|
|
3
|
+
json_validator.py
|
|
4
|
+
|
|
5
|
+
Validates a classification label specification JSON file and optionally
|
|
6
|
+
queries MegaDB to find matching image files.
|
|
7
|
+
|
|
8
|
+
See README.md for an example of a classification label specification JSON file.
|
|
9
|
+
|
|
10
|
+
The validation step takes the classification label specification JSON file and
|
|
11
|
+
finds the dataset labels that belong to each classification label. It checks
|
|
12
|
+
that the following conditions hold:
|
|
13
|
+
|
|
14
|
+
1) Each classification label specification matches at least 1 dataset label.
|
|
15
|
+
|
|
16
|
+
2) If the classification label includes a taxonomical specification, then the
|
|
17
|
+
taxa is actually a part of our master taxonomy.
|
|
18
|
+
|
|
19
|
+
3) If the 'prioritize' key is found for a given label, then the label must
|
|
20
|
+
also have a 'max_count' key.
|
|
21
|
+
|
|
22
|
+
4) If --allow-multilabel=False, then no dataset label is included in more than
|
|
23
|
+
one classification label.
|
|
24
|
+
|
|
25
|
+
If --output-dir <output_dir> is given, then we query MegaDB for images
|
|
26
|
+
that match the dataset labels identified during the validation step. We filter
|
|
27
|
+
out images that have unaccepted file extensions and images that don't actually
|
|
28
|
+
exist in Azure Blob Storage. In total, we output the following files:
|
|
29
|
+
|
|
30
|
+
<output_dir>/
|
|
31
|
+
|
|
32
|
+
- included_dataset_labels.txt
|
|
33
|
+
lists the original dataset classes included for each classification label
|
|
34
|
+
|
|
35
|
+
- image_counts_by_label_presample.json
|
|
36
|
+
number of images for each classification label after filtering bad
|
|
37
|
+
images, but before sampling
|
|
38
|
+
|
|
39
|
+
- image_counts_by_label_sampled.json
|
|
40
|
+
number of images for each classification label in queried_images.json
|
|
41
|
+
|
|
42
|
+
- json_validator_log_{timestamp}.json
|
|
43
|
+
log of excluded images / labels
|
|
44
|
+
|
|
45
|
+
- queried_images.json
|
|
46
|
+
main output file, ex:
|
|
47
|
+
|
|
48
|
+
{
|
|
49
|
+
"caltech/cct_images/59f5fe2b-23d2-11e8-a6a3-ec086b02610b.jpg": {
|
|
50
|
+
"dataset": "caltech",
|
|
51
|
+
"location": 13,
|
|
52
|
+
"class": "mountain_lion", // class from dataset
|
|
53
|
+
"label": ["monutain_lion"] // labels to use in classifier
|
|
54
|
+
},
|
|
55
|
+
"caltech/cct_images/59f79901-23d2-11e8-a6a3-ec086b02610b.jpg": {
|
|
56
|
+
"dataset": "caltech",
|
|
57
|
+
"location": 13,
|
|
58
|
+
"class": "mountain_lion", // class from dataset
|
|
59
|
+
"bbox": [{"category": "animal",
|
|
60
|
+
"bbox": [0, 0.347, 0.237, 0.257]}],
|
|
61
|
+
"label": ["monutain_lion"] // labels to use in classifier
|
|
62
|
+
},
|
|
63
|
+
...
|
|
64
|
+
}
|
|
69
65
|
|
|
70
|
-
"""
|
|
71
|
-
python json_validator.py label_spec.json \
|
|
72
|
-
$HOME/camera-traps-private/camera_trap_taxonomy_mapping.csv \
|
|
73
|
-
--output-dir run --json-indent 2
|
|
74
66
|
"""
|
|
75
67
|
|
|
76
68
|
from __future__ import annotations
|
|
@@ -96,6 +88,15 @@ from taxonomy_mapping.taxonomy_graph import (
|
|
|
96
88
|
build_taxonomy_graph, dag_to_tree, TaxonNode)
|
|
97
89
|
|
|
98
90
|
|
|
91
|
+
#%% Example usage
|
|
92
|
+
|
|
93
|
+
"""
|
|
94
|
+
python json_validator.py label_spec.json \
|
|
95
|
+
$HOME/camera-traps-private/camera_trap_taxonomy_mapping.csv \
|
|
96
|
+
--output-dir run --json-indent 2
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
|
|
99
100
|
#%% Main function
|
|
100
101
|
|
|
101
102
|
def main(label_spec_json_path: str,
|
|
@@ -1,48 +1,39 @@
|
|
|
1
|
-
|
|
2
|
-
#
|
|
3
|
-
# map_classification_categories.py
|
|
4
|
-
#
|
|
5
|
-
# Maps a classifier's output categories to desired target categories.
|
|
6
|
-
#
|
|
7
|
-
# In this file, we use the following terminology:
|
|
8
|
-
# * "category": a category output by the classifier
|
|
9
|
-
# * "target": name of a desired group, comprising >= 1 classifier categories
|
|
10
|
-
#
|
|
11
|
-
# Takes as input 2 label specification JSON files:
|
|
12
|
-
#
|
|
13
|
-
# 1) desired label specification JSON file
|
|
14
|
-
# this should not have a target named "other"
|
|
15
|
-
#
|
|
16
|
-
# 2) label specification JSON file of trained classifier
|
|
17
|
-
#
|
|
18
|
-
# The mapping is accomplished as follows:
|
|
19
|
-
#
|
|
20
|
-
# 1. For each category in the classifier label spec, find all taxon nodes that
|
|
21
|
-
# belong to that category.
|
|
22
|
-
#
|
|
23
|
-
# 2. Given a target in the desired label spec, find all taxon nodes that belong
|
|
24
|
-
# to that target. If there is any classifier category whose nodes are a
|
|
25
|
-
# subset of the target nodes, then map the classifier category to that target.
|
|
26
|
-
# Any partial intersection between a target's nodes and a category's nodes
|
|
27
|
-
# is considered an error.
|
|
28
|
-
#
|
|
29
|
-
# 3. If there are any classifier categories that have not yet been assigned a
|
|
30
|
-
# target, group them into the "other" target.
|
|
31
|
-
#
|
|
32
|
-
# This script outputs a JSON file that maps each target to a list of classifier
|
|
33
|
-
# categories.
|
|
34
|
-
#
|
|
35
|
-
# The taxonomy mapping parts of this script are very similar to json_validator.py.
|
|
36
|
-
#
|
|
37
|
-
########
|
|
1
|
+
"""
|
|
38
2
|
|
|
39
|
-
|
|
3
|
+
map_classification_categories.py
|
|
4
|
+
|
|
5
|
+
Maps a classifier's output categories to desired target categories.
|
|
6
|
+
|
|
7
|
+
In this file, we use the following terminology:
|
|
8
|
+
* "category": a category output by the classifier
|
|
9
|
+
* "target": name of a desired group, comprising >= 1 classifier categories
|
|
10
|
+
|
|
11
|
+
Takes as input 2 label specification JSON files:
|
|
12
|
+
|
|
13
|
+
1) desired label specification JSON file
|
|
14
|
+
this should not have a target named "other"
|
|
15
|
+
|
|
16
|
+
2) label specification JSON file of trained classifier
|
|
17
|
+
|
|
18
|
+
The mapping is accomplished as follows:
|
|
19
|
+
|
|
20
|
+
1. For each category in the classifier label spec, find all taxon nodes that
|
|
21
|
+
belong to that category.
|
|
22
|
+
|
|
23
|
+
2. Given a target in the desired label spec, find all taxon nodes that belong
|
|
24
|
+
to that target. If there is any classifier category whose nodes are a
|
|
25
|
+
subset of the target nodes, then map the classifier category to that target.
|
|
26
|
+
Any partial intersection between a target's nodes and a category's nodes
|
|
27
|
+
is considered an error.
|
|
28
|
+
|
|
29
|
+
3. If there are any classifier categories that have not yet been assigned a
|
|
30
|
+
target, group them into the "other" target.
|
|
31
|
+
|
|
32
|
+
This script outputs a JSON file that maps each target to a list of classifier
|
|
33
|
+
categories.
|
|
34
|
+
|
|
35
|
+
The taxonomy mapping parts of this script are very similar to json_validator.py.
|
|
40
36
|
|
|
41
|
-
"""
|
|
42
|
-
python map_classification_categories.py \
|
|
43
|
-
desired_label_spec.json \
|
|
44
|
-
/path/to/classifier/label_spec.json \
|
|
45
|
-
$HOME/camera-traps-private/camera_trap_taxonomy_mapping.csv
|
|
46
37
|
"""
|
|
47
38
|
|
|
48
39
|
#%% Imports
|
|
@@ -65,6 +56,16 @@ from taxonomy_mapping.taxonomy_graph import (
|
|
|
65
56
|
build_taxonomy_graph, dag_to_tree, TaxonNode)
|
|
66
57
|
|
|
67
58
|
|
|
59
|
+
#%% Example usage
|
|
60
|
+
|
|
61
|
+
"""
|
|
62
|
+
python map_classification_categories.py \
|
|
63
|
+
desired_label_spec.json \
|
|
64
|
+
/path/to/classifier/label_spec.json \
|
|
65
|
+
$HOME/camera-traps-private/camera_trap_taxonomy_mapping.csv
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
|
|
68
69
|
#%% Main function
|
|
69
70
|
|
|
70
71
|
def main(desired_label_spec_json_path: str,
|
|
@@ -1,74 +1,59 @@
|
|
|
1
|
-
|
|
2
|
-
#
|
|
3
|
-
# merge_classification_detection_output.py
|
|
4
|
-
#
|
|
5
|
-
# Merges classification results with Batch Detection API outputs.
|
|
6
|
-
#
|
|
7
|
-
# This script takes 2 main files as input:
|
|
8
|
-
#
|
|
9
|
-
# 1) Either a "dataset CSV" (output of create_classification_dataset.py) or a
|
|
10
|
-
# "classification results CSV" (output of evaluate_model.py). The CSV is
|
|
11
|
-
# expected to have columns listed below. The 'label' and [label names] columns
|
|
12
|
-
# are optional, but at least one of them must be provided.
|
|
13
|
-
# * 'path': str, path to cropped image
|
|
14
|
-
# * if passing in a detections JSON, must match
|
|
15
|
-
# <img_file>___cropXX_mdvY.Y.jpg
|
|
16
|
-
# * if passing in a queried images JSON, must match
|
|
17
|
-
# <dataset>/<img_file>___cropXX_mdvY.Y.jpg or
|
|
18
|
-
# <dataset>/<img_file>___cropXX.jpg
|
|
19
|
-
# * 'label': str, label assigned to this crop
|
|
20
|
-
# * [label names]: float, confidence in each label
|
|
21
|
-
#
|
|
22
|
-
# 2) Either a "detections JSON" (output of MegaDetector) or a "queried images
|
|
23
|
-
# JSON" (output of json_validatory.py).
|
|
24
|
-
#
|
|
25
|
-
# If the CSV contains [label names] columns (e.g., output of evaluate_model.py),
|
|
26
|
-
# then each crop's "classifications" output will have one value per category.
|
|
27
|
-
# Categories are sorted decreasing by confidence.
|
|
28
|
-
# "classifications": [
|
|
29
|
-
# ["3", 0.901],
|
|
30
|
-
# ["1", 0.071],
|
|
31
|
-
# ["4", 0.025],
|
|
32
|
-
# ["2", 0.003],
|
|
33
|
-
# ]
|
|
34
|
-
#
|
|
35
|
-
# If the CSV only contains the 'label' column (e.g., output of
|
|
36
|
-
# create_classification_dataset.py), then each crop's "classifications" output
|
|
37
|
-
# will have only one value, with a confidence of 1.0. The label's classification
|
|
38
|
-
# category ID is always greater than 1,000,000, to distinguish it from a predicted
|
|
39
|
-
# category ID.
|
|
40
|
-
# "classifications": [
|
|
41
|
-
# ["1000004", 1.0]
|
|
42
|
-
# ]
|
|
43
|
-
#
|
|
44
|
-
# If the CSV contains both [label names] and 'label' columns, then both the
|
|
45
|
-
# predicted categories and label category will be included. By default, the
|
|
46
|
-
# label-category is included last; if the --label-first flag is given, then the
|
|
47
|
-
# label category is placed first in the results.
|
|
48
|
-
# "classifications": [
|
|
49
|
-
# ["1000004", 1.0], # label put first if --label-first flag is given
|
|
50
|
-
# ["3", 0.901], # all other results are sorted by confidence
|
|
51
|
-
# ["1", 0.071],
|
|
52
|
-
# ["4", 0.025],
|
|
53
|
-
# ["2", 0.003]
|
|
54
|
-
# ]
|
|
55
|
-
#
|
|
56
|
-
########
|
|
1
|
+
"""
|
|
57
2
|
|
|
58
|
-
|
|
3
|
+
merge_classification_detection_output.py
|
|
4
|
+
|
|
5
|
+
Merges classification results with Batch Detection API outputs.
|
|
6
|
+
|
|
7
|
+
This script takes 2 main files as input:
|
|
8
|
+
|
|
9
|
+
1) Either a "dataset CSV" (output of create_classification_dataset.py) or a
|
|
10
|
+
"classification results CSV" (output of evaluate_model.py). The CSV is
|
|
11
|
+
expected to have columns listed below. The 'label' and [label names] columns
|
|
12
|
+
are optional, but at least one of them must be provided.
|
|
13
|
+
* 'path': str, path to cropped image
|
|
14
|
+
* if passing in a detections JSON, must match
|
|
15
|
+
<img_file>___cropXX_mdvY.Y.jpg
|
|
16
|
+
* if passing in a queried images JSON, must match
|
|
17
|
+
<dataset>/<img_file>___cropXX_mdvY.Y.jpg or
|
|
18
|
+
<dataset>/<img_file>___cropXX.jpg
|
|
19
|
+
* 'label': str, label assigned to this crop
|
|
20
|
+
* [label names]: float, confidence in each label
|
|
21
|
+
|
|
22
|
+
2) Either a "detections JSON" (output of MegaDetector) or a "queried images
|
|
23
|
+
JSON" (output of json_validatory.py).
|
|
24
|
+
|
|
25
|
+
If the CSV contains [label names] columns (e.g., output of evaluate_model.py),
|
|
26
|
+
then each crop's "classifications" output will have one value per category.
|
|
27
|
+
Categories are sorted decreasing by confidence.
|
|
28
|
+
"classifications": [
|
|
29
|
+
["3", 0.901],
|
|
30
|
+
["1", 0.071],
|
|
31
|
+
["4", 0.025],
|
|
32
|
+
["2", 0.003],
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
If the CSV only contains the 'label' column (e.g., output of
|
|
36
|
+
create_classification_dataset.py), then each crop's "classifications" output
|
|
37
|
+
will have only one value, with a confidence of 1.0. The label's classification
|
|
38
|
+
category ID is always greater than 1,000,000, to distinguish it from a predicted
|
|
39
|
+
category ID.
|
|
40
|
+
"classifications": [
|
|
41
|
+
["1000004", 1.0]
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
If the CSV contains both [label names] and 'label' columns, then both the
|
|
45
|
+
predicted categories and label category will be included. By default, the
|
|
46
|
+
label-category is included last; if the --label-first flag is given, then the
|
|
47
|
+
label category is placed first in the results.
|
|
48
|
+
"classifications": [
|
|
49
|
+
["1000004", 1.0], # label put first if --label-first flag is given
|
|
50
|
+
["3", 0.901], # all other results are sorted by confidence
|
|
51
|
+
["1", 0.071],
|
|
52
|
+
["4", 0.025],
|
|
53
|
+
["2", 0.003]
|
|
54
|
+
]
|
|
59
55
|
|
|
60
56
|
"""
|
|
61
|
-
python merge_classification_detection_output.py \
|
|
62
|
-
BASE_LOGDIR/LOGDIR/outputs_test.csv.gz \
|
|
63
|
-
BASE_LOGDIR/label_index.json \
|
|
64
|
-
BASE_LOGDIR/queried_images.json \
|
|
65
|
-
--classifier-name "efficientnet-b3-idfg-moredata" \
|
|
66
|
-
--detector-output-cache-dir $HOME/classifier-training/mdcache \
|
|
67
|
-
--detector-version "4.1" \
|
|
68
|
-
--output-json BASE_LOGDIR/LOGDIR/classifier_results.json \
|
|
69
|
-
--datasets idfg idfg_swwlf_2019
|
|
70
|
-
"""
|
|
71
|
-
|
|
72
57
|
|
|
73
58
|
#%% Imports
|
|
74
59
|
|
|
@@ -88,6 +73,21 @@ from tqdm import tqdm
|
|
|
88
73
|
from md_utils.ct_utils import truncate_float
|
|
89
74
|
|
|
90
75
|
|
|
76
|
+
#%% Example usage
|
|
77
|
+
|
|
78
|
+
"""
|
|
79
|
+
python merge_classification_detection_output.py \
|
|
80
|
+
BASE_LOGDIR/LOGDIR/outputs_test.csv.gz \
|
|
81
|
+
BASE_LOGDIR/label_index.json \
|
|
82
|
+
BASE_LOGDIR/queried_images.json \
|
|
83
|
+
--classifier-name "efficientnet-b3-idfg-moredata" \
|
|
84
|
+
--detector-output-cache-dir $HOME/classifier-training/mdcache \
|
|
85
|
+
--detector-version "4.1" \
|
|
86
|
+
--output-json BASE_LOGDIR/LOGDIR/classifier_results.json \
|
|
87
|
+
--datasets idfg idfg_swwlf_2019
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
|
|
91
91
|
#%% Support functions
|
|
92
92
|
|
|
93
93
|
def row_to_classification_list(row: Mapping[str, Any],
|