megadetector 5.0.8__py3-none-any.whl → 5.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/__init__.py +0 -0
- api/batch_processing/__init__.py +0 -0
- api/batch_processing/api_core/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/score.py +0 -1
- api/batch_processing/api_core/server_job_status_table.py +0 -1
- api/batch_processing/api_core_support/__init__.py +0 -0
- api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
- api/batch_processing/api_support/__init__.py +0 -0
- api/batch_processing/api_support/summarize_daily_activity.py +0 -1
- api/batch_processing/data_preparation/__init__.py +0 -0
- api/batch_processing/data_preparation/manage_local_batch.py +65 -65
- api/batch_processing/data_preparation/manage_video_batch.py +8 -8
- api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
- api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
- api/batch_processing/postprocessing/__init__.py +0 -0
- api/batch_processing/postprocessing/add_max_conf.py +12 -12
- api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
- api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
- api/batch_processing/postprocessing/compare_batch_results.py +113 -43
- api/batch_processing/postprocessing/convert_output_format.py +41 -16
- api/batch_processing/postprocessing/load_api_results.py +16 -17
- api/batch_processing/postprocessing/md_to_coco.py +31 -21
- api/batch_processing/postprocessing/md_to_labelme.py +52 -22
- api/batch_processing/postprocessing/merge_detections.py +14 -14
- api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
- api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
- api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
- api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
- api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
- api/synchronous/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
- api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
- api/synchronous/api_core/animal_detection_api/config.py +35 -35
- api/synchronous/api_core/tests/__init__.py +0 -0
- api/synchronous/api_core/tests/load_test.py +109 -109
- classification/__init__.py +0 -0
- classification/aggregate_classifier_probs.py +21 -24
- classification/analyze_failed_images.py +11 -13
- classification/cache_batchapi_outputs.py +51 -51
- classification/create_classification_dataset.py +69 -68
- classification/crop_detections.py +54 -53
- classification/csv_to_json.py +97 -100
- classification/detect_and_crop.py +105 -105
- classification/evaluate_model.py +43 -42
- classification/identify_mislabeled_candidates.py +47 -46
- classification/json_to_azcopy_list.py +10 -10
- classification/json_validator.py +72 -71
- classification/map_classification_categories.py +44 -43
- classification/merge_classification_detection_output.py +68 -68
- classification/prepare_classification_script.py +157 -154
- classification/prepare_classification_script_mc.py +228 -228
- classification/run_classifier.py +27 -26
- classification/save_mislabeled.py +30 -30
- classification/train_classifier.py +20 -20
- classification/train_classifier_tf.py +21 -22
- classification/train_utils.py +10 -10
- data_management/__init__.py +0 -0
- data_management/annotations/__init__.py +0 -0
- data_management/annotations/annotation_constants.py +18 -31
- data_management/camtrap_dp_to_coco.py +238 -0
- data_management/cct_json_utils.py +102 -59
- data_management/cct_to_md.py +176 -158
- data_management/cct_to_wi.py +247 -219
- data_management/coco_to_labelme.py +272 -263
- data_management/coco_to_yolo.py +79 -58
- data_management/databases/__init__.py +0 -0
- data_management/databases/add_width_and_height_to_db.py +20 -16
- data_management/databases/combine_coco_camera_traps_files.py +35 -31
- data_management/databases/integrity_check_json_db.py +62 -24
- data_management/databases/subset_json_db.py +24 -15
- data_management/generate_crops_from_cct.py +27 -45
- data_management/get_image_sizes.py +188 -162
- data_management/importers/add_nacti_sizes.py +8 -8
- data_management/importers/add_timestamps_to_icct.py +78 -78
- data_management/importers/animl_results_to_md_results.py +158 -158
- data_management/importers/auckland_doc_test_to_json.py +9 -9
- data_management/importers/auckland_doc_to_json.py +8 -8
- data_management/importers/awc_to_json.py +7 -7
- data_management/importers/bellevue_to_json.py +15 -15
- data_management/importers/cacophony-thermal-importer.py +13 -13
- data_management/importers/carrizo_shrubfree_2018.py +8 -8
- data_management/importers/carrizo_trail_cam_2017.py +8 -8
- data_management/importers/cct_field_adjustments.py +9 -9
- data_management/importers/channel_islands_to_cct.py +10 -10
- data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
- data_management/importers/ena24_to_json.py +7 -7
- data_management/importers/filenames_to_json.py +8 -8
- data_management/importers/helena_to_cct.py +7 -7
- data_management/importers/idaho-camera-traps.py +7 -7
- data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
- data_management/importers/jb_csv_to_json.py +9 -9
- data_management/importers/mcgill_to_json.py +8 -8
- data_management/importers/missouri_to_json.py +18 -18
- data_management/importers/nacti_fieldname_adjustments.py +10 -10
- data_management/importers/noaa_seals_2019.py +7 -7
- data_management/importers/pc_to_json.py +7 -7
- data_management/importers/plot_wni_giraffes.py +7 -7
- data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
- data_management/importers/prepare_zsl_imerit.py +7 -7
- data_management/importers/rspb_to_json.py +8 -8
- data_management/importers/save_the_elephants_survey_A.py +8 -8
- data_management/importers/save_the_elephants_survey_B.py +9 -9
- data_management/importers/snapshot_safari_importer.py +26 -26
- data_management/importers/snapshot_safari_importer_reprise.py +665 -665
- data_management/importers/snapshot_serengeti_lila.py +14 -14
- data_management/importers/sulross_get_exif.py +8 -9
- data_management/importers/timelapse_csv_set_to_json.py +11 -11
- data_management/importers/ubc_to_json.py +13 -13
- data_management/importers/umn_to_json.py +7 -7
- data_management/importers/wellington_to_json.py +8 -8
- data_management/importers/wi_to_json.py +9 -9
- data_management/importers/zamba_results_to_md_results.py +181 -181
- data_management/labelme_to_coco.py +65 -24
- data_management/labelme_to_yolo.py +8 -8
- data_management/lila/__init__.py +0 -0
- data_management/lila/add_locations_to_island_camera_traps.py +9 -9
- data_management/lila/add_locations_to_nacti.py +147 -147
- data_management/lila/create_lila_blank_set.py +13 -13
- data_management/lila/create_lila_test_set.py +8 -8
- data_management/lila/create_links_to_md_results_files.py +106 -106
- data_management/lila/download_lila_subset.py +44 -110
- data_management/lila/generate_lila_per_image_labels.py +55 -42
- data_management/lila/get_lila_annotation_counts.py +18 -15
- data_management/lila/get_lila_image_counts.py +11 -11
- data_management/lila/lila_common.py +96 -33
- data_management/lila/test_lila_metadata_urls.py +132 -116
- data_management/ocr_tools.py +173 -128
- data_management/read_exif.py +110 -97
- data_management/remap_coco_categories.py +83 -83
- data_management/remove_exif.py +58 -62
- data_management/resize_coco_dataset.py +30 -23
- data_management/wi_download_csv_to_coco.py +246 -239
- data_management/yolo_output_to_md_output.py +86 -73
- data_management/yolo_to_coco.py +300 -60
- detection/__init__.py +0 -0
- detection/detector_training/__init__.py +0 -0
- detection/process_video.py +85 -33
- detection/pytorch_detector.py +43 -25
- detection/run_detector.py +157 -72
- detection/run_detector_batch.py +179 -113
- detection/run_inference_with_yolov5_val.py +108 -48
- detection/run_tiled_inference.py +111 -40
- detection/tf_detector.py +51 -29
- detection/video_utils.py +606 -521
- docs/source/conf.py +43 -0
- md_utils/__init__.py +0 -0
- md_utils/azure_utils.py +9 -9
- md_utils/ct_utils.py +228 -68
- md_utils/directory_listing.py +59 -64
- md_utils/md_tests.py +968 -871
- md_utils/path_utils.py +460 -134
- md_utils/process_utils.py +157 -133
- md_utils/sas_blob_utils.py +20 -20
- md_utils/split_locations_into_train_val.py +45 -32
- md_utils/string_utils.py +33 -10
- md_utils/url_utils.py +176 -60
- md_utils/write_html_image_list.py +40 -33
- md_visualization/__init__.py +0 -0
- md_visualization/plot_utils.py +102 -109
- md_visualization/render_images_with_thumbnails.py +34 -34
- md_visualization/visualization_utils.py +597 -291
- md_visualization/visualize_db.py +76 -48
- md_visualization/visualize_detector_output.py +61 -42
- {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/METADATA +13 -7
- megadetector-5.0.9.dist-info/RECORD +224 -0
- {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
- taxonomy_mapping/__init__.py +0 -0
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
- taxonomy_mapping/map_new_lila_datasets.py +154 -154
- taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
- taxonomy_mapping/preview_lila_taxonomy.py +591 -591
- taxonomy_mapping/retrieve_sample_image.py +12 -12
- taxonomy_mapping/simple_image_download.py +11 -11
- taxonomy_mapping/species_lookup.py +10 -10
- taxonomy_mapping/taxonomy_csv_checker.py +18 -18
- taxonomy_mapping/taxonomy_graph.py +47 -47
- taxonomy_mapping/validate_lila_category_mappings.py +83 -76
- data_management/cct_json_to_filename_json.py +0 -89
- data_management/cct_to_csv.py +0 -140
- data_management/databases/remove_corrupted_images_from_db.py +0 -191
- detection/detector_training/copy_checkpoints.py +0 -43
- megadetector-5.0.8.dist-info/RECORD +0 -205
- {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
- {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/WHEEL +0 -0
api/__init__.py
ADDED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -1,64 +1,64 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
manage_local_batch.py
|
|
4
|
+
|
|
5
|
+
Semi-automated process for managing a local MegaDetector job, including
|
|
6
|
+
standard postprocessing steps.
|
|
7
|
+
|
|
8
|
+
This script is not intended to be run from top to bottom like a typical Python script,
|
|
9
|
+
it's a notebook disguised with a .py extension. It's the Bestest Most Awesome way to
|
|
10
|
+
run MegaDetector, but it's also pretty subtle; if you want to play with this, you might
|
|
11
|
+
want to check in with cameratraps@lila.science for some tips. Otherwise... YMMV.
|
|
12
|
+
|
|
13
|
+
Some general notes on using this script, which I do in Spyder, though everything will be
|
|
14
|
+
the same if you are reading this in Jupyter Notebook (using the .ipynb version of the
|
|
15
|
+
script):
|
|
16
|
+
|
|
17
|
+
* Typically when I have a MegaDetector job to run, I make a copy of this script. Let's
|
|
18
|
+
say I'm running a job for an organization called "bibblebop"; I have a big folder of
|
|
19
|
+
job-specific copies of this script, and I might save a new one called "bibblebop-2023-07-26.py"
|
|
20
|
+
(the filename doesn't matter, it just helps me keep these organized).
|
|
21
|
+
|
|
22
|
+
* There are three variables you need to set in this script before you start running code:
|
|
23
|
+
"input_path", "organization_name_short", and "job_date". You will get a sensible error if you forget
|
|
24
|
+
to set any of these. In this case I might set those to "/data/bibblebobcamerastuff",
|
|
25
|
+
"bibblebop", and "2023-07-26", respectively.
|
|
26
|
+
|
|
27
|
+
* The defaults assume you want to split the job into two tasks (this is the default because I have
|
|
28
|
+
two GPUs). Nothing bad will happen if you do this on a zero-GPU or single-GPU machine, but if you
|
|
29
|
+
want everything to run in one logical task, change "n_gpus" and "n_jobs" to 1 (instead of 2).
|
|
30
|
+
|
|
31
|
+
* After setting the required variables, I run the first few cells - up to and including the one
|
|
32
|
+
called "Generate commands" - which collectively take basically zero seconds. After you run the
|
|
33
|
+
"Generate commands" cell, you will have a folder that looks something like:
|
|
34
|
+
|
|
35
|
+
~/postprocessing/bibblebop/bibblebop-2023-07-06-mdv5a/
|
|
36
|
+
|
|
37
|
+
On Windows, this means:
|
|
38
|
+
|
|
39
|
+
~/postprocessing/bibblebop/bibblebop-2023-07-06-mdv5a/
|
|
40
|
+
|
|
41
|
+
Everything related to this job - scripts, outputs, intermediate stuff - will be in this folder.
|
|
42
|
+
Specifically, after the "Generate commands" cell, you'll have scripts in that folder called something
|
|
43
|
+
like:
|
|
44
|
+
|
|
45
|
+
run_chunk_000_gpu_00.sh (or .bat on Windows)
|
|
46
|
+
|
|
47
|
+
Personally, I like to run that script directly in a command prompt (I just leave Spyder open, though
|
|
48
|
+
it's OK if Spyder gets shut down while MD is running).
|
|
49
|
+
|
|
50
|
+
At this point, once you get the hang of it, you've invested about zero seconds of human time,
|
|
51
|
+
but possibly several days of unattended compute time, depending on the size of your job.
|
|
52
|
+
|
|
53
|
+
* Then when the jobs are done, back to the interactive environment! I run the next few cells,
|
|
54
|
+
which make sure the job finished OK, and the cell called "Post-processing (pre-RDE)", which
|
|
55
|
+
generates an HTML preview of the results. You are very plausibly done at this point, and can ignore
|
|
56
|
+
all the remaining cells. If you want to do things like repeat detection elimination, or running
|
|
57
|
+
a classifier, or splitting your results file up in specialized ways, there are cells for all of those
|
|
58
|
+
things, but now you're in power-user territory, so I'm going to leave this guide here. Email
|
|
59
|
+
cameratraps@lila.science with questions about the fancy stuff.
|
|
60
|
+
|
|
61
|
+
"""
|
|
62
62
|
|
|
63
63
|
#%% Imports and constants
|
|
64
64
|
|
|
@@ -803,7 +803,7 @@ if render_animals_only:
|
|
|
803
803
|
os.makedirs(output_base, exist_ok=True)
|
|
804
804
|
print('Processing to {}'.format(output_base))
|
|
805
805
|
|
|
806
|
-
options.
|
|
806
|
+
options.md_results_file = combined_api_output_file
|
|
807
807
|
options.output_dir = output_base
|
|
808
808
|
ppresults = process_batch_results(options)
|
|
809
809
|
html_output_file = ppresults.output_html_file
|
|
@@ -939,7 +939,7 @@ os.makedirs(output_base, exist_ok=True)
|
|
|
939
939
|
|
|
940
940
|
print('Processing post-RDE to {}'.format(output_base))
|
|
941
941
|
|
|
942
|
-
options.
|
|
942
|
+
options.md_results_file = filtered_output_filename
|
|
943
943
|
options.output_dir = output_base
|
|
944
944
|
ppresults = process_batch_results(options)
|
|
945
945
|
html_output_file = ppresults.output_html_file
|
|
@@ -2014,7 +2014,7 @@ output_base = os.path.join(postprocessing_output_folder, folder_token + \
|
|
|
2014
2014
|
os.makedirs(output_base, exist_ok=True)
|
|
2015
2015
|
print('Processing {} to {}'.format(base_task_name, output_base))
|
|
2016
2016
|
|
|
2017
|
-
options.
|
|
2017
|
+
options.md_results_file = sequence_smoothed_classification_file
|
|
2018
2018
|
options.output_dir = output_base
|
|
2019
2019
|
ppresults = process_batch_results(options)
|
|
2020
2020
|
path_utils.open_file(ppresults.output_html_file,attempt_to_open_in_wsl_host=True,browser_name='chrome')
|
|
@@ -2134,7 +2134,7 @@ output_base_large_boxes = os.path.join(postprocessing_output_folder,
|
|
|
2134
2134
|
os.makedirs(output_base_large_boxes, exist_ok=True)
|
|
2135
2135
|
print('Processing post-RDE, post-size-separation to {}'.format(output_base_large_boxes))
|
|
2136
2136
|
|
|
2137
|
-
options.
|
|
2137
|
+
options.md_results_file = size_separated_file
|
|
2138
2138
|
options.output_dir = output_base_large_boxes
|
|
2139
2139
|
|
|
2140
2140
|
ppresults = process_batch_results(options)
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
manage_video_batch.py
|
|
4
|
+
|
|
5
|
+
Notebook-esque script to manage the process of running a local batch of videos
|
|
6
|
+
through MD. Defers most of the heavy lifting to manage_local_batch.py .
|
|
7
|
+
|
|
8
|
+
"""
|
|
9
9
|
|
|
10
10
|
#%% Imports and constants
|
|
11
11
|
|
|
File without changes
|
|
@@ -1,15 +1,15 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
add_max_conf.py
|
|
4
|
+
|
|
5
|
+
The MD output format included a "max_detection_conf" field with each image
|
|
6
|
+
up to and including version 1.2; it was removed as of version 1.3 (it's
|
|
7
|
+
redundant with the individual detection confidence values).
|
|
8
|
+
|
|
9
|
+
Just in case someone took a dependency on that field, this script allows you
|
|
10
|
+
to add it back to an existing .json file.
|
|
11
|
+
|
|
12
|
+
"""
|
|
13
13
|
|
|
14
14
|
#%% Imports and constants
|
|
15
15
|
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
categorize_detections_by_size.py
|
|
4
|
+
|
|
5
|
+
Given a MegaDetector .json file, creates a separate category for bounding boxes
|
|
6
|
+
above one or more size thresholds.
|
|
7
|
+
|
|
8
|
+
"""
|
|
9
9
|
|
|
10
10
|
#%% Constants and imports
|
|
11
11
|
|
|
@@ -18,24 +18,42 @@ from tqdm import tqdm
|
|
|
18
18
|
#%% Support classes
|
|
19
19
|
|
|
20
20
|
class SizeCategorizationOptions:
|
|
21
|
-
|
|
22
|
-
|
|
21
|
+
"""
|
|
22
|
+
Options used to parameterize categorize_detections_by_size().
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
#: Thresholds to use for separation, as a fraction of the image size.
|
|
26
|
+
#:
|
|
27
|
+
#: Should be sorted from smallest to largest.
|
|
23
28
|
size_thresholds = [0.95]
|
|
24
29
|
|
|
25
|
-
|
|
30
|
+
#: List of category numbers to use in separation; uses all categories if None
|
|
26
31
|
categories_to_separate = None
|
|
27
32
|
|
|
28
|
-
|
|
33
|
+
#: Dimension to use for thresholding; can be "size", "width", or "height"
|
|
29
34
|
measurement = 'size'
|
|
30
35
|
|
|
31
|
-
|
|
36
|
+
#: Categories to assign to thresholded ranges; should have the same length as
|
|
37
|
+
#: "size_thresholds".
|
|
32
38
|
size_category_names = ['large_detection']
|
|
33
39
|
|
|
34
40
|
|
|
35
41
|
#%% Main functions
|
|
36
42
|
|
|
37
43
|
def categorize_detections_by_size(input_file,output_file=None,options=None):
|
|
38
|
-
|
|
44
|
+
"""
|
|
45
|
+
Given a MegaDetector .json file, creates a separate category for bounding boxes
|
|
46
|
+
above one or more size thresholds, optionally writing results to [output_file].
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
input_file (str): file to process
|
|
50
|
+
output_file (str, optional): optional output file
|
|
51
|
+
options (SizeCategorizationOptions): categorization parameters
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
dict: data loaded from [input_file], with the new size-based categories.
|
|
55
|
+
Identical to what's written to [output_file], if [output_file] is not None.
|
|
56
|
+
"""
|
|
39
57
|
if options is None:
|
|
40
58
|
options = SizeCategorizationOptions()
|
|
41
59
|
|
|
@@ -1,54 +1,56 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
combine_api_outputs.py
|
|
4
|
+
|
|
5
|
+
Merges two or more .json files in batch API output format, optionally
|
|
6
|
+
writing the results to another .json file.
|
|
7
|
+
|
|
8
|
+
* Concatenates image lists, erroring if images are not unique.
|
|
9
|
+
* Errors if class lists are conflicting; errors on unrecognized fields.
|
|
10
|
+
* Checks compatibility in info structs, within reason.
|
|
11
|
+
|
|
12
|
+
File format:
|
|
13
|
+
|
|
14
|
+
https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#batch-processing-api-output-format
|
|
15
|
+
|
|
16
|
+
Command-line use:
|
|
17
|
+
|
|
18
|
+
combine_api_outputs input1.json input2.json ... inputN.json output.json
|
|
19
|
+
|
|
20
|
+
Also see combine_api_shard_files() (not exposed via the command line yet) to
|
|
21
|
+
combine the intermediate files created by the API.
|
|
22
|
+
|
|
23
|
+
This does no checking for redundancy; if you are looking to ensemble
|
|
24
|
+
the results of multiple model versions, see merge_detections.py.
|
|
25
|
+
|
|
26
|
+
"""
|
|
27
27
|
|
|
28
28
|
#%% Constants and imports
|
|
29
29
|
|
|
30
30
|
import argparse
|
|
31
31
|
import sys
|
|
32
32
|
import json
|
|
33
|
-
from typing import Any, Dict, Iterable, Mapping, List, Optional
|
|
34
33
|
|
|
35
34
|
|
|
36
35
|
#%% Merge functions
|
|
37
36
|
|
|
38
|
-
def combine_api_output_files(input_files
|
|
39
|
-
output_file
|
|
40
|
-
require_uniqueness
|
|
41
|
-
verbose
|
|
42
|
-
) -> Dict[str, Any]:
|
|
37
|
+
def combine_api_output_files(input_files,
|
|
38
|
+
output_file=None,
|
|
39
|
+
require_uniqueness=True,
|
|
40
|
+
verbose=True):
|
|
43
41
|
"""
|
|
44
|
-
Merges list of
|
|
45
|
-
dictionary, optionally writing the result to
|
|
42
|
+
Merges the list of MD results files [input_files] into a single
|
|
43
|
+
dictionary, optionally writing the result to [output_file].
|
|
46
44
|
|
|
47
45
|
Args:
|
|
48
|
-
input_files
|
|
49
|
-
output_file
|
|
50
|
-
require_uniqueness
|
|
46
|
+
input_files (list of str): paths to JSON detection files
|
|
47
|
+
output_file (str, optional): path to write merged JSON
|
|
48
|
+
require_uniqueness (bool): whether to require that the images in
|
|
51
49
|
each list of images be unique
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
dict: merged dictionaries loaded from [input_files], identical to what's
|
|
53
|
+
written to [output_file] if [output_file] is not None
|
|
52
54
|
"""
|
|
53
55
|
|
|
54
56
|
def print_if_verbose(s):
|
|
@@ -73,27 +75,27 @@ def combine_api_output_files(input_files: List[str],
|
|
|
73
75
|
return merged_dict
|
|
74
76
|
|
|
75
77
|
|
|
76
|
-
def combine_api_output_dictionaries(input_dicts
|
|
77
|
-
require_uniqueness: bool = True
|
|
78
|
-
) -> Dict[str, Any]:
|
|
78
|
+
def combine_api_output_dictionaries(input_dicts, require_uniqueness=True):
|
|
79
79
|
"""
|
|
80
|
-
Merges the list of
|
|
81
|
-
comment for details on merge rules.
|
|
80
|
+
Merges the list of MD results dictionaries [input_dicts] into a single dict.
|
|
81
|
+
See module header comment for details on merge rules.
|
|
82
82
|
|
|
83
83
|
Args:
|
|
84
|
-
input_dicts: list of dicts
|
|
85
|
-
|
|
86
|
-
require_uniqueness
|
|
87
|
-
each input dict be unique
|
|
88
|
-
|
|
89
|
-
|
|
84
|
+
input_dicts (list of dicts): list of dicts in which each dict represents the
|
|
85
|
+
contents of a MD output file
|
|
86
|
+
require_uniqueness (bool): whether to require that the images in
|
|
87
|
+
each input dict be unique; if this is True and image filenames are
|
|
88
|
+
not unique, an error is raised.
|
|
89
|
+
|
|
90
|
+
Returns
|
|
91
|
+
dict: merged MD results
|
|
90
92
|
"""
|
|
91
93
|
|
|
92
94
|
# Map image filenames to detections, we'll convert to a list later
|
|
93
95
|
images = {}
|
|
94
|
-
info
|
|
95
|
-
detection_categories
|
|
96
|
-
classification_categories
|
|
96
|
+
info = {}
|
|
97
|
+
detection_categories = {}
|
|
98
|
+
classification_categories = {}
|
|
97
99
|
n_redundant_images = 0
|
|
98
100
|
n_images = 0
|
|
99
101
|
|
|
@@ -182,8 +184,20 @@ def combine_api_output_dictionaries(input_dicts: Iterable[Mapping[str, Any]],
|
|
|
182
184
|
|
|
183
185
|
def combine_api_shard_files(input_files, output_file=None):
|
|
184
186
|
"""
|
|
185
|
-
Merges the list of .json-formatted API shard files
|
|
186
|
-
list of dictionaries, optionally writing the result to
|
|
187
|
+
Merges the list of .json-formatted API shard files [input_files] into a single
|
|
188
|
+
list of dictionaries, optionally writing the result to [output_file].
|
|
189
|
+
|
|
190
|
+
This operates on mostly-deprecated API shard files, not MegaDetector results files.
|
|
191
|
+
If you don't know what an API shard file is, you don't want this function.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
input_files (list of str): files to merge
|
|
195
|
+
output_file (str, optiona): file to which we should write merged results
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
dict: merged results
|
|
199
|
+
|
|
200
|
+
:meta private:
|
|
187
201
|
"""
|
|
188
202
|
|
|
189
203
|
input_lists = []
|
|
@@ -215,6 +229,7 @@ def combine_api_shard_files(input_files, output_file=None):
|
|
|
215
229
|
#%% Command-line driver
|
|
216
230
|
|
|
217
231
|
def main():
|
|
232
|
+
|
|
218
233
|
parser = argparse.ArgumentParser()
|
|
219
234
|
parser.add_argument(
|
|
220
235
|
'input_paths', nargs='+',
|
|
@@ -230,6 +245,5 @@ def main():
|
|
|
230
245
|
args = parser.parse_args()
|
|
231
246
|
combine_api_output_files(args.input_paths, args.output_path)
|
|
232
247
|
|
|
233
|
-
|
|
234
248
|
if __name__ == '__main__':
|
|
235
249
|
main()
|