PyPI - megadetector - Versions diffs - 5.0.8__py3-none-any.whl → 5.0.9__py3-none-any.whl - Mend

megadetector 5.0.8py3-none-any.whl → 5.0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (190) hide show

api/__init__.py +0 -0
api/batch_processing/__init__.py +0 -0
api/batch_processing/api_core/__init__.py +0 -0
api/batch_processing/api_core/batch_service/__init__.py +0 -0
api/batch_processing/api_core/batch_service/score.py +0 -1
api/batch_processing/api_core/server_job_status_table.py +0 -1
api/batch_processing/api_core_support/__init__.py +0 -0
api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
api/batch_processing/api_support/__init__.py +0 -0
api/batch_processing/api_support/summarize_daily_activity.py +0 -1
api/batch_processing/data_preparation/__init__.py +0 -0
api/batch_processing/data_preparation/manage_local_batch.py +65 -65
api/batch_processing/data_preparation/manage_video_batch.py +8 -8
api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
api/batch_processing/postprocessing/__init__.py +0 -0
api/batch_processing/postprocessing/add_max_conf.py +12 -12
api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
api/batch_processing/postprocessing/compare_batch_results.py +113 -43
api/batch_processing/postprocessing/convert_output_format.py +41 -16
api/batch_processing/postprocessing/load_api_results.py +16 -17
api/batch_processing/postprocessing/md_to_coco.py +31 -21
api/batch_processing/postprocessing/md_to_labelme.py +52 -22
api/batch_processing/postprocessing/merge_detections.py +14 -14
api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
api/synchronous/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
api/synchronous/api_core/animal_detection_api/config.py +35 -35
api/synchronous/api_core/tests/__init__.py +0 -0
api/synchronous/api_core/tests/load_test.py +109 -109
classification/__init__.py +0 -0
classification/aggregate_classifier_probs.py +21 -24
classification/analyze_failed_images.py +11 -13
classification/cache_batchapi_outputs.py +51 -51
classification/create_classification_dataset.py +69 -68
classification/crop_detections.py +54 -53
classification/csv_to_json.py +97 -100
classification/detect_and_crop.py +105 -105
classification/evaluate_model.py +43 -42
classification/identify_mislabeled_candidates.py +47 -46
classification/json_to_azcopy_list.py +10 -10
classification/json_validator.py +72 -71
classification/map_classification_categories.py +44 -43
classification/merge_classification_detection_output.py +68 -68
classification/prepare_classification_script.py +157 -154
classification/prepare_classification_script_mc.py +228 -228
classification/run_classifier.py +27 -26
classification/save_mislabeled.py +30 -30
classification/train_classifier.py +20 -20
classification/train_classifier_tf.py +21 -22
classification/train_utils.py +10 -10
data_management/__init__.py +0 -0
data_management/annotations/__init__.py +0 -0
data_management/annotations/annotation_constants.py +18 -31
data_management/camtrap_dp_to_coco.py +238 -0
data_management/cct_json_utils.py +102 -59
data_management/cct_to_md.py +176 -158
data_management/cct_to_wi.py +247 -219
data_management/coco_to_labelme.py +272 -263
data_management/coco_to_yolo.py +79 -58
data_management/databases/__init__.py +0 -0
data_management/databases/add_width_and_height_to_db.py +20 -16
data_management/databases/combine_coco_camera_traps_files.py +35 -31
data_management/databases/integrity_check_json_db.py +62 -24
data_management/databases/subset_json_db.py +24 -15
data_management/generate_crops_from_cct.py +27 -45
data_management/get_image_sizes.py +188 -162
data_management/importers/add_nacti_sizes.py +8 -8
data_management/importers/add_timestamps_to_icct.py +78 -78
data_management/importers/animl_results_to_md_results.py +158 -158
data_management/importers/auckland_doc_test_to_json.py +9 -9
data_management/importers/auckland_doc_to_json.py +8 -8
data_management/importers/awc_to_json.py +7 -7
data_management/importers/bellevue_to_json.py +15 -15
data_management/importers/cacophony-thermal-importer.py +13 -13
data_management/importers/carrizo_shrubfree_2018.py +8 -8
data_management/importers/carrizo_trail_cam_2017.py +8 -8
data_management/importers/cct_field_adjustments.py +9 -9
data_management/importers/channel_islands_to_cct.py +10 -10
data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
data_management/importers/ena24_to_json.py +7 -7
data_management/importers/filenames_to_json.py +8 -8
data_management/importers/helena_to_cct.py +7 -7
data_management/importers/idaho-camera-traps.py +7 -7
data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
data_management/importers/jb_csv_to_json.py +9 -9
data_management/importers/mcgill_to_json.py +8 -8
data_management/importers/missouri_to_json.py +18 -18
data_management/importers/nacti_fieldname_adjustments.py +10 -10
data_management/importers/noaa_seals_2019.py +7 -7
data_management/importers/pc_to_json.py +7 -7
data_management/importers/plot_wni_giraffes.py +7 -7
data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
data_management/importers/prepare_zsl_imerit.py +7 -7
data_management/importers/rspb_to_json.py +8 -8
data_management/importers/save_the_elephants_survey_A.py +8 -8
data_management/importers/save_the_elephants_survey_B.py +9 -9
data_management/importers/snapshot_safari_importer.py +26 -26
data_management/importers/snapshot_safari_importer_reprise.py +665 -665
data_management/importers/snapshot_serengeti_lila.py +14 -14
data_management/importers/sulross_get_exif.py +8 -9
data_management/importers/timelapse_csv_set_to_json.py +11 -11
data_management/importers/ubc_to_json.py +13 -13
data_management/importers/umn_to_json.py +7 -7
data_management/importers/wellington_to_json.py +8 -8
data_management/importers/wi_to_json.py +9 -9
data_management/importers/zamba_results_to_md_results.py +181 -181
data_management/labelme_to_coco.py +65 -24
data_management/labelme_to_yolo.py +8 -8
data_management/lila/__init__.py +0 -0
data_management/lila/add_locations_to_island_camera_traps.py +9 -9
data_management/lila/add_locations_to_nacti.py +147 -147
data_management/lila/create_lila_blank_set.py +13 -13
data_management/lila/create_lila_test_set.py +8 -8
data_management/lila/create_links_to_md_results_files.py +106 -106
data_management/lila/download_lila_subset.py +44 -110
data_management/lila/generate_lila_per_image_labels.py +55 -42
data_management/lila/get_lila_annotation_counts.py +18 -15
data_management/lila/get_lila_image_counts.py +11 -11
data_management/lila/lila_common.py +96 -33
data_management/lila/test_lila_metadata_urls.py +132 -116
data_management/ocr_tools.py +173 -128
data_management/read_exif.py +110 -97
data_management/remap_coco_categories.py +83 -83
data_management/remove_exif.py +58 -62
data_management/resize_coco_dataset.py +30 -23
data_management/wi_download_csv_to_coco.py +246 -239
data_management/yolo_output_to_md_output.py +86 -73
data_management/yolo_to_coco.py +300 -60
detection/__init__.py +0 -0
detection/detector_training/__init__.py +0 -0
detection/process_video.py +85 -33
detection/pytorch_detector.py +43 -25
detection/run_detector.py +157 -72
detection/run_detector_batch.py +179 -113
detection/run_inference_with_yolov5_val.py +108 -48
detection/run_tiled_inference.py +111 -40
detection/tf_detector.py +51 -29
detection/video_utils.py +606 -521
docs/source/conf.py +43 -0
md_utils/__init__.py +0 -0
md_utils/azure_utils.py +9 -9
md_utils/ct_utils.py +228 -68
md_utils/directory_listing.py +59 -64
md_utils/md_tests.py +968 -871
md_utils/path_utils.py +460 -134
md_utils/process_utils.py +157 -133
md_utils/sas_blob_utils.py +20 -20
md_utils/split_locations_into_train_val.py +45 -32
md_utils/string_utils.py +33 -10
md_utils/url_utils.py +176 -60
md_utils/write_html_image_list.py +40 -33
md_visualization/__init__.py +0 -0
md_visualization/plot_utils.py +102 -109
md_visualization/render_images_with_thumbnails.py +34 -34
md_visualization/visualization_utils.py +597 -291
md_visualization/visualize_db.py +76 -48
md_visualization/visualize_detector_output.py +61 -42
{megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/METADATA +13 -7
megadetector-5.0.9.dist-info/RECORD +224 -0
{megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
taxonomy_mapping/__init__.py +0 -0
taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
taxonomy_mapping/map_new_lila_datasets.py +154 -154
taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
taxonomy_mapping/preview_lila_taxonomy.py +591 -591
taxonomy_mapping/retrieve_sample_image.py +12 -12
taxonomy_mapping/simple_image_download.py +11 -11
taxonomy_mapping/species_lookup.py +10 -10
taxonomy_mapping/taxonomy_csv_checker.py +18 -18
taxonomy_mapping/taxonomy_graph.py +47 -47
taxonomy_mapping/validate_lila_category_mappings.py +83 -76
data_management/cct_json_to_filename_json.py +0 -89
data_management/cct_to_csv.py +0 -140
data_management/databases/remove_corrupted_images_from_db.py +0 -191
detection/detector_training/copy_checkpoints.py +0 -43
megadetector-5.0.8.dist-info/RECORD +0 -205
{megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
{megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/WHEEL +0 -0

api/__init__.py ADDED Viewed

File without changes

api/batch_processing/__init__.py ADDED Viewed

File without changes

api/batch_processing/api_core/__init__.py ADDED Viewed

File without changes

api/batch_processing/api_core/batch_service/__init__.py ADDED Viewed

File without changes

api/batch_processing/api_core/batch_service/score.py CHANGED Viewed

@@ -435,6 +435,5 @@ def main():
     with open(task_output_path, 'w', encoding='utf-8') as f:
         json.dump(detections, f, ensure_ascii=False)
 if __name__ == '__main__':
     main()

api/batch_processing/api_core/server_job_status_table.py CHANGED Viewed

@@ -148,6 +148,5 @@ class TestJobStatusTable(unittest.TestCase):
         item_read = table.read_job_status(job_id)
         self.assertIsNone(item_read)
 if __name__ == '__main__':
     unittest.main()

api/batch_processing/api_core_support/__init__.py ADDED Viewed

File without changes

api/batch_processing/api_core_support/aggregate_results_manually.py CHANGED Viewed

@@ -42,6 +42,5 @@ def main():
     output_file_urls_str = json.dumps(output_file_urls)
     print(output_file_urls_str)
 if __name__ == '__main__':
     main()

api/batch_processing/api_support/__init__.py ADDED Viewed

File without changes

api/batch_processing/api_support/summarize_daily_activity.py CHANGED Viewed

@@ -148,6 +148,5 @@ def main():
         send_message()
         time.sleep(24 * 60 * 60)
 if __name__ == '__main__':
     main()

api/batch_processing/data_preparation/__init__.py ADDED Viewed

File without changes

api/batch_processing/data_preparation/manage_local_batch.py CHANGED Viewed

@@ -1,64 +1,64 @@
-########
-#
-# manage_local_batch.py
-#
-# Semi-automated process for managing a local MegaDetector job, including
-# standard postprocessing steps.
-#
-# This script is not intended to be run from top to bottom like a typical Python script,
-# it's a notebook disguised with a .py extension.  It's the Bestest Most Awesome way to
-# run MegaDetector, but it's also pretty subtle; if you want to play with this, you might
-# want to check in with cameratraps@lila.science for some tips.  Otherwise... YMMV.
-#
-# Some general notes on using this script, which I do in Spyder, though everything will be
-# the same if you are reading this in Jupyter Notebook (using the .ipynb version of the
-# script):
-#
-# * Typically when I have a MegaDetector job to run, I make a copy of this script.  Let's
-#   say I'm running a job for an organization called "bibblebop"; I have a big folder of
-#   job-specific copies of this script, and I might save a new one called "bibblebop-2023-07-26.py"
-#   (the filename doesn't matter, it just helps me keep these organized).
-#
-# * There are three variables you need to set in this script before you start running code:
-#   "input_path", "organization_name_short", and "job_date".  You will get a sensible error if you forget
-#   to set any of these.  In this case I might set those to "/data/bibblebobcamerastuff",
-#   "bibblebop", and "2023-07-26", respectively.
-#
-# * The defaults assume you want to split the job into two tasks (this is the default because I have
-#   two GPUs).  Nothing bad will happen if you do this on a zero-GPU or single-GPU machine, but if you
-#   want everything to run in one logical task, change "n_gpus" and "n_jobs" to 1 (instead of 2).
-#
-# * After setting the required variables, I run the first few cells - up to and including the one
-#   called "Generate commands" - which collectively take basically zero seconds.  After you run the
-#   "Generate commands" cell, you will have a folder that looks something like:
-#
-#   ~/postprocessing/bibblebop/bibblebop-2023-07-06-mdv5a/
-#
-#   On Windows, this means:
-#
-#   ~/postprocessing/bibblebop/bibblebop-2023-07-06-mdv5a/
-#
-#   Everything related to this job - scripts, outputs, intermediate stuff - will be in this folder.
-#   Specifically, after the "Generate commands" cell, you'll have scripts in that folder called something
-#   like:
-#
-#   run_chunk_000_gpu_00.sh (or .bat on Windows)
-#
-#   Personally, I like to run that script directly in a command prompt (I just leave Spyder open, though
-#   it's OK if Spyder gets shut down while MD is running).
-#
-#   At this point, once you get the hang of it, you've invested about zero seconds of human time,
-#   but possibly several days of unattended compute time, depending on the size of your job.
-#
-# * Then when the jobs are done, back to the interactive environment!  I run the next few cells,
-#   which make sure the job finished OK, and the cell called "Post-processing (pre-RDE)", which
-#   generates an HTML preview of the results.  You are very plausibly done at this point, and can ignore
-#   all the remaining cells.  If you want to do things like repeat detection elimination, or running
-#   a classifier, or splitting your results file up in specialized ways, there are cells for all of those
-#   things, but now you're in power-user territory, so I'm going to leave this guide here.  Email
-#   cameratraps@lila.science with questions about the fancy stuff.
-#
-########
+"""
+manage_local_batch.py
+Semi-automated process for managing a local MegaDetector job, including
+standard postprocessing steps.
+This script is not intended to be run from top to bottom like a typical Python script,
+it's a notebook disguised with a .py extension.  It's the Bestest Most Awesome way to
+run MegaDetector, but it's also pretty subtle; if you want to play with this, you might
+want to check in with cameratraps@lila.science for some tips.  Otherwise... YMMV.
+Some general notes on using this script, which I do in Spyder, though everything will be
+the same if you are reading this in Jupyter Notebook (using the .ipynb version of the
+script):
+* Typically when I have a MegaDetector job to run, I make a copy of this script.  Let's
+  say I'm running a job for an organization called "bibblebop"; I have a big folder of
+  job-specific copies of this script, and I might save a new one called "bibblebop-2023-07-26.py"
+  (the filename doesn't matter, it just helps me keep these organized).
+* There are three variables you need to set in this script before you start running code:
+  "input_path", "organization_name_short", and "job_date".  You will get a sensible error if you forget
+  to set any of these.  In this case I might set those to "/data/bibblebobcamerastuff",
+  "bibblebop", and "2023-07-26", respectively.
+* The defaults assume you want to split the job into two tasks (this is the default because I have
+  two GPUs).  Nothing bad will happen if you do this on a zero-GPU or single-GPU machine, but if you
+  want everything to run in one logical task, change "n_gpus" and "n_jobs" to 1 (instead of 2).
+* After setting the required variables, I run the first few cells - up to and including the one
+  called "Generate commands" - which collectively take basically zero seconds.  After you run the
+  "Generate commands" cell, you will have a folder that looks something like:
+  ~/postprocessing/bibblebop/bibblebop-2023-07-06-mdv5a/
+  On Windows, this means:
+  ~/postprocessing/bibblebop/bibblebop-2023-07-06-mdv5a/
+  Everything related to this job - scripts, outputs, intermediate stuff - will be in this folder.
+  Specifically, after the "Generate commands" cell, you'll have scripts in that folder called something
+  like:
+  run_chunk_000_gpu_00.sh (or .bat on Windows)
+  Personally, I like to run that script directly in a command prompt (I just leave Spyder open, though
+  it's OK if Spyder gets shut down while MD is running).
+  At this point, once you get the hang of it, you've invested about zero seconds of human time,
+  but possibly several days of unattended compute time, depending on the size of your job.
+* Then when the jobs are done, back to the interactive environment!  I run the next few cells,
+  which make sure the job finished OK, and the cell called "Post-processing (pre-RDE)", which
+  generates an HTML preview of the results.  You are very plausibly done at this point, and can ignore
+  all the remaining cells.  If you want to do things like repeat detection elimination, or running
+  a classifier, or splitting your results file up in specialized ways, there are cells for all of those
+  things, but now you're in power-user territory, so I'm going to leave this guide here.  Email
+  cameratraps@lila.science with questions about the fancy stuff.
+"""
 #%% Imports and constants
@@ -803,7 +803,7 @@ if render_animals_only:
 os.makedirs(output_base, exist_ok=True)
 print('Processing to {}'.format(output_base))
-options.api_output_file = combined_api_output_file
+options.md_results_file = combined_api_output_file
 options.output_dir = output_base
 ppresults = process_batch_results(options)
 html_output_file = ppresults.output_html_file
@@ -939,7 +939,7 @@ os.makedirs(output_base, exist_ok=True)
 print('Processing post-RDE to {}'.format(output_base))
-options.api_output_file = filtered_output_filename
+options.md_results_file = filtered_output_filename
 options.output_dir = output_base
 ppresults = process_batch_results(options)
 html_output_file = ppresults.output_html_file
@@ -2014,7 +2014,7 @@ output_base = os.path.join(postprocessing_output_folder, folder_token + \
 os.makedirs(output_base, exist_ok=True)
 print('Processing {} to {}'.format(base_task_name, output_base))
-options.api_output_file = sequence_smoothed_classification_file
+options.md_results_file = sequence_smoothed_classification_file
 options.output_dir = output_base
 ppresults = process_batch_results(options)
 path_utils.open_file(ppresults.output_html_file,attempt_to_open_in_wsl_host=True,browser_name='chrome')
@@ -2134,7 +2134,7 @@ output_base_large_boxes = os.path.join(postprocessing_output_folder,
 os.makedirs(output_base_large_boxes, exist_ok=True)
 print('Processing post-RDE, post-size-separation to {}'.format(output_base_large_boxes))
-options.api_output_file = size_separated_file
+options.md_results_file = size_separated_file
 options.output_dir = output_base_large_boxes
 ppresults = process_batch_results(options)

api/batch_processing/data_preparation/manage_video_batch.py CHANGED Viewed

@@ -1,11 +1,11 @@
-########
-#
-# manage_video_batch.py
-#
-# Notebook-esque script to manage the process of running a local batch of videos
-# through MD.  Defers most of the heavy lifting to manage_local_batch.py .
-#
-########
+"""
+manage_video_batch.py
+Notebook-esque script to manage the process of running a local batch of videos
+through MD.  Defers most of the heavy lifting to manage_local_batch.py .
+"""
 #%% Imports and constants

api/batch_processing/integration/digiKam/xmp_integration.py CHANGED Viewed

@@ -460,7 +460,6 @@ def main():
     else:
         process_input_data(options)
 if __name__ == '__main__':
     main()

api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py CHANGED Viewed

@@ -121,7 +121,6 @@ def main():
             update_data(sql)
             mysql_connection.commit()
 if __name__ == '__main__':
     main()

api/batch_processing/postprocessing/__init__.py ADDED Viewed

File without changes

api/batch_processing/postprocessing/add_max_conf.py CHANGED Viewed

@@ -1,15 +1,15 @@
-########
-#
-# add_max_conf.py
-#
-# The MD output format included a "max_detection_conf" field with each image
-# up to and including version 1.2; it was removed as of version 1.3 (it's
-# redundant with the individual detection confidence values).
-#
-# Just in case someone took a dependency on that field, this script allows you
-# to add it back to an existing .json file.
-#
-########
+"""
+add_max_conf.py
+The MD output format included a "max_detection_conf" field with each image
+up to and including version 1.2; it was removed as of version 1.3 (it's
+redundant with the individual detection confidence values).
+Just in case someone took a dependency on that field, this script allows you
+to add it back to an existing .json file.
+"""
 #%% Imports and constants

api/batch_processing/postprocessing/categorize_detections_by_size.py CHANGED Viewed

@@ -1,11 +1,11 @@
-########
-#
-# categorize_detections_by_size.py
-#
-# Given an API output .json file, creates a separate category for bounding boxes
-# above one or more size thresholds.
-#
-########
+"""
+categorize_detections_by_size.py
+Given a MegaDetector .json file, creates a separate category for bounding boxes
+above one or more size thresholds.
+"""
 #%% Constants and imports
@@ -18,24 +18,42 @@ from tqdm import tqdm
 #%% Support classes
 class SizeCategorizationOptions:
-    # Should be sorted from smallest to largest
+    """
+    Options used to parameterize categorize_detections_by_size().
+    """
+    #: Thresholds to use for separation, as a fraction of the image size.
+    #:
+    #: Should be sorted from smallest to largest.
     size_thresholds = [0.95]
-    # List of category numbers to use in separation; uses all categories if None
+    #: List of category numbers to use in separation; uses all categories if None
     categories_to_separate = None
-    # Can be "size", "width", or "height"
+    #: Dimension to use for thresholding; can be "size", "width", or "height"
     measurement = 'size'
-    # Should have the same length as "size_thresholds"
+    #: Categories to assign to thresholded ranges; should have the same length as
+    #: "size_thresholds".
     size_category_names = ['large_detection']
 #%% Main functions
 def categorize_detections_by_size(input_file,output_file=None,options=None):
+    """
+    Given a MegaDetector .json file, creates a separate category for bounding boxes
+    above one or more size thresholds, optionally writing results to [output_file].
+    Args:
+        input_file (str): file to process
+        output_file (str, optional): optional output file
+        options (SizeCategorizationOptions): categorization parameters
+    Returns:
+        dict: data loaded from [input_file], with the new size-based categories.
+        Identical to what's written to [output_file], if [output_file] is not None.
+    """
     if options is None:
         options = SizeCategorizationOptions()

api/batch_processing/postprocessing/combine_api_outputs.py CHANGED Viewed

@@ -1,54 +1,56 @@
-########
-#
-# combine_api_outputs.py
-#
-# Merges two or more .json files in batch API output format, optionally
-# writing the results to another .json file.
-#
-# * Concatenates image lists, erroring if images are not unique.
-# * Errors if class lists are conflicting; errors on unrecognized fields.
-# * Checks compatibility in info structs, within reason.
-#
-# File format:
-#
-# https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#batch-processing-api-output-format
-#
-# Command-line use:
-#
-# combine_api_outputs input1.json input2.json ... inputN.json output.json
-#
-# Also see combine_api_shard_files() (not exposed via the command line yet) to
-# combine the intermediate files created by the API.
-#
-# This does no checking for redundancy; if you are looking to ensemble
-# the results of multiple model versions, see merge_detections.py.
-#
-########
+"""
+combine_api_outputs.py
+Merges two or more .json files in batch API output format, optionally
+writing the results to another .json file.
+* Concatenates image lists, erroring if images are not unique.
+* Errors if class lists are conflicting; errors on unrecognized fields.
+* Checks compatibility in info structs, within reason.
+File format:
+https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#batch-processing-api-output-format
+Command-line use:
+combine_api_outputs input1.json input2.json ... inputN.json output.json
+Also see combine_api_shard_files() (not exposed via the command line yet) to
+combine the intermediate files created by the API.
+This does no checking for redundancy; if you are looking to ensemble
+the results of multiple model versions, see merge_detections.py.
+"""
 #%% Constants and imports
 import argparse
 import sys
 import json
-from typing import Any, Dict, Iterable, Mapping, List, Optional
 #%% Merge functions
-def combine_api_output_files(input_files: List[str],
-                             output_file: Optional[str] = None,
-                             require_uniqueness: bool = True,
-                             verbose: bool = True
-                             ) -> Dict[str, Any]:
+def combine_api_output_files(input_files,
+                             output_file=None,
+                             require_uniqueness=True,
+                             verbose=True):
     """
-    Merges list of JSON API detection files *input_files* into a single
-    dictionary, optionally writing the result to *output_file*.
+    Merges the list of MD results files [input_files] into a single
+    dictionary, optionally writing the result to [output_file].
     Args:
-        input_files: list of str, paths to JSON detection files
-        output_file: optional str, path to write merged JSON
-        require_uniqueness: bool, whether to require that the images in
+        input_files (list of str): paths to JSON detection files
+        output_file (str, optional): path to write merged JSON
+        require_uniqueness (bool): whether to require that the images in
             each list of images be unique
+    Returns:
+        dict: merged dictionaries loaded from [input_files], identical to what's
+        written to [output_file] if [output_file] is not None
     """
     def print_if_verbose(s):
@@ -73,27 +75,27 @@ def combine_api_output_files(input_files: List[str],
     return merged_dict
-def combine_api_output_dictionaries(input_dicts: Iterable[Mapping[str, Any]],
-                                    require_uniqueness: bool = True
-                                    ) -> Dict[str, Any]:
+def combine_api_output_dictionaries(input_dicts, require_uniqueness=True):
     """
-    Merges the list of API detection dictionaries *input_dicts*.  See header
-    comment for details on merge rules.
+    Merges the list of MD results dictionaries [input_dicts] into a single dict.
+    See module header comment for details on merge rules.
     Args:
-        input_dicts: list of dicts, each dict is the JSON of the detections
-            output file from the Batch Processing API
-        require_uniqueness: bool, whether to require that the images in
-            each input dict be unique
-    Returns: dict, represents the merged JSON
+        input_dicts (list of dicts): list of dicts in which each dict represents the
+            contents of a MD output file
+        require_uniqueness (bool): whether to require that the images in
+            each input dict be unique; if this is True and image filenames are
+            not unique, an error is raised.
+    Returns
+        dict: merged MD results
     """
     # Map image filenames to detections, we'll convert to a list later
     images = {}
-    info: Dict[str, str] = {}
-    detection_categories: Dict[str, str] = {}
-    classification_categories: Dict[str, str] = {}
+    info = {}
+    detection_categories = {}
+    classification_categories = {}
     n_redundant_images = 0
     n_images = 0
@@ -182,8 +184,20 @@ def combine_api_output_dictionaries(input_dicts: Iterable[Mapping[str, Any]],
 def combine_api_shard_files(input_files, output_file=None):
     """
-    Merges the list of .json-formatted API shard files *input_files* into a single
-    list of dictionaries, optionally writing the result to *output_file*.
+    Merges the list of .json-formatted API shard files [input_files] into a single
+    list of dictionaries, optionally writing the result to [output_file].
+    This operates on mostly-deprecated API shard files, not MegaDetector results files.
+    If you don't know what an API shard file is, you don't want this function.
+    Args:
+        input_files (list of str): files to merge
+        output_file (str, optiona): file to which we should write merged results
+    Returns:
+        dict: merged results
+    :meta private:
     """
     input_lists = []
@@ -215,6 +229,7 @@ def combine_api_shard_files(input_files, output_file=None):
 #%% Command-line driver
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument(
         'input_paths', nargs='+',
@@ -230,6 +245,5 @@ def main():
     args = parser.parse_args()
     combine_api_output_files(args.input_paths, args.output_path)
 if __name__ == '__main__':
     main()

megadetector 5.0.8__py3-none-any.whl → 5.0.9__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.8py3-none-any.whl → 5.0.9py3-none-any.whl