PyPI - megadetector - Versions diffs - 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl - Mend

megadetector 5.0.7py3-none-any.whl → 5.0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (191) hide show

api/__init__.py +0 -0
api/batch_processing/__init__.py +0 -0
api/batch_processing/api_core/__init__.py +0 -0
api/batch_processing/api_core/batch_service/__init__.py +0 -0
api/batch_processing/api_core/batch_service/score.py +0 -1
api/batch_processing/api_core/server_job_status_table.py +0 -1
api/batch_processing/api_core_support/__init__.py +0 -0
api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
api/batch_processing/api_support/__init__.py +0 -0
api/batch_processing/api_support/summarize_daily_activity.py +0 -1
api/batch_processing/data_preparation/__init__.py +0 -0
api/batch_processing/data_preparation/manage_local_batch.py +93 -79
api/batch_processing/data_preparation/manage_video_batch.py +8 -8
api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
api/batch_processing/postprocessing/__init__.py +0 -0
api/batch_processing/postprocessing/add_max_conf.py +12 -12
api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
api/batch_processing/postprocessing/compare_batch_results.py +114 -44
api/batch_processing/postprocessing/convert_output_format.py +62 -19
api/batch_processing/postprocessing/load_api_results.py +17 -20
api/batch_processing/postprocessing/md_to_coco.py +31 -21
api/batch_processing/postprocessing/md_to_labelme.py +165 -68
api/batch_processing/postprocessing/merge_detections.py +40 -15
api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
api/synchronous/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
api/synchronous/api_core/animal_detection_api/config.py +35 -35
api/synchronous/api_core/tests/__init__.py +0 -0
api/synchronous/api_core/tests/load_test.py +109 -109
classification/__init__.py +0 -0
classification/aggregate_classifier_probs.py +21 -24
classification/analyze_failed_images.py +11 -13
classification/cache_batchapi_outputs.py +51 -51
classification/create_classification_dataset.py +69 -68
classification/crop_detections.py +54 -53
classification/csv_to_json.py +97 -100
classification/detect_and_crop.py +105 -105
classification/evaluate_model.py +43 -42
classification/identify_mislabeled_candidates.py +47 -46
classification/json_to_azcopy_list.py +10 -10
classification/json_validator.py +72 -71
classification/map_classification_categories.py +44 -43
classification/merge_classification_detection_output.py +68 -68
classification/prepare_classification_script.py +157 -154
classification/prepare_classification_script_mc.py +228 -228
classification/run_classifier.py +27 -26
classification/save_mislabeled.py +30 -30
classification/train_classifier.py +20 -20
classification/train_classifier_tf.py +21 -22
classification/train_utils.py +10 -10
data_management/__init__.py +0 -0
data_management/annotations/__init__.py +0 -0
data_management/annotations/annotation_constants.py +18 -31
data_management/camtrap_dp_to_coco.py +238 -0
data_management/cct_json_utils.py +107 -59
data_management/cct_to_md.py +176 -158
data_management/cct_to_wi.py +247 -219
data_management/coco_to_labelme.py +272 -0
data_management/coco_to_yolo.py +86 -62
data_management/databases/__init__.py +0 -0
data_management/databases/add_width_and_height_to_db.py +20 -16
data_management/databases/combine_coco_camera_traps_files.py +35 -31
data_management/databases/integrity_check_json_db.py +130 -83
data_management/databases/subset_json_db.py +25 -16
data_management/generate_crops_from_cct.py +27 -45
data_management/get_image_sizes.py +188 -144
data_management/importers/add_nacti_sizes.py +8 -8
data_management/importers/add_timestamps_to_icct.py +78 -78
data_management/importers/animl_results_to_md_results.py +158 -160
data_management/importers/auckland_doc_test_to_json.py +9 -9
data_management/importers/auckland_doc_to_json.py +8 -8
data_management/importers/awc_to_json.py +7 -7
data_management/importers/bellevue_to_json.py +15 -15
data_management/importers/cacophony-thermal-importer.py +13 -13
data_management/importers/carrizo_shrubfree_2018.py +8 -8
data_management/importers/carrizo_trail_cam_2017.py +8 -8
data_management/importers/cct_field_adjustments.py +9 -9
data_management/importers/channel_islands_to_cct.py +10 -10
data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
data_management/importers/ena24_to_json.py +7 -7
data_management/importers/filenames_to_json.py +8 -8
data_management/importers/helena_to_cct.py +7 -7
data_management/importers/idaho-camera-traps.py +7 -7
data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
data_management/importers/jb_csv_to_json.py +9 -9
data_management/importers/mcgill_to_json.py +8 -8
data_management/importers/missouri_to_json.py +18 -18
data_management/importers/nacti_fieldname_adjustments.py +10 -10
data_management/importers/noaa_seals_2019.py +8 -8
data_management/importers/pc_to_json.py +7 -7
data_management/importers/plot_wni_giraffes.py +7 -7
data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
data_management/importers/prepare_zsl_imerit.py +7 -7
data_management/importers/rspb_to_json.py +8 -8
data_management/importers/save_the_elephants_survey_A.py +8 -8
data_management/importers/save_the_elephants_survey_B.py +9 -9
data_management/importers/snapshot_safari_importer.py +26 -26
data_management/importers/snapshot_safari_importer_reprise.py +665 -665
data_management/importers/snapshot_serengeti_lila.py +14 -14
data_management/importers/sulross_get_exif.py +8 -9
data_management/importers/timelapse_csv_set_to_json.py +11 -11
data_management/importers/ubc_to_json.py +13 -13
data_management/importers/umn_to_json.py +7 -7
data_management/importers/wellington_to_json.py +8 -8
data_management/importers/wi_to_json.py +9 -9
data_management/importers/zamba_results_to_md_results.py +181 -181
data_management/labelme_to_coco.py +309 -159
data_management/labelme_to_yolo.py +103 -60
data_management/lila/__init__.py +0 -0
data_management/lila/add_locations_to_island_camera_traps.py +9 -9
data_management/lila/add_locations_to_nacti.py +147 -147
data_management/lila/create_lila_blank_set.py +114 -31
data_management/lila/create_lila_test_set.py +8 -8
data_management/lila/create_links_to_md_results_files.py +106 -106
data_management/lila/download_lila_subset.py +92 -90
data_management/lila/generate_lila_per_image_labels.py +56 -43
data_management/lila/get_lila_annotation_counts.py +18 -15
data_management/lila/get_lila_image_counts.py +11 -11
data_management/lila/lila_common.py +103 -70
data_management/lila/test_lila_metadata_urls.py +132 -116
data_management/ocr_tools.py +173 -128
data_management/read_exif.py +161 -99
data_management/remap_coco_categories.py +84 -0
data_management/remove_exif.py +58 -62
data_management/resize_coco_dataset.py +32 -44
data_management/wi_download_csv_to_coco.py +246 -0
data_management/yolo_output_to_md_output.py +86 -73
data_management/yolo_to_coco.py +535 -95
detection/__init__.py +0 -0
detection/detector_training/__init__.py +0 -0
detection/process_video.py +85 -33
detection/pytorch_detector.py +43 -25
detection/run_detector.py +157 -72
detection/run_detector_batch.py +189 -114
detection/run_inference_with_yolov5_val.py +118 -51
detection/run_tiled_inference.py +113 -42
detection/tf_detector.py +51 -28
detection/video_utils.py +606 -521
docs/source/conf.py +43 -0
md_utils/__init__.py +0 -0
md_utils/azure_utils.py +9 -9
md_utils/ct_utils.py +249 -70
md_utils/directory_listing.py +59 -64
md_utils/md_tests.py +968 -862
md_utils/path_utils.py +655 -155
md_utils/process_utils.py +157 -133
md_utils/sas_blob_utils.py +20 -20
md_utils/split_locations_into_train_val.py +45 -32
md_utils/string_utils.py +33 -10
md_utils/url_utils.py +208 -27
md_utils/write_html_image_list.py +51 -35
md_visualization/__init__.py +0 -0
md_visualization/plot_utils.py +102 -109
md_visualization/render_images_with_thumbnails.py +34 -34
md_visualization/visualization_utils.py +908 -311
md_visualization/visualize_db.py +109 -58
md_visualization/visualize_detector_output.py +61 -42
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
megadetector-5.0.9.dist-info/RECORD +224 -0
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
taxonomy_mapping/__init__.py +0 -0
taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
taxonomy_mapping/map_new_lila_datasets.py +154 -154
taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
taxonomy_mapping/preview_lila_taxonomy.py +591 -591
taxonomy_mapping/retrieve_sample_image.py +12 -12
taxonomy_mapping/simple_image_download.py +11 -11
taxonomy_mapping/species_lookup.py +10 -10
taxonomy_mapping/taxonomy_csv_checker.py +18 -18
taxonomy_mapping/taxonomy_graph.py +47 -47
taxonomy_mapping/validate_lila_category_mappings.py +83 -76
data_management/cct_json_to_filename_json.py +0 -89
data_management/cct_to_csv.py +0 -140
data_management/databases/remove_corrupted_images_from_db.py +0 -191
detection/detector_training/copy_checkpoints.py +0 -43
md_visualization/visualize_megadb.py +0 -183
megadetector-5.0.7.dist-info/RECORD +0 -202
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0

api/batch_processing/postprocessing/compare_batch_results.py CHANGED Viewed

@@ -1,19 +1,19 @@
-########
-#
-# compare_batch_results.py
-#
-# Compare sets of batch results; typically used to compare:
-#
-# * Results from different MegaDetector versions
-# * Results before/after RDE
-# * Results with/without augmentation
-#
-# Makes pairwise comparisons, but can take lists of results files (will perform
-# all pairwise comparisons).  Results are written to an HTML page that shows the number
-# and nature of disagreements (in the sense of each image being a detection or non-detection),
-# with sample images for each category.
-#
-########
+"""
+compare_batch_results.py
+Compare sets of batch results; typically used to compare:
+* Results from different MegaDetector versions
+* Results before/after RDE
+* Results with/without augmentation
+Makes pairwise comparisons, but can take lists of results files (will perform
+all pairwise comparisons).  Results are written to an HTML page that shows the number
+and nature of disagreements (in the sense of each image being a detection or non-detection),
+with sample images for each category.
+"""
 #%% Imports
@@ -43,16 +43,28 @@ class PairwiseBatchComparisonOptions:
     pairwise options sets is stored in the BatchComparisonsOptions class.
     """
+    #: First filename to compare
     results_filename_a = None
+    #: Second filename to compare
     results_filename_b = None
+    #: Description to use in the output HTML for filename A
     results_description_a = None
+    #: Description to use in the output HTML for filename B
     results_description_b = None
+    #: Per-class detection thresholds to use for filename A (including a 'default' threshold)
     detection_thresholds_a = {'animal':0.15,'person':0.15,'vehicle':0.15,'default':0.15}
+    #: Per-class detection thresholds to use for filename B (including a 'default' threshold)
     detection_thresholds_b = {'animal':0.15,'person':0.15,'vehicle':0.15,'default':0.15}
+    #: Rendering threshold to use for all categories for filename A
     rendering_confidence_threshold_a = 0.1
+    #: Rendering threshold to use for all categories for filename B
     rendering_confidence_threshold_b = 0.1
 # ...class PairwiseBatchComparisonOptions
@@ -63,33 +75,56 @@ class BatchComparisonOptions:
     Defines the options for a set of (possibly many) pairwise comparisons.
     """
+    #: Folder to which we should write HTML output
     output_folder = None
+    #: Base folder for images (which are specified as relative files)
     image_folder = None
+    #: Job name to use in the HTML output file
     job_name = ''
+    #: Maximum number of images to render for each category, where a "category" here is
+    #: "detections_a_only", "detections_b_only", etc., or None to render all images.
     max_images_per_category = 1000
+    #: Maximum number of images per HTML page (paginates if a category page goes beyond this),
+    #: or None to disable pagination.
     max_images_per_page = None
+    #: Colormap to use for detections in file A (maps detection categories to colors)
     colormap_a = ['Red']
+    #: Colormap to use for detections in file B (maps detection categories to colors)
     colormap_b = ['RoyalBlue']
-    # Process-based parallelization isn't supported yet; this must be "True"
+    #: Process-based parallelization isn't supported yet; this must be "True"
     parallelize_rendering_with_threads = True
-    # List of filenames to include in the comparison, or None to use all files
+    #: List of filenames to include in the comparison, or None to use all files
     filenames_to_include = None
-    # Compare only detections/non-detections, ignore categories (still renders categories)
+    #: Compare only detections/non-detections, ignore categories (still renders categories)
     class_agnostic_comparison = False
+    #: Width of images to render in the output HTML
     target_width = 800
+    #: Number of workers to use for rendering, or <=1 to disable parallelization
     n_rendering_workers = 20
+    #: Random seed for image sampling (not used if max_images_per_category is None)
     random_seed = 0
-    # Default to sorting by filename
+    #: Whether to sort results by confidence; if this is False, sorts by filename
     sort_by_confidence = False
+    #: The expectation is that all results sets being compared will refer to the same images; if this
+    #: is True (default), we'll error if that's not the case, otherwise non-matching lists will just be
+    #: a warning.
     error_on_non_matching_lists = True
+    #: List of PairwiseBatchComparisonOptions that defines the comparisons we'll render.
     pairwise_options = []
 # ...class BatchComparisonOptions
@@ -100,18 +135,21 @@ class PairwiseBatchComparisonResults:
     The results from a single pairwise comparison.
     """
+    #: String of HTML content suitable for rendering to an HTML file
     html_content = None
+    #: Possibly-modified version of the PairwiseBatchComparisonOptions supplied as input.
     pairwise_options = None
-    # A dictionary with keys including:
-    #
-    # common_detections
-    # common_non_detections
-    # detections_a_only
-    # detections_b_only
-    # class_transitions
+    #: A dictionary with keys including:
+    #:
+    #: common_detections
+    #: common_non_detections
+    #: detections_a_only
+    #: detections_b_only
+    #: class_transitions
     #
-    # Each of these maps a filename to a two-element list (the image in set A, the image in set B).
+    #: Each of these maps a filename to a two-element list (the image in set A, the image in set B).
     categories_to_image_pairs = None
 # ...class PairwiseBatchComparisonResults
@@ -122,9 +160,10 @@ class BatchComparisonResults:
     The results from a set of pairwise comparisons
     """
+    #: Filename containing HTML output
     html_output_file = None
-    # An list of PairwiseBatchComparisonResults
+    #: A list of PairwiseBatchComparisonResults
     pairwise_results = None
 # ...class BatchComparisonResults
@@ -144,9 +183,20 @@ main_page_footer = '<br/><br/><br/></body></html>\n'
 #%% Comparison functions
-def render_image_pair(fn,image_pairs,category_folder,options,pairwise_options):
+def _render_image_pair(fn,image_pairs,category_folder,options,pairwise_options):
     """
     Render two sets of results (i.e., a comparison) for a single image.
+    Args:
+        fn (str): image filename
+        image_pairs (dict): dict mapping filenames to pairs of image dicts
+        category_folder (str): folder to which to render this image, typically
+            "detections_a_only", "detections_b_only", etc.
+        options (BatchComparisonOptions): job options
+        pairwise_options (PairwiseBatchComparisonOptions): pairwise comparison options
+    Returns:
+        str: rendered image filename
     """
     input_image_path = os.path.join(options.image_folder,fn)
@@ -194,20 +244,22 @@ def render_image_pair(fn,image_pairs,category_folder,options,pairwise_options):
     im.save(output_image_path)
     return output_image_path
-# ...def render_image_pair()
+# ...def _render_image_pair()
-def pairwise_compare_batch_results(options,output_index,pairwise_options):
+def _pairwise_compare_batch_results(options,output_index,pairwise_options):
     """
     The main entry point for this module is compare_batch_results(), which calls
     this function for each pair of comparisons the caller has requested.  Generates an
     HTML page for this comparison.  Returns a BatchComparisonResults object.
-    options: an instance of BatchComparisonOptions
-    output_index: a numeric index used for generating HTML titles
-    pairwise_options: an instance of PairwiseBatchComparisonOptions
+    Args:
+        options (BatchComparisonOptions): overall job options for this comparison group
+        output_index (int): a numeric index used for generating HTML titles
+        pairwise_options (PairwiseBatchComparisonOptions): job options for this comparison
+    Returns:
+        PairwiseBatchComparisonResults: the results of this pairwise comparison
     """
     # pairwise_options is passed as a parameter here, and should not be specified
@@ -291,7 +343,7 @@ def pairwise_compare_batch_results(options,output_index,pairwise_options):
     filenames_b_set = set([im['file'] for im in images_b])
     if len(images_a) != len(images_b):
-        s = 'set A has {} iamges, set B has {}'.format(len(images_a),len(images_b))
+        s = 'set A has {} images, set B has {}'.format(len(images_a),len(images_b))
         if options.error_on_non_matching_lists:
             raise ValueError(s)
         else:
@@ -463,11 +515,11 @@ def pairwise_compare_batch_results(options,output_index,pairwise_options):
         if options.n_rendering_workers <= 1:
             output_image_paths = []
             for fn in tqdm(image_filenames):
-                output_image_paths.append(render_image_pair(fn,image_pairs,category_folder,
+                output_image_paths.append(_render_image_pair(fn,image_pairs,category_folder,
                                                             options,pairwise_options))
         else:
             output_image_paths = list(tqdm(pool.imap(
-                partial(render_image_pair, image_pairs=image_pairs,
+                partial(_render_image_pair, image_pairs=image_pairs,
                         category_folder=category_folder,options=options,
                         pairwise_options=pairwise_options),
                 image_filenames),
@@ -644,14 +696,20 @@ def pairwise_compare_batch_results(options,output_index,pairwise_options):
     return pairwise_results
-# ...def pairwise_compare_batch_results()
+# ...def _pairwise_compare_batch_results()
 def compare_batch_results(options):
     """
     The main entry point for this module.  Runs one or more batch results comparisons,
-    writing results to an html page.  Most of the work is deferred to
-    pairwise_compare_batch_results().
+    writing results to an html page.  Most of the work is deferred to _pairwise_compare_batch_results().
+    Args:
+        options (BatchComparisonOptions): job options to use for this comparison task, including the
+            list of specific pairswise comparisons to make (in the pairwise_options field)
+    Returns:
+        BatchComparisonResults: the results of this comparison task
     """
     assert options.output_folder is not None
@@ -675,7 +733,7 @@ def compare_batch_results(options):
     for i_comparison,pairwise_options in enumerate(pairwise_options_list):
         print('Running comparison {} of {}'.format(i_comparison,n_comparisons))
         pairwise_results = \
-            pairwise_compare_batch_results(options,i_comparison,pairwise_options)
+            _pairwise_compare_batch_results(options,i_comparison,pairwise_options)
         html_content += pairwise_results.html_content
         all_pairwise_results.append(pairwise_results)
@@ -702,6 +760,18 @@ def n_way_comparison(filenames,options,detection_thresholds=None,rendering_thres
     """
     Performs N pairwise comparisons for the list of results files in [filenames], by generating
     sets of pairwise options and calling compare_batch_results.
+    Args:
+        filenames (list): list of MD results filenames to compare
+        options (BatchComparisonOptions): task options set in which pairwise_options is still
+            empty; that will get populated from [filenames]
+        detection_thresholds (list, optional): list of detection thresholds with the same length
+            as [filenames], or None to use sensible defaults
+        rendering_thresholds (list, optional): list of rendering thresholds with the same length
+            as [filenames], or None to use sensible defaults
+    Returns:
+        BatchComparisonResults: the results of this comparison task
     """
     if detection_thresholds is None:

api/batch_processing/postprocessing/convert_output_format.py CHANGED Viewed

@@ -1,13 +1,15 @@
-########
-#
-# convert_output_format.py
-#
-# Converts between file formats output by our batch processing API.  Currently
-# supports json <--> csv conversion, but this should be the landing place for any
-# conversion - including between future .json versions - that we support in the
-# future.
-#
-########
+"""
+convert_output_format.py
+Converts between file formats output by our batch processing API.  Currently
+supports json <--> csv conversion, but this should be the landing place for any
+conversion - including between hypothetical alternative .json versions - that we support
+in the future.
+The .csv format is largely obsolete, don't use it unless you're super-duper sure you need it.
+"""
 #%% Constants and imports
@@ -30,18 +32,39 @@ CONF_DIGITS = 3
 #%% Conversion functions
 def convert_json_to_csv(input_path,output_path=None,min_confidence=None,
-                        omit_bounding_boxes=False,output_encoding=None):
+                        omit_bounding_boxes=False,output_encoding=None,
+                        overwrite=True):
     """
-    Convert .json to .csv
+    Converts a MD results .json file to a totally non-standard .csv format.
+    If [output_path] is None, will convert x.json to x.csv.
     TODO: this function should obviously be using Pandas or some other sensible structured
     representation of tabular data.  Even a list of dicts.  This implementation is quite
     brittle and depends on adding fields to every row in exactly the right order.
+    Args:
+        input_path (str): the input .json file to convert
+        output_path (str, optional): the output .csv file to generate; if this is None, uses
+            [input_path].csv
+        min_confidence (float, optional): the minimum-confidence detection we should include
+            in the "detections" column; has no impact on the other columns
+        omit_bounding_boxes (bool): whether to leave out the json-formatted bounding boxes
+            that make up the "detections" column, which are not generally useful for someone who
+            wants to consume this data as a .csv file
+        output_encoding (str, optional): encoding to use for the .csv file
+        overwrite (bool): whether to overwrite an existing .csv file; if this is False and the
+            output file exists, no-ops and returns
     """
     if output_path is None:
         output_path = os.path.splitext(input_path)[0]+'.csv'
+    if os.path.isfile(output_path) and (not overwrite):
+        print('File {} exists, skipping json --> csv conversion'.format(output_path))
+        return
     print('Loading json results from {}...'.format(input_path))
     json_output = json.load(open(input_path))
@@ -51,11 +74,12 @@ def convert_json_to_csv(input_path,output_path=None,min_confidence=None,
     # We add an output column for each class other than 'empty',
     # containing the maximum probability of  that class for each image
-    n_non_empty_detection_categories = len(annotation_constants.annotation_bbox_categories) - 1
+    # n_non_empty_detection_categories = len(annotation_constants.annotation_bbox_categories) - 1
+    n_non_empty_detection_categories = annotation_constants.NUM_DETECTOR_CATEGORIES
     detection_category_column_names = []
-    assert annotation_constants.annotation_bbox_category_id_to_name[0] == 'empty'
+    assert annotation_constants.detector_bbox_categories[0] == 'empty'
     for cat_id in range(1,n_non_empty_detection_categories+1):
-        cat_name = annotation_constants.annotation_bbox_category_id_to_name[cat_id]
+        cat_name = annotation_constants.detector_bbox_categories[cat_id]
         detection_category_column_names.append('max_conf_' + cat_name)
     n_classification_categories = 0
@@ -73,7 +97,7 @@ def convert_json_to_csv(input_path,output_path=None,min_confidence=None,
         n_classification_categories = len(classification_category_ids)
-    # There are several fields for which we add columns, other random bespoke fields
+    # There are several .json fields for which we add .csv columns; other random bespoke fields
     # will be ignored.
     optional_fields = ['width','height','datetime','exif_metadata']
     optional_fields_present = set()
@@ -104,7 +128,7 @@ def convert_json_to_csv(input_path,output_path=None,min_confidence=None,
         if 'failure' in im and im['failure'] is not None:
             row = [image_id, 'failure', im['failure']]
             rows.append(row)
-            print('Skipping failed image {} ({})'.format(im['file'],im['failure']))
+            # print('Skipping failed image {} ({})'.format(im['file'],im['failure']))
             continue
         max_conf = ct_utils.get_max_conf(im)
@@ -193,12 +217,29 @@ def convert_json_to_csv(input_path,output_path=None,min_confidence=None,
         writer.writerow(header)
         writer.writerows(rows)
+# ...def convert_json_to_csv(...)
-def convert_csv_to_json(input_path,output_path=None):
+def convert_csv_to_json(input_path,output_path=None,overwrite=True):
+    """
+    Convert .csv to .json.  If output_path is None, will convert x.csv to x.json.
+    Args:
+        input_path (str): .csv filename to convert to .json
+        output_path (str, optional): the output .json file to generate; if this is None, uses
+            [input_path].json
+        overwrite (bool): whether to overwrite an existing .json file; if this is False and the
+            output file exists, no-ops and returns
+    """
     if output_path is None:
         output_path = os.path.splitext(input_path)[0]+'.json'
+    if os.path.isfile(output_path) and (not overwrite):
+        print('File {} exists, skipping csv --> json conversion'.format(output_path))
+        return
     # Format spec:
     #
     # https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing
@@ -215,7 +256,7 @@ def convert_csv_to_json(input_path,output_path=None):
     }
     classification_categories = {}
-    detection_categories = annotation_constants.annotation_bbox_category_id_to_name
+    detection_categories = annotation_constants.detector_bbox_categories
     images = []
@@ -259,6 +300,8 @@ def convert_csv_to_json(input_path,output_path=None):
     json_out['images'] = images
     json.dump(json_out,open(output_path,'w'),indent=1)
+# ...def convert_csv_to_json(...)
 #%% Interactive driver

api/batch_processing/postprocessing/load_api_results.py CHANGED Viewed

@@ -1,17 +1,17 @@
-########
-#
-# load_api_results.py
-#
-# DEPRECATED
-#
-# As of 2023.12, this module is used in postprocessing and RDE.  Not recommended
-# for new code.
-#
-# Loads the output of the batch processing API (json) into a Pandas dataframe.
-#
-# Includes functions to read/write the (very very old) .csv results format.
-#
-########
+"""
+load_api_results.py
+DEPRECATED
+As of 2023.12, this module is used in postprocessing and RDE.  Not recommended
+for new code.
+Loads the output of the batch processing API (json) into a Pandas dataframe.
+Includes functions to read/write the (very very old) .csv results format.
+"""
 #%% Imports
@@ -31,7 +31,7 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
                      filename_replacements: Optional[Mapping[str, str]] = None,
                      force_forward_slashes: bool = True
                      ) -> Tuple[pd.DataFrame, Dict]:
-    """
+    r"""
     Loads json-formatted MegaDetector results to a Pandas DataFrame.
     Args:
@@ -44,8 +44,7 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
             in filenames
     Returns:
-        detection_results: pd.DataFrame, contains at least the columns:
-                ['file', 'detections','failure']
+        detection_results: pd.DataFrame, contains at least the columns ['file', 'detections','failure']
         other_fields: a dict containing fields in the results other than 'images'
     """
@@ -64,11 +63,9 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
         if k != 'images':
             other_fields[k] = v
-    # Normalize paths to simplify comparisons later
     if normalize_paths:
         for image in detection_results['images']:
-            image['file'] = os.path.normpath(image['file'])
-            # image['file'] = image['file'].replace('\\','/')
+            image['file'] = os.path.normpath(image['file'])
     if force_forward_slashes:
         for image in detection_results['images']:

api/batch_processing/postprocessing/md_to_coco.py CHANGED Viewed

@@ -1,13 +1,13 @@
-########
-#
-# md_to_coco.py
-#
-# "Converts" MegaDetector output files to COCO format.  "Converts" is in quotes because
-# this is an opinionated transformation that requires a confidence threshold.
-#
-# Does not currently handle classification information.
-#
-########
+"""
+md_to_coco.py
+"Converts" MegaDetector output files to COCO format.  "Converts" is in quotes because
+this is an opinionated transformation that requires a confidence threshold.
+Does not currently handle classification information.
+"""
 #%% Constants and imports
@@ -38,18 +38,28 @@ def md_to_coco(md_results_file,
     A folder of images is required if width and height information are not available
     in the MD results file.
+    Args:
+        md_results_file (str): MD results .json file to convert to COCO format
+        coco_output_file (str, optional): COCO .json file to write; if this is None, we'll return
+            a COCO-formatted dict, but won't write it to disk
+        image_folder (str, optional): folder of images, required if 'width' and 'height' are not
+            present in the MD results file (they are not required by the format)
+        confidence_threshold (float, optional): boxes below this confidence threshold will not be
+            included in the output data
+        validate_image_sizes (bool, optional): if this is True, we'll check the image sizes
+            regardless of whether "width" and "height" are present in the MD results file.
+        info (dict, optional): arbitrary metadata to include in an "info" field in the COCO-formatted
+            output
+        preserve_nonstandard_metadata (bool, optional): if this is True, confidence will be preserved in a
+            non-standard "conf" field in each annotation, and any random fields present in each image's data
+            (e.g. EXIF metadata) will be propagated to COCO output
+        include_failed_images (boo, optional): if this is True, failed images will be propagated to COCO output
+            with a non-empty "failure" field and no other fields, otherwise failed images will be skipped.
-    If validate_image_sizes is True, we'll check the image sizes regardless of whether width
-    and height are present in the MD results file.
-    If preserve_nonstandard_metadata is True, confidence will be preserved in a non-standard
-    "conf" field in each annotation, and any random fields present in each image's data (e.g.
-    EXIF metadata) will be propagated to COCO output.
-    If include_failed_images is True, failed images will be propagated to COCO output with
-    a non-empty "failure" field and no other fields, otherwise failed images will be skipped.
-    Returns the COCO json dict.
+    Returns:
+        dict: the COCO data dict, identical to what's written to [coco_output_file] if [coco_output_file]
+        is not None.
     """
     with open(md_results_file,'r') as f:

megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.7py3-none-any.whl → 5.0.9py3-none-any.whl