PyPI - megadetector - Versions diffs - 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl - Mend

megadetector 5.0.28py3-none-any.whl → 10.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (197) hide show

megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
megadetector/classification/aggregate_classifier_probs.py +3 -3
megadetector/classification/analyze_failed_images.py +5 -5
megadetector/classification/cache_batchapi_outputs.py +5 -5
megadetector/classification/create_classification_dataset.py +11 -12
megadetector/classification/crop_detections.py +10 -10
megadetector/classification/csv_to_json.py +8 -8
megadetector/classification/detect_and_crop.py +13 -15
megadetector/classification/efficientnet/model.py +8 -8
megadetector/classification/efficientnet/utils.py +6 -5
megadetector/classification/evaluate_model.py +7 -7
megadetector/classification/identify_mislabeled_candidates.py +6 -6
megadetector/classification/json_to_azcopy_list.py +1 -1
megadetector/classification/json_validator.py +29 -32
megadetector/classification/map_classification_categories.py +9 -9
megadetector/classification/merge_classification_detection_output.py +12 -9
megadetector/classification/prepare_classification_script.py +19 -19
megadetector/classification/prepare_classification_script_mc.py +26 -26
megadetector/classification/run_classifier.py +4 -4
megadetector/classification/save_mislabeled.py +6 -6
megadetector/classification/train_classifier.py +1 -1
megadetector/classification/train_classifier_tf.py +9 -9
megadetector/classification/train_utils.py +10 -10
megadetector/data_management/annotations/annotation_constants.py +1 -2
megadetector/data_management/camtrap_dp_to_coco.py +79 -46
megadetector/data_management/cct_json_utils.py +103 -103
megadetector/data_management/cct_to_md.py +49 -49
megadetector/data_management/cct_to_wi.py +33 -33
megadetector/data_management/coco_to_labelme.py +75 -75
megadetector/data_management/coco_to_yolo.py +210 -193
megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
megadetector/data_management/databases/integrity_check_json_db.py +228 -200
megadetector/data_management/databases/subset_json_db.py +33 -33
megadetector/data_management/generate_crops_from_cct.py +88 -39
megadetector/data_management/get_image_sizes.py +54 -49
megadetector/data_management/labelme_to_coco.py +133 -125
megadetector/data_management/labelme_to_yolo.py +159 -73
megadetector/data_management/lila/create_lila_blank_set.py +81 -83
megadetector/data_management/lila/create_lila_test_set.py +32 -31
megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
megadetector/data_management/lila/download_lila_subset.py +21 -24
megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
megadetector/data_management/lila/get_lila_image_counts.py +22 -22
megadetector/data_management/lila/lila_common.py +73 -70
megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
megadetector/data_management/mewc_to_md.py +344 -340
megadetector/data_management/ocr_tools.py +262 -255
megadetector/data_management/read_exif.py +249 -227
megadetector/data_management/remap_coco_categories.py +90 -28
megadetector/data_management/remove_exif.py +81 -21
megadetector/data_management/rename_images.py +187 -187
megadetector/data_management/resize_coco_dataset.py +588 -120
megadetector/data_management/speciesnet_to_md.py +41 -41
megadetector/data_management/wi_download_csv_to_coco.py +55 -55
megadetector/data_management/yolo_output_to_md_output.py +248 -122
megadetector/data_management/yolo_to_coco.py +333 -191
megadetector/detection/change_detection.py +832 -0
megadetector/detection/process_video.py +340 -337
megadetector/detection/pytorch_detector.py +358 -278
megadetector/detection/run_detector.py +399 -186
megadetector/detection/run_detector_batch.py +404 -377
megadetector/detection/run_inference_with_yolov5_val.py +340 -327
megadetector/detection/run_tiled_inference.py +257 -249
megadetector/detection/tf_detector.py +24 -24
megadetector/detection/video_utils.py +332 -295
megadetector/postprocessing/add_max_conf.py +19 -11
megadetector/postprocessing/categorize_detections_by_size.py +45 -45
megadetector/postprocessing/classification_postprocessing.py +468 -433
megadetector/postprocessing/combine_batch_outputs.py +23 -23
megadetector/postprocessing/compare_batch_results.py +590 -525
megadetector/postprocessing/convert_output_format.py +106 -102
megadetector/postprocessing/create_crop_folder.py +347 -147
megadetector/postprocessing/detector_calibration.py +173 -168
megadetector/postprocessing/generate_csv_report.py +508 -499
megadetector/postprocessing/load_api_results.py +48 -27
megadetector/postprocessing/md_to_coco.py +133 -102
megadetector/postprocessing/md_to_labelme.py +107 -90
megadetector/postprocessing/md_to_wi.py +40 -40
megadetector/postprocessing/merge_detections.py +92 -114
megadetector/postprocessing/postprocess_batch_results.py +319 -301
megadetector/postprocessing/remap_detection_categories.py +91 -38
megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
megadetector/postprocessing/separate_detections_into_folders.py +226 -211
megadetector/postprocessing/subset_json_detector_output.py +265 -262
megadetector/postprocessing/top_folders_to_bottom.py +45 -45
megadetector/postprocessing/validate_batch_results.py +70 -70
megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
megadetector/taxonomy_mapping/simple_image_download.py +8 -8
megadetector/taxonomy_mapping/species_lookup.py +156 -74
megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
megadetector/utils/ct_utils.py +1049 -211
megadetector/utils/directory_listing.py +21 -77
megadetector/utils/gpu_test.py +22 -22
megadetector/utils/md_tests.py +632 -529
megadetector/utils/path_utils.py +1520 -431
megadetector/utils/process_utils.py +41 -41
megadetector/utils/split_locations_into_train_val.py +62 -62
megadetector/utils/string_utils.py +148 -27
megadetector/utils/url_utils.py +489 -176
megadetector/utils/wi_utils.py +2658 -2526
megadetector/utils/write_html_image_list.py +137 -137
megadetector/visualization/plot_utils.py +34 -30
megadetector/visualization/render_images_with_thumbnails.py +39 -74
megadetector/visualization/visualization_utils.py +487 -435
megadetector/visualization/visualize_db.py +232 -198
megadetector/visualization/visualize_detector_output.py +82 -76
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
megadetector-10.0.0.dist-info/RECORD +139 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
megadetector/api/batch_processing/api_core/__init__.py +0 -0
megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
megadetector/api/batch_processing/api_core/server.py +0 -294
megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
megadetector/api/batch_processing/api_core/server_utils.py +0 -88
megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
megadetector/api/batch_processing/api_support/__init__.py +0 -0
megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
megadetector/api/synchronous/__init__.py +0 -0
megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
megadetector/data_management/importers/add_nacti_sizes.py +0 -52
megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
megadetector/data_management/importers/awc_to_json.py +0 -191
megadetector/data_management/importers/bellevue_to_json.py +0 -272
megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
megadetector/data_management/importers/cct_field_adjustments.py +0 -58
megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
megadetector/data_management/importers/ena24_to_json.py +0 -276
megadetector/data_management/importers/filenames_to_json.py +0 -386
megadetector/data_management/importers/helena_to_cct.py +0 -283
megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
megadetector/data_management/importers/jb_csv_to_json.py +0 -150
megadetector/data_management/importers/mcgill_to_json.py +0 -250
megadetector/data_management/importers/missouri_to_json.py +0 -490
megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
megadetector/data_management/importers/noaa_seals_2019.py +0 -181
megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
megadetector/data_management/importers/pc_to_json.py +0 -365
megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
megadetector/data_management/importers/rspb_to_json.py +0 -356
megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
megadetector/data_management/importers/sulross_get_exif.py +0 -65
megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
megadetector/data_management/importers/ubc_to_json.py +0 -399
megadetector/data_management/importers/umn_to_json.py +0 -507
megadetector/data_management/importers/wellington_to_json.py +0 -263
megadetector/data_management/importers/wi_to_json.py +0 -442
megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
megadetector/utils/azure_utils.py +0 -178
megadetector/utils/sas_blob_utils.py +0 -509
megadetector-5.0.28.dist-info/RECORD +0 -209
/megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0

megadetector/postprocessing/load_api_results.py CHANGED Viewed

@@ -18,7 +18,8 @@ Includes functions to read/write the (very very old) .csv results format.
 import json
 import os
-from typing import Dict, Mapping, Optional, Tuple
+from typing import Optional
+from collections.abc import Mapping
 import pandas as pd
@@ -31,28 +32,28 @@ from megadetector.utils.wi_utils import load_md_or_speciesnet_file
 def load_api_results(api_output_path: str, normalize_paths: bool = True,
                      filename_replacements: Optional[Mapping[str, str]] = None,
                      force_forward_slashes: bool = True
-                     ) -> Tuple[pd.DataFrame, Dict]:
+                     ) -> tuple[pd.DataFrame, dict]:
     r"""
     Loads json-formatted MegaDetector results to a Pandas DataFrame.
     Args:
-        api_output_path: path to the output json file
-        normalize_paths: whether to apply os.path.normpath to the 'file' field
-            in each image entry in the output file
-        filename_replacements: replace some path tokens to match local paths to
-            the original blob structure
-        force_forward_slashes: whether to convert backslashes to forward slashes
-            in filenames
+        api_output_path (str): path to the output json file
+        normalize_paths (bool, optional): whether to apply os.path.normpath to the 'file'
+            field in each image entry in the output file
+        filename_replacements (dict, optional): replace some path tokens to match local paths
+            to the original file structure
+        force_forward_slashes (bool, optional): whether to convert backslashes to forward
+            slashes in filenames
     Returns:
         detection_results: pd.DataFrame, contains at least the columns ['file', 'detections','failure']
         other_fields: a dict containing fields in the results other than 'images'
     """
     print('Loading results from {}'.format(api_output_path))
     detection_results = load_md_or_speciesnet_file(api_output_path)
     # Validate that this is really a detector output file
     for s in ['info', 'detection_categories', 'images']:
         assert s in detection_results, 'Missing field {} in detection results'.format(s)
@@ -65,12 +66,12 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
     if normalize_paths:
         for image in detection_results['images']:
-            image['file'] = os.path.normpath(image['file'])
+            image['file'] = os.path.normpath(image['file'])
     if force_forward_slashes:
         for image in detection_results['images']:
             image['file'] = image['file'].replace('\\','/')
     # Replace some path tokens to match local paths to original blob structure
     if filename_replacements is not None:
         for string_to_replace in filename_replacements.keys():
@@ -79,16 +80,16 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
                 im['file'] = im['file'].replace(string_to_replace,replacement_string)
     print('Converting results to dataframe')
     # If this is a newer file that doesn't include maximum detection confidence values,
     # add them, because our unofficial internal dataframe format includes this.
     for im in detection_results['images']:
         if 'max_detection_conf' not in im:
             im['max_detection_conf'] = ct_utils.get_max_conf(im)
     # Pack the json output into a Pandas DataFrame
     detection_results = pd.DataFrame(detection_results['images'])
     print('Finished loading MegaDetector results for {} images from {}'.format(
             len(detection_results),api_output_path))
@@ -98,6 +99,11 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
 def write_api_results(detection_results_table, other_fields, out_path):
     """
     Writes a Pandas DataFrame to the MegaDetector .json format.
+    Args:
+        detection_results_table (DataFrame): data to write
+        other_fields (dict): additional fields to include in the output .json
+        out_path (str): output .json filename
     """
     print('Writing detection results to {}'.format(out_path))
@@ -111,7 +117,7 @@ def write_api_results(detection_results_table, other_fields, out_path):
         if 'failure' in im and im['failure'] is None:
             del im['failure']
     fields['images'] = images
     # Convert the 'version' field back to a string as per format convention
     try:
         version = other_fields['info']['format_version']
@@ -120,7 +126,7 @@ def write_api_results(detection_results_table, other_fields, out_path):
     except Exception:
         print('Warning: error determining format version')
         pass
     # Remove 'max_detection_conf' as per newer file convention (format >= v1.3)
     try:
         version = other_fields['info']['format_version']
@@ -132,20 +138,31 @@ def write_api_results(detection_results_table, other_fields, out_path):
     except Exception:
         print('Warning: error removing max_detection_conf from output')
         pass
     with open(out_path, 'w') as f:
         json.dump(fields, f, indent=1)
     print('Finished writing detection results to {}'.format(out_path))
-def load_api_results_csv(filename, normalize_paths=True, filename_replacements={}, nrows=None):
+def load_api_results_csv(filename, normalize_paths=True, filename_replacements=None, nrows=None):
     """
     [DEPRECATED]
     Loads .csv-formatted MegaDetector results to a pandas table
+    Args:
+        filename (str): path to the csv file to read
+        normalize_paths (bool, optional): whether to apply os.path.normpath to the 'file'
+            field in each image entry in the output file
+        filename_replacements (dict, optional): replace some path tokens to match local paths
+            to the original file structure
+        nrows (int, optional): read only the first N rows of [filename]
     """
+    if filename_replacements is None:
+        filename_replacements = {}
     print('Loading MegaDetector results from {}'.format(filename))
     detection_results = pd.read_csv(filename,nrows=nrows)
@@ -169,12 +186,12 @@ def load_api_results_csv(filename, normalize_paths=True, filename_replacements={
         replacement_string = filename_replacements[string_to_replace]
-        # iRow = 0
-        for iRow in range(0,len(detection_results)):
-            row = detection_results.iloc[iRow]
+        # i_row = 0
+        for i_row in range(0,len(detection_results)):
+            row = detection_results.iloc[i_row]
             fn = row['image_path']
             fn = fn.replace(string_to_replace,replacement_string)
-            detection_results.at[iRow,'image_path'] = fn
+            detection_results.at[i_row,'image_path'] = fn
     print('Finished loading and de-serializing MD results for {} images from {}'.format(
         len(detection_results),filename))
@@ -183,12 +200,16 @@ def load_api_results_csv(filename, normalize_paths=True, filename_replacements={
 def write_api_results_csv(detection_results, filename):
-    """
+    """
     [DEPRECATED]
     Writes a Pandas table to csv in a way that's compatible with the .csv output
     format.  Currently just a wrapper around to_csv that forces output writing
     to go through a common code path.
+    Args:
+        detection_results (DataFrame): dataframe to write to [filename]
+        filename (str): .csv filename to write
     """
     print('Writing detection results to {}'.format(filename))

megadetector/postprocessing/md_to_coco.py CHANGED Viewed

@@ -3,7 +3,7 @@
 md_to_coco.py
 "Converts" MegaDetector output files to COCO format.  "Converts" is in quotes because
-this is an opinionated transformation that requires a confidence threshold for most
+this is an opinionated transformation that requires a confidence threshold for most
 applications.
 Does not currently handle classification information.
@@ -15,6 +15,8 @@ Does not currently handle classification information.
 import os
 import json
 import uuid
+import sys
+import argparse
 from tqdm import tqdm
@@ -36,44 +38,45 @@ def md_to_coco(md_results_file,
                include_failed_images=True,
                include_annotations_without_bounding_boxes=True,
                empty_category_id='0',
-               overwrite_behavior='skip',
+               overwrite_behavior='skip',
                verbose=True,
-               image_filename_to_size=None):
+               image_filename_to_size=None,
+               unrecognized_category_handling='error'):
     """
     "Converts" MegaDetector output files to COCO format.  "Converts" is in quotes because
     this is an opinionated transformation that typically requires a confidence threshold.
-    The default confidence threshold is not 0; the assumption is that by default, you are
+    The default confidence threshold is not 0; the assumption is that by default, you are
     going to treat the resulting COCO file as a set of labels.  If you are using the resulting COCO
-    file to *evaluate* a detector, rather than as a set of labels, you likely want a
-    confidence threshold of 0.  Confidence values will be written to the semi-standard "score"
+    file to *evaluate* a detector, rather than as a set of labels, you likely want a
+    confidence threshold of 0.  Confidence values will be written to the semi-standard "score"
     field for each image (regardless of the threshold) if preserve_nonstandard_metadata is True.
-    A folder of images is required if width and height information are not available
+    A folder of images is required if width and height information are not available
     in the MD results file.
     Args:
-        md_results_file (str): MD results .json file to convert to COCO
+        md_results_file (str): MD results .json file to convert to COCO
             format
-        coco_output_file (str, optional): COCO .json file to write; if this is None, we'll return
+        coco_output_file (str, optional): COCO .json file to write; if this is None, we'll return
             a COCO-formatted dict, but won't write it to disk.  If this is 'auto', we'll write to
             [md_results_file_without_extension].coco.json.
         image_folder (str, optional): folder of images, required if 'width' and 'height' are not
             present in the MD results file (they are not required by the format)
         confidence_threshold (float, optional): boxes below this confidence threshold will not be
             included in the output data
-        validate_image_sizes (bool, optional): if this is True, we'll check the image sizes
+        validate_image_sizes (bool, optional): if this is True, we'll check the image sizes
             regardless of whether "width" and "height" are present in the MD results file.
         info (dict, optional): arbitrary metadata to include in an "info" field in the COCO-formatted
             output
-        preserve_nonstandard_metadata (bool, optional): if this is True, confidence will be preserved in a
-            non-standard "score" field in each annotation, and any random fields present in each image's
-            data (e.g. EXIF metadata) will be propagated to COCO output
-        include_failed_images (bool, optional): if this is True, failed images will be propagated to COCO output
+        preserve_nonstandard_metadata (bool, optional): if this is True, confidence will be preserved in a
+            non-standard "score" field in each annotation, and any random fields present in each image's
+            data (e.g. EXIF metadata) will be propagated to COCO output
+        include_failed_images (bool, optional): if this is True, failed images will be propagated to COCO output
             with a non-empty "failure" field and no other fields, otherwise failed images will be skipped.
-        include_annotations_without_bounding_boxes (bool, optional): if this is True, annotations with
-            only class labels (no bounding boxes) will be included in the output.  If this is False, empty
-            images will be represented with no annotations.
+        include_annotations_without_bounding_boxes (bool, optional): the only time we end up with
+            annotations without bounding boxes is when a detection has the category [empty_category_id];
+            this determines whether those annotations are included in the output.
         empty_category_id (str, optional): category ID reserved for the 'empty' class, should not be
             attached to any bounding boxes
         overwrite_behavior (str, optional): determines behavior if the output file exists ('skip' to skip conversion,
@@ -84,19 +87,26 @@ def md_to_coco(md_results_file,
             image sizes is the slowest step, so if you need to convert many results files at once for the same
             set of images, things will be gobs faster if you read the image sizes in advance and pass them in
             via this argument.  The format used here is the same format output by parallel_get_image_sizes().
+        unrecognized_category_handling (str or float, optional): specifies what to do when encountering category
+            IDs not in the category mapping.  Can be "error", "ignore", or "warning".  Can also be a float,
+            in which case an error is thrown if an unrecognized category has a confidence value higher than
+            this value.
     Returns:
         dict: the COCO data dict, identical to what's written to [coco_output_file] if [coco_output_file]
         is not None.
     """
     assert isinstance(md_results_file,str)
     assert os.path.isfile(md_results_file), \
         'MD results file {} does not exist'.format(md_results_file)
+    assert (isinstance(unrecognized_category_handling,float)) or \
+           (unrecognized_category_handling in ('error','warning','ignore')), \
+        'Invalid category handling behavior {}'.format(unrecognized_category_handling)
     if coco_output_file == 'auto':
         coco_output_file = insert_before_extension(md_results_file,'coco')
     if coco_output_file is not None:
         if os.path.isfile(coco_output_file):
             if overwrite_behavior == 'skip':
@@ -120,155 +130,177 @@ def md_to_coco(md_results_file,
                 pass
             elif overwrite_behavior == 'error':
                 raise ValueError('Output file {} exists'.format(coco_output_file))
-    with open(md_results_file,'r') as f:
-        md_results = json.load(f)
+    with open(md_results_file,'r') as f:
+        md_results = json.load(f)
     coco_images = []
     coco_annotations = []
-    print('Converting MD results file {} to COCO file {}...'.format(
-        md_results_file, coco_output_file))
+    if verbose:
+        print('Converting MD results file {} to COCO file {}...'.format(
+            md_results_file, coco_output_file))
     # im = md_results['images'][0]
     for im in tqdm(md_results['images'],disable=(not verbose)):
         coco_im = {}
         coco_im['id'] = im['file']
         coco_im['file_name'] = im['file']
-        # There is no concept of this in the COCO standard
+        # There is no concept of this in the COCO standard
         if 'failure' in im and im['failure'] is not None:
             if include_failed_images:
                 coco_im['failure'] = im['failure']
                 coco_images.append(coco_im)
             continue
         # Read/validate image size
         w = None
         h = None
         if ('width' not in im) or ('height' not in im) or validate_image_sizes:
             if (image_folder is None) and (image_filename_to_size is None):
-                raise ValueError('Must provide an image folder or a size mapping when height/width need to be read from images')
+                raise ValueError('Must provide an image folder or a size mapping when ' + \
+                                 'height/width need to be read from images')
             w = None; h = None
             if image_filename_to_size is not None:
                 if im['file'] not in image_filename_to_size:
-                    print('Warning: file {} not in image size mapping dict, reading from file'.format(im['file']))
+                    print('Warning: file {} not in image size mapping dict, reading from file'.format(
+                        im['file']))
                 else:
                     image_size = image_filename_to_size[im['file']]
                     if image_size is not None:
                         assert len(image_size) == 2
                         w = image_size[0]
                         h = image_size[1]
             if w is None:
                 image_file_abs = os.path.join(image_folder,im['file'])
                 pil_im = vis_utils.open_image(image_file_abs)
                 w = pil_im.width
                 h = pil_im.height
             if validate_image_sizes:
                 if 'width' in im:
                     assert im['width'] == w, 'Width mismatch for image {}'.format(im['file'])
                 if 'height' in im:
                     assert im['height'] == h, 'Height mismatch for image {}'.format(im['file'])
         else:
             w = im['width']
             h = im['height']
         coco_im['width'] = w
         coco_im['height'] = h
         # Add other, non-standard fields to the output dict
         if preserve_nonstandard_metadata:
             for k in im.keys():
                 if k not in ('file','detections','width','height'):
                     coco_im[k] = im[k]
         coco_images.append(coco_im)
         # detection = im['detections'][0]
         for detection in im['detections']:
             # Skip below-threshold detections
             if confidence_threshold is not None and detection['conf'] < confidence_threshold:
                 continue
             # Create an annotation
-            ann = {}
+            ann = {}
             ann['id'] = str(uuid.uuid1())
-            ann['image_id'] = coco_im['id']
+            ann['image_id'] = coco_im['id']
             md_category_id = detection['category']
+            if md_category_id not in md_results['detection_categories']:
+                s = 'unrecognized category ID {} occurred with confidence {} in file {}'.format(
+                        md_category_id,detection['conf'],im['file'])
+                if isinstance(unrecognized_category_handling,float):
+                    if detection['conf'] > unrecognized_category_handling:
+                        raise ValueError(s)
+                    else:
+                        continue
+                elif unrecognized_category_handling == 'warning':
+                    print('Warning: {}'.format(s))
+                    continue
+                elif unrecognized_category_handling == 'ignore':
+                    continue
+                else:
+                    raise ValueError(s)
             coco_category_id = int(md_category_id)
             ann['category_id'] = coco_category_id
             if md_category_id != empty_category_id:
                 assert 'bbox' in detection,\
                     'Oops: non-empty category with no bbox in {}'.format(im['file'])
                 ann['bbox'] = detection['bbox']
                 # MegaDetector: [x,y,width,height] (normalized, origin upper-left)
                 # COCO: [x,y,width,height] (absolute, origin upper-left)
                 ann['bbox'][0] = ann['bbox'][0] * coco_im['width']
                 ann['bbox'][1] = ann['bbox'][1] * coco_im['height']
                 ann['bbox'][2] = ann['bbox'][2] * coco_im['width']
-                ann['bbox'][3] = ann['bbox'][3] * coco_im['height']
+                ann['bbox'][3] = ann['bbox'][3] * coco_im['height']
             else:
                 # In very esoteric cases, we use the empty category (0) in MD-formatted output files
-                print('Warning: empty category ({}) used for annotation in file {}'.format(
+                print('Warning: empty category ({}) used for annotation for image {}'.format(
                     empty_category_id,im['file']))
                 pass
             if preserve_nonstandard_metadata:
                 # "Score" is a semi-standard string here, recognized by at least pycocotools
                 # ann['conf'] = detection['conf']
                 ann['score'] = detection['conf']
             if 'bbox' in ann or include_annotations_without_bounding_boxes:
-                coco_annotations.append(ann)
+                coco_annotations.append(ann)
         # ...for each detection
     # ...for each image
     output_dict = {}
     if info is not None:
         output_dict['info'] = info
     else:
         output_dict['info'] = {'description':'Converted from MD results file {}'.format(md_results_file)}
     output_dict['info']['confidence_threshold'] = confidence_threshold
     output_dict['images'] = coco_images
     output_dict['annotations'] = coco_annotations
     output_dict['categories'] = []
     for md_category_id in md_results['detection_categories'].keys():
         coco_category_id = int(md_category_id)
         coco_category = {'id':coco_category_id,
                          'name':md_results['detection_categories'][md_category_id]}
         output_dict['categories'].append(coco_category)
-    print('Writing COCO output file...')
+    if verbose:
+        print('Writing COCO output file...')
     if coco_output_file is not None:
         with open(coco_output_file,'w') as f:
             json.dump(output_dict,f,indent=1)
-    return output_dict
+    return output_dict
 # ...def md_to_coco(...)
@@ -276,11 +308,11 @@ def md_to_coco(md_results_file,
 #%% Interactive driver
 if False:
     pass
     #%% Configure options
     md_results_file = os.path.expanduser('~/data/md-test.json')
     coco_output_file = os.path.expanduser('~/data/md-test-coco.json')
     image_folder = os.path.expanduser('~/data/md-test')
@@ -290,10 +322,10 @@ if False:
     info=None
     preserve_nonstandard_metadata=True
     include_failed_images=False
     #%% Programmatic execution
     output_dict = md_to_coco(md_results_file,
                    coco_output_file=coco_output_file,
                    image_folder=image_folder,
@@ -302,10 +334,10 @@ if False:
                    info=info,
                    preserve_nonstandard_metadata=preserve_nonstandard_metadata,
                    include_failed_images=include_failed_images)
     #%% Command-line example
     s = f'python md_to_coco.py {md_results_file} {coco_output_file} {confidence_threshold} '
     if image_folder is not None:
         s += f' --image_folder {image_folder}'
@@ -316,9 +348,9 @@ if False:
     print(s); import clipboard; clipboard.copy(s)
     #%% Preview the resulting file
     from megadetector.visualization import visualize_db
     options = visualize_db.DbVizOptions()
     options.parallelize_rendering = True
@@ -327,49 +359,48 @@ if False:
     html_file,_ = visualize_db.visualize_db(coco_output_file,
                                               os.path.expanduser('~/tmp/md_to_coco_preview'),
-                                              image_folder,options)
+                                              image_folder,options)
     from megadetector.utils import path_utils # noqa
     path_utils.open_file(html_file)
-#%% Command-line driver
-import sys,argparse
-def main():
+#%% Command-line driver
+def main(): # noqa
     parser = argparse.ArgumentParser(
-        description='"Convert" MD output to COCO format, in quotes because this is an opinionated transformation that requires a confidence threshold')
+        description='"Convert" MD output to COCO format, in quotes because this is an opinionated ' + \
+                    'transformation that requires a confidence threshold')
     parser.add_argument(
         'md_results_file',
         type=str,
         help='Path to MD results file (.json)')
     parser.add_argument(
         'coco_output_file',
         type=str,
         help='Output filename (.json)')
     parser.add_argument(
         'confidence_threshold',
         type=float,
         default=default_confidence_threshold,
         help='Confidence threshold (default {})'.format(default_confidence_threshold)
         )
     parser.add_argument(
         '--image_folder',
         type=str,
         default=None,
         help='Image folder, only required if we will need to access image sizes'
         )
     parser.add_argument(
         '--preserve_nonstandard_metadata',
         action='store_true',
-        help='Preserve metadata that isn\'t normally included in ' +
+        help='Preserve metadata that isn\'t normally included in ' +
              'COCO-formatted data (e.g. EXIF metadata, confidence values)'
         )
@@ -378,7 +409,7 @@ def main():
         action='store_true',
         help='Keep a record of corrupted images in the output; may not be completely COCO-compliant'
         )
     if len(sys.argv[1:]) == 0:
         parser.print_help()
         parser.exit()
@@ -392,7 +423,7 @@ def main():
                validate_image_sizes=False,
                info=None,
                preserve_nonstandard_metadata=args.preserve_nonstandard_metadata,
-               include_failed_images=args.include_failed_images)
+               include_failed_images=args.include_failed_images)
 if __name__ == '__main__':
     main()

megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.28py3-none-any.whl → 10.0.0py3-none-any.whl