PyPI - megadetector - Versions diffs - 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl - Mend

megadetector 5.0.28py3-none-any.whl → 10.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (197) hide show

megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
megadetector/classification/aggregate_classifier_probs.py +3 -3
megadetector/classification/analyze_failed_images.py +5 -5
megadetector/classification/cache_batchapi_outputs.py +5 -5
megadetector/classification/create_classification_dataset.py +11 -12
megadetector/classification/crop_detections.py +10 -10
megadetector/classification/csv_to_json.py +8 -8
megadetector/classification/detect_and_crop.py +13 -15
megadetector/classification/efficientnet/model.py +8 -8
megadetector/classification/efficientnet/utils.py +6 -5
megadetector/classification/evaluate_model.py +7 -7
megadetector/classification/identify_mislabeled_candidates.py +6 -6
megadetector/classification/json_to_azcopy_list.py +1 -1
megadetector/classification/json_validator.py +29 -32
megadetector/classification/map_classification_categories.py +9 -9
megadetector/classification/merge_classification_detection_output.py +12 -9
megadetector/classification/prepare_classification_script.py +19 -19
megadetector/classification/prepare_classification_script_mc.py +26 -26
megadetector/classification/run_classifier.py +4 -4
megadetector/classification/save_mislabeled.py +6 -6
megadetector/classification/train_classifier.py +1 -1
megadetector/classification/train_classifier_tf.py +9 -9
megadetector/classification/train_utils.py +10 -10
megadetector/data_management/annotations/annotation_constants.py +1 -2
megadetector/data_management/camtrap_dp_to_coco.py +79 -46
megadetector/data_management/cct_json_utils.py +103 -103
megadetector/data_management/cct_to_md.py +49 -49
megadetector/data_management/cct_to_wi.py +33 -33
megadetector/data_management/coco_to_labelme.py +75 -75
megadetector/data_management/coco_to_yolo.py +210 -193
megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
megadetector/data_management/databases/integrity_check_json_db.py +228 -200
megadetector/data_management/databases/subset_json_db.py +33 -33
megadetector/data_management/generate_crops_from_cct.py +88 -39
megadetector/data_management/get_image_sizes.py +54 -49
megadetector/data_management/labelme_to_coco.py +133 -125
megadetector/data_management/labelme_to_yolo.py +159 -73
megadetector/data_management/lila/create_lila_blank_set.py +81 -83
megadetector/data_management/lila/create_lila_test_set.py +32 -31
megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
megadetector/data_management/lila/download_lila_subset.py +21 -24
megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
megadetector/data_management/lila/get_lila_image_counts.py +22 -22
megadetector/data_management/lila/lila_common.py +73 -70
megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
megadetector/data_management/mewc_to_md.py +344 -340
megadetector/data_management/ocr_tools.py +262 -255
megadetector/data_management/read_exif.py +249 -227
megadetector/data_management/remap_coco_categories.py +90 -28
megadetector/data_management/remove_exif.py +81 -21
megadetector/data_management/rename_images.py +187 -187
megadetector/data_management/resize_coco_dataset.py +588 -120
megadetector/data_management/speciesnet_to_md.py +41 -41
megadetector/data_management/wi_download_csv_to_coco.py +55 -55
megadetector/data_management/yolo_output_to_md_output.py +248 -122
megadetector/data_management/yolo_to_coco.py +333 -191
megadetector/detection/change_detection.py +832 -0
megadetector/detection/process_video.py +340 -337
megadetector/detection/pytorch_detector.py +358 -278
megadetector/detection/run_detector.py +399 -186
megadetector/detection/run_detector_batch.py +404 -377
megadetector/detection/run_inference_with_yolov5_val.py +340 -327
megadetector/detection/run_tiled_inference.py +257 -249
megadetector/detection/tf_detector.py +24 -24
megadetector/detection/video_utils.py +332 -295
megadetector/postprocessing/add_max_conf.py +19 -11
megadetector/postprocessing/categorize_detections_by_size.py +45 -45
megadetector/postprocessing/classification_postprocessing.py +468 -433
megadetector/postprocessing/combine_batch_outputs.py +23 -23
megadetector/postprocessing/compare_batch_results.py +590 -525
megadetector/postprocessing/convert_output_format.py +106 -102
megadetector/postprocessing/create_crop_folder.py +347 -147
megadetector/postprocessing/detector_calibration.py +173 -168
megadetector/postprocessing/generate_csv_report.py +508 -499
megadetector/postprocessing/load_api_results.py +48 -27
megadetector/postprocessing/md_to_coco.py +133 -102
megadetector/postprocessing/md_to_labelme.py +107 -90
megadetector/postprocessing/md_to_wi.py +40 -40
megadetector/postprocessing/merge_detections.py +92 -114
megadetector/postprocessing/postprocess_batch_results.py +319 -301
megadetector/postprocessing/remap_detection_categories.py +91 -38
megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
megadetector/postprocessing/separate_detections_into_folders.py +226 -211
megadetector/postprocessing/subset_json_detector_output.py +265 -262
megadetector/postprocessing/top_folders_to_bottom.py +45 -45
megadetector/postprocessing/validate_batch_results.py +70 -70
megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
megadetector/taxonomy_mapping/simple_image_download.py +8 -8
megadetector/taxonomy_mapping/species_lookup.py +156 -74
megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
megadetector/utils/ct_utils.py +1049 -211
megadetector/utils/directory_listing.py +21 -77
megadetector/utils/gpu_test.py +22 -22
megadetector/utils/md_tests.py +632 -529
megadetector/utils/path_utils.py +1520 -431
megadetector/utils/process_utils.py +41 -41
megadetector/utils/split_locations_into_train_val.py +62 -62
megadetector/utils/string_utils.py +148 -27
megadetector/utils/url_utils.py +489 -176
megadetector/utils/wi_utils.py +2658 -2526
megadetector/utils/write_html_image_list.py +137 -137
megadetector/visualization/plot_utils.py +34 -30
megadetector/visualization/render_images_with_thumbnails.py +39 -74
megadetector/visualization/visualization_utils.py +487 -435
megadetector/visualization/visualize_db.py +232 -198
megadetector/visualization/visualize_detector_output.py +82 -76
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
megadetector-10.0.0.dist-info/RECORD +139 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
megadetector/api/batch_processing/api_core/__init__.py +0 -0
megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
megadetector/api/batch_processing/api_core/server.py +0 -294
megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
megadetector/api/batch_processing/api_core/server_utils.py +0 -88
megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
megadetector/api/batch_processing/api_support/__init__.py +0 -0
megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
megadetector/api/synchronous/__init__.py +0 -0
megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
megadetector/data_management/importers/add_nacti_sizes.py +0 -52
megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
megadetector/data_management/importers/awc_to_json.py +0 -191
megadetector/data_management/importers/bellevue_to_json.py +0 -272
megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
megadetector/data_management/importers/cct_field_adjustments.py +0 -58
megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
megadetector/data_management/importers/ena24_to_json.py +0 -276
megadetector/data_management/importers/filenames_to_json.py +0 -386
megadetector/data_management/importers/helena_to_cct.py +0 -283
megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
megadetector/data_management/importers/jb_csv_to_json.py +0 -150
megadetector/data_management/importers/mcgill_to_json.py +0 -250
megadetector/data_management/importers/missouri_to_json.py +0 -490
megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
megadetector/data_management/importers/noaa_seals_2019.py +0 -181
megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
megadetector/data_management/importers/pc_to_json.py +0 -365
megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
megadetector/data_management/importers/rspb_to_json.py +0 -356
megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
megadetector/data_management/importers/sulross_get_exif.py +0 -65
megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
megadetector/data_management/importers/ubc_to_json.py +0 -399
megadetector/data_management/importers/umn_to_json.py +0 -507
megadetector/data_management/importers/wellington_to_json.py +0 -263
megadetector/data_management/importers/wi_to_json.py +0 -442
megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
megadetector/utils/azure_utils.py +0 -178
megadetector/utils/sas_blob_utils.py +0 -509
megadetector-5.0.28.dist-info/RECORD +0 -209
/megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0

megadetector/postprocessing/create_crop_folder.py CHANGED Viewed

@@ -11,6 +11,8 @@ of images representing all above-threshold crops from the original folder.
 import os
 import json
+import argparse
 from tqdm import tqdm
 from multiprocessing.pool import Pool, ThreadPool
@@ -19,6 +21,7 @@ from functools import partial
 from megadetector.utils.path_utils import insert_before_extension
 from megadetector.utils.ct_utils import invert_dictionary
+from megadetector.utils.ct_utils import is_list_sorted
 from megadetector.visualization.visualization_utils import crop_image
 from megadetector.visualization.visualization_utils import exif_preserving_save
@@ -29,24 +32,24 @@ class CreateCropFolderOptions:
     """
     Options used to parameterize create_crop_folder().
     """
     def __init__(self):
         #: Confidence threshold determining which detections get written
         self.confidence_threshold = 0.1
         #: Number of pixels to expand each crop
         self.expansion = 0
         #: JPEG quality to use for saving crops (None for default)
         self.quality = 95
         #: Whether to overwrite existing images
         self.overwrite = True
         #: Number of concurrent workers
         self.n_workers = 8
         #: Whether to use processes ('process') or threads ('thread') for parallelization
         self.pool_type = 'thread'
@@ -54,14 +57,15 @@ class CreateCropFolderOptions:
         #:
         #: options.category_names_to_include = ['animal']
         self.category_names_to_include = None
 #%% Support functions
 def _get_crop_filename(image_fn,crop_id):
     """
     Generate crop filenames in a consistent way.
     """
     if isinstance(crop_id,int):
         crop_id = str(crop_id).zfill(3)
     assert isinstance(crop_id,str)
@@ -74,37 +78,45 @@ def _generate_crops_for_single_image(crops_this_image,
                                      options):
     """
     Generate all the crops required for a single image.
+    Args:
+        crops_this_image (list of dict): list of dicts with at least keys
+            'image_fn_relative', 'crop_id'
+        input_folder (str): input folder (whole images)
+        output_folder (crops): output folder (crops)
+        options (CreateCropFolderOptions): cropping options
     """
     if len(crops_this_image) == 0:
         return
-    image_fn_relative = crops_this_image[0]['image_fn_relative']
+    image_fn_relative = crops_this_image[0]['image_fn_relative']
     input_fn_abs = os.path.join(input_folder,image_fn_relative)
     assert os.path.isfile(input_fn_abs)
     detections_to_crop = [c['detection'] for c in crops_this_image]
     cropped_images = crop_image(detections_to_crop,
                                 input_fn_abs,
                                 confidence_threshold=0,
                                 expansion=options.expansion)
     assert len(cropped_images) == len(crops_this_image)
     # i_crop = 0; crop_info = crops_this_image[0]
     for i_crop,crop_info in enumerate(crops_this_image):
         assert crop_info['image_fn_relative'] == image_fn_relative
-        crop_filename_relative = _get_crop_filename(image_fn_relative, crop_info['crop_id'])
+        crop_filename_relative = _get_crop_filename(image_fn_relative, crop_info['crop_id'])
         crop_filename_abs = os.path.join(output_folder,crop_filename_relative).replace('\\','/')
         if os.path.isfile(crop_filename_abs) and not options.overwrite:
             continue
-        cropped_image = cropped_images[i_crop]
-        os.makedirs(os.path.dirname(crop_filename_abs),exist_ok=True)
+        cropped_image = cropped_images[i_crop]
+        os.makedirs(os.path.dirname(crop_filename_abs),exist_ok=True)
         exif_preserving_save(cropped_image,crop_filename_abs,quality=options.quality)
     # ...for each crop
@@ -113,119 +125,185 @@ def _generate_crops_for_single_image(crops_this_image,
 def crop_results_to_image_results(image_results_file_with_crop_ids,
                                   crop_results_file,
                                   output_file,
-                                  delete_crop_information=True):
+                                  delete_crop_information=True,
+                                  require_identical_detection_categories=True,
+                                  restrict_to_top_n=-1,
+                                  crop_results_prefix=None,
+                                  detections_without_classification_handling='error'):
     """
     This function is intended to be run after you have:
         1. Run MegaDetector on a folder
         2. Generated a crop folder using create_crop_folder
         3. Run a species classifier on those crops
     This function will take the crop-level results and transform them back
-    to the original images.  Classification categories, if available, are taken
+    to the original images.  Classification categories, if available, are taken
     from [crop_results_file].
     Args:
         image_results_file_with_crop_ids (str): results file for the original images,
-            containing crop IDs, likely generated via create_crop_folder.  All
+            containing crop IDs, likely generated via create_crop_folder.  All
             non-standard fields in this file will be passed along to [output_file].
         crop_results_file (str): results file for the crop folder
         output_file (str): output .json file, containing crop-level classifications
             mapped back to the image level.
         delete_crop_information (bool, optional): whether to delete the "crop_id" and
             "crop_filename_relative" fields from each detection, if present.
+        require_identical_detection_categories (bool, optional): if True, error if
+            the image-level and crop-level detection categories are different.  If False,
+            ignore the crop-level detection categories.
+        restrict_to_top_n (int, optional): If >0, removes all but the top N classification
+            results for each detection.
+        crop_results_prefix (str, optional): if not None, removes this prefix from crop
+            results filenames.  Intended to support the case where the crop results
+            use absolute paths.
+        detections_without_classification_handling (str, optional): what to do when we
+            encounter a crop that doesn't appear in classification results: 'error',
+            or 'include' ("include" means "leave the detection alone, without classifications"
     """
     ##%% Validate inputs
     assert os.path.isfile(image_results_file_with_crop_ids), \
         'Could not find image-level input file {}'.format(image_results_file_with_crop_ids)
     assert os.path.isfile(crop_results_file), \
         'Could not find crop results file {}'.format(crop_results_file)
     os.makedirs(os.path.dirname(output_file),exist_ok=True)
     ##%% Read input files
     print('Reading input...')
     with open(image_results_file_with_crop_ids,'r') as f:
         image_results_with_crop_ids = json.load(f)
     with open(crop_results_file,'r') as f:
         crop_results = json.load(f)
     # Find all the detection categories that need to be consistent
-    used_category_ids = set()
+    used_detection_category_ids = set()
     for im in tqdm(image_results_with_crop_ids['images']):
         if 'detections' not in im or im['detections'] is None:
-            continue
+            continue
         for det in im['detections']:
             if 'crop_id' in det:
-                used_category_ids.add(det['category'])
-    # Make sure the categories that matter are consistent across the two files
-    for category_id in used_category_ids:
-        category_name = image_results_with_crop_ids['detection_categories'][category_id]
-        assert category_id in crop_results['detection_categories'] and \
-            category_name == crop_results['detection_categories'][category_id], \
-                'Crop results and detection results use incompatible categories'
+                used_detection_category_ids.add(det['category'])
+    # Make sure the detection categories that matter are consistent across the two files
+    if require_identical_detection_categories:
+        for category_id in used_detection_category_ids:
+            category_name = image_results_with_crop_ids['detection_categories'][category_id]
+            assert category_id in crop_results['detection_categories'] and \
+                category_name == crop_results['detection_categories'][category_id], \
+                    'Crop results and detection results use incompatible categories'
     crop_filename_to_results = {}
     # im = crop_results['images'][0]
     for im in crop_results['images']:
-        crop_filename_to_results[im['file']] = im
+        fn = im['file']
+        # Possibly remove a prefix from each filename
+        if (crop_results_prefix is not None) and (crop_results_prefix in fn):
+            if fn.startswith(crop_results_prefix):
+                fn = fn.replace(crop_results_prefix,'',1)
+                im['file'] = fn
+        crop_filename_to_results[fn] = im
     if 'classification_categories' in crop_results:
         image_results_with_crop_ids['classification_categories'] = \
             crop_results['classification_categories']
     if 'classification_category_descriptions' in crop_results:
         image_results_with_crop_ids['classification_category_descriptions'] = \
             crop_results['classification_category_descriptions']
     ##%% Read classifications from crop results, merge into image-level results
+    print('Reading classification results...')
+    n_skipped_detections = 0
+    # Loop over the original image-level detections
+    #
     # im = image_results_with_crop_ids['images'][0]
-    for im in tqdm(image_results_with_crop_ids['images']):
+    for i_image,im in tqdm(enumerate(image_results_with_crop_ids['images']),
+                           total=len(image_results_with_crop_ids['images'])):
         if 'detections' not in im or im['detections'] is None:
             continue
+        # i_det = 0; det = im['detections'][i_det]
         for det in im['detections']:
             if 'classifications' in det:
                 del det['classifications']
             if 'crop_id' in det:
+                # We may be skipping detections with no classification results
+                skip_detection = False
+                # Find the corresponding crop in the classification results
                 crop_filename_relative = det['crop_filename_relative']
-                assert crop_filename_relative in crop_filename_to_results, \
-                    'Crop lookup error'
-                crop_results_this_detection = crop_filename_to_results[crop_filename_relative]
-                assert crop_results_this_detection['file'] == crop_filename_relative
-                assert len(crop_results_this_detection['detections']) == 1
-                # Allow a slight confidence difference for the case where output precision was truncated
-                assert abs(crop_results_this_detection['detections'][0]['conf'] - det['conf']) < 0.01
-                assert crop_results_this_detection['detections'][0]['category'] == det['category']
-                assert crop_results_this_detection['detections'][0]['bbox'] == [0,0,1,1]
-                det['classifications'] = crop_results_this_detection['detections'][0]['classifications']
+                if crop_filename_relative not in crop_filename_to_results:
+                    if detections_without_classification_handling == 'error':
+                        raise ValueError('Crop lookup error: {}'.format(crop_filename_relative))
+                    elif detections_without_classification_handling == 'include':
+                        # Leave this detection unclassified
+                        skip_detection = True
+                    else:
+                        raise ValueError(
+                            'Illegal value for detections_without_classification_handling: {}'.format(
+                                detections_without_classification_handling
+                        ))
+                if not skip_detection:
+                    crop_results_this_detection = crop_filename_to_results[crop_filename_relative]
+                    # Consistency checking
+                    assert crop_results_this_detection['file'] == crop_filename_relative, \
+                        'Crop filename mismatch'
+                    assert len(crop_results_this_detection['detections']) == 1, \
+                        'Multiple crop results for a single detection'
+                    assert crop_results_this_detection['detections'][0]['bbox'] == [0,0,1,1], \
+                        'Invalid crop bounding box'
+                    # This check was helpful for the case where crop-level results had already
+                    # taken detection confidence values from detector output by construct, but this isn't
+                    # really meaningful for most cases.
+                    # assert abs(crop_results_this_detection['detections'][0]['conf'] - det['conf']) < 0.01
+                    if require_identical_detection_categories:
+                        assert crop_results_this_detection['detections'][0]['category'] == det['category']
+                    # Copy the crop-level classifications
+                    det['classifications'] = crop_results_this_detection['detections'][0]['classifications']
+                    confidence_values = [x[1] for x in det['classifications']]
+                    assert is_list_sorted(confidence_values,reverse=True)
+                    if restrict_to_top_n > 0:
+                        det['classifications'] = det['classifications'][0:restrict_to_top_n]
             if delete_crop_information:
                 if 'crop_id' in det:
                     del det['crop_id']
                 if 'crop_filename_relative' in det:
                     del det['crop_filename_relative']
         # ...for each detection
-    # ...for each image
+    # ...for each image
+    if n_skipped_detections > 0:
+        print('Skipped {} detections'.format(n_skipped_detections))
     ##%% Write output file
     print('Writing output file...')
     with open(output_file,'w') as f:
         json.dump(image_results_with_crop_ids,f,indent=1)
@@ -241,9 +319,9 @@ def create_crop_folder(input_file,
     """
     Given a MegaDetector .json file and a folder of images, creates a new folder
     of images representing all above-threshold crops from the original folder.
     Optionally writes a new .json file that attaches unique IDs to each detection.
     Args:
         input_file (str): MD-formatted .json file to process
         input_folder (str): Input image folder
@@ -251,11 +329,11 @@ def create_crop_folder(input_file,
         output_file (str, optional): new .json file that attaches unique IDs to each detection.
         crops_output_file (str, optional): new .json file that includes whole-image detections
             for each of the crops, using confidence values from the original results
-        options (CreateCropFolderOptions, optional): crop parameters
+        options (CreateCropFolderOptions, optional): crop parameters
     """
     ## Validate options, prepare output folders
     if options is None:
         options = CreateCropFolderOptions()
@@ -264,45 +342,45 @@ def create_crop_folder(input_file,
     os.makedirs(output_folder,exist_ok=True)
     if output_file is not None:
         os.makedirs(os.path.dirname(output_file),exist_ok=True)
     ##%% Read input
-    print('Reading MD results file...')
+    print('Reading MD results file...')
     with open(input_file,'r') as f:
         detection_results = json.load(f)
     category_ids_to_include = None
-    if options.category_names_to_include is not None:
+    if options.category_names_to_include is not None:
         category_id_to_name = detection_results['detection_categories']
-        category_name_to_id = invert_dictionary(category_id_to_name)
+        category_name_to_id = invert_dictionary(category_id_to_name)
         category_ids_to_include = set()
         for category_name in options.category_names_to_include:
             assert category_name in category_name_to_id, \
                 'Unrecognized category name {}'.format(category_name)
-            category_ids_to_include.add(category_name_to_id[category_name])
+            category_ids_to_include.add(category_name_to_id[category_name])
     ##%% Make a list of crops that we need to create
     # Maps input images to list of dicts, with keys 'crop_id','detection'
     image_fn_relative_to_crops = defaultdict(list)
     n_crops = 0
     n_detections_excluded_by_category = 0
     # im = detection_results['images'][0]
     for i_image,im in enumerate(detection_results['images']):
         if 'detections' not in im or im['detections'] is None or len(im['detections']) == 0:
             continue
         detections_this_image = im['detections']
         image_fn_relative = im['file']
         for i_detection,det in enumerate(detections_this_image):
             if det['conf'] < options.confidence_threshold:
                 continue
@@ -312,87 +390,93 @@ def create_crop_folder(input_file,
                 continue
             det['crop_id'] = i_detection
             crop_info = {'image_fn_relative':image_fn_relative,
-                            'crop_id':i_detection,
-                            'detection':det}
-            crop_filename_relative = _get_crop_filename(image_fn_relative,
+                         'crop_id':i_detection,
+                         'detection':det}
+            crop_filename_relative = _get_crop_filename(image_fn_relative,
                                                         crop_info['crop_id'])
             det['crop_filename_relative'] = crop_filename_relative
             image_fn_relative_to_crops[image_fn_relative].append(crop_info)
             n_crops += 1
-    # ...for each input image
+    # ...for each input image
     print('Prepared a list of {} crops from {} of {} input images'.format(
         n_crops,len(image_fn_relative_to_crops),len(detection_results['images'])))
     if n_detections_excluded_by_category > 0:
         print('Excluded {} detections by category'.format(n_detections_excluded_by_category))
     ##%% Generate crops
     if options.n_workers <= 1:
         # image_fn_relative = next(iter(image_fn_relative_to_crops))
         for image_fn_relative in tqdm(image_fn_relative_to_crops.keys()):
-            crops_this_image = image_fn_relative_to_crops[image_fn_relative]
+            crops_this_image = image_fn_relative_to_crops[image_fn_relative]
             _generate_crops_for_single_image(crops_this_image=crops_this_image,
                                              input_folder=input_folder,
                                              output_folder=output_folder,
                                              options=options)
     else:
         print('Creating a {} pool with {} workers'.format(options.pool_type,options.n_workers))
+        pool = None
+        try:
+            if options.pool_type == 'thread':
+                pool = ThreadPool(options.n_workers)
+            else:
+                assert options.pool_type == 'process'
+                pool = Pool(options.n_workers)
+            # Each element in this list is the list of crops for a single image
+            crop_lists = list(image_fn_relative_to_crops.values())
+            with tqdm(total=len(image_fn_relative_to_crops)) as pbar:
+                for i,_ in enumerate(pool.imap_unordered(partial(
+                            _generate_crops_for_single_image,
+                                input_folder=input_folder,
+                                output_folder=output_folder,
+                                options=options),
+                            crop_lists)):
+                    pbar.update()
+        finally:
+            if pool is not None:
+                pool.close()
+                pool.join()
+                print("Pool closed and joined for crop folder creation")
+    # ...if we're using parallel processing
-        if options.pool_type == 'thread':
-            pool = ThreadPool(options.n_workers)
-        else:
-            assert options.pool_type == 'process'
-            pool = Pool(options.n_workers)
-        # Each element in this list is the list of crops for a single image
-        crop_lists = list(image_fn_relative_to_crops.values())
-        with tqdm(total=len(image_fn_relative_to_crops)) as pbar:
-            for i,_ in enumerate(pool.imap_unordered(partial(
-                        _generate_crops_for_single_image,
-                            input_folder=input_folder,
-                            output_folder=output_folder,
-                            options=options),
-                        crop_lists)):
-                pbar.update()
-    # ...if we're using parallel processing
     ##%% Write output file
     if output_file is not None:
         with open(output_file,'w') as f:
             json.dump(detection_results,f,indent=1)
     if crops_output_file is not None:
         original_images = detection_results['images']
         detection_results_cropped = detection_results
         detection_results_cropped['images'] = []
         # im = original_images[0]
         for im in original_images:
             if 'detections' not in im or im['detections'] is None or len(im['detections']) == 0:
                 continue
-            detections_this_image = im['detections']
+            detections_this_image = im['detections']
             image_fn_relative = im['file']
             for i_detection,det in enumerate(detections_this_image):
                 if 'crop_id' in det:
                     im_out = {}
                     im_out['file'] = det['crop_filename_relative']
@@ -402,19 +486,135 @@ def create_crop_folder(input_file,
                     det_out['bbox'] = [0, 0, 1, 1]
                     im_out['detections'] = [det_out]
                     detection_results_cropped['images'].append(im_out)
                 # ...if we need to include this crop in the new .json file
             # ...for each crop
         # ...for each original image
         with open(crops_output_file,'w') as f:
             json.dump(detection_results_cropped,f,indent=1)
 # ...def create_crop_folder()
 #%% Command-line driver
-# TODO
+def main():
+    """
+    Command-line interface for creating a crop folder from MegaDetector results.
+    """
+    parser = argparse.ArgumentParser(
+        description='Create a folder of crops from MegaDetector results'
+    )
+    parser.add_argument(
+        'input_file',
+        type=str,
+        help='Path to the MegaDetector .json results file'
+    )
+    parser.add_argument(
+        'input_folder',
+        type=str,
+        help='Path to the folder containing the original images'
+    )
+    parser.add_argument(
+        'output_folder',
+        type=str,
+        help='Path to the folder where cropped images will be saved'
+    )
+    parser.add_argument(
+        '--output_file',
+        type=str,
+        default=None,
+        help='Path to save the modified MegaDetector .json file (with crop IDs and filenames)'
+    )
+    parser.add_argument(
+        '--crops_output_file',
+        type=str,
+        default=None,
+        help='Path to save a new .json file for the crops themselves (with full-image detections for each crop)'
+    )
+    parser.add_argument(
+        '--confidence_threshold',
+        type=float,
+        default=0.1,
+        help='Confidence threshold for detections to be cropped (default: 0.1)'
+    )
+    parser.add_argument(
+        '--expansion',
+        type=int,
+        default=0,
+        help='Number of pixels to expand each crop (default: 0)'
+    )
+    parser.add_argument(
+        '--quality',
+        type=int,
+        default=95,
+        help='JPEG quality for saving crops (default: 95)'
+    )
+    parser.add_argument(
+        '--overwrite',
+        type=str,
+        default='true',
+        choices=['true', 'false'],
+        help="Overwrite existing crop images (default: 'true')"
+    )
+    parser.add_argument(
+        '--n_workers',
+        type=int,
+        default=8,
+        help='Number of concurrent workers (default: 8)'
+    )
+    parser.add_argument(
+        '--pool_type',
+        type=str,
+        default='thread',
+        choices=['thread', 'process'],
+        help="Type of parallelism to use ('thread' or 'process', default: 'thread')"
+    )
+    parser.add_argument(
+        '--category_names',
+        type=str,
+        default=None,
+        help="Comma-separated list of category names to include " + \
+             "(e.g., 'animal,person'). If None (default), all categories are included."
+    )
+    args = parser.parse_args()
+    options = CreateCropFolderOptions()
+    options.confidence_threshold = args.confidence_threshold
+    options.expansion = args.expansion
+    options.quality = args.quality
+    options.overwrite = (args.overwrite.lower() == 'true')
+    options.n_workers = args.n_workers
+    options.pool_type = args.pool_type
+    if args.category_names:
+        options.category_names_to_include = [name.strip() for name in args.category_names.split(',')]
+    else:
+        options.category_names_to_include = None
+    print('Starting crop folder creation...')
+    print('Input MD results: {}'.format(args.input_file))
+    print('Input image folder {}'.format(args.input_folder))
+    print('Output crop folder: {}'.format(args.output_folder))
+    if args.output_file:
+        print('Modified MD results will be saved to {}'.format(args.output_file))
+    if args.crops_output_file:
+        print('Crops .json output will be saved to {}'.format(args.crops_output_file))
+    create_crop_folder(
+        input_file=args.input_file,
+        input_folder=args.input_folder,
+        output_folder=args.output_folder,
+        output_file=args.output_file,
+        crops_output_file=args.crops_output_file,
+        options=options
+    )
+if __name__ == '__main__':
+    main()

megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.28py3-none-any.whl → 10.0.0py3-none-any.whl