PyPI - megadetector - Versions diffs - 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl - Mend

megadetector 5.0.28py3-none-any.whl → 10.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (197) hide show

megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
megadetector/classification/aggregate_classifier_probs.py +3 -3
megadetector/classification/analyze_failed_images.py +5 -5
megadetector/classification/cache_batchapi_outputs.py +5 -5
megadetector/classification/create_classification_dataset.py +11 -12
megadetector/classification/crop_detections.py +10 -10
megadetector/classification/csv_to_json.py +8 -8
megadetector/classification/detect_and_crop.py +13 -15
megadetector/classification/efficientnet/model.py +8 -8
megadetector/classification/efficientnet/utils.py +6 -5
megadetector/classification/evaluate_model.py +7 -7
megadetector/classification/identify_mislabeled_candidates.py +6 -6
megadetector/classification/json_to_azcopy_list.py +1 -1
megadetector/classification/json_validator.py +29 -32
megadetector/classification/map_classification_categories.py +9 -9
megadetector/classification/merge_classification_detection_output.py +12 -9
megadetector/classification/prepare_classification_script.py +19 -19
megadetector/classification/prepare_classification_script_mc.py +26 -26
megadetector/classification/run_classifier.py +4 -4
megadetector/classification/save_mislabeled.py +6 -6
megadetector/classification/train_classifier.py +1 -1
megadetector/classification/train_classifier_tf.py +9 -9
megadetector/classification/train_utils.py +10 -10
megadetector/data_management/annotations/annotation_constants.py +1 -2
megadetector/data_management/camtrap_dp_to_coco.py +79 -46
megadetector/data_management/cct_json_utils.py +103 -103
megadetector/data_management/cct_to_md.py +49 -49
megadetector/data_management/cct_to_wi.py +33 -33
megadetector/data_management/coco_to_labelme.py +75 -75
megadetector/data_management/coco_to_yolo.py +210 -193
megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
megadetector/data_management/databases/integrity_check_json_db.py +228 -200
megadetector/data_management/databases/subset_json_db.py +33 -33
megadetector/data_management/generate_crops_from_cct.py +88 -39
megadetector/data_management/get_image_sizes.py +54 -49
megadetector/data_management/labelme_to_coco.py +133 -125
megadetector/data_management/labelme_to_yolo.py +159 -73
megadetector/data_management/lila/create_lila_blank_set.py +81 -83
megadetector/data_management/lila/create_lila_test_set.py +32 -31
megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
megadetector/data_management/lila/download_lila_subset.py +21 -24
megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
megadetector/data_management/lila/get_lila_image_counts.py +22 -22
megadetector/data_management/lila/lila_common.py +73 -70
megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
megadetector/data_management/mewc_to_md.py +344 -340
megadetector/data_management/ocr_tools.py +262 -255
megadetector/data_management/read_exif.py +249 -227
megadetector/data_management/remap_coco_categories.py +90 -28
megadetector/data_management/remove_exif.py +81 -21
megadetector/data_management/rename_images.py +187 -187
megadetector/data_management/resize_coco_dataset.py +588 -120
megadetector/data_management/speciesnet_to_md.py +41 -41
megadetector/data_management/wi_download_csv_to_coco.py +55 -55
megadetector/data_management/yolo_output_to_md_output.py +248 -122
megadetector/data_management/yolo_to_coco.py +333 -191
megadetector/detection/change_detection.py +832 -0
megadetector/detection/process_video.py +340 -337
megadetector/detection/pytorch_detector.py +358 -278
megadetector/detection/run_detector.py +399 -186
megadetector/detection/run_detector_batch.py +404 -377
megadetector/detection/run_inference_with_yolov5_val.py +340 -327
megadetector/detection/run_tiled_inference.py +257 -249
megadetector/detection/tf_detector.py +24 -24
megadetector/detection/video_utils.py +332 -295
megadetector/postprocessing/add_max_conf.py +19 -11
megadetector/postprocessing/categorize_detections_by_size.py +45 -45
megadetector/postprocessing/classification_postprocessing.py +468 -433
megadetector/postprocessing/combine_batch_outputs.py +23 -23
megadetector/postprocessing/compare_batch_results.py +590 -525
megadetector/postprocessing/convert_output_format.py +106 -102
megadetector/postprocessing/create_crop_folder.py +347 -147
megadetector/postprocessing/detector_calibration.py +173 -168
megadetector/postprocessing/generate_csv_report.py +508 -499
megadetector/postprocessing/load_api_results.py +48 -27
megadetector/postprocessing/md_to_coco.py +133 -102
megadetector/postprocessing/md_to_labelme.py +107 -90
megadetector/postprocessing/md_to_wi.py +40 -40
megadetector/postprocessing/merge_detections.py +92 -114
megadetector/postprocessing/postprocess_batch_results.py +319 -301
megadetector/postprocessing/remap_detection_categories.py +91 -38
megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
megadetector/postprocessing/separate_detections_into_folders.py +226 -211
megadetector/postprocessing/subset_json_detector_output.py +265 -262
megadetector/postprocessing/top_folders_to_bottom.py +45 -45
megadetector/postprocessing/validate_batch_results.py +70 -70
megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
megadetector/taxonomy_mapping/simple_image_download.py +8 -8
megadetector/taxonomy_mapping/species_lookup.py +156 -74
megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
megadetector/utils/ct_utils.py +1049 -211
megadetector/utils/directory_listing.py +21 -77
megadetector/utils/gpu_test.py +22 -22
megadetector/utils/md_tests.py +632 -529
megadetector/utils/path_utils.py +1520 -431
megadetector/utils/process_utils.py +41 -41
megadetector/utils/split_locations_into_train_val.py +62 -62
megadetector/utils/string_utils.py +148 -27
megadetector/utils/url_utils.py +489 -176
megadetector/utils/wi_utils.py +2658 -2526
megadetector/utils/write_html_image_list.py +137 -137
megadetector/visualization/plot_utils.py +34 -30
megadetector/visualization/render_images_with_thumbnails.py +39 -74
megadetector/visualization/visualization_utils.py +487 -435
megadetector/visualization/visualize_db.py +232 -198
megadetector/visualization/visualize_detector_output.py +82 -76
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
megadetector-10.0.0.dist-info/RECORD +139 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
megadetector/api/batch_processing/api_core/__init__.py +0 -0
megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
megadetector/api/batch_processing/api_core/server.py +0 -294
megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
megadetector/api/batch_processing/api_core/server_utils.py +0 -88
megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
megadetector/api/batch_processing/api_support/__init__.py +0 -0
megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
megadetector/api/synchronous/__init__.py +0 -0
megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
megadetector/data_management/importers/add_nacti_sizes.py +0 -52
megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
megadetector/data_management/importers/awc_to_json.py +0 -191
megadetector/data_management/importers/bellevue_to_json.py +0 -272
megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
megadetector/data_management/importers/cct_field_adjustments.py +0 -58
megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
megadetector/data_management/importers/ena24_to_json.py +0 -276
megadetector/data_management/importers/filenames_to_json.py +0 -386
megadetector/data_management/importers/helena_to_cct.py +0 -283
megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
megadetector/data_management/importers/jb_csv_to_json.py +0 -150
megadetector/data_management/importers/mcgill_to_json.py +0 -250
megadetector/data_management/importers/missouri_to_json.py +0 -490
megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
megadetector/data_management/importers/noaa_seals_2019.py +0 -181
megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
megadetector/data_management/importers/pc_to_json.py +0 -365
megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
megadetector/data_management/importers/rspb_to_json.py +0 -356
megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
megadetector/data_management/importers/sulross_get_exif.py +0 -65
megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
megadetector/data_management/importers/ubc_to_json.py +0 -399
megadetector/data_management/importers/umn_to_json.py +0 -507
megadetector/data_management/importers/wellington_to_json.py +0 -263
megadetector/data_management/importers/wi_to_json.py +0 -442
megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
megadetector/utils/azure_utils.py +0 -178
megadetector/utils/sas_blob_utils.py +0 -509
megadetector-5.0.28.dist-info/RECORD +0 -209
/megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0

megadetector/data_management/resize_coco_dataset.py CHANGED Viewed

@@ -12,178 +12,646 @@ scaling bounding boxes accordingly.
 import os
 import json
 import shutil
+import argparse
+import sys
-from tqdm import tqdm
 from collections import defaultdict
+from multiprocessing.pool import Pool, ThreadPool
+from functools import partial
+from PIL import Image
+from tqdm import tqdm
 from megadetector.utils.path_utils import insert_before_extension
 from megadetector.visualization.visualization_utils import \
     open_image, resize_image, exif_preserving_save
+from megadetector.utils.ct_utils import make_test_folder
+from megadetector.utils.ct_utils import write_json
 #%% Functions
-def resize_coco_dataset(input_folder,input_filename,
-                        output_folder,output_filename,
+def _process_single_image_for_resize(image_data,
+                                     input_folder,
+                                     output_folder,
+                                     target_size,
+                                     correct_size_image_handling,
+                                     unavailable_image_handling,
+                                     no_enlarge_width,
+                                     verbose):
+    """
+    Processes a single image: loads, resizes/copies, updates metadata, and scales annotations.
+    [image_data] is a tuple of [im,annotations]
+    """
+    assert unavailable_image_handling in ('error','omit'), \
+        f'Illegal unavailable_image_handling {unavailable_image_handling}'
+    assert isinstance(image_data,tuple) and len(image_data) == 2
+    assert isinstance(image_data[0],dict)
+    assert isinstance(image_data[1],list)
+    im = image_data[0].copy()
+    annotations_this_image = [ann.copy() for ann in image_data[1]]
+    input_fn_relative = im['file_name']
+    input_fn_abs = os.path.join(input_folder, input_fn_relative)
+    if not os.path.isfile(input_fn_abs):
+        if unavailable_image_handling == 'error':
+            raise FileNotFoundError('Could not find file {}'.format(input_fn_abs))
+        else:
+            print("Can't find image {}, skipping".format(input_fn_relative))
+            return None, None
+    output_fn_abs = os.path.join(output_folder, input_fn_relative)
+    os.makedirs(os.path.dirname(output_fn_abs), exist_ok=True)
+    if verbose:
+        print('Resizing {} to {}'.format(input_fn_abs,output_fn_abs))
+    try:
+        pil_im = open_image(input_fn_abs)
+        input_w = pil_im.width
+        input_h = pil_im.height
+    except Exception as e:
+        if unavailable_image_handling == 'error':
+            raise Exception('Could not open image {}: {}'.format(
+                input_fn_relative, str(e)))
+        else:
+            print("Can't open image {}, skipping".format(input_fn_relative))
+            return None, None
+    image_is_already_target_size = \
+        (input_w == target_size[0]) and (input_h == target_size[1])
+    if no_enlarge_width and (input_w < target_size[0]):
+        image_is_already_target_size = True
+    preserve_original_size = \
+        (target_size[0] == -1) and (target_size[1] == -1)
+    # Do we need to resize, or can we try to get away with a copy?
+    if image_is_already_target_size or preserve_original_size:
+        output_w = input_w
+        output_h = input_h
+        if correct_size_image_handling == 'copy':
+            if input_fn_abs != output_fn_abs: # only copy if src and dst are different
+                 shutil.copyfile(input_fn_abs, output_fn_abs)
+        elif correct_size_image_handling == 'rewrite':
+            exif_preserving_save(pil_im, output_fn_abs)
+        else:
+            raise ValueError(
+                f'Unrecognized value {correct_size_image_handling} for correct_size_image_handling')
+    else:
+        try:
+            pil_im = resize_image(pil_im, target_size[0], target_size[1],
+                                  no_enlarge_width=no_enlarge_width)
+            output_w = pil_im.width
+            output_h = pil_im.height
+            exif_preserving_save(pil_im, output_fn_abs)
+        except Exception as e:
+            if unavailable_image_handling == 'error':
+                raise Exception('Could not resize image {}: {}'.format(
+                    input_fn_relative, str(e)))
+            else:
+                print("Can't resize image {}, skipping".format(input_fn_relative))
+                return None,None
+    im['width'] = output_w
+    im['height'] = output_h
+    for ann in annotations_this_image:
+        if 'bbox' in ann:
+            bbox = ann['bbox']
+            if (output_w != input_w) or (output_h != input_h):
+                width_scale = output_w / input_w
+                height_scale = output_h / input_h
+                bbox = [
+                    bbox[0] * width_scale,
+                    bbox[1] * height_scale,
+                    bbox[2] * width_scale,
+                    bbox[3] * height_scale
+                ]
+            ann['bbox'] = bbox
+    # ...for each annotation associated with this image
+    return im, annotations_this_image
+# ...def _process_single_image_for_resize(...)
+def resize_coco_dataset(input_folder,
+                        input_filename,
+                        output_folder,
+                        output_filename=None,
                         target_size=(-1,-1),
-                        correct_size_image_handling='copy'):
+                        correct_size_image_handling='copy',
+                        unavailable_image_handling='error',
+                        n_workers=1,
+                        pool_type='thread',
+                        no_enlarge_width=True,
+                        verbose=False):
     """
     Given a COCO-formatted dataset (images in input_folder, data in input_filename), resizes
     all the images to a target size (in output_folder) and scales bounding boxes accordingly.
     Args:
-        input_folder (str): the folder where images live; filenames in [input_filename] should
+        input_folder (str): the folder where images live; filenames in [input_filename] should
             be relative to [input_folder]
         input_filename (str): the (input) COCO-formatted .json file containing annotations
         output_folder (str): the folder to which we should write resized images; can be the
             same as [input_folder], in which case images are over-written
-        output_filename (str): the COCO-formatted .json file we should generate that refers to
-            the resized images
-        target_size (list or tuple of ints): this should be tuple/list of ints, with length 2 (w,h).
-            If either dimension is -1, aspect ratio will be preserved.  If both dimensions are -1, this means
-            "keep the original size".  If  both dimensions are -1 and correct_size_image_handling is copy, this
-            function is basically a no-op.
-        correct_size_image_handling (str): can be 'copy' (in which case the original image is just copied
+        output_filename (str, optional): the COCO-formatted .json file we should generate that refers
+            to the resized images
+        target_size (list or tuple of ints, optional): this should be tuple/list of ints, with length 2 (w,h).
+            If either dimension is -1, aspect ratio will be preserved.  If both dimensions are -1, this means
+            "keep the original size".  If  both dimensions are -1 and correct_size_image_handling is copy, this
+            function is basically a no-op.
+        correct_size_image_handling (str, optional): what to do in the case where the original size
+            already matches the target size.  Can be 'copy' (in which case the original image is just copied
             to the output folder) or 'rewrite' (in which case the image is opened via PIL and re-written,
-            attempting to preserve the same quality).  The only reason to do use 'rewrite' 'is the case where
-            you're superstitious about biases coming from images in a training set being written by different
+            attempting to preserve the same quality).  The only reason to do use 'rewrite' 'is the case where
+            you're superstitious about biases coming from images in a training set being written by different
             image encoders.
+        unavailable_image_handling (str, optional): what to do when a file can't be opened.  Can be
+            'error' or 'omit'.
+        n_workers (int, optional): number of workers to use for parallel processing.
+            Defaults to 1 (no parallelization). If <= 1, processing is sequential.
+        pool_type (str, optional): type of multiprocessing pool to use ('thread' or 'process').
+            Defaults to 'thread'. Only used if n_workers > 1.
+        no_enlarge_width (bool, optional): if [no_enlarge_width] is True, and
+            [target width] is larger than the original image width, does not modify the image,
+            but still writes it
+        verbose (bool, optional): enable additional debug output
     Returns:
         dict: the COCO database with resized images, identical to the content of [output_filename]
     """
+    # Validate arguments
+    assert unavailable_image_handling in ('error','omit'), \
+        f'Illegal unavailable_image_handling {unavailable_image_handling}'
     # Read input data
     with open(input_filename,'r') as f:
         d = json.load(f)
     # Map image IDs to annotations
     image_id_to_annotations = defaultdict(list)
     for ann in d['annotations']:
         image_id_to_annotations[ann['image_id']].append(ann)
-    # For each image
-    # TODO: this is trivially parallelizable
-    #
-    # im = d['images'][0]
-    for im in tqdm(d['images']):
-        input_fn_relative = im['file_name']
-        input_fn_abs = os.path.join(input_folder,input_fn_relative)
-        assert os.path.isfile(input_fn_abs), "Can't find image file {}".format(input_fn_abs)
-        output_fn_abs = os.path.join(output_folder,input_fn_relative)
-        os.makedirs(os.path.dirname(output_fn_abs),exist_ok=True)
-        pil_im = open_image(input_fn_abs)
-        input_w = pil_im.width
-        input_h = pil_im.height
-        image_is_already_target_size = \
-            (input_w == target_size[0]) and (input_h == target_size[1])
-        preserve_original_size = \
-            (target_size[0] == -1) and (target_size[1] == -1)
-        # If the image is already the right size...
-        if (image_is_already_target_size or preserve_original_size):
-            output_w = input_w
-            output_h = input_h
-            if correct_size_image_handling == 'copy':
-                shutil.copyfile(input_fn_abs,output_fn_abs)
-            elif correct_size_image_handling == 'rewrite':
-                exif_preserving_save(pil_im,output_fn_abs)
-            else:
-                raise ValueError('Unrecognized value {} for correct_size_image_handling'.format(
-                    correct_size_image_handling))
+    original_images = d['images']
+    # Our worker function will take tuples of images and their
+    # associated annotations
+    image_annotation_tuples = []
+    for im in original_images:
+        if im['id'] not in image_id_to_annotations:
+            annotations_this_image = []
         else:
-            pil_im = resize_image(pil_im, target_size[0], target_size[1])
-            output_w = pil_im.width
-            output_h = pil_im.height
-            exif_preserving_save(pil_im,output_fn_abs)
-        im['width'] = output_w
-        im['height'] = output_h
-        # For each box
-        annotations_this_image = image_id_to_annotations[im['id']]
-        # ann = annotations_this_image[0]
-        for ann in annotations_this_image:
-            if 'bbox' in ann:
-                # boxes are [x,y,w,h]
-                bbox = ann['bbox']
-                # Do we need to scale this box?
-                if (output_w != input_w) or (output_h != input_h):
-                    width_scale = output_w/input_w
-                    height_scale = output_h/input_h
-                    bbox = \
-                           [bbox[0] * width_scale,
-                            bbox[1] * height_scale,
-                            bbox[2] * width_scale,
-                            bbox[3] * height_scale]
-                ann['bbox'] = bbox
-            # ...if this annotation has a box
-        # ...for each annotation
-    # ...for each image
-    # Write output file
-    with open(output_filename,'w') as f:
-        json.dump(d,f,indent=1)
+            annotations_this_image = image_id_to_annotations[im['id']]
+        image_annotation_tuple = (im,annotations_this_image)
+        image_annotation_tuples.append(image_annotation_tuple)
+    processed_results = []
+    if n_workers <= 1:
+        for image_annotation_tuple in tqdm(image_annotation_tuples,
+                                           desc="Resizing images sequentially"):
+            result = _process_single_image_for_resize(
+                image_data=image_annotation_tuple,
+                input_folder=input_folder,
+                output_folder=output_folder,
+                target_size=target_size,
+                correct_size_image_handling=correct_size_image_handling,
+                unavailable_image_handling=unavailable_image_handling,
+                no_enlarge_width=no_enlarge_width,
+                verbose=verbose
+            )
+            processed_results.append(result)
+    else:
+        try:
+            assert pool_type in ('process', 'thread'), f'Illegal pool type {pool_type}'
+            selected_pool = ThreadPool if (pool_type == 'thread') else Pool
+            print(f'Starting a {pool_type} pool of {n_workers} workers for image resizing')
+            pool = selected_pool(n_workers)
+            p_process_image = partial(_process_single_image_for_resize,
+                                       input_folder=input_folder,
+                                       output_folder=output_folder,
+                                       target_size=target_size,
+                                       correct_size_image_handling=correct_size_image_handling,
+                                       unavailable_image_handling=unavailable_image_handling,
+                                       no_enlarge_width=no_enlarge_width,
+                                       verbose=verbose)
+            processed_results = list(tqdm(pool.imap(p_process_image, image_annotation_tuples),
+                                        total=len(image_annotation_tuples),
+                                        desc=f"Resizing images with {pool_type} pool"))
+        finally:
+            pool.close()
+            pool.join()
+            print(f"{pool_type.capitalize()} pool closed and joined.")
+    new_images_list = []
+    new_annotations_list = []
+    for res_im_data, res_annotations in processed_results:
+        if res_im_data is None or res_annotations is None:
+            assert res_annotations is None and res_im_data is None
+            assert unavailable_image_handling == 'omit'
+            continue
+        new_images_list.append(res_im_data)
+        new_annotations_list.extend(res_annotations)
+    d['images'] = new_images_list
+    d['annotations'] = new_annotations_list
+    if output_filename is not None:
+        write_json(output_filename,d)
     return d
 # ...def resize_coco_dataset(...)
 #%% Interactive driver
 if False:
     pass
     #%% Test resizing
-    input_folder = os.path.expanduser('~/data/usgs-tegus/usgs-kissel-training')
-    input_filename = os.path.expanduser('~/data/usgs-tegus/usgs-kissel-training.json')
-    target_size = (1600,-1)
-    output_filename = insert_before_extension(input_filename,'resized-test')
-    output_folder = input_folder + '-resized-test'
+    input_folder = 'i:/data/lila/ena24'
+    # input_filename = 'i:/data/lila/ena24.json'
+    input_filename = 'i:/data/lila/ena24-mini.json'
+    output_folder = 'i:/data/lila/ena24-resized'
+    output_filename = insert_before_extension(input_filename,'resized')
+    target_size = (640,-1)
     correct_size_image_handling = 'rewrite'
-    resize_coco_dataset(input_folder,input_filename,
-                        output_folder,output_filename,
-                        target_size=target_size,
-                        correct_size_image_handling=correct_size_image_handling)
+    _ = resize_coco_dataset(input_folder=input_folder,
+                            input_filename=input_filename,
+                            output_folder=output_folder,
+                            output_filename=output_filename,
+                            target_size=target_size,
+                            correct_size_image_handling=correct_size_image_handling,
+                            unavailable_image_handling='omit',
+                            n_workers=10,
+                            pool_type='process')
     #%% Preview
     from megadetector.visualization import visualize_db
     options = visualize_db.DbVizOptions()
     options.parallelize_rendering = True
-    options.viz_size = (900, -1)
-    options.num_to_visualize = 5000
+    options.viz_size = (640, -1)
+    options.num_to_visualize = 100
+    preview_folder = 'i:/data/lila/ena24-resized-preview'
     html_file,_ = visualize_db.visualize_db(output_filename,
-                                              os.path.expanduser('~/tmp/resize_coco_preview'),
-                                              output_folder,options)
+                                            preview_folder,
+                                            output_folder,options)
     from megadetector.utils import path_utils # noqa
     path_utils.open_file(html_file)
 #%% Command-line driver
-# TODO
+def main():
+    """
+    Command-line driver for resize_coco_dataset
+    """
+    parser = argparse.ArgumentParser(
+        description='Resize images in a COCO dataset and scale annotations'
+    )
+    parser.add_argument(
+        'input_folder',
+        type=str,
+        help='Path to the folder containing original images'
+    )
+    parser.add_argument(
+        'input_filename',
+        type=str,
+        help='Path to the input COCO .json file'
+    )
+    parser.add_argument(
+        'output_folder',
+        type=str,
+        help='Path to the folder where resized images will be saved'
+    )
+    parser.add_argument(
+        'output_filename',
+        type=str,
+        help='Path to the output COCO .json file for resized data'
+    )
+    parser.add_argument(
+        '--target_size',
+        type=str,
+        default='-1,-1',
+        help='Target size as "width,height". Use -1 to preserve aspect ratio for a dimension. ' + \
+             'E.g., "800,600" or "1024,-1".'
+    )
+    parser.add_argument(
+        '--correct_size_image_handling',
+        type=str,
+        default='copy',
+        choices=['copy', 'rewrite'],
+        help='How to handle images already at target size'
+    )
+    parser.add_argument(
+        '--n_workers',
+        type=int,
+        default=1,
+        help='Number of workers for parallel processing. <=1 for sequential'
+    )
+    parser.add_argument(
+        '--pool_type',
+        type=str,
+        default='thread',
+        choices=['thread', 'process'],
+        help='Type of multiprocessing pool if n_workers > 1'
+    )
+    if len(sys.argv[1:]) == 0:
+        parser.print_help()
+        parser.exit()
+    args = parser.parse_args()
+    try:
+        target_size_parts = args.target_size.split(',')
+        if len(target_size_parts) != 2:
+            raise ValueError("target_size must have two comma-separated parts (width,height).")
+        parsed_target_size = (int(target_size_parts[0]), int(target_size_parts[1]))
+    except ValueError as e:
+        print(f"Error parsing target_size: {e}")
+        parser.print_help()
+        parser.exit()
+    resize_coco_dataset(
+        args.input_folder,
+        args.input_filename,
+        args.output_folder,
+        args.output_filename,
+        target_size=parsed_target_size,
+        correct_size_image_handling=args.correct_size_image_handling,
+        n_workers=args.n_workers,
+        pool_type=args.pool_type
+    )
+    print("Dataset resizing complete")
+if __name__ == '__main__':
+    main()
+#%% Tests
+class TestResizeCocoDataset:
+    """
+    Test class for the resize_coco_dataset function.
+    """
+    def set_up(self): # noqa
+        self.test_dir = make_test_folder(subfolder='resize_coco_tests')
+        self.input_images_dir_seq = os.path.join(self.test_dir, 'input_images_seq')
+        os.makedirs(self.input_images_dir_seq, exist_ok=True)
+        self.input_images_dir_par = os.path.join(self.test_dir, 'input_images_par')
+        os.makedirs(self.input_images_dir_par, exist_ok=True)
+        self.output_images_dir_seq = os.path.join(self.test_dir, 'output_images_seq')
+        os.makedirs(self.output_images_dir_seq, exist_ok=True)
+        self.output_images_dir_par = os.path.join(self.test_dir, 'output_images_par')
+        os.makedirs(self.output_images_dir_par, exist_ok=True)
+    def tear_down(self): # noqa
+        # Ensure shutil is imported if not already globally in the file
+        # (it is, under '#%% Imports and constants')
+        if hasattr(self, 'test_dir') and os.path.exists(self.test_dir):
+            shutil.rmtree(self.test_dir)
+    def _create_dummy_image_and_coco_json(self,
+                                          image_dir,
+                                          json_filename_base="input_coco.json",
+                                          num_images=2,
+                                          original_size=(100, 100),
+                                          num_annotations_per_image=2):
+        coco_data = {
+            "images": [],
+            "annotations": [],
+            "categories": [{"id": 1, "name": "test_category"}]
+        }
+        annotation_id_counter = 1
+        for i in range(num_images):
+            image_name = f"image_{i}.png"
+            image_path = os.path.join(image_dir, image_name)
+            # Create a dummy image
+            try:
+                img = Image.new('RGB', original_size, color='red')
+                img.save(image_path)
+            except Exception as e:
+                # In some environments, font loading for default PIL text might fail.
+                # For a simple color image, this shouldn't be an issue.
+                # If it is, consider a simpler save or pre-creating a tiny PNG.
+                print(f"Warning: Could not create dummy image {image_path}: {e}")
+                # Fallback: create an empty file, though this will fail later steps
+                # open(image_path, 'a').close()
+            image_entry = {
+                "id": i + 1,
+                "file_name": image_name, # Filename only, not path
+                "width": original_size[0],
+                "height": original_size[1]
+            }
+            coco_data["images"].append(image_entry)
+            for j in range(num_annotations_per_image):
+                annotation_entry = {
+                    "id": annotation_id_counter,
+                    "image_id": image_entry["id"],
+                    "category_id": 1, # Corresponds to "test_category"
+                    # Simple, non-overlapping bbox for testing scaling
+                    "bbox": [10 + j*30, 10 + j*5, 20, 15]
+                }
+                coco_data["annotations"].append(annotation_entry)
+                annotation_id_counter += 1
+        json_file_path = os.path.join(self.test_dir, json_filename_base)
+        with open(json_file_path, 'w') as f:
+            json.dump(coco_data, f, indent=1)
+        return json_file_path, coco_data
+    def test_resize_sequential_vs_parallel(self):
+        """
+        Test driver for sequence vs. parallel COCO dataset resizing.
+        """
+        self.set_up()
+        try:
+            num_images_to_test = 3
+            original_w, original_h = 120, 80
+            target_w, target_h = 60, 40
+            target_size_test = (target_w, target_h)
+            # Sequential run
+            input_json_path_seq, _ = self._create_dummy_image_and_coco_json(
+                image_dir=self.input_images_dir_seq,
+                json_filename_base="input_coco_seq.json",
+                num_images=num_images_to_test,
+                original_size=(original_w, original_h)
+            )
+            output_json_path_seq = os.path.join(self.test_dir, 'output_coco_seq.json')
+            print("Test: starting sequential resize (1 worker)...")
+            resize_coco_dataset(
+                input_folder=self.input_images_dir_seq,
+                input_filename=input_json_path_seq,
+                output_folder=self.output_images_dir_seq,
+                output_filename=output_json_path_seq,
+                target_size=target_size_test,
+                n_workers=1
+            )
+            print(f"Test: Sequential resize complete. Output: {output_json_path_seq}")
+            # Parallel run
+            # For the parallel run, we use different input/output directories but can reuse the same logic
+            # for creating the dummy dataset structure. The image files will be new.
+            input_json_path_par, _ = self._create_dummy_image_and_coco_json(
+                image_dir=self.input_images_dir_par,
+                json_filename_base="input_coco_par.json",
+                num_images=num_images_to_test,
+                original_size=(original_w, original_h)
+            )
+            output_json_path_par = os.path.join(self.test_dir, 'output_coco_par.json')
+            print("Test: Starting parallel resize (2 workers, thread pool)...")
+            resize_coco_dataset(
+                input_folder=self.input_images_dir_par,
+                input_filename=input_json_path_par,
+                output_folder=self.output_images_dir_par,
+                output_filename=output_json_path_par,
+                target_size=target_size_test,
+                n_workers=2, # Using 2 workers for testing parallelism
+                pool_type='thread'
+            )
+            print(f"Test: Parallel resize complete. Output: {output_json_path_par}")
+            # Load results
+            with open(output_json_path_seq, 'r') as f:
+                data_seq = json.load(f)
+            with open(output_json_path_par, 'r') as f:
+                data_par = json.load(f)
+            # Compare COCO JSON data
+            # Compare images
+            assert len(data_seq['images']) == num_images_to_test
+            assert len(data_seq['images']) == len(data_par['images']), "Number of images differs"
+            sorted_images_seq = sorted(data_seq['images'], key=lambda x: x['id'])
+            sorted_images_par = sorted(data_par['images'], key=lambda x: x['id'])
+            for img_s, img_p in zip(sorted_images_seq, sorted_images_par, strict=True):
+                assert img_s['id'] == img_p['id'], \
+                    f"Image IDs differ: {img_s['id']} vs {img_p['id']}"
+                # Filenames are generated independently, so we only check structure, not exact name matching
+                # across seq/par runs' inputs, but output structure should be consistent if input
+                # names were e.g. image_0, image_1
+                assert img_s['file_name'] == img_p['file_name']
+                assert img_s['width'] == target_w, \
+                    f"Seq image {img_s['id']} width incorrect"
+                assert img_s['height'] == target_h, \
+                    f"Seq image {img_s['id']} height incorrect"
+                assert img_p['width'] == target_w, \
+                    f"Par image {img_p['id']} width incorrect"
+                assert img_p['height'] == target_h, \
+                    f"Par image {img_p['id']} height incorrect"
+            # Compare annotations
+            assert len(data_seq['annotations']) == len(data_par['annotations']), \
+                "Number of annotations differs"
+            # Assuming _create_dummy_image_and_coco_json creates the same number of annotations for each test run
+            sorted_anns_seq = sorted(data_seq['annotations'], key=lambda x: x['id'])
+            sorted_anns_par = sorted(data_par['annotations'], key=lambda x: x['id'])
+            for ann_s, ann_p in zip(sorted_anns_seq, sorted_anns_par, strict=True):
+                assert ann_s['id'] == ann_p['id'], \
+                    f"Annotation IDs differ: {ann_s['id']} vs {ann_p['id']}"
+                assert ann_s['image_id'] == ann_p['image_id'], \
+                    f"Annotation image_ids differ for ann_id {ann_s['id']}"
+                assert ann_s['category_id'] == ann_p['category_id'], \
+                    f"Annotation category_ids differ for ann_id {ann_s['id']}"
+                # Check bbox scaling (example: original width 120, target 60 -> scale 0.5)
+                # Original bbox: [10, 10, 20, 15] -> Scaled: [5, 5, 10, 7.5] (Floats possible)
+                # Need to compare with tolerance or ensure rounding is handled if expecting ints
+                # For this test, let's assume direct comparison works due to simple scaling.
+                # If PIL's resize causes slight pixel shifts affecting precise sub-pixel bbox calculations,
+                # then a tolerance (pytest.approx) would be better.
+                # Given the current resize_coco_dataset logic, it's direct multiplication.
+                for i in range(4):
+                    assert abs(ann_s['bbox'][i] - ann_p['bbox'][i]) < 1e-5, \
+                        f"Bbox element {i} differs for ann_id {ann_s['id']}: {ann_s['bbox']} vs {ann_p['bbox']}"
+            # Compare actual image files
+            seq_files = sorted(os.listdir(self.output_images_dir_seq))
+            par_files = sorted(os.listdir(self.output_images_dir_par))
+            assert len(seq_files) == num_images_to_test, "Incorrect number of output images (sequential)"
+            assert len(seq_files) == len(par_files), "Number of output image files differs"
+            for fname_s, fname_p in zip(seq_files, par_files, strict=True):
+                assert fname_s == fname_p, "Output image filenames differ between seq and par runs"
+                img_s_path = os.path.join(self.output_images_dir_seq, fname_s)
+                img_p_path = os.path.join(self.output_images_dir_par, fname_p)
+                with Image.open(img_s_path) as img_s_pil:
+                    assert img_s_pil.size == target_size_test, \
+                        f"Image {fname_s} (seq) has wrong dimensions: {img_s_pil.size}"
+                with Image.open(img_p_path) as img_p_pil:
+                    assert img_p_pil.size == target_size_test, \
+                        f"Image {fname_p} (par) has wrong dimensions: {img_p_pil.size}"
+            print("Test test_resize_sequential_vs_parallel PASSED")
+        finally:
+            self.tear_down()
+    # ...def test_resize_sequential_vs_parallel(...)
+# ...class TestResizeCocoDataset
+def test_resize_coco_dataset_main():
+    """
+    Driver for the TestResizeCocoDataset() class.
+    """
+    print("Starting TestResizeCocoDataset main runner...")
+    test_runner = TestResizeCocoDataset()
+    test_runner.test_resize_sequential_vs_parallel()
+    print("TestResizeCocoDataset main runner finished.")

megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.28py3-none-any.whl → 10.0.0py3-none-any.whl