PyPI - megadetector - Versions diffs - 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl - Mend

megadetector 5.0.7py3-none-any.whl → 5.0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (191) hide show

api/__init__.py +0 -0
api/batch_processing/__init__.py +0 -0
api/batch_processing/api_core/__init__.py +0 -0
api/batch_processing/api_core/batch_service/__init__.py +0 -0
api/batch_processing/api_core/batch_service/score.py +0 -1
api/batch_processing/api_core/server_job_status_table.py +0 -1
api/batch_processing/api_core_support/__init__.py +0 -0
api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
api/batch_processing/api_support/__init__.py +0 -0
api/batch_processing/api_support/summarize_daily_activity.py +0 -1
api/batch_processing/data_preparation/__init__.py +0 -0
api/batch_processing/data_preparation/manage_local_batch.py +93 -79
api/batch_processing/data_preparation/manage_video_batch.py +8 -8
api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
api/batch_processing/postprocessing/__init__.py +0 -0
api/batch_processing/postprocessing/add_max_conf.py +12 -12
api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
api/batch_processing/postprocessing/compare_batch_results.py +114 -44
api/batch_processing/postprocessing/convert_output_format.py +62 -19
api/batch_processing/postprocessing/load_api_results.py +17 -20
api/batch_processing/postprocessing/md_to_coco.py +31 -21
api/batch_processing/postprocessing/md_to_labelme.py +165 -68
api/batch_processing/postprocessing/merge_detections.py +40 -15
api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
api/synchronous/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
api/synchronous/api_core/animal_detection_api/config.py +35 -35
api/synchronous/api_core/tests/__init__.py +0 -0
api/synchronous/api_core/tests/load_test.py +109 -109
classification/__init__.py +0 -0
classification/aggregate_classifier_probs.py +21 -24
classification/analyze_failed_images.py +11 -13
classification/cache_batchapi_outputs.py +51 -51
classification/create_classification_dataset.py +69 -68
classification/crop_detections.py +54 -53
classification/csv_to_json.py +97 -100
classification/detect_and_crop.py +105 -105
classification/evaluate_model.py +43 -42
classification/identify_mislabeled_candidates.py +47 -46
classification/json_to_azcopy_list.py +10 -10
classification/json_validator.py +72 -71
classification/map_classification_categories.py +44 -43
classification/merge_classification_detection_output.py +68 -68
classification/prepare_classification_script.py +157 -154
classification/prepare_classification_script_mc.py +228 -228
classification/run_classifier.py +27 -26
classification/save_mislabeled.py +30 -30
classification/train_classifier.py +20 -20
classification/train_classifier_tf.py +21 -22
classification/train_utils.py +10 -10
data_management/__init__.py +0 -0
data_management/annotations/__init__.py +0 -0
data_management/annotations/annotation_constants.py +18 -31
data_management/camtrap_dp_to_coco.py +238 -0
data_management/cct_json_utils.py +107 -59
data_management/cct_to_md.py +176 -158
data_management/cct_to_wi.py +247 -219
data_management/coco_to_labelme.py +272 -0
data_management/coco_to_yolo.py +86 -62
data_management/databases/__init__.py +0 -0
data_management/databases/add_width_and_height_to_db.py +20 -16
data_management/databases/combine_coco_camera_traps_files.py +35 -31
data_management/databases/integrity_check_json_db.py +130 -83
data_management/databases/subset_json_db.py +25 -16
data_management/generate_crops_from_cct.py +27 -45
data_management/get_image_sizes.py +188 -144
data_management/importers/add_nacti_sizes.py +8 -8
data_management/importers/add_timestamps_to_icct.py +78 -78
data_management/importers/animl_results_to_md_results.py +158 -160
data_management/importers/auckland_doc_test_to_json.py +9 -9
data_management/importers/auckland_doc_to_json.py +8 -8
data_management/importers/awc_to_json.py +7 -7
data_management/importers/bellevue_to_json.py +15 -15
data_management/importers/cacophony-thermal-importer.py +13 -13
data_management/importers/carrizo_shrubfree_2018.py +8 -8
data_management/importers/carrizo_trail_cam_2017.py +8 -8
data_management/importers/cct_field_adjustments.py +9 -9
data_management/importers/channel_islands_to_cct.py +10 -10
data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
data_management/importers/ena24_to_json.py +7 -7
data_management/importers/filenames_to_json.py +8 -8
data_management/importers/helena_to_cct.py +7 -7
data_management/importers/idaho-camera-traps.py +7 -7
data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
data_management/importers/jb_csv_to_json.py +9 -9
data_management/importers/mcgill_to_json.py +8 -8
data_management/importers/missouri_to_json.py +18 -18
data_management/importers/nacti_fieldname_adjustments.py +10 -10
data_management/importers/noaa_seals_2019.py +8 -8
data_management/importers/pc_to_json.py +7 -7
data_management/importers/plot_wni_giraffes.py +7 -7
data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
data_management/importers/prepare_zsl_imerit.py +7 -7
data_management/importers/rspb_to_json.py +8 -8
data_management/importers/save_the_elephants_survey_A.py +8 -8
data_management/importers/save_the_elephants_survey_B.py +9 -9
data_management/importers/snapshot_safari_importer.py +26 -26
data_management/importers/snapshot_safari_importer_reprise.py +665 -665
data_management/importers/snapshot_serengeti_lila.py +14 -14
data_management/importers/sulross_get_exif.py +8 -9
data_management/importers/timelapse_csv_set_to_json.py +11 -11
data_management/importers/ubc_to_json.py +13 -13
data_management/importers/umn_to_json.py +7 -7
data_management/importers/wellington_to_json.py +8 -8
data_management/importers/wi_to_json.py +9 -9
data_management/importers/zamba_results_to_md_results.py +181 -181
data_management/labelme_to_coco.py +309 -159
data_management/labelme_to_yolo.py +103 -60
data_management/lila/__init__.py +0 -0
data_management/lila/add_locations_to_island_camera_traps.py +9 -9
data_management/lila/add_locations_to_nacti.py +147 -147
data_management/lila/create_lila_blank_set.py +114 -31
data_management/lila/create_lila_test_set.py +8 -8
data_management/lila/create_links_to_md_results_files.py +106 -106
data_management/lila/download_lila_subset.py +92 -90
data_management/lila/generate_lila_per_image_labels.py +56 -43
data_management/lila/get_lila_annotation_counts.py +18 -15
data_management/lila/get_lila_image_counts.py +11 -11
data_management/lila/lila_common.py +103 -70
data_management/lila/test_lila_metadata_urls.py +132 -116
data_management/ocr_tools.py +173 -128
data_management/read_exif.py +161 -99
data_management/remap_coco_categories.py +84 -0
data_management/remove_exif.py +58 -62
data_management/resize_coco_dataset.py +32 -44
data_management/wi_download_csv_to_coco.py +246 -0
data_management/yolo_output_to_md_output.py +86 -73
data_management/yolo_to_coco.py +535 -95
detection/__init__.py +0 -0
detection/detector_training/__init__.py +0 -0
detection/process_video.py +85 -33
detection/pytorch_detector.py +43 -25
detection/run_detector.py +157 -72
detection/run_detector_batch.py +189 -114
detection/run_inference_with_yolov5_val.py +118 -51
detection/run_tiled_inference.py +113 -42
detection/tf_detector.py +51 -28
detection/video_utils.py +606 -521
docs/source/conf.py +43 -0
md_utils/__init__.py +0 -0
md_utils/azure_utils.py +9 -9
md_utils/ct_utils.py +249 -70
md_utils/directory_listing.py +59 -64
md_utils/md_tests.py +968 -862
md_utils/path_utils.py +655 -155
md_utils/process_utils.py +157 -133
md_utils/sas_blob_utils.py +20 -20
md_utils/split_locations_into_train_val.py +45 -32
md_utils/string_utils.py +33 -10
md_utils/url_utils.py +208 -27
md_utils/write_html_image_list.py +51 -35
md_visualization/__init__.py +0 -0
md_visualization/plot_utils.py +102 -109
md_visualization/render_images_with_thumbnails.py +34 -34
md_visualization/visualization_utils.py +908 -311
md_visualization/visualize_db.py +109 -58
md_visualization/visualize_detector_output.py +61 -42
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
megadetector-5.0.9.dist-info/RECORD +224 -0
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
taxonomy_mapping/__init__.py +0 -0
taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
taxonomy_mapping/map_new_lila_datasets.py +154 -154
taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
taxonomy_mapping/preview_lila_taxonomy.py +591 -591
taxonomy_mapping/retrieve_sample_image.py +12 -12
taxonomy_mapping/simple_image_download.py +11 -11
taxonomy_mapping/species_lookup.py +10 -10
taxonomy_mapping/taxonomy_csv_checker.py +18 -18
taxonomy_mapping/taxonomy_graph.py +47 -47
taxonomy_mapping/validate_lila_category_mappings.py +83 -76
data_management/cct_json_to_filename_json.py +0 -89
data_management/cct_to_csv.py +0 -140
data_management/databases/remove_corrupted_images_from_db.py +0 -191
detection/detector_training/copy_checkpoints.py +0 -43
md_visualization/visualize_megadb.py +0 -183
megadetector-5.0.7.dist-info/RECORD +0 -202
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0

data_management/labelme_to_coco.py CHANGED Viewed

@@ -1,10 +1,10 @@
-########
-#
-# labelme_to_coco.py
-#
-# Converts a folder of labelme-formatted .json files to COCO.
-#
-########
+"""
+labelme_to_coco.py
+Converts a folder of labelme-formatted .json files to COCO.
+"""
 #%% Constants and imports
@@ -15,10 +15,177 @@ import uuid
 from md_utils import path_utils
 from md_visualization.visualization_utils import open_image
+from multiprocessing.pool import Pool, ThreadPool
+from functools import partial
 from tqdm import tqdm
-#%% Functions
+#%% Support functions
+def _add_category(category_name,category_name_to_id,candidate_category_id=0):
+    """
+    Adds the category [category_name] to the dict [category_name_to_id], by default
+    using the next available integer index.
+    """
+    if category_name in category_name_to_id:
+        return category_name_to_id[category_name]
+    while candidate_category_id in category_name_to_id.values():
+        candidate_category_id += 1
+    category_name_to_id[category_name] = candidate_category_id
+    return candidate_category_id
+def _process_labelme_file(image_fn_relative,input_folder,use_folders_as_labels,
+                          no_json_handling,validate_image_sizes,
+                          category_name_to_id,allow_new_categories=True):
+    """
+    Internal function for processing each image; this support function facilitates parallelization.
+    """
+    result = {}
+    result['im'] = None
+    result['annotations_this_image'] = None
+    result['status'] = None
+    image_fn_abs = os.path.join(input_folder,image_fn_relative)
+    json_fn_abs = os.path.splitext(image_fn_abs)[0] + '.json'
+    im = {}
+    im['id'] = image_fn_relative
+    im['file_name'] = image_fn_relative
+    # If there's no .json file for this image...
+    if not os.path.isfile(json_fn_abs):
+        # Either skip it...
+        if no_json_handling == 'skip':
+            print('Skipping image {} (no .json file)'.format(image_fn_relative))
+            result['status'] = 'skipped (no .json file)'
+            return result
+        # ...or error
+        elif no_json_handling == 'error':
+            raise ValueError('Image file {} has no corresponding .json file'.format(
+                image_fn_relative))
+        # ...or treat it as empty.
+        elif no_json_handling == 'empty':
+            try:
+                pil_im = open_image(image_fn_abs)
+            except Exception:
+                print('Warning: error opening image {}, skipping'.format(image_fn_abs))
+                result['status'] = 'image load error'
+                return result
+            im['width'] = pil_im.width
+            im['height'] = pil_im.height
+            # Just in case we need to differentiate between "no .json file" and "a .json file with no annotations"
+            im['no_labelme_json'] = True
+            shapes = []
+        else:
+            raise ValueError('Unrecognized specifier {} for handling images with no .json files'.format(
+                no_json_handling))
+    # If we found a .json file for this image...
+    else:
+        # Read the .json file
+        with open(json_fn_abs,'r') as f:
+            labelme_data = json.load(f)
+        im['width'] = labelme_data['imageWidth']
+        im['height'] = labelme_data['imageHeight']
+        if validate_image_sizes:
+            try:
+                pil_im = open_image(image_fn_abs)
+            except Exception:
+                print('Warning: error opening image {} for size validation, skipping'.format(image_fn_abs))
+                result['status'] = 'skipped (size validation error)'
+                return result
+            if not (im['width'] == pil_im.width and im['height'] == pil_im.height):
+                print('Warning: image size validation error for file {}'.format(image_fn_relative))
+                im['width'] = pil_im.width
+                im['height'] = pil_im.height
+                im['labelme_width'] = labelme_data['imageWidth']
+                im['labelme_height'] = labelme_data['imageHeight']
+        shapes = labelme_data['shapes']
+        if ('flags' in labelme_data) and (len(labelme_data['flags']) > 0):
+            im['flags'] = labelme_data['flags']
+    annotations_this_image = []
+    if len(shapes) == 0:
+        if allow_new_categories:
+            category_id = _add_category('empty',category_name_to_id)
+        else:
+            assert 'empty' in category_name_to_id
+            category_id = category_name_to_id['empty']
+        ann = {}
+        ann['id'] = str(uuid.uuid1())
+        ann['image_id'] = im['id']
+        ann['category_id'] = category_id
+        ann['sequence_level_annotation'] = False
+        annotations_this_image.append(ann)
+    else:
+        for shape in shapes:
+            if shape['shape_type'] != 'rectangle':
+                print('Only rectangles are supported, skipping an annotation of type {} in {}'.format(
+                    shape['shape_type'],image_fn_relative))
+                continue
+            if use_folders_as_labels:
+                category_name = os.path.basename(os.path.dirname(image_fn_abs))
+            else:
+                category_name = shape['label']
+            if allow_new_categories:
+                category_id = _add_category(category_name,category_name_to_id)
+            else:
+                assert category_name in category_name_to_id
+                category_id = category_name_to_id[category_name]
+            points = shape['points']
+            if len(points) != 2:
+                print('Warning: illegal rectangle with {} points for {}'.format(
+                    len(points),image_fn_relative))
+                continue
+            p0 = points[0]
+            p1 = points[1]
+            x0 = min(p0[0],p1[0])
+            x1 = max(p0[0],p1[0])
+            y0 = min(p0[1],p1[1])
+            y1 = max(p0[1],p1[1])
+            bbox = [x0,y0,abs(x1-x0),abs(y1-y0)]
+            ann = {}
+            ann['id'] = str(uuid.uuid1())
+            ann['image_id'] = im['id']
+            ann['category_id'] = category_id
+            ann['sequence_level_annotation'] = False
+            ann['bbox'] = bbox
+            annotations_this_image.append(ann)
+        # ...for each shape
+    result['im'] = im
+    result['annotations_this_image'] = annotations_this_image
+    return result
+# ...def _process_labelme_file(...)
+#%% Main function
 def labelme_to_coco(input_folder,
                     output_file=None,
@@ -32,12 +199,17 @@ def labelme_to_coco(input_folder,
                     recursive=True,
                     no_json_handling='skip',
                     validate_image_sizes=True,
-                    right_edge_quantization_threshold=None):
+                    max_workers=1,
+                    use_threads=True):
     """
-    Find all images in [input_folder] that have corresponding .json files, and convert
+    Finds all images in [input_folder] that have corresponding .json files, and converts
     to a COCO .json file.
-    Currently only supports bounding box annotations.
+    Currently only supports bounding box annotations and image-level flags (i.e., does not
+    support point or general polygon annotations).
+    Labelme's image-level flags don't quite fit the COCO annotations format, so they are attached
+    to image objects, rather than annotation objects.
     If output_file is None, just returns the resulting dict, does not write to file.
@@ -52,42 +224,90 @@ def labelme_to_coco(input_folder,
     file.  Empty images in the "lion" folder will still be given the label "empty" (or
     [empty_category_name]).
-    no_json_handling can be:
+    Args:
+        input_folder (str): input folder to search for images and Labelme .json files
+        output_file (str, optional): output file to which we should write COCO-formatted data; if None
+            this function just returns the COCO-formatted dict
+        category_id_to_category_name (dict, optional): dict mapping category IDs to category names;
+            really used to map Labelme category names to COCO category IDs.  IDs will be auto-generated
+            if this is None.
+        empty_category_id (int, optional): category ID to use for the not-very-COCO-like "empty" category;
+            also see the no_json_handling parameter.
+        info_struct (dict, optional): dict to stash in the "info" field of the resulting COCO dict
+        relative_paths_to_include (list, optional): allowlist of relative paths to include in the COCO
+            dict; there's no reason to specify this along with relative_paths_to_exclude.
+        relative_paths_to_exclude (list, optional): blocklist of relative paths to exclude from the COCO
+            dict; there's no reason to specify this along with relative_paths_to_include.
+        use_folders_as_labels (bool, optional): if this is True, class names will be pulled from folder names,
+            useful if you have images like a/b/cat/image001.jpg, a/b/dog/image002.jpg, etc.
+        recursive (bool, optional): whether to recurse into [input_folder]
+        no_json_handling (str, optional): how to deal with image files that have no corresponding .json files,
+            can be:
+                - 'skip': ignore image files with no corresponding .json files
+                - 'empty': treat image files with no corresponding .json files as empty
+                - 'error': throw an error when an image file has no corresponding .json file
+        validate_image_sizes (bool, optional): whether to load images to verify that the sizes specified
+            in the labelme files are correct
+        max_workers (int, optional): number of workers to use for parallelization, set to <=1 to disable
+            parallelization
+        use_threads (bool, optional): whether to use threads (True) or processes (False) for parallelization,
+            not relevant if max_workers <= 1
-    * 'skip': ignore image files with no corresponding .json files
-    * 'empty': treat image files with no corresponding .json files as empty
-    * 'error': throw an error when an image file has no corresponding .json file
-    right_edge_quantization_threshold is an off-by-default hack to handle cases where
-    boxes that really should be running off the right side of the image only extend like 99%
-    of the way there, due to what appears to be a slight bias inherent to MD.  If a box extends
-    within [right_edge_quantization_threshold] (a small number, from 0 to 1, but probably around
-    0.02) of the right edge of the image, it will be extended to the far right edge.
+    Returns:
+        dict: a COCO-formatted dictionary, identical to what's written to [output_file] if [output_file] is not None.
     """
+    if max_workers > 1:
+        assert category_id_to_category_name is not None, \
+            'When parallelizing labelme --> COCO conversion, you must supply a category mapping'
     if category_id_to_category_name is None:
         category_name_to_id = {}
     else:
         category_name_to_id = {v: k for k, v in category_id_to_category_name.items()}
     for category_name in category_name_to_id:
         try:
             category_name_to_id[category_name] = int(category_name_to_id[category_name])
         except ValueError:
             raise ValueError('Category IDs must be ints or string-formatted ints')
+    # If the user supplied an explicit empty category ID, and the empty category
+    # name is already in category_name_to_id, make sure they match.
+    if empty_category_id is not None:
+        if empty_category_name in category_name_to_id:
+            assert category_name_to_id[empty_category_name] == empty_category_id, \
+                'Ambiguous empty category specification'
+        if empty_category_id in category_id_to_category_name:
+            assert category_id_to_category_name[empty_category_id] == empty_category_name, \
+                'Ambiguous empty category specification'
+    else:
+        if empty_category_name in category_name_to_id:
+            empty_category_id = category_name_to_id[empty_category_name]
+    del category_id_to_category_name
     # Enumerate images
+    print('Enumerating images in {}'.format(input_folder))
     image_filenames_relative = path_utils.find_images(input_folder,recursive=recursive,
-                                                      return_relative_paths=True)
-    def add_category(category_name,candidate_category_id=0):
-        if category_name in category_name_to_id:
-            return category_name_to_id[category_name]
-        while candidate_category_id in category_name_to_id.values():
-            candidate_category_id += 1
-        category_name_to_id[category_name] = candidate_category_id
-        return candidate_category_id
+                                                      return_relative_paths=True,
+                                                      convert_slashes=True)
+    # Remove any images we're supposed to skip
+    if (relative_paths_to_include is not None) or (relative_paths_to_exclude is not None):
+        image_filenames_relative_to_process = []
+        for image_fn_relative in image_filenames_relative:
+            if relative_paths_to_include is not None and image_fn_relative not in relative_paths_to_include:
+                continue
+            if relative_paths_to_exclude is not None and image_fn_relative in relative_paths_to_exclude:
+                continue
+            image_filenames_relative_to_process.append(image_fn_relative)
+        print('Processing {} of {} images'.format(
+            len(image_filenames_relative_to_process),
+            len(image_filenames_relative)))
+        image_filenames_relative = image_filenames_relative_to_process
+    # If the user supplied a category ID to use for empty images...
     if empty_category_id is not None:
         try:
             empty_category_id = int(empty_category_id)
@@ -95,136 +315,52 @@ def labelme_to_coco(input_folder,
             raise ValueError('Category IDs must be ints or string-formatted ints')
     if empty_category_id is None:
-        empty_category_id = add_category(empty_category_name)
-    images = []
-    annotations = []
-    n_edges_quantized = 0
-    # image_fn_relative = image_filenames_relative[0]
-    for image_fn_relative in tqdm(image_filenames_relative):
-        if relative_paths_to_include is not None and image_fn_relative not in relative_paths_to_include:
-            continue
-        if relative_paths_to_exclude is not None and image_fn_relative in relative_paths_to_exclude:
-            continue
-        image_fn_abs = os.path.join(input_folder,image_fn_relative)
-        json_fn_abs = os.path.splitext(image_fn_abs)[0] + '.json'
-        im = {}
-        im['id'] = image_fn_relative
-        im['file_name'] = image_fn_relative
-        # If there's no .json file for this image...
-        if not os.path.isfile(json_fn_abs):
+        empty_category_id = _add_category(empty_category_name,category_name_to_id)
-            # Either skip it...
-            if no_json_handling == 'skip':
-                continue
-            # ...or error
-            elif no_json_handling == 'error':
-                raise ValueError('Image file {} has no corresponding .json file'.format(
-                    image_fn_relative))
-            # ...or treat it as empty.
-            elif no_json_handling == 'empty':
-                try:
-                    pil_im = open_image(image_fn_abs)
-                except Exception:
-                    print('Warning: error opening image {}, skipping'.format(image_fn_abs))
-                    continue
-                im['width'] = pil_im.width
-                im['height'] = pil_im.height
-                shapes = []
-            else:
-                raise ValueError('Unrecognized specifier {} for handling images with no .json files'.format(
-                    no_json_handling))
+    if max_workers <= 1:
-        # If we found a .json file for this image...
-        else:
+        image_results = []
+        for image_fn_relative in tqdm(image_filenames_relative):
-            # Read the .json file
-            with open(json_fn_abs,'r') as f:
-                labelme_data = json.load(f)
-            im['width'] = labelme_data['imageWidth']
-            im['height'] = labelme_data['imageHeight']
+            result = _process_labelme_file(image_fn_relative,input_folder,use_folders_as_labels,
+                                      no_json_handling,validate_image_sizes,
+                                      category_name_to_id,allow_new_categories=True)
+            image_results.append(result)
-            if validate_image_sizes:
-                try:
-                    pil_im = open_image(image_fn_abs)
-                except Exception:
-                    print('Warning: error opening image {}, skipping'.format(image_fn_abs))
-                    continue
-                assert im['width'] == pil_im.width and im['height'] == pil_im.height, \
-                    'Image size validation error for file {}'.format(image_fn_relative)
-            shapes = labelme_data['shapes']
+    else:
-        if len(shapes) == 0:
-            category_id = add_category('empty')
-            ann = {}
-            ann['id'] = str(uuid.uuid1())
-            ann['image_id'] = im['id']
-            ann['category_id'] = category_id
-            ann['sequence_level_annotation'] = False
-            annotations.append(ann)
+        n_workers = min(max_workers,len(image_filenames_relative))
+        assert category_name_to_id is not None
+        if use_threads:
+            pool = ThreadPool(n_workers)
         else:
-            for shape in shapes:
-                if shape['shape_type'] != 'rectangle':
-                    print('Only rectangles are supported, skipping an annotation of type {} in {}'.format(
-                        shape['shape_type'],image_fn_relative))
-                    continue
-                if use_folders_as_labels:
-                    category_name = os.path.basename(os.path.dirname(image_fn_abs))
-                else:
-                    category_name = shape['label']
-                category_id = add_category(category_name)
-                points = shape['points']
-                assert len(points) == 2, 'Illegal rectangle with {} points'.format(
-                    len(points))
-                p0 = points[0]
-                p1 = points[1]
-                x0 = min(p0[0],p1[0])
-                x1 = max(p0[0],p1[0])
-                y0 = min(p0[1],p1[1])
-                y1 = max(p0[1],p1[1])
-                if right_edge_quantization_threshold is not None:
-                    x1_rel = x1 / (im['width'] - 1)
-                    right_edge_distance = 1.0 - x1_rel
-                    if right_edge_distance < right_edge_quantization_threshold:
-                        n_edges_quantized += 1
-                        x1 = im['width'] - 1
-                bbox = [x0,y0,abs(x1-x0),abs(y1-y0)]
-                ann = {}
-                ann['id'] = str(uuid.uuid1())
-                ann['image_id'] = im['id']
-                ann['category_id'] = category_id
-                ann['sequence_level_annotation'] = False
-                ann['bbox'] = bbox
-                annotations.append(ann)
-            # ...for each shape
-        images.append(im)
-    # ..for each image
+            pool = Pool(n_workers)
+        image_results = list(tqdm(pool.imap(
+            partial(_process_labelme_file,
+                input_folder=input_folder,
+                use_folders_as_labels=use_folders_as_labels,
+                no_json_handling=no_json_handling,
+                validate_image_sizes=validate_image_sizes,
+                category_name_to_id=category_name_to_id,
+                allow_new_categories=False
+                ),image_filenames_relative), total=len(image_filenames_relative)))
+    images = []
+    annotations = []
-    if n_edges_quantized > 0:
-        print('Quantized the right edge in {} of {} images'.format(
-            n_edges_quantized,len(image_filenames_relative)))
+    # Flatten the lists of images and annotations
+    for result in image_results:
+        im = result['im']
+        annotations_this_image = result['annotations_this_image']
+        if im is None:
+            assert annotations_this_image is None
+        else:
+            images.append(im)
+            annotations.extend(annotations_this_image)
     output_dict = {}
     output_dict['images'] = images
     output_dict['annotations'] = annotations
@@ -257,12 +393,26 @@ def find_empty_labelme_files(input_folder,recursive=True):
     Returns a list of all image files in in [input_folder] associated with .json files that have
     no boxes in them.  Also returns a list of images with no associated .json files.  Specifically,
     returns a dict:
-    {
-       'images_with_empty_json_files':[list],
-       'images_with_no_json_files':[list],
-       'images_with_non_empty_json_files':[list]
-    }
+    .. code-block: none
+        {
+            'images_with_empty_json_files':[list],
+            'images_with_no_json_files':[list],
+            'images_with_non_empty_json_files':[list]
+        }
+    Args:
+        input_folder (str): the folder to search for empty (i.e., box-less) Labelme .json files
+        recursive (bool, optional): whether to recurse into [input_folder]
+    Returns:
+        dict: a dict with fields:
+            - images_with_empty_json_files: a list of all image files in [input_folder] associated with
+              .json files that have no boxes in them
+            - images_with_no_json_files: a list of images in [input_folder] with no associated .json files
+            - images_with_non_empty_json_files: a list of images in [input_folder] associated with .json
+              files that have at least one box
     """
     image_filenames_relative = path_utils.find_images(input_folder,recursive=True,
                                                       return_relative_paths=True)

megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.7py3-none-any.whl → 5.0.9py3-none-any.whl