PyPI - megadetector - Versions diffs - 5.0.27__py3-none-any.whl → 5.0.29__py3-none-any.whl - Mend

megadetector 5.0.27py3-none-any.whl → 5.0.29py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (176) hide show

megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
megadetector/classification/aggregate_classifier_probs.py +3 -3
megadetector/classification/analyze_failed_images.py +5 -5
megadetector/classification/cache_batchapi_outputs.py +5 -5
megadetector/classification/create_classification_dataset.py +11 -12
megadetector/classification/crop_detections.py +10 -10
megadetector/classification/csv_to_json.py +8 -8
megadetector/classification/detect_and_crop.py +13 -15
megadetector/classification/evaluate_model.py +7 -7
megadetector/classification/identify_mislabeled_candidates.py +6 -6
megadetector/classification/json_to_azcopy_list.py +1 -1
megadetector/classification/json_validator.py +29 -32
megadetector/classification/map_classification_categories.py +9 -9
megadetector/classification/merge_classification_detection_output.py +12 -9
megadetector/classification/prepare_classification_script.py +19 -19
megadetector/classification/prepare_classification_script_mc.py +23 -23
megadetector/classification/run_classifier.py +4 -4
megadetector/classification/save_mislabeled.py +6 -6
megadetector/classification/train_classifier.py +1 -1
megadetector/classification/train_classifier_tf.py +9 -9
megadetector/classification/train_utils.py +10 -10
megadetector/data_management/annotations/annotation_constants.py +1 -1
megadetector/data_management/camtrap_dp_to_coco.py +45 -45
megadetector/data_management/cct_json_utils.py +101 -101
megadetector/data_management/cct_to_md.py +49 -49
megadetector/data_management/cct_to_wi.py +33 -33
megadetector/data_management/coco_to_labelme.py +75 -75
megadetector/data_management/coco_to_yolo.py +189 -189
megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
megadetector/data_management/databases/integrity_check_json_db.py +202 -188
megadetector/data_management/databases/subset_json_db.py +33 -33
megadetector/data_management/generate_crops_from_cct.py +38 -38
megadetector/data_management/get_image_sizes.py +54 -49
megadetector/data_management/labelme_to_coco.py +130 -124
megadetector/data_management/labelme_to_yolo.py +78 -72
megadetector/data_management/lila/create_lila_blank_set.py +81 -83
megadetector/data_management/lila/create_lila_test_set.py +32 -31
megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
megadetector/data_management/lila/download_lila_subset.py +21 -24
megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
megadetector/data_management/lila/get_lila_image_counts.py +22 -22
megadetector/data_management/lila/lila_common.py +70 -70
megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
megadetector/data_management/mewc_to_md.py +339 -340
megadetector/data_management/ocr_tools.py +258 -252
megadetector/data_management/read_exif.py +232 -223
megadetector/data_management/remap_coco_categories.py +26 -26
megadetector/data_management/remove_exif.py +31 -20
megadetector/data_management/rename_images.py +187 -187
megadetector/data_management/resize_coco_dataset.py +41 -41
megadetector/data_management/speciesnet_to_md.py +41 -41
megadetector/data_management/wi_download_csv_to_coco.py +55 -55
megadetector/data_management/yolo_output_to_md_output.py +117 -120
megadetector/data_management/yolo_to_coco.py +195 -188
megadetector/detection/change_detection.py +831 -0
megadetector/detection/process_video.py +341 -338
megadetector/detection/pytorch_detector.py +308 -266
megadetector/detection/run_detector.py +186 -166
megadetector/detection/run_detector_batch.py +366 -364
megadetector/detection/run_inference_with_yolov5_val.py +328 -325
megadetector/detection/run_tiled_inference.py +312 -253
megadetector/detection/tf_detector.py +24 -24
megadetector/detection/video_utils.py +291 -283
megadetector/postprocessing/add_max_conf.py +15 -11
megadetector/postprocessing/categorize_detections_by_size.py +44 -44
megadetector/postprocessing/classification_postprocessing.py +808 -311
megadetector/postprocessing/combine_batch_outputs.py +20 -21
megadetector/postprocessing/compare_batch_results.py +528 -517
megadetector/postprocessing/convert_output_format.py +97 -97
megadetector/postprocessing/create_crop_folder.py +220 -147
megadetector/postprocessing/detector_calibration.py +173 -168
megadetector/postprocessing/generate_csv_report.py +508 -0
megadetector/postprocessing/load_api_results.py +25 -22
megadetector/postprocessing/md_to_coco.py +129 -98
megadetector/postprocessing/md_to_labelme.py +89 -83
megadetector/postprocessing/md_to_wi.py +40 -40
megadetector/postprocessing/merge_detections.py +87 -114
megadetector/postprocessing/postprocess_batch_results.py +319 -302
megadetector/postprocessing/remap_detection_categories.py +36 -36
megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
megadetector/postprocessing/separate_detections_into_folders.py +226 -211
megadetector/postprocessing/subset_json_detector_output.py +265 -262
megadetector/postprocessing/top_folders_to_bottom.py +45 -45
megadetector/postprocessing/validate_batch_results.py +70 -70
megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -69
megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
megadetector/taxonomy_mapping/simple_image_download.py +8 -8
megadetector/taxonomy_mapping/species_lookup.py +33 -33
megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
megadetector/taxonomy_mapping/taxonomy_graph.py +11 -11
megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
megadetector/utils/azure_utils.py +22 -22
megadetector/utils/ct_utils.py +1019 -200
megadetector/utils/directory_listing.py +21 -77
megadetector/utils/gpu_test.py +22 -22
megadetector/utils/md_tests.py +541 -518
megadetector/utils/path_utils.py +1511 -406
megadetector/utils/process_utils.py +41 -41
megadetector/utils/sas_blob_utils.py +53 -49
megadetector/utils/split_locations_into_train_val.py +73 -60
megadetector/utils/string_utils.py +147 -26
megadetector/utils/url_utils.py +463 -173
megadetector/utils/wi_utils.py +2629 -2868
megadetector/utils/write_html_image_list.py +137 -137
megadetector/visualization/plot_utils.py +21 -21
megadetector/visualization/render_images_with_thumbnails.py +37 -73
megadetector/visualization/visualization_utils.py +424 -404
megadetector/visualization/visualize_db.py +197 -190
megadetector/visualization/visualize_detector_output.py +126 -98
{megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/METADATA +6 -3
megadetector-5.0.29.dist-info/RECORD +163 -0
{megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
megadetector/data_management/importers/add_nacti_sizes.py +0 -52
megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
megadetector/data_management/importers/awc_to_json.py +0 -191
megadetector/data_management/importers/bellevue_to_json.py +0 -272
megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
megadetector/data_management/importers/cct_field_adjustments.py +0 -58
megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
megadetector/data_management/importers/ena24_to_json.py +0 -276
megadetector/data_management/importers/filenames_to_json.py +0 -386
megadetector/data_management/importers/helena_to_cct.py +0 -283
megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
megadetector/data_management/importers/jb_csv_to_json.py +0 -150
megadetector/data_management/importers/mcgill_to_json.py +0 -250
megadetector/data_management/importers/missouri_to_json.py +0 -490
megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
megadetector/data_management/importers/noaa_seals_2019.py +0 -181
megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
megadetector/data_management/importers/pc_to_json.py +0 -365
megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
megadetector/data_management/importers/rspb_to_json.py +0 -356
megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
megadetector/data_management/importers/sulross_get_exif.py +0 -65
megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
megadetector/data_management/importers/ubc_to_json.py +0 -399
megadetector/data_management/importers/umn_to_json.py +0 -507
megadetector/data_management/importers/wellington_to_json.py +0 -263
megadetector/data_management/importers/wi_to_json.py +0 -442
megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
megadetector-5.0.27.dist-info/RECORD +0 -208
{megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
{megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0

megadetector/data_management/yolo_to_coco.py CHANGED Viewed

@@ -2,7 +2,7 @@
 yolo_to_coco.py
-Converts a folder of YOLO-formatted annotation files to a COCO-formatted dataset.
+Converts a folder of YOLO-formatted annotation files to a COCO-formatted dataset.
 """
@@ -31,6 +31,7 @@ def _filename_to_image_id(fn):
     """
     Image IDs can't have spaces in them, replace spaces with underscores
     """
     return fn.replace(' ','_').replace('\\','/')
@@ -38,27 +39,27 @@ def _process_image(fn_abs,input_folder,category_id_to_name,label_folder):
     """
     Internal support function for processing one image's labels.
     """
     # Create the image object for this image
     #
     # Always use forward slashes in image filenames and IDs
     image_fn_relative = os.path.relpath(fn_abs,input_folder).replace('\\','/')
     image_id = _filename_to_image_id(image_fn_relative)
     # This is done in a separate loop now
     #
     # assert image_id not in image_ids, \
     #    'Oops, you have hit a very esoteric case where you have the same filename ' + \
     #    'with both spaces and underscores, this is not currently handled.'
     # image_ids.add(image_id)
     im = {}
     im['file_name'] = image_fn_relative
     im['id'] = image_id
     annotations_this_image = []
-    try:
+    try:
         pil_im = open_image(fn_abs)
         im_width, im_height = pil_im.size
         im['width'] = im_width
@@ -70,32 +71,32 @@ def _process_image(fn_abs,input_folder,category_id_to_name,label_folder):
         im['height'] = -1
         im['error'] = str(e)
         return (im,annotations_this_image)
     # Is there an annotation file for this image?
     if label_folder is not None:
         assert input_folder in fn_abs
         label_file_abs_base = fn_abs.replace(input_folder,label_folder)
     else:
         label_file_abs_base = fn_abs
     annotation_file = os.path.splitext(label_file_abs_base)[0] + '.txt'
     if not os.path.isfile(annotation_file):
         annotation_file = os.path.splitext(fn_abs)[0] + '.TXT'
     if os.path.isfile(annotation_file):
         with open(annotation_file,'r') as f:
             lines = f.readlines()
         lines = [s.strip() for s in lines]
         # s = lines[0]
         annotation_number = 0
         for s in lines:
             if len(s.strip()) == 0:
                 continue
             tokens = s.split()
             assert len(tokens) == 5
             category_id = int(tokens[0])
@@ -107,35 +108,35 @@ def _process_image(fn_abs,input_folder,category_id_to_name,label_folder):
             ann['image_id'] = im['id']
             ann['category_id'] = category_id
             ann['sequence_level_annotation'] = False
             # COCO: [x_min, y_min, width, height] in absolute coordinates
             # YOLO: [class, x_center, y_center, width, height] in normalized coordinates
             yolo_bbox = [float(x) for x in tokens[1:]]
             normalized_x_center = yolo_bbox[0]
             normalized_y_center = yolo_bbox[1]
             normalized_width = yolo_bbox[2]
             normalized_height = yolo_bbox[3]
-            absolute_x_center = normalized_x_center * im_width
+            absolute_x_center = normalized_x_center * im_width
             absolute_y_center = normalized_y_center * im_height
             absolute_width = normalized_width * im_width
             absolute_height = normalized_height * im_height
             absolute_x_min = absolute_x_center - absolute_width / 2
             absolute_y_min = absolute_y_center - absolute_height / 2
             coco_bbox = [absolute_x_min, absolute_y_min, absolute_width, absolute_height]
             ann['bbox'] = coco_bbox
             annotation_number += 1
-            annotations_this_image.append(ann)
-        # ...for each annotation
+            annotations_this_image.append(ann)
+        # ...for each annotation
     # ...if this image has annotations
     return (im,annotations_this_image)
 # ...def _process_image(...)
@@ -144,37 +145,37 @@ def _process_image(fn_abs,input_folder,category_id_to_name,label_folder):
 def load_yolo_class_list(class_name_file):
     """
     Loads a dictionary mapping zero-indexed IDs to class names from the text/yaml file
-    [class_name_file].
+    [class_name_file].
     Args:
         class_name_file (str or list): this can be:
             - a .yaml or .yaml file in YOLO's dataset.yaml format
             - a .txt or .data file containing a flat list of class names
             - a list of class names
     Returns:
         dict: A dict mapping zero-indexed integer IDs to class names
     """
     # class_name_file can also be a list of class names
     if isinstance(class_name_file,list):
         category_id_to_name = {}
         for i_name,name in enumerate(class_name_file):
             category_id_to_name[i_name] = name
         return category_id_to_name
     ext = os.path.splitext(class_name_file)[1][1:]
     assert ext in ('yml','txt','yaml','data'), 'Unrecognized class name file type {}'.format(
         class_name_file)
     if ext in ('txt','data'):
         with open(class_name_file,'r') as f:
             lines = f.readlines()
         assert len(lines) > 0, 'Empty class name file {}'.format(class_name_file)
         class_names = [s.strip() for s in lines]
         assert len(lines[0]) > 0, 'Empty class name file {} (empty first line)'.format(class_name_file)
         # Blank lines should only appear at the end
         b_found_blank = False
         for s in lines:
@@ -183,17 +184,17 @@ def load_yolo_class_list(class_name_file):
             elif b_found_blank:
                 raise ValueError('Invalid class name file {}, non-blank line after the last blank line'.format(
                     class_name_file))
-        category_id_to_name = {}
+        category_id_to_name = {}
         for i_category_id,category_name in enumerate(class_names):
             assert len(category_name) > 0
             category_id_to_name[i_category_id] = category_name
     else:
         assert ext in ('yml','yaml')
         category_id_to_name = read_classes_from_yolo_dataset_file(class_name_file)
     return category_id_to_name
 # ...load_yolo_class_list(...)
@@ -202,91 +203,91 @@ def load_yolo_class_list(class_name_file):
 def validate_label_file(label_file,category_id_to_name=None,verbose=False):
     """"
     Verifies that [label_file] is a valid YOLO label file.  Does not check the extension.
     Args:
         label_file (str): the .txt file to validate
         category_id_to_name (dict, optional): a dict mapping integer category IDs to names;
             if this is not None, this function errors if the file uses a category that's not
             in this dict
         verbose (bool, optional): enable additional debug console output
     Returns:
-        dict: a dict with keys 'file' (the same as [label_file]) and 'errors' (a list of
+        dict: a dict with keys 'file' (the same as [label_file]) and 'errors' (a list of
         errors (if any) that we found in this file)
     """
     label_result = {}
     label_result['file'] = label_file
     label_result['errors'] = []
     try:
         with open(label_file,'r') as f:
             lines = f.readlines()
     except Exception as e:
         label_result['errors'].append('Read error: {}'.format(str(e)))
         return label_result
     # i_line 0; line = lines[i_line]
     for i_line,line in enumerate(lines):
         s = line.strip()
         if len(s) == 0 or s[0] == '#':
             continue
         try:
             tokens = s.split()
-            assert len(tokens) == 5, '{} tokens'.format(len(tokens))
+            assert len(tokens) == 5, '{} tokens'.format(len(tokens))
             if category_id_to_name is not None:
                 category_id = int(tokens[0])
                 assert category_id in category_id_to_name, \
                     'Unrecognized category ID {}'.format(category_id)
             yolo_bbox = [float(x) for x in tokens[1:]]
         except Exception as e:
             label_result['errors'].append('Token error at line {}: {}'.format(i_line,str(e)))
             continue
         normalized_x_center = yolo_bbox[0]
         normalized_y_center = yolo_bbox[1]
         normalized_width = yolo_bbox[2]
         normalized_height = yolo_bbox[3]
         normalized_x_min = normalized_x_center - normalized_width / 2.0
         normalized_x_max = normalized_x_center + normalized_width / 2.0
         normalized_y_min = normalized_y_center - normalized_height / 2.0
         normalized_y_max = normalized_y_center + normalized_height / 2.0
         if normalized_x_min < 0 or normalized_y_min < 0 or \
             normalized_x_max > 1 or normalized_y_max > 1:
             label_result['errors'].append('Invalid bounding box: {} {} {} {}'.format(
                 normalized_x_min,normalized_y_min,normalized_x_max,normalized_y_max))
     # ...for each line
     if verbose:
         if len(label_result['errors']) > 0:
             print('Errors for {}:'.format(label_file))
             for error in label_result['errors']:
                 print(error)
     return label_result
 # ...def validate_label_file(...)
-def validate_yolo_dataset(input_folder,
-                          class_name_file,
-                          n_workers=1,
-                          pool_type='thread',
+def validate_yolo_dataset(input_folder,
+                          class_name_file,
+                          n_workers=1,
+                          pool_type='thread',
                           verbose=False):
     """
-    Verifies all the labels in a YOLO dataset folder.  Does not yet support the case where the
+    Verifies all the labels in a YOLO dataset folder.  Does not yet support the case where the
     labels and images are in different folders (yolo_to_coco() supports this).
     Looks for:
     * Image files without label files
     * Text files without image files
     * Illegal classes in label files
@@ -294,103 +295,109 @@ def validate_yolo_dataset(input_folder,
     Args:
         input_folder (str): the YOLO dataset folder to validate
-        class_name_file (str or list): a list of classes, a flat text file, or a yolo
-            dataset.yml/.yaml file.  If it's a dataset.yml file, that file should point to
+        class_name_file (str or list): a list of classes, a flat text file, or a yolo
+            dataset.yml/.yaml file.  If it's a dataset.yml file, that file should point to
             input_folder as the base folder, though this is not explicitly checked.
         n_workers (int, optional): number of concurrent workers, set to <= 1 to disable
             parallelization
         pool_type (str, optional): 'thread' or 'process', worker type to use for parallelization;
             not used if [n_workers] <= 1
         verbose (bool, optional): enable additional debug console output
     Returns:
-        dict: validation results, as a dict with fields:
+        dict: validation results, as a dict with fields:
         - image_files_without_label_files (list)
         - label_files_without_image_files (list)
         - label_results (list of dicts with field 'filename', 'errors') (list)
     """
     # Validate arguments
     assert os.path.isdir(input_folder), 'Could not find input folder {}'.format(input_folder)
     if n_workers > 1:
         assert pool_type in ('thread','process'), 'Illegal pool type {}'.format(pool_type)
     category_id_to_name = load_yolo_class_list(class_name_file)
     print('Enumerating files in {}'.format(input_folder))
     all_files = recursive_file_list(input_folder,recursive=True,return_relative_paths=False,
                                     convert_slashes=True)
     label_files = [fn for fn in all_files if fn.endswith('.txt')]
     image_files = find_image_strings(all_files)
     print('Found {} images files and {} label files in {}'.format(
         len(image_files),len(label_files),input_folder))
     label_files_set = set(label_files)
     image_files_without_extension = set()
     for fn in image_files:
         image_file_without_extension = os.path.splitext(fn)[0]
         assert image_file_without_extension not in image_files_without_extension, \
             'Duplicate image file, likely with different extensions: {}'.format(fn)
         image_files_without_extension.add(image_file_without_extension)
     print('Looking for missing image/label files')
     image_files_without_label_files = []
     label_files_without_images = []
     for image_file in tqdm(image_files):
         expected_label_file = os.path.splitext(image_file)[0] + '.txt'
         if expected_label_file not in label_files_set:
             image_files_without_label_files.append(image_file)
     for label_file in tqdm(label_files):
         expected_image_file_without_extension = os.path.splitext(label_file)[0]
         if expected_image_file_without_extension not in image_files_without_extension:
             label_files_without_images.append(label_file)
     print('Found {} image files without labels, {} labels without images'.format(
         len(image_files_without_label_files),len(label_files_without_images)))
     print('Validating label files')
     if n_workers <= 1:
-        label_results = []
-        for fn_abs in tqdm(label_files):
+        label_results = []
+        for fn_abs in tqdm(label_files):
             label_results.append(validate_label_file(fn_abs,
                                                       category_id_to_name=category_id_to_name,
                                                       verbose=verbose))
     else:
         assert pool_type in ('process','thread'), 'Illegal pool type {}'.format(pool_type)
-        if pool_type == 'thread':
-            pool = ThreadPool(n_workers)
-        else:
-            pool = Pool(n_workers)
-        print('Starting a {} pool of {} workers'.format(pool_type,n_workers))
-        p = partial(validate_label_file,
-                    category_id_to_name=category_id_to_name,
-                    verbose=verbose)
-        label_results = list(tqdm(pool.imap(p, label_files),
-                                  total=len(label_files)))
+        pool = None
+        try:
+            if pool_type == 'thread':
+                pool = ThreadPool(n_workers)
+            else:
+                pool = Pool(n_workers)
+            print('Starting a {} pool of {} workers'.format(pool_type,n_workers))
+            p = partial(validate_label_file,
+                        category_id_to_name=category_id_to_name,
+                        verbose=verbose)
+            label_results = list(tqdm(pool.imap(p, label_files),
+                                    total=len(label_files)))
+        finally:
+            pool.close()
+            pool.join()
+            print("Pool closed and joined for label file validation")
     assert len(label_results) == len(label_files)
     validation_results = {}
     validation_results['image_files_without_label_files'] = image_files_without_label_files
     validation_results['label_files_without_images'] = label_files_without_images
     validation_results['label_results'] = label_results
     return validation_results
-# ...validate_yolo_dataset(...)
+# ...validate_yolo_dataset(...)
 #%% Main conversion function
@@ -411,32 +418,32 @@ def yolo_to_coco(input_folder,
                  label_folder=None):
     """
     Converts a YOLO-formatted dataset to a COCO-formatted dataset.
-    All images will be assigned an "error" value, usually None.
+    All images will be assigned an "error" value, usually None.
     Args:
-        input_folder (str): the YOLO dataset folder to convert.  If the image and label
+        input_folder (str): the YOLO dataset folder to convert.  If the image and label
             folders are different, this is the image folder, and [label_folder] is the
             label folder.
-        class_name_file (str or list): a list of classes, a flat text file, or a yolo
-            dataset.yml/.yaml file.  If it's a dataset.yml file, that file should point to
+        class_name_file (str or list): a list of classes, a flat text file, or a yolo
+            dataset.yml/.yaml file.  If it's a dataset.yml file, that file should point to
             input_folder as the base folder, though this is not explicitly checked.
         output_file (str, optional): .json file to which we should write COCO .json data
         empty_image_handling (str, optional): how to handle images with no boxes; whether
-            this includes images with no .txt files depending on the value of
+            this includes images with no .txt files depending on the value of
             [allow_images_without_label_files].  Can be:
             - 'no_annotations': include the image in the image list, with no annotations
             - 'empty_annotations': include the image in the image list, and add an annotation without
               any bounding boxes, using a category called [empty_image_category_name].
             - 'skip': don't include the image in the image list
-            - 'error': there shouldn't be any empty images
+            - 'error': there shouldn't be any empty images
         error_image_handling (str, optional): how to handle images that don't load properly; can
             be:
             - 'skip': don't include the image at all
             - 'no_annotations': include with no annotations
         n_workers (int, optional): number of concurrent workers, set to <= 1 to disable
             parallelization
         pool_type (str, optional): 'thread' or 'process', worker type to use for parallelization;
@@ -444,27 +451,27 @@ def yolo_to_coco(input_folder,
         recursive (bool, optional): whether to recurse into [input_folder]
         exclude_string (str, optional): exclude any images whose filename contains a string
         include_string (str, optional): include only images whose filename contains a string
-        overwrite_handling (bool, optional): behavior if output_file exists ('load', 'overwrite', or
+        overwrite_handling (bool, optional): behavior if output_file exists ('load', 'overwrite', or
             'error')
         label_folder (str, optional): label folder, if different from the image folder
     Returns:
         dict: COCO-formatted data, the same as what's written to [output_file]
     """
     ## Validate input
     input_folder = input_folder.replace('\\','/')
     assert os.path.isdir(input_folder)
     assert os.path.isfile(class_name_file)
     assert empty_image_handling in \
         ('no_annotations','empty_annotations','skip','error'), \
             'Unrecognized empty image handling spec: {}'.format(empty_image_handling)
     if (output_file is not None) and os.path.isfile(output_file):
             if overwrite_handling == 'overwrite':
                 print('Warning: output file {} exists, over-writing'.format(output_file))
             elif overwrite_handling == 'load':
@@ -476,62 +483,62 @@ def yolo_to_coco(input_folder,
                 raise ValueError('Output file {} exists'.format(output_file))
             else:
                 raise ValueError('Unrecognized overwrite_handling value: {}'.format(overwrite_handling))
     ## Read class names
     category_id_to_name = load_yolo_class_list(class_name_file)
     # Find or create the empty image category, if necessary
     empty_category_id = None
     if (empty_image_handling == 'empty_annotations'):
         category_name_to_id = invert_dictionary(category_id_to_name)
         if empty_image_category_name in category_name_to_id:
             empty_category_id = category_name_to_id[empty_image_category_name]
             print('Using existing empty image category with name {}, ID {}'.format(
-                empty_image_category_name,empty_category_id))
+                empty_image_category_name,empty_category_id))
         else:
             empty_category_id = len(category_id_to_name)
             print('Adding an empty category with name {}, ID {}'.format(
                 empty_image_category_name,empty_category_id))
             category_id_to_name[empty_category_id] = empty_image_category_name
     ## Enumerate images
     print('Enumerating images...')
     image_files_abs = find_images(input_folder,recursive=recursive,convert_slashes=True)
     assert not any(['\\' in fn for fn in image_files_abs])
     n_files_original = len(image_files_abs)
     # Optionally include/exclude images matching specific strings
     if exclude_string is not None:
         image_files_abs = [fn for fn in image_files_abs if exclude_string not in fn]
     if include_string is not None:
         image_files_abs = [fn for fn in image_files_abs if include_string in fn]
     if len(image_files_abs) != n_files_original or exclude_string is not None or include_string is not None:
         n_excluded = n_files_original - len(image_files_abs)
         print('Excluded {} of {} images based on filenames'.format(n_excluded,n_files_original))
     categories = []
     for category_id in category_id_to_name:
         categories.append({'id':category_id,'name':category_id_to_name[category_id]})
     info = {}
     info['version'] = '1.0'
     info['description'] = 'Converted from YOLO format'
     image_ids = set()
     ## If we're expected to have labels for every image, check before we process all the images
     if not allow_images_without_label_files:
         print('Verifying that label files exist')
         # image_file_abs = image_files_abs[0]
@@ -544,88 +551,88 @@ def yolo_to_coco(input_folder,
             label_file_abs = os.path.splitext(label_file_abs_base)[0] + '.txt'
             assert os.path.isfile(label_file_abs), \
                 'No annotation file for {}'.format(image_file_abs)
     ## Initial loop to make sure image IDs will be unique
     print('Validating image IDs...')
     for fn_abs in tqdm(image_files_abs):
         fn_relative = os.path.relpath(fn_abs,input_folder).replace('\\','/')
         image_id = _filename_to_image_id(fn_relative)
         assert image_id not in image_ids, \
             'Oops, you have hit a very esoteric case where you have the same filename ' + \
             'with both spaces and underscores, this is not currently handled.'
         image_ids.add(image_id)
     ## Main loop to process labels
     print('Processing labels...')
     if n_workers <= 1:
-        image_results = []
+        image_results = []
         # fn_abs = image_files_abs[0]
-        for fn_abs in tqdm(image_files_abs):
+        for fn_abs in tqdm(image_files_abs):
             image_results.append(_process_image(fn_abs,
                                                 input_folder,
                                                 category_id_to_name,
                                                 label_folder))
     else:
         assert pool_type in ('process','thread'), 'Illegal pool type {}'.format(pool_type)
         if pool_type == 'thread':
             pool = ThreadPool(n_workers)
         else:
             pool = Pool(n_workers)
         print('Starting a {} pool of {} workers'.format(pool_type,n_workers))
         p = partial(_process_image,
                     input_folder=input_folder,
                     category_id_to_name=category_id_to_name,
                     label_folder=label_folder)
         image_results = list(tqdm(pool.imap(p, image_files_abs),
                                   total=len(image_files_abs)))
     assert len(image_results) == len(image_files_abs)
     ## Re-assembly of results into a COCO dict
     print('Assembling labels...')
     images = []
     annotations = []
     for image_result in tqdm(image_results):
         im = image_result[0]
         annotations_this_image = image_result[1]
         # If we have annotations for this image
         if len(annotations_this_image) > 0:
             assert im['error'] is None
             images.append(im)
             for ann in annotations_this_image:
                 annotations.append(ann)
         # If this image failed to read
         elif im['error'] is not None:
             if error_image_handling == 'skip':
                 pass
             elif error_image_handling == 'no_annotations':
-                images.append(im)
+                images.append(im)
         # If this image read successfully, but there are no annotations
         else:
             if empty_image_handling == 'skip':
                 pass
             elif empty_image_handling == 'no_annotations':
@@ -641,13 +648,13 @@ def yolo_to_coco(input_folder,
                 # we're adopting.
                 # ann['bbox'] = [0,0,0,0]
                 annotations.append(ann)
-                images.append(im)
+                images.append(im)
     # ...for each image result
     print('Read {} annotations for {} images'.format(len(annotations),
                                                      len(images)))
     d = {}
     d['images'] = images
     d['annotations'] = annotations
@@ -667,25 +674,25 @@ def yolo_to_coco(input_folder,
 #%% Interactive driver
 if False:
     pass
     #%% Convert YOLO folders to COCO
     preview_folder = '/home/user/data/noaa-fish/val-coco-conversion-preview'
     input_folder = '/home/user/data/noaa-fish/val'
     output_file = '/home/user/data/noaa-fish/val.json'
     class_name_file = '/home/user/data/noaa-fish/AllImagesWithAnnotations/classes.txt'
     d = yolo_to_coco(input_folder,class_name_file,output_file)
     input_folder = '/home/user/data/noaa-fish/train'
     output_file = '/home/user/data/noaa-fish/train.json'
     class_name_file = '/home/user/data/noaa-fish/AllImagesWithAnnotations/classes.txt'
     d = yolo_to_coco(input_folder,class_name_file,output_file)
     #%% Check DB integrity
     from megadetector.data_management.databases import integrity_check_json_db
@@ -715,7 +722,7 @@ if False:
                                                         output_dir=preview_folder,
                                                         image_base_dir=input_folder,
                                                         options=viz_options)
     from megadetector.utils.path_utils import open_file
     open_file(html_output_file)

megadetector 5.0.27__py3-none-any.whl → 5.0.29__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.27py3-none-any.whl → 5.0.29py3-none-any.whl