PyPI - megadetector - Versions diffs - 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl - Mend

megadetector 5.0.28py3-none-any.whl → 10.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (197) hide show

megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
megadetector/classification/aggregate_classifier_probs.py +3 -3
megadetector/classification/analyze_failed_images.py +5 -5
megadetector/classification/cache_batchapi_outputs.py +5 -5
megadetector/classification/create_classification_dataset.py +11 -12
megadetector/classification/crop_detections.py +10 -10
megadetector/classification/csv_to_json.py +8 -8
megadetector/classification/detect_and_crop.py +13 -15
megadetector/classification/efficientnet/model.py +8 -8
megadetector/classification/efficientnet/utils.py +6 -5
megadetector/classification/evaluate_model.py +7 -7
megadetector/classification/identify_mislabeled_candidates.py +6 -6
megadetector/classification/json_to_azcopy_list.py +1 -1
megadetector/classification/json_validator.py +29 -32
megadetector/classification/map_classification_categories.py +9 -9
megadetector/classification/merge_classification_detection_output.py +12 -9
megadetector/classification/prepare_classification_script.py +19 -19
megadetector/classification/prepare_classification_script_mc.py +26 -26
megadetector/classification/run_classifier.py +4 -4
megadetector/classification/save_mislabeled.py +6 -6
megadetector/classification/train_classifier.py +1 -1
megadetector/classification/train_classifier_tf.py +9 -9
megadetector/classification/train_utils.py +10 -10
megadetector/data_management/annotations/annotation_constants.py +1 -2
megadetector/data_management/camtrap_dp_to_coco.py +79 -46
megadetector/data_management/cct_json_utils.py +103 -103
megadetector/data_management/cct_to_md.py +49 -49
megadetector/data_management/cct_to_wi.py +33 -33
megadetector/data_management/coco_to_labelme.py +75 -75
megadetector/data_management/coco_to_yolo.py +210 -193
megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
megadetector/data_management/databases/integrity_check_json_db.py +228 -200
megadetector/data_management/databases/subset_json_db.py +33 -33
megadetector/data_management/generate_crops_from_cct.py +88 -39
megadetector/data_management/get_image_sizes.py +54 -49
megadetector/data_management/labelme_to_coco.py +133 -125
megadetector/data_management/labelme_to_yolo.py +159 -73
megadetector/data_management/lila/create_lila_blank_set.py +81 -83
megadetector/data_management/lila/create_lila_test_set.py +32 -31
megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
megadetector/data_management/lila/download_lila_subset.py +21 -24
megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
megadetector/data_management/lila/get_lila_image_counts.py +22 -22
megadetector/data_management/lila/lila_common.py +73 -70
megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
megadetector/data_management/mewc_to_md.py +344 -340
megadetector/data_management/ocr_tools.py +262 -255
megadetector/data_management/read_exif.py +249 -227
megadetector/data_management/remap_coco_categories.py +90 -28
megadetector/data_management/remove_exif.py +81 -21
megadetector/data_management/rename_images.py +187 -187
megadetector/data_management/resize_coco_dataset.py +588 -120
megadetector/data_management/speciesnet_to_md.py +41 -41
megadetector/data_management/wi_download_csv_to_coco.py +55 -55
megadetector/data_management/yolo_output_to_md_output.py +248 -122
megadetector/data_management/yolo_to_coco.py +333 -191
megadetector/detection/change_detection.py +832 -0
megadetector/detection/process_video.py +340 -337
megadetector/detection/pytorch_detector.py +358 -278
megadetector/detection/run_detector.py +399 -186
megadetector/detection/run_detector_batch.py +404 -377
megadetector/detection/run_inference_with_yolov5_val.py +340 -327
megadetector/detection/run_tiled_inference.py +257 -249
megadetector/detection/tf_detector.py +24 -24
megadetector/detection/video_utils.py +332 -295
megadetector/postprocessing/add_max_conf.py +19 -11
megadetector/postprocessing/categorize_detections_by_size.py +45 -45
megadetector/postprocessing/classification_postprocessing.py +468 -433
megadetector/postprocessing/combine_batch_outputs.py +23 -23
megadetector/postprocessing/compare_batch_results.py +590 -525
megadetector/postprocessing/convert_output_format.py +106 -102
megadetector/postprocessing/create_crop_folder.py +347 -147
megadetector/postprocessing/detector_calibration.py +173 -168
megadetector/postprocessing/generate_csv_report.py +508 -499
megadetector/postprocessing/load_api_results.py +48 -27
megadetector/postprocessing/md_to_coco.py +133 -102
megadetector/postprocessing/md_to_labelme.py +107 -90
megadetector/postprocessing/md_to_wi.py +40 -40
megadetector/postprocessing/merge_detections.py +92 -114
megadetector/postprocessing/postprocess_batch_results.py +319 -301
megadetector/postprocessing/remap_detection_categories.py +91 -38
megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
megadetector/postprocessing/separate_detections_into_folders.py +226 -211
megadetector/postprocessing/subset_json_detector_output.py +265 -262
megadetector/postprocessing/top_folders_to_bottom.py +45 -45
megadetector/postprocessing/validate_batch_results.py +70 -70
megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
megadetector/taxonomy_mapping/simple_image_download.py +8 -8
megadetector/taxonomy_mapping/species_lookup.py +156 -74
megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
megadetector/utils/ct_utils.py +1049 -211
megadetector/utils/directory_listing.py +21 -77
megadetector/utils/gpu_test.py +22 -22
megadetector/utils/md_tests.py +632 -529
megadetector/utils/path_utils.py +1520 -431
megadetector/utils/process_utils.py +41 -41
megadetector/utils/split_locations_into_train_val.py +62 -62
megadetector/utils/string_utils.py +148 -27
megadetector/utils/url_utils.py +489 -176
megadetector/utils/wi_utils.py +2658 -2526
megadetector/utils/write_html_image_list.py +137 -137
megadetector/visualization/plot_utils.py +34 -30
megadetector/visualization/render_images_with_thumbnails.py +39 -74
megadetector/visualization/visualization_utils.py +487 -435
megadetector/visualization/visualize_db.py +232 -198
megadetector/visualization/visualize_detector_output.py +82 -76
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
megadetector-10.0.0.dist-info/RECORD +139 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
megadetector/api/batch_processing/api_core/__init__.py +0 -0
megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
megadetector/api/batch_processing/api_core/server.py +0 -294
megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
megadetector/api/batch_processing/api_core/server_utils.py +0 -88
megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
megadetector/api/batch_processing/api_support/__init__.py +0 -0
megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
megadetector/api/synchronous/__init__.py +0 -0
megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
megadetector/data_management/importers/add_nacti_sizes.py +0 -52
megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
megadetector/data_management/importers/awc_to_json.py +0 -191
megadetector/data_management/importers/bellevue_to_json.py +0 -272
megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
megadetector/data_management/importers/cct_field_adjustments.py +0 -58
megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
megadetector/data_management/importers/ena24_to_json.py +0 -276
megadetector/data_management/importers/filenames_to_json.py +0 -386
megadetector/data_management/importers/helena_to_cct.py +0 -283
megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
megadetector/data_management/importers/jb_csv_to_json.py +0 -150
megadetector/data_management/importers/mcgill_to_json.py +0 -250
megadetector/data_management/importers/missouri_to_json.py +0 -490
megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
megadetector/data_management/importers/noaa_seals_2019.py +0 -181
megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
megadetector/data_management/importers/pc_to_json.py +0 -365
megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
megadetector/data_management/importers/rspb_to_json.py +0 -356
megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
megadetector/data_management/importers/sulross_get_exif.py +0 -65
megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
megadetector/data_management/importers/ubc_to_json.py +0 -399
megadetector/data_management/importers/umn_to_json.py +0 -507
megadetector/data_management/importers/wellington_to_json.py +0 -263
megadetector/data_management/importers/wi_to_json.py +0 -442
megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
megadetector/utils/azure_utils.py +0 -178
megadetector/utils/sas_blob_utils.py +0 -509
megadetector-5.0.28.dist-info/RECORD +0 -209
/megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0

megadetector/data_management/yolo_to_coco.py CHANGED Viewed

@@ -2,7 +2,7 @@
 yolo_to_coco.py
-Converts a folder of YOLO-formatted annotation files to a COCO-formatted dataset.
+Converts a folder of YOLO-formatted annotation files to a COCO-formatted dataset.
 """
@@ -10,6 +10,8 @@ Converts a folder of YOLO-formatted annotation files to a COCO-formatted dataset
 import json
 import os
+import argparse
+import sys
 from multiprocessing.pool import ThreadPool
 from multiprocessing.pool import Pool
@@ -22,7 +24,8 @@ from megadetector.utils.path_utils import recursive_file_list
 from megadetector.utils.path_utils import find_image_strings
 from megadetector.utils.ct_utils import invert_dictionary
 from megadetector.visualization.visualization_utils import open_image
-from megadetector.data_management.yolo_output_to_md_output import read_classes_from_yolo_dataset_file
+from megadetector.data_management.yolo_output_to_md_output import \
+    read_classes_from_yolo_dataset_file
 #%% Support functions
@@ -31,6 +34,7 @@ def _filename_to_image_id(fn):
     """
     Image IDs can't have spaces in them, replace spaces with underscores
     """
     return fn.replace(' ','_').replace('\\','/')
@@ -38,27 +42,27 @@ def _process_image(fn_abs,input_folder,category_id_to_name,label_folder):
     """
     Internal support function for processing one image's labels.
     """
     # Create the image object for this image
     #
     # Always use forward slashes in image filenames and IDs
     image_fn_relative = os.path.relpath(fn_abs,input_folder).replace('\\','/')
     image_id = _filename_to_image_id(image_fn_relative)
     # This is done in a separate loop now
     #
     # assert image_id not in image_ids, \
     #    'Oops, you have hit a very esoteric case where you have the same filename ' + \
     #    'with both spaces and underscores, this is not currently handled.'
     # image_ids.add(image_id)
     im = {}
     im['file_name'] = image_fn_relative
     im['id'] = image_id
     annotations_this_image = []
-    try:
+    try:
         pil_im = open_image(fn_abs)
         im_width, im_height = pil_im.size
         im['width'] = im_width
@@ -70,32 +74,32 @@ def _process_image(fn_abs,input_folder,category_id_to_name,label_folder):
         im['height'] = -1
         im['error'] = str(e)
         return (im,annotations_this_image)
     # Is there an annotation file for this image?
     if label_folder is not None:
         assert input_folder in fn_abs
         label_file_abs_base = fn_abs.replace(input_folder,label_folder)
     else:
         label_file_abs_base = fn_abs
     annotation_file = os.path.splitext(label_file_abs_base)[0] + '.txt'
     if not os.path.isfile(annotation_file):
         annotation_file = os.path.splitext(fn_abs)[0] + '.TXT'
     if os.path.isfile(annotation_file):
         with open(annotation_file,'r') as f:
             lines = f.readlines()
         lines = [s.strip() for s in lines]
         # s = lines[0]
         annotation_number = 0
         for s in lines:
             if len(s.strip()) == 0:
                 continue
             tokens = s.split()
             assert len(tokens) == 5
             category_id = int(tokens[0])
@@ -107,35 +111,35 @@ def _process_image(fn_abs,input_folder,category_id_to_name,label_folder):
             ann['image_id'] = im['id']
             ann['category_id'] = category_id
             ann['sequence_level_annotation'] = False
             # COCO: [x_min, y_min, width, height] in absolute coordinates
             # YOLO: [class, x_center, y_center, width, height] in normalized coordinates
             yolo_bbox = [float(x) for x in tokens[1:]]
             normalized_x_center = yolo_bbox[0]
             normalized_y_center = yolo_bbox[1]
             normalized_width = yolo_bbox[2]
             normalized_height = yolo_bbox[3]
-            absolute_x_center = normalized_x_center * im_width
+            absolute_x_center = normalized_x_center * im_width
             absolute_y_center = normalized_y_center * im_height
             absolute_width = normalized_width * im_width
             absolute_height = normalized_height * im_height
             absolute_x_min = absolute_x_center - absolute_width / 2
             absolute_y_min = absolute_y_center - absolute_height / 2
             coco_bbox = [absolute_x_min, absolute_y_min, absolute_width, absolute_height]
             ann['bbox'] = coco_bbox
             annotation_number += 1
-            annotations_this_image.append(ann)
-        # ...for each annotation
+            annotations_this_image.append(ann)
+        # ...for each annotation
     # ...if this image has annotations
     return (im,annotations_this_image)
 # ...def _process_image(...)
@@ -144,37 +148,37 @@ def _process_image(fn_abs,input_folder,category_id_to_name,label_folder):
 def load_yolo_class_list(class_name_file):
     """
     Loads a dictionary mapping zero-indexed IDs to class names from the text/yaml file
-    [class_name_file].
+    [class_name_file].
     Args:
         class_name_file (str or list): this can be:
             - a .yaml or .yaml file in YOLO's dataset.yaml format
             - a .txt or .data file containing a flat list of class names
             - a list of class names
     Returns:
         dict: A dict mapping zero-indexed integer IDs to class names
     """
     # class_name_file can also be a list of class names
     if isinstance(class_name_file,list):
         category_id_to_name = {}
         for i_name,name in enumerate(class_name_file):
             category_id_to_name[i_name] = name
         return category_id_to_name
     ext = os.path.splitext(class_name_file)[1][1:]
     assert ext in ('yml','txt','yaml','data'), 'Unrecognized class name file type {}'.format(
         class_name_file)
     if ext in ('txt','data'):
         with open(class_name_file,'r') as f:
             lines = f.readlines()
         assert len(lines) > 0, 'Empty class name file {}'.format(class_name_file)
         class_names = [s.strip() for s in lines]
         assert len(lines[0]) > 0, 'Empty class name file {} (empty first line)'.format(class_name_file)
         # Blank lines should only appear at the end
         b_found_blank = False
         for s in lines:
@@ -183,17 +187,17 @@ def load_yolo_class_list(class_name_file):
             elif b_found_blank:
                 raise ValueError('Invalid class name file {}, non-blank line after the last blank line'.format(
                     class_name_file))
-        category_id_to_name = {}
+        category_id_to_name = {}
         for i_category_id,category_name in enumerate(class_names):
             assert len(category_name) > 0
             category_id_to_name[i_category_id] = category_name
     else:
         assert ext in ('yml','yaml')
         category_id_to_name = read_classes_from_yolo_dataset_file(class_name_file)
     return category_id_to_name
 # ...load_yolo_class_list(...)
@@ -202,91 +206,91 @@ def load_yolo_class_list(class_name_file):
 def validate_label_file(label_file,category_id_to_name=None,verbose=False):
     """"
     Verifies that [label_file] is a valid YOLO label file.  Does not check the extension.
     Args:
         label_file (str): the .txt file to validate
         category_id_to_name (dict, optional): a dict mapping integer category IDs to names;
             if this is not None, this function errors if the file uses a category that's not
             in this dict
         verbose (bool, optional): enable additional debug console output
     Returns:
-        dict: a dict with keys 'file' (the same as [label_file]) and 'errors' (a list of
+        dict: a dict with keys 'file' (the same as [label_file]) and 'errors' (a list of
         errors (if any) that we found in this file)
     """
     label_result = {}
     label_result['file'] = label_file
     label_result['errors'] = []
     try:
         with open(label_file,'r') as f:
             lines = f.readlines()
     except Exception as e:
         label_result['errors'].append('Read error: {}'.format(str(e)))
         return label_result
     # i_line 0; line = lines[i_line]
     for i_line,line in enumerate(lines):
         s = line.strip()
         if len(s) == 0 or s[0] == '#':
             continue
         try:
             tokens = s.split()
-            assert len(tokens) == 5, '{} tokens'.format(len(tokens))
+            assert len(tokens) == 5, '{} tokens'.format(len(tokens))
             if category_id_to_name is not None:
                 category_id = int(tokens[0])
                 assert category_id in category_id_to_name, \
                     'Unrecognized category ID {}'.format(category_id)
             yolo_bbox = [float(x) for x in tokens[1:]]
         except Exception as e:
             label_result['errors'].append('Token error at line {}: {}'.format(i_line,str(e)))
             continue
         normalized_x_center = yolo_bbox[0]
         normalized_y_center = yolo_bbox[1]
         normalized_width = yolo_bbox[2]
         normalized_height = yolo_bbox[3]
         normalized_x_min = normalized_x_center - normalized_width / 2.0
         normalized_x_max = normalized_x_center + normalized_width / 2.0
         normalized_y_min = normalized_y_center - normalized_height / 2.0
         normalized_y_max = normalized_y_center + normalized_height / 2.0
         if normalized_x_min < 0 or normalized_y_min < 0 or \
             normalized_x_max > 1 or normalized_y_max > 1:
             label_result['errors'].append('Invalid bounding box: {} {} {} {}'.format(
                 normalized_x_min,normalized_y_min,normalized_x_max,normalized_y_max))
     # ...for each line
     if verbose:
         if len(label_result['errors']) > 0:
             print('Errors for {}:'.format(label_file))
             for error in label_result['errors']:
                 print(error)
     return label_result
 # ...def validate_label_file(...)
-def validate_yolo_dataset(input_folder,
-                          class_name_file,
-                          n_workers=1,
-                          pool_type='thread',
+def validate_yolo_dataset(input_folder,
+                          class_name_file,
+                          n_workers=1,
+                          pool_type='thread',
                           verbose=False):
     """
-    Verifies all the labels in a YOLO dataset folder.  Does not yet support the case where the
+    Verifies all the labels in a YOLO dataset folder.  Does not yet support the case where the
     labels and images are in different folders (yolo_to_coco() supports this).
     Looks for:
     * Image files without label files
     * Text files without image files
     * Illegal classes in label files
@@ -294,103 +298,109 @@ def validate_yolo_dataset(input_folder,
     Args:
         input_folder (str): the YOLO dataset folder to validate
-        class_name_file (str or list): a list of classes, a flat text file, or a yolo
-            dataset.yml/.yaml file.  If it's a dataset.yml file, that file should point to
+        class_name_file (str or list): a list of classes, a flat text file, or a yolo
+            dataset.yml/.yaml file.  If it's a dataset.yml file, that file should point to
             input_folder as the base folder, though this is not explicitly checked.
         n_workers (int, optional): number of concurrent workers, set to <= 1 to disable
             parallelization
         pool_type (str, optional): 'thread' or 'process', worker type to use for parallelization;
             not used if [n_workers] <= 1
         verbose (bool, optional): enable additional debug console output
     Returns:
-        dict: validation results, as a dict with fields:
+        dict: validation results, as a dict with fields:
         - image_files_without_label_files (list)
         - label_files_without_image_files (list)
         - label_results (list of dicts with field 'filename', 'errors') (list)
     """
     # Validate arguments
     assert os.path.isdir(input_folder), 'Could not find input folder {}'.format(input_folder)
     if n_workers > 1:
         assert pool_type in ('thread','process'), 'Illegal pool type {}'.format(pool_type)
     category_id_to_name = load_yolo_class_list(class_name_file)
     print('Enumerating files in {}'.format(input_folder))
     all_files = recursive_file_list(input_folder,recursive=True,return_relative_paths=False,
                                     convert_slashes=True)
     label_files = [fn for fn in all_files if fn.endswith('.txt')]
     image_files = find_image_strings(all_files)
     print('Found {} images files and {} label files in {}'.format(
         len(image_files),len(label_files),input_folder))
     label_files_set = set(label_files)
     image_files_without_extension = set()
     for fn in image_files:
         image_file_without_extension = os.path.splitext(fn)[0]
         assert image_file_without_extension not in image_files_without_extension, \
             'Duplicate image file, likely with different extensions: {}'.format(fn)
         image_files_without_extension.add(image_file_without_extension)
     print('Looking for missing image/label files')
     image_files_without_label_files = []
     label_files_without_images = []
     for image_file in tqdm(image_files):
         expected_label_file = os.path.splitext(image_file)[0] + '.txt'
         if expected_label_file not in label_files_set:
             image_files_without_label_files.append(image_file)
     for label_file in tqdm(label_files):
         expected_image_file_without_extension = os.path.splitext(label_file)[0]
         if expected_image_file_without_extension not in image_files_without_extension:
             label_files_without_images.append(label_file)
     print('Found {} image files without labels, {} labels without images'.format(
         len(image_files_without_label_files),len(label_files_without_images)))
     print('Validating label files')
     if n_workers <= 1:
-        label_results = []
-        for fn_abs in tqdm(label_files):
+        label_results = []
+        for fn_abs in tqdm(label_files):
             label_results.append(validate_label_file(fn_abs,
                                                       category_id_to_name=category_id_to_name,
                                                       verbose=verbose))
     else:
         assert pool_type in ('process','thread'), 'Illegal pool type {}'.format(pool_type)
-        if pool_type == 'thread':
-            pool = ThreadPool(n_workers)
-        else:
-            pool = Pool(n_workers)
-        print('Starting a {} pool of {} workers'.format(pool_type,n_workers))
-        p = partial(validate_label_file,
-                    category_id_to_name=category_id_to_name,
-                    verbose=verbose)
-        label_results = list(tqdm(pool.imap(p, label_files),
-                                  total=len(label_files)))
+        pool = None
+        try:
+            if pool_type == 'thread':
+                pool = ThreadPool(n_workers)
+            else:
+                pool = Pool(n_workers)
+            print('Starting a {} pool of {} workers'.format(pool_type,n_workers))
+            p = partial(validate_label_file,
+                        category_id_to_name=category_id_to_name,
+                        verbose=verbose)
+            label_results = list(tqdm(pool.imap(p, label_files),
+                                    total=len(label_files)))
+        finally:
+            pool.close()
+            pool.join()
+            print("Pool closed and joined for label file validation")
     assert len(label_results) == len(label_files)
     validation_results = {}
     validation_results['image_files_without_label_files'] = image_files_without_label_files
     validation_results['label_files_without_images'] = label_files_without_images
     validation_results['label_results'] = label_results
     return validation_results
-# ...validate_yolo_dataset(...)
+# ...validate_yolo_dataset(...)
 #%% Main conversion function
@@ -411,32 +421,35 @@ def yolo_to_coco(input_folder,
                  label_folder=None):
     """
     Converts a YOLO-formatted dataset to a COCO-formatted dataset.
-    All images will be assigned an "error" value, usually None.
+    All images will be assigned an "error" value, usually None.
     Args:
-        input_folder (str): the YOLO dataset folder to convert.  If the image and label
+        input_folder (str): the YOLO dataset folder to convert.  If the image and label
             folders are different, this is the image folder, and [label_folder] is the
             label folder.
-        class_name_file (str or list): a list of classes, a flat text file, or a yolo
-            dataset.yml/.yaml file.  If it's a dataset.yml file, that file should point to
+        class_name_file (str or list): a list of classes, a flat text file, or a yolo
+            dataset.yml/.yaml file.  If it's a dataset.yml file, that file should point to
             input_folder as the base folder, though this is not explicitly checked.
         output_file (str, optional): .json file to which we should write COCO .json data
         empty_image_handling (str, optional): how to handle images with no boxes; whether
-            this includes images with no .txt files depending on the value of
+            this includes images with no .txt files depends on the value of
             [allow_images_without_label_files].  Can be:
             - 'no_annotations': include the image in the image list, with no annotations
             - 'empty_annotations': include the image in the image list, and add an annotation without
               any bounding boxes, using a category called [empty_image_category_name].
             - 'skip': don't include the image in the image list
-            - 'error': there shouldn't be any empty images
+            - 'error': there shouldn't be any empty images
+        empty_image_category_name (str, optional): if we're going to be inserting annotations for
+            images with no boxes, what category name should we use?
         error_image_handling (str, optional): how to handle images that don't load properly; can
             be:
             - 'skip': don't include the image at all
             - 'no_annotations': include with no annotations
+        allow_images_without_label_files (bool, optional): whether to silently allow images with
+            no label files (True) or raise errors for images with no label files (False)
         n_workers (int, optional): number of concurrent workers, set to <= 1 to disable
             parallelization
         pool_type (str, optional): 'thread' or 'process', worker type to use for parallelization;
@@ -444,27 +457,27 @@ def yolo_to_coco(input_folder,
         recursive (bool, optional): whether to recurse into [input_folder]
         exclude_string (str, optional): exclude any images whose filename contains a string
         include_string (str, optional): include only images whose filename contains a string
-        overwrite_handling (bool, optional): behavior if output_file exists ('load', 'overwrite', or
+        overwrite_handling (bool, optional): behavior if output_file exists ('load', 'overwrite', or
             'error')
         label_folder (str, optional): label folder, if different from the image folder
     Returns:
         dict: COCO-formatted data, the same as what's written to [output_file]
     """
     ## Validate input
     input_folder = input_folder.replace('\\','/')
     assert os.path.isdir(input_folder)
     assert os.path.isfile(class_name_file)
     assert empty_image_handling in \
         ('no_annotations','empty_annotations','skip','error'), \
             'Unrecognized empty image handling spec: {}'.format(empty_image_handling)
     if (output_file is not None) and os.path.isfile(output_file):
             if overwrite_handling == 'overwrite':
                 print('Warning: output file {} exists, over-writing'.format(output_file))
             elif overwrite_handling == 'load':
@@ -476,62 +489,62 @@ def yolo_to_coco(input_folder,
                 raise ValueError('Output file {} exists'.format(output_file))
             else:
                 raise ValueError('Unrecognized overwrite_handling value: {}'.format(overwrite_handling))
     ## Read class names
     category_id_to_name = load_yolo_class_list(class_name_file)
     # Find or create the empty image category, if necessary
     empty_category_id = None
-    if (empty_image_handling == 'empty_annotations'):
+    if empty_image_handling == 'empty_annotations':
         category_name_to_id = invert_dictionary(category_id_to_name)
         if empty_image_category_name in category_name_to_id:
             empty_category_id = category_name_to_id[empty_image_category_name]
             print('Using existing empty image category with name {}, ID {}'.format(
-                empty_image_category_name,empty_category_id))
+                empty_image_category_name,empty_category_id))
         else:
             empty_category_id = len(category_id_to_name)
             print('Adding an empty category with name {}, ID {}'.format(
                 empty_image_category_name,empty_category_id))
             category_id_to_name[empty_category_id] = empty_image_category_name
     ## Enumerate images
     print('Enumerating images...')
     image_files_abs = find_images(input_folder,recursive=recursive,convert_slashes=True)
     assert not any(['\\' in fn for fn in image_files_abs])
     n_files_original = len(image_files_abs)
     # Optionally include/exclude images matching specific strings
     if exclude_string is not None:
         image_files_abs = [fn for fn in image_files_abs if exclude_string not in fn]
     if include_string is not None:
         image_files_abs = [fn for fn in image_files_abs if include_string in fn]
     if len(image_files_abs) != n_files_original or exclude_string is not None or include_string is not None:
         n_excluded = n_files_original - len(image_files_abs)
         print('Excluded {} of {} images based on filenames'.format(n_excluded,n_files_original))
     categories = []
     for category_id in category_id_to_name:
         categories.append({'id':category_id,'name':category_id_to_name[category_id]})
     info = {}
     info['version'] = '1.0'
     info['description'] = 'Converted from YOLO format'
     image_ids = set()
     ## If we're expected to have labels for every image, check before we process all the images
     if not allow_images_without_label_files:
         print('Verifying that label files exist')
         # image_file_abs = image_files_abs[0]
@@ -544,88 +557,88 @@ def yolo_to_coco(input_folder,
             label_file_abs = os.path.splitext(label_file_abs_base)[0] + '.txt'
             assert os.path.isfile(label_file_abs), \
                 'No annotation file for {}'.format(image_file_abs)
     ## Initial loop to make sure image IDs will be unique
     print('Validating image IDs...')
     for fn_abs in tqdm(image_files_abs):
         fn_relative = os.path.relpath(fn_abs,input_folder).replace('\\','/')
         image_id = _filename_to_image_id(fn_relative)
         assert image_id not in image_ids, \
             'Oops, you have hit a very esoteric case where you have the same filename ' + \
             'with both spaces and underscores, this is not currently handled.'
         image_ids.add(image_id)
     ## Main loop to process labels
     print('Processing labels...')
     if n_workers <= 1:
-        image_results = []
+        image_results = []
         # fn_abs = image_files_abs[0]
-        for fn_abs in tqdm(image_files_abs):
+        for fn_abs in tqdm(image_files_abs):
             image_results.append(_process_image(fn_abs,
                                                 input_folder,
                                                 category_id_to_name,
                                                 label_folder))
     else:
         assert pool_type in ('process','thread'), 'Illegal pool type {}'.format(pool_type)
         if pool_type == 'thread':
             pool = ThreadPool(n_workers)
         else:
             pool = Pool(n_workers)
         print('Starting a {} pool of {} workers'.format(pool_type,n_workers))
         p = partial(_process_image,
                     input_folder=input_folder,
                     category_id_to_name=category_id_to_name,
                     label_folder=label_folder)
         image_results = list(tqdm(pool.imap(p, image_files_abs),
                                   total=len(image_files_abs)))
     assert len(image_results) == len(image_files_abs)
     ## Re-assembly of results into a COCO dict
     print('Assembling labels...')
     images = []
     annotations = []
     for image_result in tqdm(image_results):
         im = image_result[0]
         annotations_this_image = image_result[1]
         # If we have annotations for this image
         if len(annotations_this_image) > 0:
             assert im['error'] is None
             images.append(im)
             for ann in annotations_this_image:
                 annotations.append(ann)
         # If this image failed to read
         elif im['error'] is not None:
             if error_image_handling == 'skip':
                 pass
             elif error_image_handling == 'no_annotations':
-                images.append(im)
+                images.append(im)
         # If this image read successfully, but there are no annotations
         else:
             if empty_image_handling == 'skip':
                 pass
             elif empty_image_handling == 'no_annotations':
@@ -641,13 +654,18 @@ def yolo_to_coco(input_folder,
                 # we're adopting.
                 # ann['bbox'] = [0,0,0,0]
                 annotations.append(ann)
-                images.append(im)
+                images.append(im)
     # ...for each image result
+    # Clean up unnecessary error fields
+    for im in images:
+        if 'error' in im and im['error'] is None:
+            del im['error']
     print('Read {} annotations for {} images'.format(len(annotations),
                                                      len(images)))
     d = {}
     d['images'] = images
     d['annotations'] = annotations
@@ -667,25 +685,25 @@ def yolo_to_coco(input_folder,
 #%% Interactive driver
 if False:
     pass
     #%% Convert YOLO folders to COCO
     preview_folder = '/home/user/data/noaa-fish/val-coco-conversion-preview'
     input_folder = '/home/user/data/noaa-fish/val'
     output_file = '/home/user/data/noaa-fish/val.json'
     class_name_file = '/home/user/data/noaa-fish/AllImagesWithAnnotations/classes.txt'
     d = yolo_to_coco(input_folder,class_name_file,output_file)
     input_folder = '/home/user/data/noaa-fish/train'
     output_file = '/home/user/data/noaa-fish/train.json'
     class_name_file = '/home/user/data/noaa-fish/AllImagesWithAnnotations/classes.txt'
     d = yolo_to_coco(input_folder,class_name_file,output_file)
     #%% Check DB integrity
     from megadetector.data_management.databases import integrity_check_json_db
@@ -715,11 +733,135 @@ if False:
                                                         output_dir=preview_folder,
                                                         image_base_dir=input_folder,
                                                         options=viz_options)
     from megadetector.utils.path_utils import open_file
     open_file(html_output_file)
 #%% Command-line driver
-# TODO
+def main():
+    """
+    Command-line driver for YOLO to COCO conversion.
+    """
+    parser = argparse.ArgumentParser(
+        description='Convert a YOLO-formatted dataset to COCO format'
+    )
+    parser.add_argument(
+        'input_folder',
+        type=str,
+        help='Path to the YOLO dataset folder (image folder)'
+    )
+    parser.add_argument(
+        'class_name_file',
+        type=str,
+        help='Path to the file containing class names (e.g., classes.txt or dataset.yaml)'
+    )
+    parser.add_argument(
+        'output_file',
+        type=str,
+        help='Path to the output COCO .json file.'
+    )
+    parser.add_argument(
+        '--label_folder',
+        type=str,
+        default=None,
+        help='Label folder, if different from the image folder. Default: None (labels are in the image folder)'
+    )
+    parser.add_argument(
+        '--empty_image_handling',
+        type=str,
+        default='no_annotations',
+        choices=['no_annotations', 'empty_annotations', 'skip', 'error'],
+        help='How to handle images with no bounding boxes.'
+    )
+    parser.add_argument(
+        '--empty_image_category_name',
+        type=str,
+        default='empty',
+        help='Category name for empty images if empty_image_handling is "empty_annotations"'
+    )
+    parser.add_argument(
+        '--error_image_handling',
+        type=str,
+        default='no_annotations',
+        choices=['skip', 'no_annotations'],
+        help='How to handle images that fail to load'
+    )
+    parser.add_argument(
+        '--allow_images_without_label_files',
+        type=str,
+        default='true',
+        choices=['true', 'false'],
+        help='Whether to allow images that do not have corresponding label files (true/false)'
+    )
+    parser.add_argument(
+        '--n_workers',
+        type=int,
+        default=1,
+        help='Number of workers for parallel processing. <=1 for sequential'
+    )
+    parser.add_argument(
+        '--pool_type',
+        type=str,
+        default='thread',
+        choices=['thread', 'process'],
+        help='Type of multiprocessing pool if n_workers > 1'
+    )
+    parser.add_argument(
+        '--recursive',
+        type=str,
+        default='true',
+        choices=['true', 'false'],
+        help='Whether to search for images recursively in the input folder (true/false)'
+    )
+    parser.add_argument(
+        '--exclude_string',
+        type=str,
+        default=None,
+        help='Exclude images whose filename contains this string'
+    )
+    parser.add_argument(
+        '--include_string',
+        type=str,
+        default=None,
+        help='Include images only if filename contains this string'
+    )
+    parser.add_argument(
+        '--overwrite_handling',
+        type=str,
+        default='overwrite',
+        choices=['load', 'overwrite', 'error'],
+        help='Behavior if output_file exists.'
+    )
+    if len(sys.argv[1:]) == 0:
+        parser.print_help()
+        parser.exit()
+    args = parser.parse_args()
+    parsed_allow_images = args.allow_images_without_label_files.lower() == 'true'
+    parsed_recursive = args.recursive.lower() == 'true'
+    yolo_to_coco(
+        args.input_folder,
+        args.class_name_file,
+        output_file=args.output_file,
+        label_folder=args.label_folder,
+        empty_image_handling=args.empty_image_handling,
+        empty_image_category_name=args.empty_image_category_name,
+        error_image_handling=args.error_image_handling,
+        allow_images_without_label_files=parsed_allow_images,
+        n_workers=args.n_workers,
+        pool_type=args.pool_type,
+        recursive=parsed_recursive,
+        exclude_string=args.exclude_string,
+        include_string=args.include_string,
+        overwrite_handling=args.overwrite_handling
+    )
+    print(f"Dataset conversion complete, output written to {args.output_file}")
+if __name__ == '__main__':
+    main()

megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.28py3-none-any.whl → 10.0.0py3-none-any.whl