PyPI - megadetector - Versions diffs - 5.0.6__py3-none-any.whl → 5.0.8__py3-none-any.whl - Mend

megadetector 5.0.6py3-none-any.whl → 5.0.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (75) hide show

api/batch_processing/data_preparation/manage_local_batch.py +297 -202
api/batch_processing/data_preparation/manage_video_batch.py +7 -2
api/batch_processing/postprocessing/add_max_conf.py +1 -0
api/batch_processing/postprocessing/combine_api_outputs.py +2 -2
api/batch_processing/postprocessing/compare_batch_results.py +111 -61
api/batch_processing/postprocessing/convert_output_format.py +24 -6
api/batch_processing/postprocessing/load_api_results.py +56 -72
api/batch_processing/postprocessing/md_to_labelme.py +119 -51
api/batch_processing/postprocessing/merge_detections.py +30 -5
api/batch_processing/postprocessing/postprocess_batch_results.py +175 -55
api/batch_processing/postprocessing/remap_detection_categories.py +163 -0
api/batch_processing/postprocessing/render_detection_confusion_matrix.py +628 -0
api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +71 -23
api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +224 -76
api/batch_processing/postprocessing/subset_json_detector_output.py +132 -5
api/batch_processing/postprocessing/top_folders_to_bottom.py +1 -1
classification/prepare_classification_script.py +191 -191
data_management/cct_json_utils.py +7 -2
data_management/coco_to_labelme.py +263 -0
data_management/coco_to_yolo.py +72 -48
data_management/databases/integrity_check_json_db.py +75 -64
data_management/databases/subset_json_db.py +1 -1
data_management/generate_crops_from_cct.py +1 -1
data_management/get_image_sizes.py +44 -26
data_management/importers/animl_results_to_md_results.py +3 -5
data_management/importers/noaa_seals_2019.py +2 -2
data_management/importers/zamba_results_to_md_results.py +2 -2
data_management/labelme_to_coco.py +264 -127
data_management/labelme_to_yolo.py +96 -53
data_management/lila/create_lila_blank_set.py +557 -0
data_management/lila/create_lila_test_set.py +2 -1
data_management/lila/create_links_to_md_results_files.py +1 -1
data_management/lila/download_lila_subset.py +138 -45
data_management/lila/generate_lila_per_image_labels.py +23 -14
data_management/lila/get_lila_annotation_counts.py +16 -10
data_management/lila/lila_common.py +15 -42
data_management/lila/test_lila_metadata_urls.py +116 -0
data_management/read_exif.py +65 -16
data_management/remap_coco_categories.py +84 -0
data_management/resize_coco_dataset.py +14 -31
data_management/wi_download_csv_to_coco.py +239 -0
data_management/yolo_output_to_md_output.py +40 -13
data_management/yolo_to_coco.py +313 -100
detection/process_video.py +36 -14
detection/pytorch_detector.py +1 -1
detection/run_detector.py +73 -18
detection/run_detector_batch.py +116 -27
detection/run_inference_with_yolov5_val.py +135 -27
detection/run_tiled_inference.py +153 -43
detection/tf_detector.py +2 -1
detection/video_utils.py +4 -2
md_utils/ct_utils.py +101 -6
md_utils/md_tests.py +264 -17
md_utils/path_utils.py +326 -47
md_utils/process_utils.py +26 -7
md_utils/split_locations_into_train_val.py +215 -0
md_utils/string_utils.py +10 -0
md_utils/url_utils.py +66 -3
md_utils/write_html_image_list.py +12 -2
md_visualization/visualization_utils.py +380 -74
md_visualization/visualize_db.py +41 -10
md_visualization/visualize_detector_output.py +185 -104
{megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/METADATA +11 -13
{megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/RECORD +74 -67
{megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/WHEEL +1 -1
taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +1 -1
taxonomy_mapping/map_new_lila_datasets.py +43 -39
taxonomy_mapping/prepare_lila_taxonomy_release.py +5 -2
taxonomy_mapping/preview_lila_taxonomy.py +27 -27
taxonomy_mapping/species_lookup.py +33 -13
taxonomy_mapping/taxonomy_csv_checker.py +7 -5
md_visualization/visualize_megadb.py +0 -183
{megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/LICENSE +0 -0
{megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/top_level.txt +0 -0

data_management/read_exif.py CHANGED Viewed

@@ -48,9 +48,18 @@ class ReadExifOptions:
     #
     # Not relevant if n_workers is 1.
     use_threads = True
+    # "File" and "ExifTool" are tag types used by ExifTool to report data that
+    # doesn't come from EXIF, rather from the file (e.g. file size).
     tag_types_to_ignore = set(['File','ExifTool'])
+    # Include/exclude specific tags (mutually incompatible)
+    tags_to_include = None
+    tags_to_exclude = None
+    # A useful set of tags one might want to limit queries for
+    # options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight','DateTime','DateTimeOriginal','Orientation']
     exiftool_command_name = 'exiftool'
     # How should we handle byte-formatted EXIF tags?
@@ -62,16 +71,17 @@ class ReadExifOptions:
     # Should we use exiftool or pil?
     processing_library = 'pil' # 'exiftool','pil'
 #%% Functions
-def enumerate_files(input_folder):
+def enumerate_files(input_folder,recursive=True):
     """
     Enumerates all image files in input_folder, returning relative paths
     """
-    image_files = find_images(input_folder,recursive=True)
+    image_files = find_images(input_folder,recursive=recursive)
     image_files = [os.path.relpath(s,input_folder) for s in image_files]
     image_files = [s.replace('\\','/') for s in image_files]
     print('Enumerated {} files'.format(len(image_files)))
@@ -99,7 +109,7 @@ def get_exif_ifd(exif):
 def read_pil_exif(im,options=None):
     """
     Read all the EXIF data we know how to read from [im] (path or PIL Image), whether it's
-    in the PIL default EXIF data or not.
+    in the PIL default EXIF data or not.  Returns a dict.
     """
     if options is None:
@@ -192,6 +202,32 @@ def parse_exif_datetime_string(s,verbose=False):
     return dt
+def _filter_tags(tags,options):
+    """
+    Internal function used to include/exclude specific tags from the exif_tags
+    dict.
+    """
+    if options is None:
+        return tags
+    if options.tags_to_include is None and options.tags_to_exclude is None:
+        return tags
+    if options.tags_to_include is not None:
+        assert options.tags_to_exclude is None, "tags_to_include and tags_to_exclude are incompatible"
+        tags_to_return = {}
+        for tag_name in tags.keys():
+            if tag_name in options.tags_to_include:
+                tags_to_return[tag_name] = tags[tag_name]
+        return tags_to_return
+    if options.tags_to_exclude is not None:
+        assert options.tags_to_include is None, "tags_to_include and tags_to_exclude are incompatible"
+        tags_to_return = {}
+        for tag_name in tags.keys():
+            if tag_name not in options.tags_to_exclude:
+                tags_to_return[tag_name] = tags[tag_name]
+        return tags_to_return
 def read_exif_tags_for_image(file_path,options=None):
     """
     Get relevant fields from EXIF data for an image
@@ -227,8 +263,8 @@ def read_exif_tags_for_image(file_path,options=None):
                 result['status'] = 'empty_read'
             else:
                 result['status'] = 'success'
-                result['tags'] = exif_tags
+                result['tags'] = _filter_tags(exif_tags,options)
         return result
     elif options.processing_library == 'exiftool':
@@ -283,9 +319,12 @@ def read_exif_tags_for_image(file_path,options=None):
                     print('Ignoring tag with type {}'.format(field_type))
                 continue
-            field_tag = field_name_type_tokens[1].strip()
-            tag = [field_type,field_tag,field_value]
+            field_name = field_name_type_tokens[1].strip()
+            if options.tags_to_exclude is not None and field_name in options.tags_to_exclude:
+                continue
+            if options.tags_to_include is not None and field_name not in options.tags_to_include:
+                continue
+            tag = [field_type,field_name,field_value]
             exif_tags.append(tag)
@@ -350,20 +389,22 @@ def populate_exif_data(im, image_base, options=None):
 # ...populate_exif_data()
-def create_image_objects(image_files):
+def create_image_objects(image_files,recursive=True):
     """
     Create empty image objects for every image in [image_files], which can be a
     list of relative paths (which will get stored without processing, so the base
     path doesn't matter here), or a folder name.
     Returns a list of dicts with field 'file_name' (a relative path).
+    "recursive" is ignored if "image_files" is a list.
     """
     # Enumerate *relative* paths
     if isinstance(image_files,str):
         print('Enumerating image files in {}'.format(image_files))
         assert os.path.isdir(image_files), 'Invalid image folder {}'.format(image_files)
-        image_files = enumerate_files(image_files)
+        image_files = enumerate_files(image_files,recursive=recursive)
     images = []
     for fn in image_files:
@@ -499,7 +540,7 @@ def is_executable(name):
     return which(name) is not None
-def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=None):
+def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=None,recursive=True):
     """
     Read EXIF data for all images in input_folder.
@@ -516,6 +557,12 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
     if options is None:
         options = ReadExifOptions()
+    # Validate options
+    if options.tags_to_include is not None:
+        assert options.tags_to_exclude is None, "tags_to_include and tags_to_exclude are incompatible"
+    if options.tags_to_exclude is not None:
+        assert options.tags_to_include is None, "tags_to_include and tags_to_exclude are incompatible"
     if input_folder is None:
         input_folder = ''
     if len(input_folder) > 0:
@@ -542,7 +589,7 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
         assert is_executable(options.exiftool_command_name), 'exiftool not available'
     if filenames is None:
-        images = create_image_objects(input_folder)
+        images = create_image_objects(input_folder,recursive=recursive)
     else:
         assert isinstance(filenames,list)
         images = create_image_objects(filenames)
@@ -567,14 +614,16 @@ if False:
     #%%
-    input_folder = os.path.expanduser('~/data/KRU-test')
-    output_file = os.path.expanduser('~/data/test-exif.json')
+    input_folder = r'C:\temp\md-name-testing'
+    output_file = None # r'C:\temp\md-name-testing\exif.json'
     options = ReadExifOptions()
     options.verbose = False
     options.n_workers = 10
     options.use_threads = False
     options.processing_library = 'pil'
     # options.processing_library = 'exiftool'
+    options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight','DateTime','DateTimeOriginal','Orientation']
+    # options.tags_to_exclude = ['MakerNote']
     results = read_exif_from_folder(input_folder,output_file,options)

data_management/remap_coco_categories.py ADDED Viewed

@@ -0,0 +1,84 @@
+########
+#
+# remap_coco_categories.py
+#
+# Given a COCO-formatted dataset, remap the categories to a new mapping.
+#
+########
+#%% Imports and constants
+import os
+import json
+from copy import deepcopy
+#%% Main function
+def remap_coco_categories(input_data,
+                          output_category_name_to_id,
+                          input_category_name_to_output_category_name,
+                          output_file=None):
+    """
+    Given a COCO-formatted dataset, remap the categories to a new categories mapping, optionally
+    writing the results to a new file.
+    output_category_name_to_id is a dict mapping strings to ints.
+    input_category_name_to_output_category_name is a dict mapping strings to strings.
+    [input_data] can be a COCO-formatted dict or a filename.  If it's a dict, it will be copied,
+    not modified in place.
+    """
+    if isinstance(input_data,str):
+        assert os.path.isfile(input_data), "Can't find file {}".format(input_data)
+        with open(input_data,'r') as f:
+            input_data = json.load(f)
+        assert isinstance(input_data,dict), 'Illegal COCO input data'
+    else:
+        assert isinstance(input_data,dict), 'Illegal COCO input data'
+        input_data = deepcopy(input_data)
+    # It's safe to modify in-place now
+    output_data = input_data
+    # Read input name --> ID mapping
+    input_category_name_to_input_category_id = {}
+    for c in input_data['categories']:
+        input_category_name_to_input_category_id[c['name']] = c['id']
+    # Map input IDs --> output IDs
+    input_category_id_to_output_category_id = {}
+    for input_name in input_category_name_to_output_category_name.keys():
+        output_name = input_category_name_to_output_category_name[input_name]
+        assert output_name in output_category_name_to_id, \
+            'No output ID for {} --> {}'.format(input_name,output_name)
+        input_id = input_category_name_to_input_category_id[input_name]
+        output_id = output_category_name_to_id[output_name]
+        input_category_id_to_output_category_id[input_id] = output_id
+    # Map annotations
+    for ann in output_data['annotations']:
+        assert ann['category_id'] in input_category_id_to_output_category_id, \
+            'Unrecognized category ID {}'.format(ann['category_id'])
+        ann['category_id'] = input_category_id_to_output_category_id[ann['category_id']]
+    # Update the category list
+    output_categories = []
+    for output_name in output_category_name_to_id:
+        category = {'name':output_name,'id':output_category_name_to_id[output_name]}
+        output_categories.append(category)
+    output_data['categories'] = output_categories
+    if output_file is not None:
+        with open(output_file,'w') as f:
+            json.dump(output_data,f,indent=1)
+    return input_data
+#%% Command-line driver
+# TODO

data_management/resize_coco_dataset.py CHANGED Viewed

@@ -26,8 +26,7 @@ from md_visualization.visualization_utils import \
 def resize_coco_dataset(input_folder,input_filename,
                         output_folder,output_filename,
                         target_size=(-1,-1),
-                        correct_size_image_handling='copy',
-                        right_edge_quantization_threshold=None):
+                        correct_size_image_handling='copy'):
     """
     Given a COCO-formatted dataset (images in input_folder, data in input_filename), resize
     all the images to a target size (in output_folder) and scale bounding boxes accordingly
@@ -36,7 +35,7 @@ def resize_coco_dataset(input_folder,input_filename,
     target_size should be a tuple/list of ints, length 2.  If either dimension is -1, aspect ratio
     will be preserved.  If both dimensions are -1, this means "keep the original size".  If
     both dimensions are -1 and correct_size_image_handling is copy, this function is basically
-    a no-op, although you might still use it for right_edge_quantization_threshold.
+    a no-op.
     correct_size_image_handling can be 'copy' (in which case the original image is just copied
     to the output folder) or 'rewrite' (in which case the image is opened via PIL and re-written,
@@ -44,11 +43,7 @@ def resize_coco_dataset(input_folder,input_filename,
     you're superstitious about biases coming from images in a training set being written
     by different image encoders.
-    right_edge_quantization_threshold is an off-by-default hack to adjust large datasets where
-    boxes that really should be running off the right side of the image only extend like 99%
-    of the way there, due to what appears to be a slight bias inherent to MD.  If a box extends
-    within [right_edge_quantization_threshold] (a small number, from 0 to 1, but probably around
-    0.02) of the right edge of the image, it will be extended to the far right edge.
+    Returns the COCO database with resized images.
     """
     # Read input data
@@ -62,7 +57,9 @@ def resize_coco_dataset(input_folder,input_filename,
     # For each image
-    # im = d['images'][1]
+    # TODO: this is trivially parallelizable
+    #
+    # im = d['images'][0]
     for im in tqdm(d['images']):
         input_fn_relative = im['file_name']
@@ -122,15 +119,6 @@ def resize_coco_dataset(input_folder,input_filename,
                             bbox[2] * width_scale,
                             bbox[3] * height_scale]
-                # Do we need to quantize this box?
-                if right_edge_quantization_threshold is not None and \
-                    right_edge_quantization_threshold > 0:
-                    bbox_right_edge_abs = bbox[0] + bbox[2]
-                    bbox_right_edge_norm = bbox_right_edge_abs / output_w
-                    bbox_right_edge_distance = (1.0 - bbox_right_edge_norm)
-                    if bbox_right_edge_distance < right_edge_quantization_threshold:
-                        bbox[2] = output_w - bbox[0]
                 ann['bbox'] = bbox
             # ...if this annotation has a box
@@ -143,6 +131,8 @@ def resize_coco_dataset(input_folder,input_filename,
     with open(output_filename,'w') as f:
         json.dump(d,f,indent=1)
+    return d
 # ...def resize_coco_dataset(...)
@@ -153,27 +143,20 @@ if False:
     pass
     #%% Test resizing
-    # input_filename = os.path.expanduser('~/tmp/labelme_to_coco_test.json')
-    # input_folder = os.path.expanduser('~/data/labelme-json-test')
-    # target_size = (600,-1)
-    input_folder = os.path.expanduser('~/data/usgs-kissel-training')
-    input_filename = os.path.expanduser('~/data/usgs-tegus.json')
+    input_folder = os.path.expanduser('~/data/usgs-tegus/usgs-kissel-training')
+    input_filename = os.path.expanduser('~/data/usgs-tegus/usgs-kissel-training.json')
     target_size = (1600,-1)
-    output_filename = insert_before_extension(input_filename,'resized')
-    output_folder = input_folder + '-resized'
+    output_filename = insert_before_extension(input_filename,'resized-test')
+    output_folder = input_folder + '-resized-test'
     correct_size_image_handling = 'rewrite'
-    right_edge_quantization_threshold = 0.015
     resize_coco_dataset(input_folder,input_filename,
                         output_folder,output_filename,
                         target_size=target_size,
-                        correct_size_image_handling=correct_size_image_handling,
-                        right_edge_quantization_threshold=right_edge_quantization_threshold)
+                        correct_size_image_handling=correct_size_image_handling)
     #%% Preview

data_management/wi_download_csv_to_coco.py ADDED Viewed

@@ -0,0 +1,239 @@
+########
+#
+# wi_download_csv_to_coco.py
+#
+# Convert a .csv file from a Wildlife Insights project export to a COCO camera traps .json file.
+#
+# Currently assumes that common names are unique identifiers, which is convenient but unreliable.
+#
+########
+#%% Imports and constants
+import os
+import json
+import pandas as pd
+import numpy as np
+from tqdm import tqdm
+from collections import defaultdict
+from md_visualization import visualization_utils as vis_utils
+wi_extra_annotation_columns = \
+    ('is_blank','identified_by','wi_taxon_id','class','order','family','genus','species','uncertainty',
+             'number_of_objects','age','sex','animal_recognizable','individual_id','individual_animal_notes',
+             'behavior','highlighted','markings')
+wi_extra_image_columns = ('project_id','deployment_id')
+def make_location_id(project_id,deployment_id):
+    return 'project_' + str(project_id) + '_deployment_' + deployment_id
+def isnan(v):
+    try:
+        return np.isnan(v)
+    except Exception:
+        return False
+default_category_remappings = {
+    'Homo Species':'Human',
+    'Human-Camera Trapper':'Human',
+    'No CV Result':'Unknown'
+}
+#%%
+def wi_download_csv_to_coco(csv_file_in,
+                            coco_file_out=None,
+                            image_folder=None,
+                            validate_images=False,
+                            gs_prefix=None,
+                            verbose=True,
+                            category_remappings=default_category_remappings):
+    """
+    Convert a .csv file from a Wildlife Insights project export to a COCO
+    camera traps .json file.
+    If [coco_file_out] is None, uses [csv_file_in].json
+    gs_prefix is a string to remove from GS URLs to convert to path names... for example, if
+    your gs:// URLs look like:
+    gs://11234134_xyz/deployment/55554/dfadfasdfs.jpg
+    ...and you specify gs_prefix='11234134_xyz/deployment/', the filenames in
+    the .json file will look like:
+    55554/dfadfasdfs.jpg
+    exclude_re discards matching images; typically use to omit thumbnail images.
+    """
+    #%% Create COCO dictionaries
+    category_name_to_id = {}
+    category_name_to_id['empty'] = 0
+    df = pd.read_csv(csv_file_in)
+    print('Read {} rows from {}'.format(len(df),csv_file_in))
+    image_id_to_image = {}
+    image_id_to_annotations = defaultdict(list)
+    # i_row = 0; row = df.iloc[i_row]
+    for i_row,row in df.iterrows():
+        image_id = row['image_id']
+        if image_id not in image_id_to_image:
+            im = {}
+            image_id_to_image[image_id] = im
+            im['id'] = image_id
+            gs_url = row['location']
+            assert gs_url.startswith('gs://')
+            file_name = gs_url.replace('gs://','')
+            if gs_prefix is not None:
+                file_name = file_name.replace(gs_prefix,'')
+            location_id = make_location_id(row['project_id'],row['deployment_id'])
+            im['file_name'] = file_name
+            im['location'] = location_id
+            im['datetime'] = row['timestamp']
+            im['wi_image_info'] = {}
+            for s in wi_extra_image_columns:
+                im['wi_image_info'][s] = str(row[s])
+        else:
+            im = image_id_to_image[image_id]
+            assert im['datetime'] == row['timestamp']
+            location_id = make_location_id(row['project_id'],row['deployment_id'])
+            assert im['location'] == location_id
+        category_name = row['common_name']
+        if category_remappings is not None and category_name in category_remappings:
+            category_name = category_remappings[category_name]
+        if category_name == 'Blank':
+            category_name = 'empty'
+            assert row['is_blank'] == 1
+        else:
+            assert row['is_blank'] == 0
+        assert isinstance(category_name,str)
+        if category_name in category_name_to_id:
+            category_id = category_name_to_id[category_name]
+        else:
+            category_id = len(category_name_to_id)
+            category_name_to_id[category_name] = category_id
+        ann = {}
+        ann['image_id'] = image_id
+        annotations_this_image = image_id_to_annotations[image_id]
+        annotation_number = len(annotations_this_image)
+        ann['id'] = image_id + '_' + str(annotation_number).zfill(2)
+        ann['category_id'] = category_id
+        annotations_this_image.append(ann)
+        extra_info = {}
+        for s in wi_extra_annotation_columns:
+            v = row[s]
+            if not isnan(v):
+                extra_info[s] = v
+        ann['wi_extra_info'] = extra_info
+    # ...for each row
+    images = list(image_id_to_image.values())
+    categories = []
+    for category_name in category_name_to_id:
+        category_id = category_name_to_id[category_name]
+        categories.append({'id':category_id,'name':category_name})
+    annotations = []
+    for image_id in image_id_to_annotations:
+        annotations_this_image = image_id_to_annotations[image_id]
+        for ann in annotations_this_image:
+            annotations.append(ann)
+    info = {'version':'1.00','description':'converted from WI export'}
+    info['source_file'] = csv_file_in
+    coco_data = {}
+    coco_data['info'] = info
+    coco_data['images'] = images
+    coco_data['annotations'] = annotations
+    coco_data['categories'] = categories
+    ##%% Validate images, add sizes
+    if validate_images:
+        print('Validating images')
+        # TODO: trivially parallelizable
+        assert os.path.isdir(image_folder), \
+            'Must specify a valid image folder if you specify validate_images=True'
+        # im = images[0]
+        for im in tqdm(images):
+            file_name_relative = im['file_name']
+            file_name_abs = os.path.join(image_folder,file_name_relative)
+            assert os.path.isfile(file_name_abs)
+            im['corrupt'] = False
+            try:
+                pil_im = vis_utils.load_image(file_name_abs)
+            except Exception:
+                im['corrupt'] = True
+            if not im['corrupt']:
+                im['width'] = pil_im.width
+                im['height'] = pil_im.height
+    ##%% Write output json
+    if coco_file_out is None:
+        coco_file_out = csv_file_in + '.json'
+        with open(coco_file_out,'w') as f:
+            json.dump(coco_data,f,indent=1)
+    ##%% Validate output
+    from data_management.databases.integrity_check_json_db import \
+        IntegrityCheckOptions,integrity_check_json_db
+    options = IntegrityCheckOptions()
+    options.baseDir = image_folder
+    options.bCheckImageExistence = True
+    options.verbose = verbose
+    _ = integrity_check_json_db(coco_file_out,options)
+#%% Interactive driver
+if False:
+    #%%
+    base_folder = r'a/b/c'
+    csv_file_in = os.path.join(base_folder,'images.csv')
+    coco_file_out = None
+    gs_prefix = 'a_b_c_main/'
+    image_folder = os.path.join(base_folder,'images')
+    validate_images = False
+    verbose = True
+    category_remappings = default_category_remappings
+#%% Command-line driver
+# TODO

data_management/yolo_output_to_md_output.py CHANGED Viewed

@@ -61,21 +61,37 @@ from detection.run_detector import CONF_DIGITS, COORD_DIGITS
 def read_classes_from_yolo_dataset_file(fn):
     """
-    Read a dictionary mapping integer class IDs to class names from a YOLOv5 dataset.yaml
-    file.
+    Read a dictionary mapping integer class IDs to class names from a YOLOv5/YOLOv8
+    dataset.yaml file or a .json file.  A .json file should contain a dictionary mapping
+    integer category IDs to string category names.
     """
-    with open(fn,'r') as f:
-        lines = f.readlines()
-    category_id_to_name = {}
-    pat = '\d+:.+'
-    for s in lines:
-        if re.search(pat,s) is not None:
-            tokens = s.split(':')
-            assert len(tokens) == 2, 'Invalid token in category file {}'.format(fn)
-            category_id_to_name[int(tokens[0].strip())] = tokens[1].strip()
+    if fn.endswith('.yml') or fn.endswith('.yaml'):
+        with open(fn,'r') as f:
+            lines = f.readlines()
+        category_id_to_name = {}
+        pat = '\d+:.+'
+        for s in lines:
+            if re.search(pat,s) is not None:
+                tokens = s.split(':')
+                assert len(tokens) == 2, 'Invalid token in category file {}'.format(fn)
+                category_id_to_name[int(tokens[0].strip())] = tokens[1].strip()
+    elif fn.endswith('.json'):
+        with open(fn,'r') as f:
+            d_in = json.load(f)
+            category_id_to_name = {}
+            for k in d_in.keys():
+                category_id_to_name[int(k)] = d_in[k]
+    else:
+        raise ValueError('Unrecognized category file type: {}'.format(fn))
+    assert len(category_id_to_name) > 0, 'Failed to read class mappings from {}'.format(fn)
     return category_id_to_name
@@ -125,7 +141,8 @@ def yolo_json_output_to_md_output(yolo_json_file, image_folder,
     if image_id_to_error is None:
         image_id_to_error = {}
-    print('Converting {} to MD format'.format(yolo_json_file))
+    print('Converting {} to MD format and writing results to {}'.format(
+        yolo_json_file,output_file))
     if isinstance(yolo_category_id_to_name,str):
         assert os.path.isfile(yolo_category_id_to_name), \
@@ -194,6 +211,16 @@ def yolo_json_output_to_md_output(yolo_json_file, image_folder,
     # ...if image IDs are formatted as integers in YOLO output
+    # In a modified version of val.py, we use negative category IDs to indicate an error
+    # that happened during inference (typically truncated images with valid headers,
+    # so corruption was not detected during val.py's initial corruption check pass.
+    for det in detections:
+        if det['category_id'] < 0:
+            assert 'error' in det, 'Negative category ID present with no error string'
+            error_string = det['error']
+            print('Caught inference-time failure {} for image {}'.format(error_string,det['image_id']))
+            image_id_to_error[det['image_id']] = error_string
     output_images = []
     # image_file_relative = image_files_relative[10]

megadetector 5.0.6__py3-none-any.whl → 5.0.8__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.6py3-none-any.whl → 5.0.8py3-none-any.whl