PyPI - megadetector - Versions diffs - 5.0.20__py3-none-any.whl → 5.0.22__py3-none-any.whl - Mend

megadetector 5.0.20py3-none-any.whl → 5.0.22py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (41) hide show

megadetector/postprocessing/md_to_coco.py CHANGED Viewed

@@ -3,7 +3,8 @@
 md_to_coco.py
 "Converts" MegaDetector output files to COCO format.  "Converts" is in quotes because
-this is an opinionated transformation that requires a confidence threshold.
+this is an opinionated transformation that requires a confidence threshold for most
+applications.
 Does not currently handle classification information.
@@ -18,6 +19,7 @@ import uuid
 from tqdm import tqdm
 from megadetector.visualization import visualization_utils as vis_utils
+from megadetector.utils.path_utils import insert_before_extension
 default_confidence_threshold = 0.15
@@ -33,23 +35,29 @@ def md_to_coco(md_results_file,
                preserve_nonstandard_metadata=True,
                include_failed_images=True,
                include_annotations_without_bounding_boxes=True,
-               empty_category_id='0'):
+               empty_category_id='0',
+               overwrite_behavior='skip',
+               verbose=True,
+               image_filename_to_size=None):
     """
     "Converts" MegaDetector output files to COCO format.  "Converts" is in quotes because
-    this is an opinionated transformation that requires a confidence threshold.
+    this is an opinionated transformation that typically requires a confidence threshold.
     The default confidence threshold is not 0; the assumption is that by default, you are
     going to treat the resulting COCO file as a set of labels.  If you are using the resulting COCO
-    file to evaluate a detector, you likely want a default confidence threshold of 0.  Confidence
-    values will be written to the semi-standard "score" field for each image
+    file to *evaluate* a detector, rather than as a set of labels, you likely want a
+    confidence threshold of 0.  Confidence values will be written to the semi-standard "score"
+    field for each image (regardless of the threshold) if preserve_nonstandard_metadata is True.
     A folder of images is required if width and height information are not available
     in the MD results file.
     Args:
-        md_results_file (str): MD results .json file to convert to COCO format
+        md_results_file (str): MD results .json file to convert to COCO
+            format
         coco_output_file (str, optional): COCO .json file to write; if this is None, we'll return
-            a COCO-formatted dict, but won't write it to disk
+            a COCO-formatted dict, but won't write it to disk.  If this is 'auto', we'll write to
+            [md_results_file_without_extension].coco.json.
         image_folder (str, optional): folder of images, required if 'width' and 'height' are not
             present in the MD results file (they are not required by the format)
         confidence_threshold (float, optional): boxes below this confidence threshold will not be
@@ -59,8 +67,8 @@ def md_to_coco(md_results_file,
         info (dict, optional): arbitrary metadata to include in an "info" field in the COCO-formatted
             output
         preserve_nonstandard_metadata (bool, optional): if this is True, confidence will be preserved in a
-            non-standard "conf" field in each annotation, and any random fields present in each image's data
-            (e.g. EXIF metadata) will be propagated to COCO output
+            non-standard "score" field in each annotation, and any random fields present in each image's
+            data (e.g. EXIF metadata) will be propagated to COCO output
         include_failed_images (bool, optional): if this is True, failed images will be propagated to COCO output
             with a non-empty "failure" field and no other fields, otherwise failed images will be skipped.
         include_annotations_without_bounding_boxes (bool, optional): if this is True, annotations with
@@ -68,22 +76,62 @@ def md_to_coco(md_results_file,
             images will be represented with no annotations.
         empty_category_id (str, optional): category ID reserved for the 'empty' class, should not be
             attached to any bounding boxes
+        overwrite_behavior (str, optional): determines behavior if the output file exists ('skip' to skip conversion,
+            'overwrite' to overwrite the existing file, 'error' to raise an error, 'skip_if_valid' to skip conversion
+            if the .json file appears to be intact (does not verify COCO formatting, just intact-.json-ness))
+        verbose (bool, optional): enable debug output, including the progress bar,
+        image_filename_to_size (dict, optional): dictionary mapping relative image paths to (w,h) tuples.  Reading
+            image sizes is the slowest step, so if you need to convert many results files at once for the same
+            set of images, things will be gobs faster if you read the image sizes in advance and pass them in
+            via this argument.  The format used here is the same format output by parallel_get_image_sizes().
     Returns:
         dict: the COCO data dict, identical to what's written to [coco_output_file] if [coco_output_file]
         is not None.
     """
+    assert isinstance(md_results_file,str)
+    assert os.path.isfile(md_results_file), \
+        'MD results file {} does not exist'.format(md_results_file)
+    if coco_output_file == 'auto':
+        coco_output_file = insert_before_extension(md_results_file,'coco')
+    if coco_output_file is not None:
+        if os.path.isfile(coco_output_file):
+            if overwrite_behavior == 'skip':
+                print('Skipping conversion of {}, output file {} exists'.format(
+                    md_results_file,coco_output_file))
+                return None
+            elif overwrite_behavior == 'skip_if_valid':
+                output_file_is_valid = True
+                try:
+                    with open(coco_output_file,'r') as f:
+                        _ = json.load(f)
+                except Exception:
+                    print('COCO file {} is invalid, proceeding with conversion'.format(
+                        coco_output_file))
+                    output_file_is_valid = False
+                if output_file_is_valid:
+                    print('Skipping conversion of {}, output file {} exists and is valid'.format(
+                        md_results_file,coco_output_file))
+                    return None
+            elif overwrite_behavior == 'overwrite':
+                pass
+            elif overwrite_behavior == 'error':
+                raise ValueError('Output file {} exists'.format(coco_output_file))
     with open(md_results_file,'r') as f:
         md_results = json.load(f)
     coco_images = []
     coco_annotations = []
-    print('Converting MD results to COCO...')
+    print('Converting MD results file {} to COCO file {}...'.format(
+        md_results_file, coco_output_file))
     # im = md_results['images'][0]
-    for im in tqdm(md_results['images']):
+    for im in tqdm(md_results['images'],disable=(not verbose)):
         coco_im = {}
         coco_im['id'] = im['file']
@@ -101,18 +149,36 @@ def md_to_coco(md_results_file,
         h = None
         if ('width' not in im) or ('height' not in im) or validate_image_sizes:
-            if image_folder is None:
-                raise ValueError('Must provide an image folder when height/width need to be read from images')
-            image_file_abs = os.path.join(image_folder,im['file'])
-            pil_im = vis_utils.open_image(image_file_abs)
-            w = pil_im.width
-            h = pil_im.height
+            if (image_folder is None) and (image_filename_to_size is None):
+                raise ValueError('Must provide an image folder or a size mapping when height/width need to be read from images')
+            w = None; h = None
+            if image_filename_to_size is not None:
+                if im['file'] not in image_filename_to_size:
+                    print('Warning: file {} not in image size mapping dict, reading from file'.format(im['file']))
+                else:
+                    image_size = image_filename_to_size[im['file']]
+                    if image_size is not None:
+                        assert len(image_size) == 2
+                        w = image_size[0]
+                        h = image_size[1]
+            if w is None:
+                image_file_abs = os.path.join(image_folder,im['file'])
+                pil_im = vis_utils.open_image(image_file_abs)
+                w = pil_im.width
+                h = pil_im.height
             if validate_image_sizes:
                 if 'width' in im:
                     assert im['width'] == w, 'Width mismatch for image {}'.format(im['file'])
                 if 'height' in im:
                     assert im['height'] == h, 'Height mismatch for image {}'.format(im['file'])
         else:
             w = im['width']
             h = im['height']
@@ -202,9 +268,9 @@ def md_to_coco(md_results_file,
         with open(coco_output_file,'w') as f:
             json.dump(output_dict,f,indent=1)
-    return output_dict
+    return output_dict
-# def md_to_coco(...)
+# ...def md_to_coco(...)
 #%% Interactive driver

megadetector/postprocessing/postprocess_batch_results.py CHANGED Viewed

@@ -92,16 +92,18 @@ class PostProcessingOptions:
         #: Optional .json file containing ground truth information
         self.ground_truth_json_file = ''
-        #: Classes we'll treat as negative
+        #: List of classes we'll treat as negative (defaults to "empty", typically includes
+        #: classes like "blank", "misfire", etc.).
         #:
         #: Include the token "#NO_LABELS#" to indicate that an image with no annotations
         #: should be considered empty.
         self.negative_classes = DEFAULT_NEGATIVE_CLASSES
-        #: Classes we'll treat as neither positive nor negative
+        #: List of classes we'll treat as neither positive nor negative (defaults to
+        #: "unknown", typically includes classes like "unidentifiable").
         self.unlabeled_classes = DEFAULT_UNKNOWN_CLASSES
-        #: A list of output sets that we should count, but not render images for.
+        #: List of output sets that we should count, but not render images for.
         #:
         #: Typically used to preview sets with lots of empties, where you don't want to
         #: subset but also don't want to render 100,000 empty images.
@@ -198,11 +200,16 @@ class PostProcessingOptions:
         #: When classification results are present, should be sort alphabetically by class name (False)
         #: or in descending order by frequency (True)?
-        self.sort_classification_results_by_count = False
+        self.sort_classification_results_by_count = False
         #: Should we split individual pages up into smaller pages if there are more than
         #: N images?
         self.max_figures_per_html_file = None
+        #: Footer text for the index page
+        # self.footer_text = '<br/><p style="font-size:80%;">Preview page created with the <a href="{}">MegaDetector Python package</a>.</p>'.\
+        #    format('https://megadetector.readthedocs.io')
+        self.footer_text = ''
     # ...__init__()
@@ -590,6 +597,7 @@ def _prepare_html_subpages(images_html, output_dir, options=None):
         html_image_list_options = {}
         html_image_list_options['maxFiguresPerHtmlFile'] = options.max_figures_per_html_file
         html_image_list_options['headerHtml'] = '<h1>{}</h1>'.format(res.upper())
+        html_image_list_options['pageTitle'] = '{}'.format(res.lower())
         # Don't write empty pages
         if len(array) == 0:
@@ -762,7 +770,7 @@ def _render_image_no_gt(file_info,detection_categories_to_results_name,
     if len(rendered_image_html_info) > 0:
         image_result = [[res, rendered_image_html_info]]
+        classes_rendered_this_image = set()
         max_conf = 0
         for det in detections:
@@ -782,11 +790,14 @@ def _render_image_no_gt(file_info,detection_categories_to_results_name,
                 # confidence threshold
                 if (options.classification_confidence_threshold < 0) or \
                     (top1_class_score >= options.classification_confidence_threshold):
-                    image_result.append(['class_{}'.format(top1_class_name),
-                                         rendered_image_html_info])
+                    class_string = 'class_{}'.format(top1_class_name)
                 else:
-                    image_result.append(['class_unreliable',
+                    class_string = 'class_unreliable'
+                if class_string not in classes_rendered_this_image:
+                    image_result.append([class_string,
                                          rendered_image_html_info])
+                    classes_rendered_this_image.add(class_string)
             # ...if this detection has classification info
@@ -887,7 +898,6 @@ def _render_image_with_gt(file_info,ground_truth_indexed_db,
 #%% Main function
 def process_batch_results(options):
     """
     Given a .json or .csv file containing MD results, do one or more of the following:
@@ -1083,7 +1093,8 @@ def process_batch_results(options):
     output_html_file = ''
-    style_header = """<head>
+    style_header = """<head>
+        <title>Detection results preview</title>
         <style type="text/css">
         a { text-decoration: none; }
         body { font-family: segoe ui, calibri, "trebuchet ms", verdana, arial, sans-serif; }
@@ -1424,7 +1435,7 @@ def process_batch_results(options):
         else:
             confidence_threshold_string = str(options.confidence_threshold)
-        index_page = """<html>
+        index_page = """<html>
         {}
         <body>
         <h2>Evaluation</h2>
@@ -1509,7 +1520,7 @@ def process_batch_results(options):
             index_page += '</div>'
         # Close body and html tags
-        index_page += '</body></html>'
+        index_page += '{}</body></html>'.format(options.footer_text)
         output_html_file = os.path.join(output_dir, 'index.html')
         with open(output_html_file, 'w') as f:
             f.write(index_page)
@@ -1529,7 +1540,6 @@ def process_batch_results(options):
         # for each category
         images_html = collections.defaultdict(list)
         # Add default entries by accessing them for the first time
         # Maps sorted tuples of detection category IDs (string ints) - e.g. ("1"), ("1", "4", "7") - to
@@ -1637,14 +1647,15 @@ def process_batch_results(options):
                         files_to_render), total=len(files_to_render)))
         else:
             for file_info in tqdm(files_to_render):
-                rendering_results.append(_render_image_no_gt(file_info,
-                                                            detection_categories_to_results_name,
-                                                            detection_categories,
-                                                            classification_categories,
-                                                            options=options))
+                rendering_result = _render_image_no_gt(file_info,
+                                                       detection_categories_to_results_name,
+                                                       detection_categories,
+                                                       classification_categories,
+                                                       options=options)
+                rendering_results.append(rendering_result)
-        elapsed = time.time() - start_time
+        elapsed = time.time() - start_time
         # Do we have classification results in addition to detection results?
         has_classification_info = False
@@ -1793,7 +1804,7 @@ def process_batch_results(options):
                         cname, cname.lower(), ccount)
             index_page += '</div>\n'
-        index_page += '</body></html>'
+        index_page += '{}</body></html>'.format(options.footer_text)
         output_html_file = os.path.join(output_dir, 'index.html')
         with open(output_html_file, 'w') as f:
             f.write(index_page)

megadetector/postprocessing/validate_batch_results.py CHANGED Viewed

@@ -15,8 +15,10 @@ import sys
 import json
 import argparse
+from tqdm import tqdm
 from megadetector.detection.video_utils import is_video_file
-from megadetector.utils.ct_utils import args_to_object
+from megadetector.utils.ct_utils import args_to_object, is_list_sorted # noqa
 typical_info_fields = ['detector','detection_completion_time',
                        'classifier','classification_completion_time',
@@ -42,11 +44,16 @@ class ValidateBatchResultsOptions:
         #:
         #: If None, assumes absolute paths.
         self.relative_path_base = None
+        #: Should we return the loaded data, or just the validation results?
+        self.return_data = False
+        #: Enable additional debug output
+        self.verbose = False
 # ...class ValidateBatchResultsOptions
 #%% Main function
 def validate_batch_results(json_filename,options=None):
@@ -55,88 +62,181 @@ def validate_batch_results(json_filename,options=None):
     Args:
         json_filename (str): the filename to validate
-        options (ValidateBatchResultsOptions, optionsl): all the parameters used to control this
+        options (ValidateBatchResultsOptions, optional): all the parameters used to control this
             process, see ValidateBatchResultsOptions for details
     Returns:
-        bool: reserved; currently always errors or returns True.
+        dict: a dict with a field called "validation_results", which is itself a dict.  The reason
+        it's a dict inside a dict is that if return_data is True, the outer dict also contains all
+        the loaded data.  The "validation_results" dict contains fields called "errors", "warnings",
+        and "filename".  "errors" and "warnings" are lists of strings, although "errors" will never
+        be longer than N=1, since validation fails at the first error.
     """
     if options is None:
         options = ValidateBatchResultsOptions()
+    if options.verbose:
+        print('Loading results from {}'.format(json_filename))
     with open(json_filename,'r') as f:
         d = json.load(f)
-    ## Info validation
+    validation_results = {}
+    validation_results['filename'] = json_filename
+    validation_results['warnings'] = []
+    validation_results['errors'] = []
-    assert 'info' in d
-    info = d['info']
+    if not isinstance(d,dict):
+        validation_results['errors'].append('Input data is not a dict')
+        to_return = {}
+        to_return['validation_results'] = validation_results
+        return to_return
-    assert isinstance(info,dict)
-    assert 'format_version' in info
-    format_version = float(info['format_version'])
-    assert format_version >= 1.3, 'This validator can only be used with format version 1.3 or later'
+    try:
+        ## Info validation
+        if not 'info' in d:
+            raise ValueError('Input does not contain info field')
-    print('Validating a .json results file with format version {}'.format(format_version))
-    ## Category validation
-    assert 'detection_categories' in d
-    for k in d['detection_categories'].keys():
-        # Categories should be string-formatted ints
-        assert isinstance(k,str)
-        _ = int(k)
-        assert isinstance(d['detection_categories'][k],str)
+        info = d['info']
+        if not isinstance(info,dict):
+            raise ValueError('Input contains invalid info field')
+        if 'format_version' not in info :
+            raise ValueError('Input does not specify format version')
+        format_version = float(info['format_version'])
+        if format_version < 1.3:
+            raise ValueError('This validator can only be used with format version 1.3 or later')
-    if 'classification_categories' in d:
-        for k in d['classification_categories'].keys():
-            # Categories should be string-formatted ints
-            assert isinstance(k,str)
+        ## Category validation
+        if 'detection_categories' not in d:
+            raise ValueError('Input does not contain detection_categories field')
+        for k in d['detection_categories'].keys():
+            # Category ID should be string-formatted ints
+            if not isinstance(k,str):
+                raise ValueError('Invalid detection category ID: {}'.format(k))
             _ = int(k)
-            assert isinstance(d['classification_categories'][k],str)
-    ## Image validation
-    assert 'images' in d
-    assert isinstance(d['images'],list)
-    # im = d['images'][0]
-    for im in d['images']:
+            if not isinstance(d['detection_categories'][k],str):
+                raise ValueError('Invalid detection category name: {}'.format(
+                    d['detection_categories'][k]))
+        if 'classification_categories' in d:
+            for k in d['classification_categories'].keys():
+                # Categories should be string-formatted ints
+                if not isinstance(k,str):
+                    raise ValueError('Invalid classification category ID: {}'.format(k))
+                _ = int(k)
+                if not isinstance(d['classification_categories'][k],str):
+                    raise ValueError('Invalid classification category name: {}'.format(
+                        d['classification_categories'][k]))
-        assert isinstance(im,dict)
-        assert 'file' in im
-        file = im['file']
+        ## Image validation
-        if options.check_image_existence:
-            if options.relative_path_base is None:
-                file_abs = file
-            else:
-                file_abs = os.path.join(options.relative_path_base,file)
-            assert os.path.isfile(file_abs), 'Cannot find file {}'.format(file_abs)
+        if 'images' not in d:
+            raise ValueError('images field not present')
+        if not isinstance(d['images'],list):
+            raise ValueError('Invalid images field')
+        if options.verbose:
+            print('Validating images')
+        # im = d['images'][0]
+        for i_im,im in tqdm(enumerate(d['images']),total=len(d['images']),disable=(not options.verbose)):
-        if 'detections' not in im or im['detections'] is None:
-            assert 'failure' in im and isinstance(im['failure'],str)
-        else:
-            assert isinstance(im['detections'],list)
+            if not isinstance(im,dict):
+                raise ValueError('Invalid image at index {}'.format(i_im))
+            if 'file' not in im:
+                raise ValueError('Image without filename at index {}'.format(i_im))
+            file = im['file']
-        if is_video_file(im['file']) and (format_version >= 1.4):
-            assert 'frame_rate' in im
             if 'detections' in im and im['detections'] is not None:
                 for det in im['detections']:
-                    assert 'frame_number' in det
+                    assert 'category' in det, 'Image {} has a detection with no category'.format(file)
+                    assert 'conf' in det, 'Image {} has a detection with no confidence'.format(file)
+                    assert isinstance(det['conf'],float), \
+                        'Image {} has an illegal confidence value'.format(file)
+                    assert 'bbox' in det, 'Image {} has a detection with no box'.format(file)
+                    assert det['category'] in d['detection_categories'], \
+                        'Image {} has a detection with an unmapped category {}'.format(
+                            file,det['category'])
+            if options.check_image_existence:
+                if options.relative_path_base is None:
+                    file_abs = file
+                else:
+                    file_abs = os.path.join(options.relative_path_base,file)
+                if not os.path.isfile(file_abs):
+                    raise ValueError('Cannot find file {}'.format(file_abs))
+            if 'failure' in im:
+                if im['failure'] is not None:
+                    if not isinstance(im['failure'],str):
+                        raise ValueError('Image {} has an illegal [failure] value: {}'.format(
+                            im['file'],str(im['failure'])))
+                    if 'detections' not in im:
+                        s = 'Image {} has a failure value, should also have a null detections array'.format(
+                            im['file'])
+                        validation_results['warnings'].append(s)
+                    elif im['detections'] is not None:
+                        raise ValueError('Image {} has a failure value but a non-null detections array'.format(
+                            im['file']))
+            else:
+                if not isinstance(im['detections'],list):
+                    raise ValueError('Invalid detections list for image {}'.format(im['file']))
+            if is_video_file(im['file']) and (format_version >= 1.4):
+                if 'frame_rate' not in im:
+                    raise ValueError('Video without frame rate: {}'.format(im['file']))
+                if im['frame_rate'] < 0:
+                    raise ValueError('Video with illegal frame rate {}: {}'.format(
+                        str(im['frame_rate']),im['file']))
+                if 'detections' in im and im['detections'] is not None:
+                    for det in im['detections']:
+                        if 'frame_number' not in det:
+                            raise ValueError('Frame without frame number in video {}'.format(
+                                im['file']))
+                    frame_numbers = [det['frame_number'] for det in im['detections']] # noqa
+                    # assert is_list_sorted(frame_numbers)
+        # ...for each image
-    # ...for each image
+        ## Validation of other keys
+        for k in d.keys():
+            if (k not in typical_keys) and (k not in required_keys):
+                validation_results['warnings'].append(
+                    'Warning: non-standard key {} present at file level'.format(k))
+    except Exception as e:
+        validation_results['errors'].append(str(e))
+    # ...try/except
-    ## Checking on other keys
+    if options.return_data:
+        to_return = d
+    else:
+        to_return = {}
-    for k in d.keys():
-        if k not in typical_keys and k not in required_keys:
-            print('Warning: non-standard key {} present at file level'.format(k))
+    to_return['validation_results'] = validation_results
+    return to_return
 # ...def validate_batch_results(...)
@@ -144,15 +244,25 @@ def validate_batch_results(json_filename,options=None):
 if False:
-    #%%
+    #%% Validate all .json files in the MD test suite
+    from megadetector.utils.path_utils import recursive_file_list
+    filenames = recursive_file_list(os.path.expanduser('~/AppData/Local/Temp/md-tests'))
+    filenames = [fn for fn in filenames if fn.endswith('.json')]
+    filenames = [fn for fn in filenames if 'detectionIndex' not in fn]
     options = ValidateBatchResultsOptions()
-    # json_filename = r'g:\temp\format.json'
-    # json_filename = r'g:\temp\test-videos\video_results.json'
-    json_filename = r'g:\temp\test-videos\image_results.json'
-    options.check_image_existence = True
-    options.relative_path_base = r'g:\temp\test-videos'
-    validate_batch_results(json_filename,options)
+    options.check_image_existence = False
+    options.relative_path_base = None # r'g:\temp\test-videos'
+    for json_filename in filenames:
+        results = validate_batch_results(json_filename,options)
+        if len(results['validation_results']['warnings']) > 0:
+            print('Warnings in file {}:'.format(json_filename))
+            for s in results['validation_results']['warnings']:
+                print(s)
+            print('')
+        assert len(results['validation_results']['errors']) == 0
 #%% Command-line driver

megadetector 5.0.20__py3-none-any.whl → 5.0.22__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.20py3-none-any.whl → 5.0.22py3-none-any.whl