PyPI - megadetector - Versions diffs - 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl - Mend

megadetector 5.0.7py3-none-any.whl → 5.0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (191) hide show

api/__init__.py +0 -0
api/batch_processing/__init__.py +0 -0
api/batch_processing/api_core/__init__.py +0 -0
api/batch_processing/api_core/batch_service/__init__.py +0 -0
api/batch_processing/api_core/batch_service/score.py +0 -1
api/batch_processing/api_core/server_job_status_table.py +0 -1
api/batch_processing/api_core_support/__init__.py +0 -0
api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
api/batch_processing/api_support/__init__.py +0 -0
api/batch_processing/api_support/summarize_daily_activity.py +0 -1
api/batch_processing/data_preparation/__init__.py +0 -0
api/batch_processing/data_preparation/manage_local_batch.py +93 -79
api/batch_processing/data_preparation/manage_video_batch.py +8 -8
api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
api/batch_processing/postprocessing/__init__.py +0 -0
api/batch_processing/postprocessing/add_max_conf.py +12 -12
api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
api/batch_processing/postprocessing/compare_batch_results.py +114 -44
api/batch_processing/postprocessing/convert_output_format.py +62 -19
api/batch_processing/postprocessing/load_api_results.py +17 -20
api/batch_processing/postprocessing/md_to_coco.py +31 -21
api/batch_processing/postprocessing/md_to_labelme.py +165 -68
api/batch_processing/postprocessing/merge_detections.py +40 -15
api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
api/synchronous/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
api/synchronous/api_core/animal_detection_api/config.py +35 -35
api/synchronous/api_core/tests/__init__.py +0 -0
api/synchronous/api_core/tests/load_test.py +109 -109
classification/__init__.py +0 -0
classification/aggregate_classifier_probs.py +21 -24
classification/analyze_failed_images.py +11 -13
classification/cache_batchapi_outputs.py +51 -51
classification/create_classification_dataset.py +69 -68
classification/crop_detections.py +54 -53
classification/csv_to_json.py +97 -100
classification/detect_and_crop.py +105 -105
classification/evaluate_model.py +43 -42
classification/identify_mislabeled_candidates.py +47 -46
classification/json_to_azcopy_list.py +10 -10
classification/json_validator.py +72 -71
classification/map_classification_categories.py +44 -43
classification/merge_classification_detection_output.py +68 -68
classification/prepare_classification_script.py +157 -154
classification/prepare_classification_script_mc.py +228 -228
classification/run_classifier.py +27 -26
classification/save_mislabeled.py +30 -30
classification/train_classifier.py +20 -20
classification/train_classifier_tf.py +21 -22
classification/train_utils.py +10 -10
data_management/__init__.py +0 -0
data_management/annotations/__init__.py +0 -0
data_management/annotations/annotation_constants.py +18 -31
data_management/camtrap_dp_to_coco.py +238 -0
data_management/cct_json_utils.py +107 -59
data_management/cct_to_md.py +176 -158
data_management/cct_to_wi.py +247 -219
data_management/coco_to_labelme.py +272 -0
data_management/coco_to_yolo.py +86 -62
data_management/databases/__init__.py +0 -0
data_management/databases/add_width_and_height_to_db.py +20 -16
data_management/databases/combine_coco_camera_traps_files.py +35 -31
data_management/databases/integrity_check_json_db.py +130 -83
data_management/databases/subset_json_db.py +25 -16
data_management/generate_crops_from_cct.py +27 -45
data_management/get_image_sizes.py +188 -144
data_management/importers/add_nacti_sizes.py +8 -8
data_management/importers/add_timestamps_to_icct.py +78 -78
data_management/importers/animl_results_to_md_results.py +158 -160
data_management/importers/auckland_doc_test_to_json.py +9 -9
data_management/importers/auckland_doc_to_json.py +8 -8
data_management/importers/awc_to_json.py +7 -7
data_management/importers/bellevue_to_json.py +15 -15
data_management/importers/cacophony-thermal-importer.py +13 -13
data_management/importers/carrizo_shrubfree_2018.py +8 -8
data_management/importers/carrizo_trail_cam_2017.py +8 -8
data_management/importers/cct_field_adjustments.py +9 -9
data_management/importers/channel_islands_to_cct.py +10 -10
data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
data_management/importers/ena24_to_json.py +7 -7
data_management/importers/filenames_to_json.py +8 -8
data_management/importers/helena_to_cct.py +7 -7
data_management/importers/idaho-camera-traps.py +7 -7
data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
data_management/importers/jb_csv_to_json.py +9 -9
data_management/importers/mcgill_to_json.py +8 -8
data_management/importers/missouri_to_json.py +18 -18
data_management/importers/nacti_fieldname_adjustments.py +10 -10
data_management/importers/noaa_seals_2019.py +8 -8
data_management/importers/pc_to_json.py +7 -7
data_management/importers/plot_wni_giraffes.py +7 -7
data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
data_management/importers/prepare_zsl_imerit.py +7 -7
data_management/importers/rspb_to_json.py +8 -8
data_management/importers/save_the_elephants_survey_A.py +8 -8
data_management/importers/save_the_elephants_survey_B.py +9 -9
data_management/importers/snapshot_safari_importer.py +26 -26
data_management/importers/snapshot_safari_importer_reprise.py +665 -665
data_management/importers/snapshot_serengeti_lila.py +14 -14
data_management/importers/sulross_get_exif.py +8 -9
data_management/importers/timelapse_csv_set_to_json.py +11 -11
data_management/importers/ubc_to_json.py +13 -13
data_management/importers/umn_to_json.py +7 -7
data_management/importers/wellington_to_json.py +8 -8
data_management/importers/wi_to_json.py +9 -9
data_management/importers/zamba_results_to_md_results.py +181 -181
data_management/labelme_to_coco.py +309 -159
data_management/labelme_to_yolo.py +103 -60
data_management/lila/__init__.py +0 -0
data_management/lila/add_locations_to_island_camera_traps.py +9 -9
data_management/lila/add_locations_to_nacti.py +147 -147
data_management/lila/create_lila_blank_set.py +114 -31
data_management/lila/create_lila_test_set.py +8 -8
data_management/lila/create_links_to_md_results_files.py +106 -106
data_management/lila/download_lila_subset.py +92 -90
data_management/lila/generate_lila_per_image_labels.py +56 -43
data_management/lila/get_lila_annotation_counts.py +18 -15
data_management/lila/get_lila_image_counts.py +11 -11
data_management/lila/lila_common.py +103 -70
data_management/lila/test_lila_metadata_urls.py +132 -116
data_management/ocr_tools.py +173 -128
data_management/read_exif.py +161 -99
data_management/remap_coco_categories.py +84 -0
data_management/remove_exif.py +58 -62
data_management/resize_coco_dataset.py +32 -44
data_management/wi_download_csv_to_coco.py +246 -0
data_management/yolo_output_to_md_output.py +86 -73
data_management/yolo_to_coco.py +535 -95
detection/__init__.py +0 -0
detection/detector_training/__init__.py +0 -0
detection/process_video.py +85 -33
detection/pytorch_detector.py +43 -25
detection/run_detector.py +157 -72
detection/run_detector_batch.py +189 -114
detection/run_inference_with_yolov5_val.py +118 -51
detection/run_tiled_inference.py +113 -42
detection/tf_detector.py +51 -28
detection/video_utils.py +606 -521
docs/source/conf.py +43 -0
md_utils/__init__.py +0 -0
md_utils/azure_utils.py +9 -9
md_utils/ct_utils.py +249 -70
md_utils/directory_listing.py +59 -64
md_utils/md_tests.py +968 -862
md_utils/path_utils.py +655 -155
md_utils/process_utils.py +157 -133
md_utils/sas_blob_utils.py +20 -20
md_utils/split_locations_into_train_val.py +45 -32
md_utils/string_utils.py +33 -10
md_utils/url_utils.py +208 -27
md_utils/write_html_image_list.py +51 -35
md_visualization/__init__.py +0 -0
md_visualization/plot_utils.py +102 -109
md_visualization/render_images_with_thumbnails.py +34 -34
md_visualization/visualization_utils.py +908 -311
md_visualization/visualize_db.py +109 -58
md_visualization/visualize_detector_output.py +61 -42
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
megadetector-5.0.9.dist-info/RECORD +224 -0
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
taxonomy_mapping/__init__.py +0 -0
taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
taxonomy_mapping/map_new_lila_datasets.py +154 -154
taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
taxonomy_mapping/preview_lila_taxonomy.py +591 -591
taxonomy_mapping/retrieve_sample_image.py +12 -12
taxonomy_mapping/simple_image_download.py +11 -11
taxonomy_mapping/species_lookup.py +10 -10
taxonomy_mapping/taxonomy_csv_checker.py +18 -18
taxonomy_mapping/taxonomy_graph.py +47 -47
taxonomy_mapping/validate_lila_category_mappings.py +83 -76
data_management/cct_json_to_filename_json.py +0 -89
data_management/cct_to_csv.py +0 -140
data_management/databases/remove_corrupted_images_from_db.py +0 -191
detection/detector_training/copy_checkpoints.py +0 -43
md_visualization/visualize_megadb.py +0 -183
megadetector-5.0.7.dist-info/RECORD +0 -202
{megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0

data_management/read_exif.py CHANGED Viewed

@@ -1,15 +1,15 @@
-########
-#
-# read_exif.py
-#
-# Given a folder of images, read relevant metadata (EXIF/IPTC/XMP) fields from all images,
-# and write them to  a .json or .csv file.
-#
-# This module can use either PIL (which can only reliably read EXIF data) or exiftool (which
-# can read everything).  The latter approach expects that exiftool is available on the system
-# path.  No attempt is made to be consistent in format across the two approaches.
-#
-########
+"""
+read_exif.py
+Given a folder of images, reads relevant metadata (EXIF/IPTC/XMP) fields from all images,
+and writes them to  a .json or .csv file.
+This module can use either PIL (which can only reliably read EXIF data) or exiftool (which
+can read everything).  The latter approach expects that exiftool is available on the system
+path.  No attempt is made to be consistent in format across the two approaches.
+"""
 #%% Imports and constants
@@ -24,7 +24,7 @@ from multiprocessing.pool import Pool as Pool
 from tqdm import tqdm
 from PIL import Image, ExifTags
-from md_utils.path_utils import find_images
+from md_utils.path_utils import find_images, is_executable
 from md_utils.ct_utils import args_to_object
 debug_max_images = None
@@ -33,54 +33,61 @@ debug_max_images = None
 #%% Options
 class ReadExifOptions:
+    """
+    Parameters controlling metadata extraction.
+    """
+    #: Enable additional debug console output
     verbose = False
-    # If this is True and an output file is specified for read_exif_from_folder,
-    # and we encounter a serialization issue, we'll return the results but won't
-    # error.
+    #: If this is True and an output file is specified for read_exif_from_folder,
+    #: and we encounter a serialization issue, we'll return the results but won't
+    #: error.
     allow_write_error = False
-    # Number of concurrent workers
+    #: Number of concurrent workers, set to <= 1 to disable parallelization
     n_workers = 1
-    # Should we use threads (vs. processes) for parallelization?
-    #
-    # Not relevant if n_workers is 1.
+    #: Should we use threads (vs. processes) for parallelization?
+    #:
+    #: Not relevant if n_workers is <= 1.
     use_threads = True
+    #: "File" and "ExifTool" are tag types used by ExifTool to report data that
+    #: doesn't come from EXIF, rather from the file (e.g. file size).
     tag_types_to_ignore = set(['File','ExifTool'])
+    #: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
+    #:
+    #: A useful set of tags one might want to limit queries for:
+    #:
+    #: options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight','DateTime',
+    #: 'DateTimeOriginal','Orientation']
+    tags_to_include = None
+    #: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
+    tags_to_exclude = None
+    #: The command line to invoke if using exiftool, can be an absolute path to exiftool.exe, or
+    #: can be just "exiftool", in which case it should be on your system path.
     exiftool_command_name = 'exiftool'
-    # How should we handle byte-formatted EXIF tags?
-    #
-    # 'convert_to_string': convert to a Python string
-    # 'delete': don't include at all
-    # 'raw': include as a byte string
+    #: How should we handle byte-formatted EXIF tags?
+    #:
+    #: 'convert_to_string': convert to a Python string
+    #: 'delete': don't include at all
+    #: 'raw': include as a byte string
     byte_handling = 'convert_to_string' # 'convert_to_string','delete','raw'
-    # Should we use exiftool or pil?
+    #: Should we use exiftool or PIL?
     processing_library = 'pil' # 'exiftool','pil'
 #%% Functions
-def enumerate_files(input_folder):
+def _get_exif_ifd(exif):
     """
-    Enumerates all image files in input_folder, returning relative paths
-    """
-    image_files = find_images(input_folder,recursive=True)
-    image_files = [os.path.relpath(s,input_folder) for s in image_files]
-    image_files = [s.replace('\\','/') for s in image_files]
-    print('Enumerated {} files'.format(len(image_files)))
-    return image_files
-def get_exif_ifd(exif):
-    """
-    Read EXIF data by finding the EXIF offset and reading tags directly
+    Read EXIF data from by finding the EXIF offset and reading tags directly
     https://github.com/python-pillow/Pillow/issues/5863
     """
@@ -98,8 +105,16 @@ def get_exif_ifd(exif):
 def read_pil_exif(im,options=None):
     """
-    Read all the EXIF data we know how to read from [im] (path or PIL Image), whether it's
-    in the PIL default EXIF data or not.
+    Read all the EXIF data we know how to read from an image, using PIL.  This is primarily
+    an internal function; the main entry point for single-image EXIF information is
+    read_exif_tags_for_image().
+    Args:
+        im (str or PIL.Image.Image): image (as a filename or an Image object) from which
+            we should read EXIF data.
+    Returns:
+        dict: a dictionary mapping EXIF tag names to their values
     """
     if options is None:
@@ -128,10 +143,10 @@ def read_pil_exif(im,options=None):
             # print('Warning: unrecognized EXIF tag: {}'.format(k))
             exif_tags[k] = str(v)
-    exif_idf_tags = get_exif_ifd(exif_info)
+    exif_ifd_tags = _get_exif_ifd(exif_info)
-    for k in exif_idf_tags.keys():
-        v = exif_idf_tags[k]
+    for k in exif_ifd_tags.keys():
+        v = exif_ifd_tags[k]
         if k in exif_tags:
             if options.verbose:
                 print('Warning: redundant EXIF values for {} in {}:\n{}\n{}'.format(
@@ -167,8 +182,8 @@ def read_pil_exif(im,options=None):
 def format_datetime_as_exif_datetime_string(dt):
     """
-    Returns a Python datetime object rendered using the standard Exif datetime
-    string format
+    Returns a Python datetime object rendered using the standard EXIF datetime
+    string format ('%Y:%m:%d %H:%M:%S')
     """
     return datetime.strftime(dt, '%Y:%m:%d %H:%M:%S')
@@ -180,7 +195,14 @@ def parse_exif_datetime_string(s,verbose=False):
     %Y:%m:%d %H:%M:%S
-    Parse one of those strings into a Python datetime object.
+    Parses one of those strings into a Python datetime object.
+    Args:
+        s (str): datetime string to parse, should be in standard EXIF datetime format
+        verbose (bool, optional): enable additional debug output
+    Returns:
+        datetime: the datetime object created from [s]
     """
     dt = None
@@ -192,17 +214,43 @@ def parse_exif_datetime_string(s,verbose=False):
     return dt
+def _filter_tags(tags,options):
+    """
+    Internal function used to include/exclude specific tags from the exif_tags
+    dict.
+    """
+    if options is None:
+        return tags
+    if options.tags_to_include is None and options.tags_to_exclude is None:
+        return tags
+    if options.tags_to_include is not None:
+        assert options.tags_to_exclude is None, "tags_to_include and tags_to_exclude are incompatible"
+        tags_to_return = {}
+        for tag_name in tags.keys():
+            if tag_name in options.tags_to_include:
+                tags_to_return[tag_name] = tags[tag_name]
+        return tags_to_return
+    if options.tags_to_exclude is not None:
+        assert options.tags_to_include is None, "tags_to_include and tags_to_exclude are incompatible"
+        tags_to_return = {}
+        for tag_name in tags.keys():
+            if tag_name not in options.tags_to_exclude:
+                tags_to_return[tag_name] = tags[tag_name]
+        return tags_to_return
 def read_exif_tags_for_image(file_path,options=None):
     """
     Get relevant fields from EXIF data for an image
-    Returns a dict with fields 'status' (str) and 'tags'
-    The exact format of 'tags' depends on options.processing_library
-    For exiftool, 'tags' is a list of lists, where each element is (type/tag/value)
-    For pil, 'tags' is a dict (str:str)
+    Returns:
+        dict: a dict with fields 'status' (str) and 'tags'. The exact format of 'tags' depends on
+        options (ReadExifOptions, optional): parameters controlling metadata extraction
+        options.processing_library:
+            - For exiftool, 'tags' is a list of lists, where each element is (type/tag/value)
+            - For PIL, 'tags' is a dict (str:str)
     """
     if options is None:
@@ -227,8 +275,8 @@ def read_exif_tags_for_image(file_path,options=None):
                 result['status'] = 'empty_read'
             else:
                 result['status'] = 'success'
-                result['tags'] = exif_tags
+                result['tags'] = _filter_tags(exif_tags,options)
         return result
     elif options.processing_library == 'exiftool':
@@ -283,9 +331,12 @@ def read_exif_tags_for_image(file_path,options=None):
                     print('Ignoring tag with type {}'.format(field_type))
                 continue
-            field_tag = field_name_type_tokens[1].strip()
-            tag = [field_type,field_tag,field_value]
+            field_name = field_name_type_tokens[1].strip()
+            if options.tags_to_exclude is not None and field_name in options.tags_to_exclude:
+                continue
+            if options.tags_to_include is not None and field_name not in options.tags_to_include:
+                continue
+            tag = [field_type,field_name,field_value]
             exif_tags.append(tag)
@@ -305,7 +356,7 @@ def read_exif_tags_for_image(file_path,options=None):
 # ...read_exif_tags_for_image()
-def populate_exif_data(im, image_base, options=None):
+def _populate_exif_data(im, image_base, options=None):
     """
     Populate EXIF data into the 'exif_tags' field in the image object [im].
@@ -347,23 +398,28 @@ def populate_exif_data(im, image_base, options=None):
     return im
-# ...populate_exif_data()
+# ..._populate_exif_data()
-def create_image_objects(image_files):
+def _create_image_objects(image_files,recursive=True):
     """
     Create empty image objects for every image in [image_files], which can be a
     list of relative paths (which will get stored without processing, so the base
     path doesn't matter here), or a folder name.
     Returns a list of dicts with field 'file_name' (a relative path).
+    "recursive" is ignored if "image_files" is a list.
     """
     # Enumerate *relative* paths
     if isinstance(image_files,str):
         print('Enumerating image files in {}'.format(image_files))
         assert os.path.isdir(image_files), 'Invalid image folder {}'.format(image_files)
-        image_files = enumerate_files(image_files)
+        image_files = find_images(image_files,
+                                  recursive=recursive,
+                                  return_relative_paths=True,
+                                  convert_slashes=True)
     images = []
     for fn in image_files:
@@ -378,7 +434,7 @@ def create_image_objects(image_files):
     return images
-def populate_exif_for_images(image_base,images,options=None):
+def _populate_exif_for_images(image_base,images,options=None):
     """
     Main worker loop: read EXIF data for each image object in [images] and
     populate the image objects.
@@ -394,7 +450,7 @@ def populate_exif_for_images(image_base,images,options=None):
         results = []
         for im in tqdm(images):
-            results.append(populate_exif_data(im,image_base,options))
+            results.append(_populate_exif_data(im,image_base,options))
     else:
@@ -406,13 +462,13 @@ def populate_exif_for_images(image_base,images,options=None):
             print('Starting parallel process pool with {} workers'.format(options.n_workers))
             pool = Pool(options.n_workers)
-        results = list(tqdm(pool.imap(partial(populate_exif_data,image_base=image_base,
+        results = list(tqdm(pool.imap(partial(_populate_exif_data,image_base=image_base,
                                         options=options),images),total=len(images)))
     return results
-def write_exif_results(results,output_file):
+def _write_exif_results(results,output_file):
     """
     Write EXIF information to [output_file].
@@ -489,33 +545,35 @@ def write_exif_results(results,output_file):
     print('Wrote results to {}'.format(output_file))
-def is_executable(name):
-    """Check whether `name` is on PATH and marked as executable."""
-    # https://stackoverflow.com/questions/11210104/check-if-a-program-exists-from-a-python-script
-    from shutil import which
-    return which(name) is not None
-def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=None):
+def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=None,recursive=True):
     """
-    Read EXIF data for all images in input_folder.
-    If filenames is not None, it should be a list of relative filenames; only those files will
-    be processed.
-    input_folder can be None or '', in which case filenames should be a list of absolute paths.
-    if output_file is not None, results will be written to the specified .json file.
-    returns a dictionary mapping relative filenames to EXIF data.
+    Read EXIF data for a folder of images.
+    Args:
+        input_folder (str): folder to process; if this is None, [filenames] should be a list of absolute
+            paths
+        output_file (str, optional): .json file to which we should write results; if this is None, results
+            are returned but not written to disk
+        options (ReadExifOptions, optional): parameters controlling metadata extraction
+        filenames (list, optional): allowlist of relative filenames (if [input_folder] is not None) or
+            a list of absolute filenames (if [input_folder] is None)
+        recursive (bool, optional): whether to recurse into [input_folder], not relevant if [input_folder]
+            is None.
+    Returns:
+        dict: a dictionary mapping relative filenames to EXIF data, whose format depends on whether
+        we're using PIL or exiftool.
     """
     if options is None:
         options = ReadExifOptions()
+    # Validate options
+    if options.tags_to_include is not None:
+        assert options.tags_to_exclude is None, "tags_to_include and tags_to_exclude are incompatible"
+    if options.tags_to_exclude is not None:
+        assert options.tags_to_include is None, "tags_to_include and tags_to_exclude are incompatible"
     if input_folder is None:
         input_folder = ''
     if len(input_folder) > 0:
@@ -542,16 +600,16 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
         assert is_executable(options.exiftool_command_name), 'exiftool not available'
     if filenames is None:
-        images = create_image_objects(input_folder)
+        images = _create_image_objects(input_folder,recursive=recursive)
     else:
         assert isinstance(filenames,list)
-        images = create_image_objects(filenames)
+        images = _create_image_objects(filenames)
-    results = populate_exif_for_images(input_folder,images,options)
+    results = _populate_exif_for_images(input_folder,images,options)
     if output_file is not None:
         try:
-            write_exif_results(results,output_file)
+            _write_exif_results(results,output_file)
         except Exception as e:
             if not options.allow_write_error:
                 raise
@@ -567,14 +625,16 @@ if False:
     #%%
-    input_folder = os.path.expanduser('~/data/KRU-test')
-    output_file = os.path.expanduser('~/data/test-exif.json')
+    input_folder = r'C:\temp\md-name-testing'
+    output_file = None # r'C:\temp\md-name-testing\exif.json'
     options = ReadExifOptions()
     options.verbose = False
     options.n_workers = 10
     options.use_threads = False
     options.processing_library = 'pil'
     # options.processing_library = 'exiftool'
+    options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight','DateTime','DateTimeOriginal','Orientation']
+    # options.tags_to_exclude = ['MakerNote']
     results = read_exif_from_folder(input_folder,output_file,options)
@@ -596,8 +656,10 @@ def main():
     parser = argparse.ArgumentParser(description=('Read EXIF information from all images in' + \
                                                   ' a folder, and write the results to .csv or .json'))
-    parser.add_argument('input_folder', type=str)
-    parser.add_argument('output_file', type=str)
+    parser.add_argument('input_folder', type=str,
+                        help='Folder of images from which we should read EXIF information')
+    parser.add_argument('output_file', type=str,
+                        help='Output file (.json) to which we should write EXIF information')
     parser.add_argument('--n_workers', type=int, default=1,
                         help='Number of concurrent workers to use (defaults to 1)')
     parser.add_argument('--use_threads', action='store_true',

data_management/remap_coco_categories.py ADDED Viewed

@@ -0,0 +1,84 @@
+"""
+remap_coco_categories.py
+Given a COCO-formatted dataset, remap the categories to a new mapping.
+"""
+#%% Imports and constants
+import os
+import json
+from copy import deepcopy
+#%% Main function
+def remap_coco_categories(input_data,
+                          output_category_name_to_id,
+                          input_category_name_to_output_category_name,
+                          output_file=None):
+    """
+    Given a COCO-formatted dataset, remap the categories to a new categories mapping, optionally
+    writing the results to a new file.
+    output_category_name_to_id is a dict mapping strings to ints.
+    input_category_name_to_output_category_name is a dict mapping strings to strings.
+    [input_data] can be a COCO-formatted dict or a filename.  If it's a dict, it will be copied,
+    not modified in place.
+    """
+    if isinstance(input_data,str):
+        assert os.path.isfile(input_data), "Can't find file {}".format(input_data)
+        with open(input_data,'r') as f:
+            input_data = json.load(f)
+        assert isinstance(input_data,dict), 'Illegal COCO input data'
+    else:
+        assert isinstance(input_data,dict), 'Illegal COCO input data'
+        input_data = deepcopy(input_data)
+    # It's safe to modify in-place now
+    output_data = input_data
+    # Read input name --> ID mapping
+    input_category_name_to_input_category_id = {}
+    for c in input_data['categories']:
+        input_category_name_to_input_category_id[c['name']] = c['id']
+    # Map input IDs --> output IDs
+    input_category_id_to_output_category_id = {}
+    for input_name in input_category_name_to_output_category_name.keys():
+        output_name = input_category_name_to_output_category_name[input_name]
+        assert output_name in output_category_name_to_id, \
+            'No output ID for {} --> {}'.format(input_name,output_name)
+        input_id = input_category_name_to_input_category_id[input_name]
+        output_id = output_category_name_to_id[output_name]
+        input_category_id_to_output_category_id[input_id] = output_id
+    # Map annotations
+    for ann in output_data['annotations']:
+        assert ann['category_id'] in input_category_id_to_output_category_id, \
+            'Unrecognized category ID {}'.format(ann['category_id'])
+        ann['category_id'] = input_category_id_to_output_category_id[ann['category_id']]
+    # Update the category list
+    output_categories = []
+    for output_name in output_category_name_to_id:
+        category = {'name':output_name,'id':output_category_name_to_id[output_name]}
+        output_categories.append(category)
+    output_data['categories'] = output_categories
+    if output_file is not None:
+        with open(output_file,'w') as f:
+            json.dump(output_data,f,indent=1)
+    return input_data
+#%% Command-line driver
+# TODO

data_management/remove_exif.py CHANGED Viewed

@@ -1,70 +1,66 @@
-########
-#
-# remove_exif.py
-#
-# Removes all EXIF/IPTC/XMP metadata from a folder of images, without making
-# backup copies, using pyexiv2.
-#
-########
+"""
+remove_exif.py
+Removes all EXIF/IPTC/XMP metadata from a folder of images, without making
+backup copies, using pyexiv2.
+TODO: This is a one-off script waiting to be cleaned up for more general use.
+"""
+input_base = r'f:\images'
 #%% Imports and constants
 import os
 import glob
-input_base = r'f:\images'
-assert os.path.isdir(input_base)
-#%% List files
-all_files = [f for f in glob.glob(input_base + "*/**", recursive=True)]
-image_files = [s for s in all_files if (s.lower().endswith('.jpg'))]
-#%% Remove EXIF data (support)
-import pyexiv2
-# PYEXIV2 IS NOT THREAD SAFE; DO NOT CALL THIS IN PARALLEL FROM A SINGLE PROCESS
-def remove_exif(fn):
-    try:
-        img = pyexiv2.Image(fn)
-        # data = img.read_exif(); print(data)
-        img.clear_exif()
-        img.clear_iptc()
-        img.clear_xmp()
-        img.close()
-    except Exception as e:
-        print('EXIF error on {}: {}'.format(fn,str(e)))
-#%% Debug
-if False:
-    #%%
-    fn = image_files[-10001]
-    os.startfile(fn)
-    #%%
-    remove_exif(fn)
-    os.startfile(fn)
-#%% Remove EXIF data (execution)
-from joblib import Parallel, delayed
-n_exif_threads = 50
-if n_exif_threads == 1:
-    # fn = image_files[0]
-    for fn in image_files:
-        remove_exif(fn)
+def main():
+    assert os.path.isdir(input_base)
+    ##%% List files
+    all_files = [f for f in glob.glob(input_base + "*/**", recursive=True)]
+    image_files = [s for s in all_files if (s.lower().endswith('.jpg'))]
+    ##%% Remove EXIF data (support)
+    import pyexiv2
+    # PYEXIV2 IS NOT THREAD SAFE; DO NOT CALL THIS IN PARALLEL FROM A SINGLE PROCESS
+    def remove_exif(fn):
-else:
-    # joblib.Parallel defaults to a process-based backend, but let's be sure
-    # results = Parallel(n_jobs=n_exif_threads,verbose=2,prefer='processes')(delayed(remove_exif)(fn) for fn in image_files[0:10])
-    results = Parallel(n_jobs=n_exif_threads,verbose=2,prefer='processes')(delayed(remove_exif)(fn) for fn in image_files)
+        try:
+            img = pyexiv2.Image(fn)
+            # data = img.read_exif(); print(data)
+            img.clear_exif()
+            img.clear_iptc()
+            img.clear_xmp()
+            img.close()
+        except Exception as e:
+            print('EXIF error on {}: {}'.format(fn,str(e)))
+    ##%% Remove EXIF data (execution)
+    from joblib import Parallel, delayed
+    n_exif_threads = 50
+    if n_exif_threads == 1:
+        # fn = image_files[0]
+        for fn in image_files:
+            remove_exif(fn)
+    else:
+        # joblib.Parallel defaults to a process-based backend, but let's be sure
+        # results = Parallel(n_jobs=n_exif_threads,verbose=2,prefer='processes')(delayed(remove_exif)(fn) for fn in image_files[0:10])
+        _ = Parallel(n_jobs=n_exif_threads,verbose=2,prefer='processes')(delayed(remove_exif)(fn) for fn in image_files)
+if __name__ == '__main__':
+    main()

megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.7py3-none-any.whl → 5.0.9py3-none-any.whl