PyPI - megadetector - Versions diffs - 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl - Mend

megadetector 5.0.28py3-none-any.whl → 10.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (197) hide show

megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
megadetector/classification/aggregate_classifier_probs.py +3 -3
megadetector/classification/analyze_failed_images.py +5 -5
megadetector/classification/cache_batchapi_outputs.py +5 -5
megadetector/classification/create_classification_dataset.py +11 -12
megadetector/classification/crop_detections.py +10 -10
megadetector/classification/csv_to_json.py +8 -8
megadetector/classification/detect_and_crop.py +13 -15
megadetector/classification/efficientnet/model.py +8 -8
megadetector/classification/efficientnet/utils.py +6 -5
megadetector/classification/evaluate_model.py +7 -7
megadetector/classification/identify_mislabeled_candidates.py +6 -6
megadetector/classification/json_to_azcopy_list.py +1 -1
megadetector/classification/json_validator.py +29 -32
megadetector/classification/map_classification_categories.py +9 -9
megadetector/classification/merge_classification_detection_output.py +12 -9
megadetector/classification/prepare_classification_script.py +19 -19
megadetector/classification/prepare_classification_script_mc.py +26 -26
megadetector/classification/run_classifier.py +4 -4
megadetector/classification/save_mislabeled.py +6 -6
megadetector/classification/train_classifier.py +1 -1
megadetector/classification/train_classifier_tf.py +9 -9
megadetector/classification/train_utils.py +10 -10
megadetector/data_management/annotations/annotation_constants.py +1 -2
megadetector/data_management/camtrap_dp_to_coco.py +79 -46
megadetector/data_management/cct_json_utils.py +103 -103
megadetector/data_management/cct_to_md.py +49 -49
megadetector/data_management/cct_to_wi.py +33 -33
megadetector/data_management/coco_to_labelme.py +75 -75
megadetector/data_management/coco_to_yolo.py +210 -193
megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
megadetector/data_management/databases/integrity_check_json_db.py +228 -200
megadetector/data_management/databases/subset_json_db.py +33 -33
megadetector/data_management/generate_crops_from_cct.py +88 -39
megadetector/data_management/get_image_sizes.py +54 -49
megadetector/data_management/labelme_to_coco.py +133 -125
megadetector/data_management/labelme_to_yolo.py +159 -73
megadetector/data_management/lila/create_lila_blank_set.py +81 -83
megadetector/data_management/lila/create_lila_test_set.py +32 -31
megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
megadetector/data_management/lila/download_lila_subset.py +21 -24
megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
megadetector/data_management/lila/get_lila_image_counts.py +22 -22
megadetector/data_management/lila/lila_common.py +73 -70
megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
megadetector/data_management/mewc_to_md.py +344 -340
megadetector/data_management/ocr_tools.py +262 -255
megadetector/data_management/read_exif.py +249 -227
megadetector/data_management/remap_coco_categories.py +90 -28
megadetector/data_management/remove_exif.py +81 -21
megadetector/data_management/rename_images.py +187 -187
megadetector/data_management/resize_coco_dataset.py +588 -120
megadetector/data_management/speciesnet_to_md.py +41 -41
megadetector/data_management/wi_download_csv_to_coco.py +55 -55
megadetector/data_management/yolo_output_to_md_output.py +248 -122
megadetector/data_management/yolo_to_coco.py +333 -191
megadetector/detection/change_detection.py +832 -0
megadetector/detection/process_video.py +340 -337
megadetector/detection/pytorch_detector.py +358 -278
megadetector/detection/run_detector.py +399 -186
megadetector/detection/run_detector_batch.py +404 -377
megadetector/detection/run_inference_with_yolov5_val.py +340 -327
megadetector/detection/run_tiled_inference.py +257 -249
megadetector/detection/tf_detector.py +24 -24
megadetector/detection/video_utils.py +332 -295
megadetector/postprocessing/add_max_conf.py +19 -11
megadetector/postprocessing/categorize_detections_by_size.py +45 -45
megadetector/postprocessing/classification_postprocessing.py +468 -433
megadetector/postprocessing/combine_batch_outputs.py +23 -23
megadetector/postprocessing/compare_batch_results.py +590 -525
megadetector/postprocessing/convert_output_format.py +106 -102
megadetector/postprocessing/create_crop_folder.py +347 -147
megadetector/postprocessing/detector_calibration.py +173 -168
megadetector/postprocessing/generate_csv_report.py +508 -499
megadetector/postprocessing/load_api_results.py +48 -27
megadetector/postprocessing/md_to_coco.py +133 -102
megadetector/postprocessing/md_to_labelme.py +107 -90
megadetector/postprocessing/md_to_wi.py +40 -40
megadetector/postprocessing/merge_detections.py +92 -114
megadetector/postprocessing/postprocess_batch_results.py +319 -301
megadetector/postprocessing/remap_detection_categories.py +91 -38
megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
megadetector/postprocessing/separate_detections_into_folders.py +226 -211
megadetector/postprocessing/subset_json_detector_output.py +265 -262
megadetector/postprocessing/top_folders_to_bottom.py +45 -45
megadetector/postprocessing/validate_batch_results.py +70 -70
megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
megadetector/taxonomy_mapping/simple_image_download.py +8 -8
megadetector/taxonomy_mapping/species_lookup.py +156 -74
megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
megadetector/utils/ct_utils.py +1049 -211
megadetector/utils/directory_listing.py +21 -77
megadetector/utils/gpu_test.py +22 -22
megadetector/utils/md_tests.py +632 -529
megadetector/utils/path_utils.py +1520 -431
megadetector/utils/process_utils.py +41 -41
megadetector/utils/split_locations_into_train_val.py +62 -62
megadetector/utils/string_utils.py +148 -27
megadetector/utils/url_utils.py +489 -176
megadetector/utils/wi_utils.py +2658 -2526
megadetector/utils/write_html_image_list.py +137 -137
megadetector/visualization/plot_utils.py +34 -30
megadetector/visualization/render_images_with_thumbnails.py +39 -74
megadetector/visualization/visualization_utils.py +487 -435
megadetector/visualization/visualize_db.py +232 -198
megadetector/visualization/visualize_detector_output.py +82 -76
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
megadetector-10.0.0.dist-info/RECORD +139 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
megadetector/api/batch_processing/api_core/__init__.py +0 -0
megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
megadetector/api/batch_processing/api_core/server.py +0 -294
megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
megadetector/api/batch_processing/api_core/server_utils.py +0 -88
megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
megadetector/api/batch_processing/api_support/__init__.py +0 -0
megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
megadetector/api/synchronous/__init__.py +0 -0
megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
megadetector/data_management/importers/add_nacti_sizes.py +0 -52
megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
megadetector/data_management/importers/awc_to_json.py +0 -191
megadetector/data_management/importers/bellevue_to_json.py +0 -272
megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
megadetector/data_management/importers/cct_field_adjustments.py +0 -58
megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
megadetector/data_management/importers/ena24_to_json.py +0 -276
megadetector/data_management/importers/filenames_to_json.py +0 -386
megadetector/data_management/importers/helena_to_cct.py +0 -283
megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
megadetector/data_management/importers/jb_csv_to_json.py +0 -150
megadetector/data_management/importers/mcgill_to_json.py +0 -250
megadetector/data_management/importers/missouri_to_json.py +0 -490
megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
megadetector/data_management/importers/noaa_seals_2019.py +0 -181
megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
megadetector/data_management/importers/pc_to_json.py +0 -365
megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
megadetector/data_management/importers/rspb_to_json.py +0 -356
megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
megadetector/data_management/importers/sulross_get_exif.py +0 -65
megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
megadetector/data_management/importers/ubc_to_json.py +0 -399
megadetector/data_management/importers/umn_to_json.py +0 -507
megadetector/data_management/importers/wellington_to_json.py +0 -263
megadetector/data_management/importers/wi_to_json.py +0 -442
megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
megadetector/utils/azure_utils.py +0 -178
megadetector/utils/sas_blob_utils.py +0 -509
megadetector-5.0.28.dist-info/RECORD +0 -209
/megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0

megadetector/data_management/read_exif.py CHANGED Viewed

@@ -2,8 +2,8 @@
 read_exif.py
-Given a folder of images, reads relevant metadata (EXIF/IPTC/XMP) fields from all images,
-and writes them to  a .json or .csv file.
+Given a folder of images, reads relevant metadata (EXIF/IPTC/XMP) fields from all images,
+and writes them to  a .json or .csv file.
 This module can use either PIL (which can only reliably read EXIF data) or exiftool (which
 can read everything).  The latter approach expects that exiftool is available on the system
@@ -16,6 +16,9 @@ path.  No attempt is made to be consistent in format across the two approaches.
 import os
 import subprocess
 import json
+import argparse
+import sys
 from datetime import datetime
 from multiprocessing.pool import ThreadPool as ThreadPool
@@ -23,6 +26,7 @@ from multiprocessing.pool import Pool as Pool
 from tqdm import tqdm
 from PIL import Image, ExifTags
+from functools import partial
 from megadetector.utils.path_utils import find_images, is_executable
 from megadetector.utils.ct_utils import args_to_object
@@ -31,7 +35,9 @@ from megadetector.data_management.cct_json_utils import write_object_with_serial
 debug_max_images = None
-minimal_exif_tags = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight','DateTimeOriginal','Orientation']
+minimal_exif_tags = \
+    ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight',
+     'DateTimeOriginal','Orientation', 'GPSInfo']
 #%% Options
@@ -40,85 +46,84 @@ class ReadExifOptions:
     """
     Parameters controlling metadata extraction.
     """
     def __init__(self):
         #: Enable additional debug console output
         self.verbose = False
         #: If this is True and an output file is specified for read_exif_from_folder,
         #: and we encounter a serialization issue, we'll return the results but won't
-        #: error.
+        #: error.
         self.allow_write_error = False
         #: Number of concurrent workers, set to <= 1 to disable parallelization
         self.n_workers = 1
         #: Should we use threads (vs. processes) for parallelization?
         #:
         #: Not relevant if n_workers is <= 1.
         self.use_threads = True
-        #: "File" and "ExifTool" are tag types used by ExifTool to report data that
+        #: "File" and "ExifTool" are tag types used by ExifTool to report data that
         #: doesn't come from EXIF, rather from the file (e.g. file size).
         self.tag_types_to_ignore = set(['File','ExifTool'])
         #: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
         #:
         #: A useful set of tags one might want to limit queries for:
         #:
-        #: options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight',
-        #: 'DateTimeOriginal','Orientation']
+        #: options.tags_to_include = minimal_exif_tags
         self.tags_to_include = None
         #: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
         self.tags_to_exclude = None
         #: The command line to invoke if using exiftool, can be an absolute path to exiftool.exe, or
         #: can be just "exiftool", in which case it should be on your system path.
         self.exiftool_command_name = 'exiftool'
         #: How should we handle byte-formatted EXIF tags?
         #:
         #: 'convert_to_string': convert to a Python string
         #: 'delete': don't include at all
         #: 'raw': include as a byte string
         self.byte_handling = 'convert_to_string' # 'convert_to_string','delete','raw'
         #: Should we use exiftool or PIL?
         self.processing_library = 'pil' # 'exiftool','pil'
 class ExifResultsToCCTOptions:
     """
-    Options controlling the behavior of exif_results_to_cct() (which reformats the datetime
+    Options controlling the behavior of exif_results_to_cct() (which reformats the datetime
     information) extracted by read_exif_from_folder().
     """
     def __init__(self):
         #: Timestamps older than this are assumed to be junk; lots of cameras use a
         #: default time in 2000.
         self.min_valid_timestamp_year = 2001
         #: The EXIF tag from which to pull datetime information
         self.exif_datetime_tag = 'DateTimeOriginal'
         #: Function for extracting location information, should take a string
         #: and return a string.  Defaults to ct_utils.image_file_to_camera_folder.  If
         #: this is None, location is written as "unknown".
         self.filename_to_location_function = image_file_to_camera_folder
 #%% Functions
 def _get_exif_ifd(exif):
     """
     Read EXIF data from by finding the EXIF offset and reading tags directly
     https://github.com/python-pillow/Pillow/issues/5863
     """
     # Find the offset for all the EXIF information
     for key, value in ExifTags.TAGS.items():
         if value == "ExifOffset":
@@ -132,19 +137,19 @@ def _get_exif_ifd(exif):
 def has_gps_info(im):
     """
-    Given a filename, PIL image, dict of EXIF tags, or dict containing an 'exif_tags' field,
+    Given a filename, PIL image, dict of EXIF tags, or dict containing an 'exif_tags' field,
     determine whether GPS location information is present in this image.  Does not retrieve
     location info, currently only used to determine whether it's present.
     Args:
         im (str, PIL.Image.Image, dict): image for which we should determine GPS metadata
         presence
     Returns:
         bool: whether GPS metadata is present, or None if we failed to read EXIF data from
         a file.
     """
     if isinstance(im,str) or isinstance(im,Image.Image):
         exif_tags = read_pil_exif(im)
         if exif_tags is None:
@@ -153,57 +158,58 @@ def has_gps_info(im):
     else:
         assert isinstance(im,dict)
         exif_tags = im
     if 'exif_tags' in exif_tags:
         exif_tags = exif_tags['exif_tags']
         if exif_tags is None:
             return None
     if 'GPSInfo' in exif_tags and \
         exif_tags['GPSInfo'] is not None and \
         isinstance(exif_tags['GPSInfo'],dict):
             # Don't indicate that GPS data is present if only GPS version info is present
             if ('GPSLongitude' in exif_tags['GPSInfo']) or ('GPSLatitude' in exif_tags['GPSInfo']):
                 return True
             return False
     return False
-# ...def has_gps_info(...)
+# ...def has_gps_info(...)
 def read_pil_exif(im,options=None):
     """
     Read all the EXIF data we know how to read from an image, using PIL.  This is primarily
-    an internal function; the main entry point for single-image EXIF information is
+    an internal function; the main entry point for single-image EXIF information is
     read_exif_tags_for_image().
     Args:
-        im (str or PIL.Image.Image): image (as a filename or an Image object) from which
+        im (str or PIL.Image.Image): image (as a filename or an Image object) from which
             we should read EXIF data.
+        options (ReadExifOptions, optional): see ReadExifOptions
     Returns:
         dict: a dictionary mapping EXIF tag names to their values
     """
     if options is None:
         options = ReadExifOptions()
     image_name = '[image]'
     if isinstance(im,str):
         image_name = im
         im = Image.open(im)
     exif_tags = {}
     try:
         exif_info = im.getexif()
     except Exception:
         exif_info = None
     if exif_info is None:
         return exif_tags
     for k, v in exif_info.items():
         assert isinstance(k,str) or isinstance(k,int), \
             'Invalid EXIF key {}'.format(str(k))
@@ -212,9 +218,9 @@ def read_pil_exif(im,options=None):
         else:
             # print('Warning: unrecognized EXIF tag: {}'.format(k))
             exif_tags[k] = str(v)
     exif_ifd_tags = _get_exif_ifd(exif_info)
     for k in exif_ifd_tags.keys():
         v = exif_ifd_tags[k]
         if k in exif_tags:
@@ -223,16 +229,16 @@ def read_pil_exif(im,options=None):
                     k,image_name,exif_tags[k],v))
         else:
             exif_tags[k] = v
     exif_tag_names = list(exif_tags.keys())
     # Type conversion and cleanup
-    #
+    #
     # Most quirky types will get serialized to string when we write to .json.
     for k in exif_tag_names:
         if isinstance(exif_tags[k],bytes):
             if options.byte_handling == 'delete':
                 del exif_tags[k]
             elif options.byte_handling == 'raw':
@@ -240,24 +246,24 @@ def read_pil_exif(im,options=None):
             else:
                 assert options.byte_handling == 'convert_to_string'
                 exif_tags[k] = str(exif_tags[k])
         elif isinstance(exif_tags[k],str):
             exif_tags[k] = exif_tags[k].strip()
-    # Special case for GPS info... I could decode other encoded tags, but GPS info is
+    # Special case for GPS info... I could decode other encoded tags, but GPS info is
     # particularly important, so I'm only doing that for now.
     if 'GPSInfo' in exif_tags:
         try:
             # Find the tag number for GPS info, in practice should alays be 34853
-            GPSINFO_TAG = next(tag for tag, name in ExifTags.TAGS.items() if name == "GPSInfo")
-            assert GPSINFO_TAG == 34853
+            gpsinfo_tag = next(tag for tag, name in ExifTags.TAGS.items() if name == "GPSInfo")
+            assert gpsinfo_tag == 34853
             # These are integer keys, e.g. {7: (14.0, 27.0, 7.24)}
-            gps_info_raw = exif_info.get_ifd(GPSINFO_TAG)
+            gps_info_raw = exif_info.get_ifd(gpsinfo_tag)
             # Convert to strings, e.g. 'GPSTimeStamp'
             gps_info = {}
             for int_tag,v in enumerate(gps_info_raw.keys()):
@@ -266,15 +272,15 @@ def read_pil_exif(im,options=None):
                     gps_info[ExifTags.GPSTAGS[int_tag]] = v
                 else:
                     gps_info[int_tag] = v
             exif_tags['GPSInfo'] = gps_info
         except Exception as e:
             if options.verbose:
                 print('Warning: error reading GPS info: {}'.format(str(e)))
     # ...if we think there might be GPS tags in this image
     return exif_tags
 # ...read_pil_exif()
@@ -284,27 +290,33 @@ def format_datetime_as_exif_datetime_string(dt):
     """
     Returns a Python datetime object rendered using the standard EXIF datetime
     string format ('%Y:%m:%d %H:%M:%S')
+    Args:
+        dt (datetime): datetime object to format
+    Returns:
+        str: [dt] as a string in standard EXIF format
     """
     return datetime.strftime(dt, '%Y:%m:%d %H:%M:%S')
 def parse_exif_datetime_string(s,verbose=False):
     """"
-    Exif datetimes are strings, but in a standard format:
+    Exif datetimes are strings, but in a standard format:
     %Y:%m:%d %H:%M:%S
     Parses one of those strings into a Python datetime object.
     Args:
         s (str): datetime string to parse, should be in standard EXIF datetime format
         verbose (bool, optional): enable additional debug output
     Returns:
         datetime: the datetime object created from [s]
     """
     dt = None
     try:
         dt = datetime.strptime(s, '%Y:%m:%d %H:%M:%S')
@@ -319,7 +331,7 @@ def _filter_tags(tags,options):
     Internal function used to include/exclude specific tags from the exif_tags
     dict.
     """
     if options is None:
         return tags
     if options.tags_to_include is None and options.tags_to_exclude is None:
@@ -343,23 +355,27 @@ def _filter_tags(tags,options):
 def read_exif_tags_for_image(file_path,options=None):
     """
     Get relevant fields from EXIF data for an image
+    Args:
+        file_path (str): image from which we should read EXIF data
+        options (ReadExifOptions, optional): see ReadExifOptions
     Returns:
-        dict: a dict with fields 'status' (str) and 'tags'. The exact format of 'tags' depends on
+        dict: a dict with fields 'status' (str) and 'tags'. The exact format of 'tags' depends on
         options (ReadExifOptions, optional): parameters controlling metadata extraction
         options.processing_library:
             - For exiftool, 'tags' is a list of lists, where each element is (type/tag/value)
             - For PIL, 'tags' is a dict (str:str)
     """
     if options is None:
         options = ReadExifOptions()
     result = {'status':'unknown','tags':[]}
     if options.processing_library == 'pil':
         try:
             exif_tags = read_pil_exif(file_path,options)
@@ -369,18 +385,18 @@ def read_exif_tags_for_image(file_path,options=None):
                     file_path,str(e)))
             result['status'] = 'read_failure'
             result['error'] = str(e)
         if result['status'] == 'unknown':
-            if exif_tags is None:
+            if exif_tags is None:
                 result['status'] = 'empty_read'
             else:
                 result['status'] = 'success'
                 result['tags'] = _filter_tags(exif_tags,options)
         return result
     elif options.processing_library == 'exiftool':
         # -G means "Print group name for each tag", e.g. print:
         #
         # [File]          Bits Per Sample                 : 8
@@ -390,95 +406,95 @@ def read_exif_tags_for_image(file_path,options=None):
         # Bits Per Sample                 : 8
         proc = subprocess.Popen([options.exiftool_command_name, '-G', file_path],
                                 stdout=subprocess.PIPE, encoding='utf8')
-        exif_lines = proc.stdout.readlines()
+        exif_lines = proc.stdout.readlines()
         exif_lines = [s.strip() for s in exif_lines]
         if ( (exif_lines is None) or (len(exif_lines) == 0) or not \
             any([s.lower().startswith('[exif]') for s in exif_lines])):
             result['status'] = 'failure'
             return result
         # A list of three-element lists (type/tag/value)
         exif_tags = []
         # line_raw = exif_lines[0]
         for line_raw in exif_lines:
             # A typical line:
             #
             # [ExifTool]      ExifTool Version Number         : 12.13
             line = line_raw.strip()
             # Split on the first occurrence of ":"
             tokens = line.split(':',1)
             assert(len(tokens) == 2), 'EXIF tokenization failure ({} tokens, expected 2)'.format(
                 len(tokens))
-            field_value = tokens[1].strip()
-            field_name_type = tokens[0].strip()
+            field_value = tokens[1].strip()
+            field_name_type = tokens[0].strip()
             field_name_type_tokens = field_name_type.split(None,1)
             assert len(field_name_type_tokens) == 2, 'EXIF tokenization failure'
             field_type = field_name_type_tokens[0].strip()
             assert field_type.startswith('[') and field_type.endswith(']'), \
                 'Invalid EXIF field {}'.format(field_type)
             field_type = field_type[1:-1]
             if field_type in options.tag_types_to_ignore:
                 if options.verbose:
                     print('Ignoring tag with type {}'.format(field_type))
-                continue
+                continue
             field_name = field_name_type_tokens[1].strip()
             if options.tags_to_exclude is not None and field_name in options.tags_to_exclude:
                 continue
             if options.tags_to_include is not None and field_name not in options.tags_to_include:
                 continue
             tag = [field_type,field_name,field_value]
             exif_tags.append(tag)
         # ...for each output line
         result['status'] = 'success'
         result['tags'] = exif_tags
         return result
     else:
         raise ValueError('Unknown processing library {}'.format(
             options.processing_library))
     # ...which processing library are we using?
 # ...read_exif_tags_for_image()
 def _populate_exif_data(im, image_base, options=None):
     """
     Populate EXIF data into the 'exif_tags' field in the image object [im].
     im['file_name'] should be prepopulated, relative to image_base.
     Returns a modified version of [im], also modifies [im] in place.
     """
     if options is None:
         options = ReadExifOptions()
     fn = im['file_name']
     if options.verbose:
         print('Processing {}'.format(fn))
     try:
         file_path = os.path.join(image_base,fn)
         assert os.path.isfile(file_path), 'Could not find file {}'.format(file_path)
         result = read_exif_tags_for_image(file_path,options)
         if result['status'] == 'success':
-            exif_tags = result['tags']
+            exif_tags = result['tags']
             im['exif_tags'] = exif_tags
         else:
             im['exif_tags'] = None
@@ -487,15 +503,15 @@ def _populate_exif_data(im, image_base, options=None):
                 im['error'] = result['error']
             if options.verbose:
                 print('Error reading EXIF data for {}'.format(file_path))
     except Exception as e:
         s = 'Error on {}: {}'.format(fn,str(e))
         print(s)
         im['error'] = s
         im['status'] = 'read failure'
         im['exif_tags'] = None
     return im
 # ..._populate_exif_data()
@@ -503,67 +519,72 @@ def _populate_exif_data(im, image_base, options=None):
 def _create_image_objects(image_files,recursive=True):
     """
-    Create empty image objects for every image in [image_files], which can be a
-    list of relative paths (which will get stored without processing, so the base
+    Create empty image objects for every image in [image_files], which can be a
+    list of relative paths (which will get stored without processing, so the base
     path doesn't matter here), or a folder name.
     Returns a list of dicts with field 'file_name' (a relative path).
     "recursive" is ignored if "image_files" is a list.
     """
     # Enumerate *relative* paths
-    if isinstance(image_files,str):
+    if isinstance(image_files,str):
         print('Enumerating image files in {}'.format(image_files))
         assert os.path.isdir(image_files), 'Invalid image folder {}'.format(image_files)
         image_files = find_images(image_files,
                                   recursive=recursive,
                                   return_relative_paths=True,
                                   convert_slashes=True)
     images = []
     for fn in image_files:
         im = {}
         im['file_name'] = fn
         images.append(im)
     if debug_max_images is not None:
         print('Trimming input list to {} images'.format(debug_max_images))
         images = images[0:debug_max_images]
     return images
 def _populate_exif_for_images(image_base,images,options=None):
     """
-    Main worker loop: read EXIF data for each image object in [images] and
+    Main worker loop: read EXIF data for each image object in [images] and
     populate the image objects in place.
     'images' should be a list of dicts with the field 'file_name' containing
-    a relative path (relative to 'image_base').
+    a relative path (relative to 'image_base').
     """
     if options is None:
         options = ReadExifOptions()
     if options.n_workers == 1:
         results = []
         for im in tqdm(images):
             results.append(_populate_exif_data(im,image_base,options))
     else:
-        from functools import partial
-        if options.use_threads:
-            print('Starting parallel thread pool with {} workers'.format(options.n_workers))
-            pool = ThreadPool(options.n_workers)
-        else:
-            print('Starting parallel process pool with {} workers'.format(options.n_workers))
-            pool = Pool(options.n_workers)
-        results = list(tqdm(pool.imap(partial(_populate_exif_data,image_base=image_base,
-                                        options=options),images),total=len(images)))
+        pool = None
+        try:
+            if options.use_threads:
+                print('Starting parallel thread pool with {} workers'.format(options.n_workers))
+                pool = ThreadPool(options.n_workers)
+            else:
+                print('Starting parallel process pool with {} workers'.format(options.n_workers))
+                pool = Pool(options.n_workers)
+            results = list(tqdm(pool.imap(partial(_populate_exif_data,image_base=image_base,
+                                            options=options),images),total=len(images)))
+        finally:
+            pool.close()
+            pool.join()
+            print("Pool closed and joined for EXIF extraction")
     return results
@@ -571,23 +592,23 @@ def _populate_exif_for_images(image_base,images,options=None):
 def _write_exif_results(results,output_file):
     """
     Write EXIF information to [output_file].
     'results' is a list of dicts with fields 'exif_tags' and 'file_name'.
-    Writes to .csv or .json depending on the extension of 'output_file'.
+    Writes to .csv or .json depending on the extension of 'output_file'.
     """
     if output_file.endswith('.json'):
         with open(output_file,'w') as f:
             json.dump(results,f,indent=1,default=str)
     elif output_file.endswith('.csv'):
         # Find all EXIF tags that exist in any image
         all_keys = set()
         for im in results:
             keys_this_image = set()
             exif_tags = im['exif_tags']
             file_name = im['file_name']
@@ -597,60 +618,64 @@ def _write_exif_results(results,output_file):
                     'Error: tag {} appears twice in image {}'.format(
                         tag_name,file_name)
                 all_keys.add(tag_name)
             # ...for each tag in this image
         # ...for each image
         all_keys = sorted(list(all_keys))
         header = ['File Name']
         header.extend(all_keys)
         import csv
         with open(output_file,'w') as csvfile:
             writer = csv.writer(csvfile)
             # Write header
             writer.writerow(header)
             for im in results:
                 row = [im['file_name']]
                 kvp_this_image = {tag[1]:tag[2] for tag in im['exif_tags']}
                 for i_key,key in enumerate(all_keys):
                     value = ''
                     if key in kvp_this_image:
                         value = kvp_this_image[key]
-                    row.append(value)
+                    row.append(value)
                 # ...for each key that *might* be present in this image
                 assert len(row) == len(header)
                 writer.writerow(row)
             # ...for each image
         # ...with open()
     else:
         raise ValueError('Could not determine output type from file {}'.format(
             output_file))
     # ...if we're writing to .json/.csv
     print('Wrote results to {}'.format(output_file))
 # ..._write_exif_results(...)
-def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=None,recursive=True):
+def read_exif_from_folder(input_folder,
+                          output_file=None,
+                          options=None,
+                          filenames=None,
+                          recursive=True):
     """
     Read EXIF data for a folder of images.
     Args:
         input_folder (str): folder to process; if this is None, [filenames] should be a list of absolute
             paths
@@ -661,23 +686,22 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
             a list of absolute filenames (if [input_folder] is None)
         recursive (bool, optional): whether to recurse into [input_folder], not relevant if [input_folder]
             is None.
-        verbose (bool, optional): enable additional debug output
     Returns:
         list: list of dicts, each of which contains EXIF information for one images.  Fields include at least:
             * 'file_name': the relative path to the image
             * 'exif_tags': a dict of EXIF tags whose exact format depends on [options.processing_library].
     """
     if options is None:
         options = ReadExifOptions()
     # Validate options
     if options.tags_to_include is not None:
         assert options.tags_to_exclude is None, "tags_to_include and tags_to_exclude are incompatible"
     if options.tags_to_exclude is not None:
-        assert options.tags_to_include is None, "tags_to_include and tags_to_exclude are incompatible"
+        assert options.tags_to_include is None, "tags_to_include and tags_to_exclude are incompatible"
     if input_folder is None:
         input_folder = ''
     if len(input_folder) > 0:
@@ -686,20 +710,20 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
     assert (len(input_folder) > 0) or (filenames is not None), \
         'Must specify either a folder or a list of files'
-    if output_file is not None:
+    if output_file is not None:
         assert output_file.lower().endswith('.json') or output_file.lower().endswith('.csv'), \
             'I only know how to write results to .json or .csv'
         try:
             with open(output_file, 'a') as f:
                 if not f.writable():
-                    raise IOError('File not writable')
+                    raise OSError('File not writable')
         except Exception:
             print('Could not write to file {}'.format(output_file))
             raise
     if options.processing_library == 'exif':
         assert is_executable(options.exiftool_command_name), 'exiftool not available'
@@ -708,9 +732,9 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
     else:
         assert isinstance(filenames,list)
         images = _create_image_objects(filenames)
     results = _populate_exif_for_images(input_folder,images,options)
     if output_file is not None:
         try:
             _write_exif_results(results,output_file)
@@ -718,8 +742,8 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
             if not options.allow_write_error:
                 raise
             else:
-                print('Warning: error serializing EXIF data: {}'.format(str(e)))
+                print('Warning: error serializing EXIF data: {}'.format(str(e)))
     return results
 # ...read_exif_from_folder(...)
@@ -728,54 +752,54 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
 def exif_results_to_cct(exif_results,cct_output_file=None,options=None):
     """
     Given the EXIF results for a folder of images read via read_exif_from_folder,
-    create a COCO Camera Traps .json file that has no annotations, but
+    create a COCO Camera Traps .json file that has no annotations, but
     attaches image filenames to locations and datetimes.
     Args:
         exif_results (str or list): the filename (or loaded list) containing the results
             from read_exif_from_folder
-        cct_output_file (str,optional): the filename to which we should write
+        cct_output_file (str, optional): the filename to which we should write
             COCO-Camera-Traps-formatted data
         options (ExifResultsToCCTOptions, optional): options guiding the generation
             of the CCT file, particularly location mapping
     Returns:
         dict: a COCO Camera Traps dict (with no annotations).
     """
     if options is None:
         options = ExifResultsToCCTOptions()
     if isinstance(exif_results,str):
         print('Reading EXIF results from {}'.format(exif_results))
         with open(exif_results,'r') as f:
             exif_results = json.load(f)
     else:
         assert isinstance(exif_results,list)
     now = datetime.now()
     image_info = []
     images_without_datetime = []
     images_with_invalid_datetime = []
     # exif_result = exif_results[0]
     for exif_result in tqdm(exif_results):
         im = {}
         # By default we assume that each leaf-node folder is a location
         if options.filename_to_location_function is None:
             im['location'] = 'unknown'
         else:
-            im['location'] = options.filename_to_location_function(exif_result['file_name'])
+            im['location'] = options.filename_to_location_function(exif_result['file_name'])
         im['file_name'] = exif_result['file_name']
         im['id'] = im['file_name']
         if ('exif_tags' not in exif_result) or (exif_result['exif_tags'] is None) or \
-            (options.exif_datetime_tag not in exif_result['exif_tags']):
+            (options.exif_datetime_tag not in exif_result['exif_tags']):
             exif_dt = None
         else:
             exif_dt = exif_result['exif_tags'][options.exif_datetime_tag]
@@ -785,26 +809,26 @@ def exif_results_to_cct(exif_results,cct_output_file=None,options=None):
             images_without_datetime.append(im['file_name'])
         else:
             dt = exif_dt
             # An image from the future (or within the last 24 hours) is invalid
             if (now - dt).total_seconds() <= 1*24*60*60:
                 print('Warning: datetime for {} is {}'.format(
                     im['file_name'],dt))
-                im['datetime'] = None
+                im['datetime'] = None
                 images_with_invalid_datetime.append(im['file_name'])
             # An image from before the dawn of time is also invalid
             elif dt.year < options.min_valid_timestamp_year:
                 print('Warning: datetime for {} is {}'.format(
                     im['file_name'],dt))
                 im['datetime'] = None
                 images_with_invalid_datetime.append(im['file_name'])
             else:
                 im['datetime'] = dt
         image_info.append(im)
     # ...for each exif image result
     print('Parsed EXIF datetime information, unable to parse EXIF date from {} of {} images'.format(
@@ -815,21 +839,21 @@ def exif_results_to_cct(exif_results,cct_output_file=None,options=None):
     d['images'] = image_info
     d['annotations'] = []
     d['categories'] = []
     if cct_output_file is not None:
         write_object_with_serialized_datetimes(d,cct_output_file)
     return d
 # ...exif_results_to_cct(...)
 #%% Interactive driver
 if False:
     #%%
     input_folder = r'C:\temp\md-name-testing'
     output_file = None # r'C:\temp\md-name-testing\exif.json'
     options = ReadExifOptions()
@@ -838,30 +862,28 @@ if False:
     options.use_threads = False
     options.processing_library = 'pil'
     # options.processing_library = 'exiftool'
-    options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight','DateTime','DateTimeOriginal','Orientation']
+    options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth',
+                               'ExifImageHeight','DateTime','DateTimeOriginal','Orientation']
     # options.tags_to_exclude = ['MakerNote']
     results = read_exif_from_folder(input_folder,output_file,options)
     #%%
     with open(output_file,'r') as f:
         d = json.load(f)
-#%% Command-line driver
-import argparse
-import sys
+#%% Command-line driver
-def main():
+def main(): # noqa
     options = ReadExifOptions()
     parser = argparse.ArgumentParser(description=('Read EXIF information from all images in' + \
                                                   ' a folder, and write the results to .csv or .json'))
-    parser.add_argument('input_folder', type=str,
+    parser.add_argument('input_folder', type=str,
                         help='Folder of images from which we should read EXIF information')
     parser.add_argument('output_file', type=str,
                         help='Output file (.json) to which we should write EXIF information')
@@ -871,16 +893,16 @@ def main():
                         help='Use threads (instead of processes) for multitasking')
     parser.add_argument('--processing_library', type=str, default=options.processing_library,
                         help='Processing library (exif or pil)')
     if len(sys.argv[1:]) == 0:
         parser.print_help()
         parser.exit()
-    args = parser.parse_args()
+    args = parser.parse_args()
     args_to_object(args, options)
     options.processing_library = options.processing_library.lower()
     read_exif_from_folder(args.input_folder,args.output_file,options)
 if __name__ == '__main__':
     main()

megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.28py3-none-any.whl → 10.0.0py3-none-any.whl