megadetector 5.0.8__py3-none-any.whl → 5.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (190) hide show
  1. api/__init__.py +0 -0
  2. api/batch_processing/__init__.py +0 -0
  3. api/batch_processing/api_core/__init__.py +0 -0
  4. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. api/batch_processing/api_core/batch_service/score.py +0 -1
  6. api/batch_processing/api_core/server_job_status_table.py +0 -1
  7. api/batch_processing/api_core_support/__init__.py +0 -0
  8. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
  9. api/batch_processing/api_support/__init__.py +0 -0
  10. api/batch_processing/api_support/summarize_daily_activity.py +0 -1
  11. api/batch_processing/data_preparation/__init__.py +0 -0
  12. api/batch_processing/data_preparation/manage_local_batch.py +65 -65
  13. api/batch_processing/data_preparation/manage_video_batch.py +8 -8
  14. api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
  15. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  16. api/batch_processing/postprocessing/__init__.py +0 -0
  17. api/batch_processing/postprocessing/add_max_conf.py +12 -12
  18. api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
  19. api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
  20. api/batch_processing/postprocessing/compare_batch_results.py +113 -43
  21. api/batch_processing/postprocessing/convert_output_format.py +41 -16
  22. api/batch_processing/postprocessing/load_api_results.py +16 -17
  23. api/batch_processing/postprocessing/md_to_coco.py +31 -21
  24. api/batch_processing/postprocessing/md_to_labelme.py +52 -22
  25. api/batch_processing/postprocessing/merge_detections.py +14 -14
  26. api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
  27. api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
  28. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
  29. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
  30. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
  31. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
  32. api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
  33. api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
  34. api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
  35. api/synchronous/__init__.py +0 -0
  36. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  37. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
  38. api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
  39. api/synchronous/api_core/animal_detection_api/config.py +35 -35
  40. api/synchronous/api_core/tests/__init__.py +0 -0
  41. api/synchronous/api_core/tests/load_test.py +109 -109
  42. classification/__init__.py +0 -0
  43. classification/aggregate_classifier_probs.py +21 -24
  44. classification/analyze_failed_images.py +11 -13
  45. classification/cache_batchapi_outputs.py +51 -51
  46. classification/create_classification_dataset.py +69 -68
  47. classification/crop_detections.py +54 -53
  48. classification/csv_to_json.py +97 -100
  49. classification/detect_and_crop.py +105 -105
  50. classification/evaluate_model.py +43 -42
  51. classification/identify_mislabeled_candidates.py +47 -46
  52. classification/json_to_azcopy_list.py +10 -10
  53. classification/json_validator.py +72 -71
  54. classification/map_classification_categories.py +44 -43
  55. classification/merge_classification_detection_output.py +68 -68
  56. classification/prepare_classification_script.py +157 -154
  57. classification/prepare_classification_script_mc.py +228 -228
  58. classification/run_classifier.py +27 -26
  59. classification/save_mislabeled.py +30 -30
  60. classification/train_classifier.py +20 -20
  61. classification/train_classifier_tf.py +21 -22
  62. classification/train_utils.py +10 -10
  63. data_management/__init__.py +0 -0
  64. data_management/annotations/__init__.py +0 -0
  65. data_management/annotations/annotation_constants.py +18 -31
  66. data_management/camtrap_dp_to_coco.py +238 -0
  67. data_management/cct_json_utils.py +102 -59
  68. data_management/cct_to_md.py +176 -158
  69. data_management/cct_to_wi.py +247 -219
  70. data_management/coco_to_labelme.py +272 -263
  71. data_management/coco_to_yolo.py +79 -58
  72. data_management/databases/__init__.py +0 -0
  73. data_management/databases/add_width_and_height_to_db.py +20 -16
  74. data_management/databases/combine_coco_camera_traps_files.py +35 -31
  75. data_management/databases/integrity_check_json_db.py +62 -24
  76. data_management/databases/subset_json_db.py +24 -15
  77. data_management/generate_crops_from_cct.py +27 -45
  78. data_management/get_image_sizes.py +188 -162
  79. data_management/importers/add_nacti_sizes.py +8 -8
  80. data_management/importers/add_timestamps_to_icct.py +78 -78
  81. data_management/importers/animl_results_to_md_results.py +158 -158
  82. data_management/importers/auckland_doc_test_to_json.py +9 -9
  83. data_management/importers/auckland_doc_to_json.py +8 -8
  84. data_management/importers/awc_to_json.py +7 -7
  85. data_management/importers/bellevue_to_json.py +15 -15
  86. data_management/importers/cacophony-thermal-importer.py +13 -13
  87. data_management/importers/carrizo_shrubfree_2018.py +8 -8
  88. data_management/importers/carrizo_trail_cam_2017.py +8 -8
  89. data_management/importers/cct_field_adjustments.py +9 -9
  90. data_management/importers/channel_islands_to_cct.py +10 -10
  91. data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
  92. data_management/importers/ena24_to_json.py +7 -7
  93. data_management/importers/filenames_to_json.py +8 -8
  94. data_management/importers/helena_to_cct.py +7 -7
  95. data_management/importers/idaho-camera-traps.py +7 -7
  96. data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
  97. data_management/importers/jb_csv_to_json.py +9 -9
  98. data_management/importers/mcgill_to_json.py +8 -8
  99. data_management/importers/missouri_to_json.py +18 -18
  100. data_management/importers/nacti_fieldname_adjustments.py +10 -10
  101. data_management/importers/noaa_seals_2019.py +7 -7
  102. data_management/importers/pc_to_json.py +7 -7
  103. data_management/importers/plot_wni_giraffes.py +7 -7
  104. data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
  105. data_management/importers/prepare_zsl_imerit.py +7 -7
  106. data_management/importers/rspb_to_json.py +8 -8
  107. data_management/importers/save_the_elephants_survey_A.py +8 -8
  108. data_management/importers/save_the_elephants_survey_B.py +9 -9
  109. data_management/importers/snapshot_safari_importer.py +26 -26
  110. data_management/importers/snapshot_safari_importer_reprise.py +665 -665
  111. data_management/importers/snapshot_serengeti_lila.py +14 -14
  112. data_management/importers/sulross_get_exif.py +8 -9
  113. data_management/importers/timelapse_csv_set_to_json.py +11 -11
  114. data_management/importers/ubc_to_json.py +13 -13
  115. data_management/importers/umn_to_json.py +7 -7
  116. data_management/importers/wellington_to_json.py +8 -8
  117. data_management/importers/wi_to_json.py +9 -9
  118. data_management/importers/zamba_results_to_md_results.py +181 -181
  119. data_management/labelme_to_coco.py +65 -24
  120. data_management/labelme_to_yolo.py +8 -8
  121. data_management/lila/__init__.py +0 -0
  122. data_management/lila/add_locations_to_island_camera_traps.py +9 -9
  123. data_management/lila/add_locations_to_nacti.py +147 -147
  124. data_management/lila/create_lila_blank_set.py +13 -13
  125. data_management/lila/create_lila_test_set.py +8 -8
  126. data_management/lila/create_links_to_md_results_files.py +106 -106
  127. data_management/lila/download_lila_subset.py +44 -110
  128. data_management/lila/generate_lila_per_image_labels.py +55 -42
  129. data_management/lila/get_lila_annotation_counts.py +18 -15
  130. data_management/lila/get_lila_image_counts.py +11 -11
  131. data_management/lila/lila_common.py +96 -33
  132. data_management/lila/test_lila_metadata_urls.py +132 -116
  133. data_management/ocr_tools.py +173 -128
  134. data_management/read_exif.py +110 -97
  135. data_management/remap_coco_categories.py +83 -83
  136. data_management/remove_exif.py +58 -62
  137. data_management/resize_coco_dataset.py +30 -23
  138. data_management/wi_download_csv_to_coco.py +246 -239
  139. data_management/yolo_output_to_md_output.py +86 -73
  140. data_management/yolo_to_coco.py +300 -60
  141. detection/__init__.py +0 -0
  142. detection/detector_training/__init__.py +0 -0
  143. detection/process_video.py +85 -33
  144. detection/pytorch_detector.py +43 -25
  145. detection/run_detector.py +157 -72
  146. detection/run_detector_batch.py +179 -113
  147. detection/run_inference_with_yolov5_val.py +108 -48
  148. detection/run_tiled_inference.py +111 -40
  149. detection/tf_detector.py +51 -29
  150. detection/video_utils.py +606 -521
  151. docs/source/conf.py +43 -0
  152. md_utils/__init__.py +0 -0
  153. md_utils/azure_utils.py +9 -9
  154. md_utils/ct_utils.py +228 -68
  155. md_utils/directory_listing.py +59 -64
  156. md_utils/md_tests.py +968 -871
  157. md_utils/path_utils.py +460 -134
  158. md_utils/process_utils.py +157 -133
  159. md_utils/sas_blob_utils.py +20 -20
  160. md_utils/split_locations_into_train_val.py +45 -32
  161. md_utils/string_utils.py +33 -10
  162. md_utils/url_utils.py +176 -60
  163. md_utils/write_html_image_list.py +40 -33
  164. md_visualization/__init__.py +0 -0
  165. md_visualization/plot_utils.py +102 -109
  166. md_visualization/render_images_with_thumbnails.py +34 -34
  167. md_visualization/visualization_utils.py +597 -291
  168. md_visualization/visualize_db.py +76 -48
  169. md_visualization/visualize_detector_output.py +61 -42
  170. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/METADATA +13 -7
  171. megadetector-5.0.10.dist-info/RECORD +224 -0
  172. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/top_level.txt +1 -0
  173. taxonomy_mapping/__init__.py +0 -0
  174. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
  175. taxonomy_mapping/map_new_lila_datasets.py +154 -154
  176. taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
  177. taxonomy_mapping/preview_lila_taxonomy.py +591 -591
  178. taxonomy_mapping/retrieve_sample_image.py +12 -12
  179. taxonomy_mapping/simple_image_download.py +11 -11
  180. taxonomy_mapping/species_lookup.py +10 -10
  181. taxonomy_mapping/taxonomy_csv_checker.py +18 -18
  182. taxonomy_mapping/taxonomy_graph.py +47 -47
  183. taxonomy_mapping/validate_lila_category_mappings.py +83 -76
  184. data_management/cct_json_to_filename_json.py +0 -89
  185. data_management/cct_to_csv.py +0 -140
  186. data_management/databases/remove_corrupted_images_from_db.py +0 -191
  187. detection/detector_training/copy_checkpoints.py +0 -43
  188. megadetector-5.0.8.dist-info/RECORD +0 -205
  189. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/LICENSE +0 -0
  190. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/WHEEL +0 -0
@@ -1,15 +1,15 @@
1
- ########
2
- #
3
- # read_exif.py
4
- #
5
- # Given a folder of images, read relevant metadata (EXIF/IPTC/XMP) fields from all images,
6
- # and write them to a .json or .csv file.
7
- #
8
- # This module can use either PIL (which can only reliably read EXIF data) or exiftool (which
9
- # can read everything). The latter approach expects that exiftool is available on the system
10
- # path. No attempt is made to be consistent in format across the two approaches.
11
- #
12
- ########
1
+ """
2
+
3
+ read_exif.py
4
+
5
+ Given a folder of images, reads relevant metadata (EXIF/IPTC/XMP) fields from all images,
6
+ and writes them to a .json or .csv file.
7
+
8
+ This module can use either PIL (which can only reliably read EXIF data) or exiftool (which
9
+ can read everything). The latter approach expects that exiftool is available on the system
10
+ path. No attempt is made to be consistent in format across the two approaches.
11
+
12
+ """
13
13
 
14
14
  #%% Imports and constants
15
15
 
@@ -24,7 +24,7 @@ from multiprocessing.pool import Pool as Pool
24
24
  from tqdm import tqdm
25
25
  from PIL import Image, ExifTags
26
26
 
27
- from md_utils.path_utils import find_images
27
+ from md_utils.path_utils import find_images, is_executable
28
28
  from md_utils.ct_utils import args_to_object
29
29
 
30
30
  debug_max_images = None
@@ -33,64 +33,61 @@ debug_max_images = None
33
33
  #%% Options
34
34
 
35
35
  class ReadExifOptions:
36
+ """
37
+ Parameters controlling metadata extraction.
38
+ """
36
39
 
40
+ #: Enable additional debug console output
37
41
  verbose = False
38
42
 
39
- # If this is True and an output file is specified for read_exif_from_folder,
40
- # and we encounter a serialization issue, we'll return the results but won't
41
- # error.
43
+ #: If this is True and an output file is specified for read_exif_from_folder,
44
+ #: and we encounter a serialization issue, we'll return the results but won't
45
+ #: error.
42
46
  allow_write_error = False
43
47
 
44
- # Number of concurrent workers
48
+ #: Number of concurrent workers, set to <= 1 to disable parallelization
45
49
  n_workers = 1
46
50
 
47
- # Should we use threads (vs. processes) for parallelization?
48
- #
49
- # Not relevant if n_workers is 1.
51
+ #: Should we use threads (vs. processes) for parallelization?
52
+ #:
53
+ #: Not relevant if n_workers is <= 1.
50
54
  use_threads = True
51
55
 
52
- # "File" and "ExifTool" are tag types used by ExifTool to report data that
53
- # doesn't come from EXIF, rather from the file (e.g. file size).
56
+ #: "File" and "ExifTool" are tag types used by ExifTool to report data that
57
+ #: doesn't come from EXIF, rather from the file (e.g. file size).
54
58
  tag_types_to_ignore = set(['File','ExifTool'])
55
59
 
56
- # Include/exclude specific tags (mutually incompatible)
60
+ #: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
61
+ #:
62
+ #: A useful set of tags one might want to limit queries for:
63
+ #:
64
+ #: options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight','DateTime',
65
+ #: 'DateTimeOriginal','Orientation']
57
66
  tags_to_include = None
58
- tags_to_exclude = None
59
67
 
60
- # A useful set of tags one might want to limit queries for
61
- # options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight','DateTime','DateTimeOriginal','Orientation']
68
+ #: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
69
+ tags_to_exclude = None
62
70
 
71
+ #: The command line to invoke if using exiftool, can be an absolute path to exiftool.exe, or
72
+ #: can be just "exiftool", in which case it should be on your system path.
63
73
  exiftool_command_name = 'exiftool'
64
74
 
65
- # How should we handle byte-formatted EXIF tags?
66
- #
67
- # 'convert_to_string': convert to a Python string
68
- # 'delete': don't include at all
69
- # 'raw': include as a byte string
75
+ #: How should we handle byte-formatted EXIF tags?
76
+ #:
77
+ #: 'convert_to_string': convert to a Python string
78
+ #: 'delete': don't include at all
79
+ #: 'raw': include as a byte string
70
80
  byte_handling = 'convert_to_string' # 'convert_to_string','delete','raw'
71
81
 
72
- # Should we use exiftool or pil?
82
+ #: Should we use exiftool or PIL?
73
83
  processing_library = 'pil' # 'exiftool','pil'
74
-
75
-
84
+
76
85
 
77
86
  #%% Functions
78
87
 
79
- def enumerate_files(input_folder,recursive=True):
80
- """
81
- Enumerates all image files in input_folder, returning relative paths
82
- """
83
-
84
- image_files = find_images(input_folder,recursive=recursive)
85
- image_files = [os.path.relpath(s,input_folder) for s in image_files]
86
- image_files = [s.replace('\\','/') for s in image_files]
87
- print('Enumerated {} files'.format(len(image_files)))
88
- return image_files
89
-
90
-
91
- def get_exif_ifd(exif):
88
+ def _get_exif_ifd(exif):
92
89
  """
93
- Read EXIF data by finding the EXIF offset and reading tags directly
90
+ Read EXIF data from by finding the EXIF offset and reading tags directly
94
91
 
95
92
  https://github.com/python-pillow/Pillow/issues/5863
96
93
  """
@@ -108,8 +105,16 @@ def get_exif_ifd(exif):
108
105
 
109
106
  def read_pil_exif(im,options=None):
110
107
  """
111
- Read all the EXIF data we know how to read from [im] (path or PIL Image), whether it's
112
- in the PIL default EXIF data or not. Returns a dict.
108
+ Read all the EXIF data we know how to read from an image, using PIL. This is primarily
109
+ an internal function; the main entry point for single-image EXIF information is
110
+ read_exif_tags_for_image().
111
+
112
+ Args:
113
+ im (str or PIL.Image.Image): image (as a filename or an Image object) from which
114
+ we should read EXIF data.
115
+
116
+ Returns:
117
+ dict: a dictionary mapping EXIF tag names to their values
113
118
  """
114
119
 
115
120
  if options is None:
@@ -138,10 +143,10 @@ def read_pil_exif(im,options=None):
138
143
  # print('Warning: unrecognized EXIF tag: {}'.format(k))
139
144
  exif_tags[k] = str(v)
140
145
 
141
- exif_idf_tags = get_exif_ifd(exif_info)
146
+ exif_ifd_tags = _get_exif_ifd(exif_info)
142
147
 
143
- for k in exif_idf_tags.keys():
144
- v = exif_idf_tags[k]
148
+ for k in exif_ifd_tags.keys():
149
+ v = exif_ifd_tags[k]
145
150
  if k in exif_tags:
146
151
  if options.verbose:
147
152
  print('Warning: redundant EXIF values for {} in {}:\n{}\n{}'.format(
@@ -177,8 +182,8 @@ def read_pil_exif(im,options=None):
177
182
 
178
183
  def format_datetime_as_exif_datetime_string(dt):
179
184
  """
180
- Returns a Python datetime object rendered using the standard Exif datetime
181
- string format
185
+ Returns a Python datetime object rendered using the standard EXIF datetime
186
+ string format ('%Y:%m:%d %H:%M:%S')
182
187
  """
183
188
 
184
189
  return datetime.strftime(dt, '%Y:%m:%d %H:%M:%S')
@@ -190,7 +195,14 @@ def parse_exif_datetime_string(s,verbose=False):
190
195
 
191
196
  %Y:%m:%d %H:%M:%S
192
197
 
193
- Parse one of those strings into a Python datetime object.
198
+ Parses one of those strings into a Python datetime object.
199
+
200
+ Args:
201
+ s (str): datetime string to parse, should be in standard EXIF datetime format
202
+ verbose (bool, optional): enable additional debug output
203
+
204
+ Returns:
205
+ datetime: the datetime object created from [s]
194
206
  """
195
207
 
196
208
  dt = None
@@ -232,13 +244,13 @@ def read_exif_tags_for_image(file_path,options=None):
232
244
  """
233
245
  Get relevant fields from EXIF data for an image
234
246
 
235
- Returns a dict with fields 'status' (str) and 'tags'
236
-
237
- The exact format of 'tags' depends on options.processing_library
238
-
239
- For exiftool, 'tags' is a list of lists, where each element is (type/tag/value)
240
-
241
- For pil, 'tags' is a dict (str:str)
247
+ Returns:
248
+ dict: a dict with fields 'status' (str) and 'tags'. The exact format of 'tags' depends on
249
+ options (ReadExifOptions, optional): parameters controlling metadata extraction
250
+ options.processing_library:
251
+
252
+ - For exiftool, 'tags' is a list of lists, where each element is (type/tag/value)
253
+ - For PIL, 'tags' is a dict (str:str)
242
254
  """
243
255
 
244
256
  if options is None:
@@ -344,7 +356,7 @@ def read_exif_tags_for_image(file_path,options=None):
344
356
  # ...read_exif_tags_for_image()
345
357
 
346
358
 
347
- def populate_exif_data(im, image_base, options=None):
359
+ def _populate_exif_data(im, image_base, options=None):
348
360
  """
349
361
  Populate EXIF data into the 'exif_tags' field in the image object [im].
350
362
 
@@ -386,10 +398,10 @@ def populate_exif_data(im, image_base, options=None):
386
398
 
387
399
  return im
388
400
 
389
- # ...populate_exif_data()
401
+ # ..._populate_exif_data()
390
402
 
391
403
 
392
- def create_image_objects(image_files,recursive=True):
404
+ def _create_image_objects(image_files,recursive=True):
393
405
  """
394
406
  Create empty image objects for every image in [image_files], which can be a
395
407
  list of relative paths (which will get stored without processing, so the base
@@ -404,7 +416,10 @@ def create_image_objects(image_files,recursive=True):
404
416
  if isinstance(image_files,str):
405
417
  print('Enumerating image files in {}'.format(image_files))
406
418
  assert os.path.isdir(image_files), 'Invalid image folder {}'.format(image_files)
407
- image_files = enumerate_files(image_files,recursive=recursive)
419
+ image_files = find_images(image_files,
420
+ recursive=recursive,
421
+ return_relative_paths=True,
422
+ convert_slashes=True)
408
423
 
409
424
  images = []
410
425
  for fn in image_files:
@@ -419,7 +434,7 @@ def create_image_objects(image_files,recursive=True):
419
434
  return images
420
435
 
421
436
 
422
- def populate_exif_for_images(image_base,images,options=None):
437
+ def _populate_exif_for_images(image_base,images,options=None):
423
438
  """
424
439
  Main worker loop: read EXIF data for each image object in [images] and
425
440
  populate the image objects.
@@ -435,7 +450,7 @@ def populate_exif_for_images(image_base,images,options=None):
435
450
 
436
451
  results = []
437
452
  for im in tqdm(images):
438
- results.append(populate_exif_data(im,image_base,options))
453
+ results.append(_populate_exif_data(im,image_base,options))
439
454
 
440
455
  else:
441
456
 
@@ -447,13 +462,13 @@ def populate_exif_for_images(image_base,images,options=None):
447
462
  print('Starting parallel process pool with {} workers'.format(options.n_workers))
448
463
  pool = Pool(options.n_workers)
449
464
 
450
- results = list(tqdm(pool.imap(partial(populate_exif_data,image_base=image_base,
465
+ results = list(tqdm(pool.imap(partial(_populate_exif_data,image_base=image_base,
451
466
  options=options),images),total=len(images)))
452
467
 
453
468
  return results
454
469
 
455
470
 
456
- def write_exif_results(results,output_file):
471
+ def _write_exif_results(results,output_file):
457
472
  """
458
473
  Write EXIF information to [output_file].
459
474
 
@@ -530,28 +545,24 @@ def write_exif_results(results,output_file):
530
545
  print('Wrote results to {}'.format(output_file))
531
546
 
532
547
 
533
- def is_executable(name):
534
-
535
- """Check whether `name` is on PATH and marked as executable."""
536
-
537
- # https://stackoverflow.com/questions/11210104/check-if-a-program-exists-from-a-python-script
538
-
539
- from shutil import which
540
- return which(name) is not None
541
-
542
-
543
548
  def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=None,recursive=True):
544
549
  """
545
- Read EXIF data for all images in input_folder.
546
-
547
- If filenames is not None, it should be a list of relative filenames; only those files will
548
- be processed.
549
-
550
- input_folder can be None or '', in which case filenames should be a list of absolute paths.
551
-
552
- if output_file is not None, results will be written to the specified .json file.
553
-
554
- returns a dictionary mapping relative filenames to EXIF data.
550
+ Read EXIF data for a folder of images.
551
+
552
+ Args:
553
+ input_folder (str): folder to process; if this is None, [filenames] should be a list of absolute
554
+ paths
555
+ output_file (str, optional): .json file to which we should write results; if this is None, results
556
+ are returned but not written to disk
557
+ options (ReadExifOptions, optional): parameters controlling metadata extraction
558
+ filenames (list, optional): allowlist of relative filenames (if [input_folder] is not None) or
559
+ a list of absolute filenames (if [input_folder] is None)
560
+ recursive (bool, optional): whether to recurse into [input_folder], not relevant if [input_folder]
561
+ is None.
562
+
563
+ Returns:
564
+ dict: a dictionary mapping relative filenames to EXIF data, whose format depends on whether
565
+ we're using PIL or exiftool.
555
566
  """
556
567
 
557
568
  if options is None:
@@ -589,16 +600,16 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
589
600
  assert is_executable(options.exiftool_command_name), 'exiftool not available'
590
601
 
591
602
  if filenames is None:
592
- images = create_image_objects(input_folder,recursive=recursive)
603
+ images = _create_image_objects(input_folder,recursive=recursive)
593
604
  else:
594
605
  assert isinstance(filenames,list)
595
- images = create_image_objects(filenames)
606
+ images = _create_image_objects(filenames)
596
607
 
597
- results = populate_exif_for_images(input_folder,images,options)
608
+ results = _populate_exif_for_images(input_folder,images,options)
598
609
 
599
610
  if output_file is not None:
600
611
  try:
601
- write_exif_results(results,output_file)
612
+ _write_exif_results(results,output_file)
602
613
  except Exception as e:
603
614
  if not options.allow_write_error:
604
615
  raise
@@ -645,8 +656,10 @@ def main():
645
656
  parser = argparse.ArgumentParser(description=('Read EXIF information from all images in' + \
646
657
  ' a folder, and write the results to .csv or .json'))
647
658
 
648
- parser.add_argument('input_folder', type=str)
649
- parser.add_argument('output_file', type=str)
659
+ parser.add_argument('input_folder', type=str,
660
+ help='Folder of images from which we should read EXIF information')
661
+ parser.add_argument('output_file', type=str,
662
+ help='Output file (.json) to which we should write EXIF information')
650
663
  parser.add_argument('--n_workers', type=int, default=1,
651
664
  help='Number of concurrent workers to use (defaults to 1)')
652
665
  parser.add_argument('--use_threads', action='store_true',
@@ -1,84 +1,84 @@
1
- ########
2
- #
3
- # remap_coco_categories.py
4
- #
5
- # Given a COCO-formatted dataset, remap the categories to a new mapping.
6
- #
7
- ########
8
-
9
- #%% Imports and constants
10
-
11
- import os
12
- import json
13
-
14
- from copy import deepcopy
15
-
16
-
17
- #%% Main function
18
-
19
- def remap_coco_categories(input_data,
20
- output_category_name_to_id,
21
- input_category_name_to_output_category_name,
22
- output_file=None):
23
- """
24
- Given a COCO-formatted dataset, remap the categories to a new categories mapping, optionally
25
- writing the results to a new file.
26
-
27
- output_category_name_to_id is a dict mapping strings to ints.
28
-
29
- input_category_name_to_output_category_name is a dict mapping strings to strings.
30
-
31
- [input_data] can be a COCO-formatted dict or a filename. If it's a dict, it will be copied,
32
- not modified in place.
33
- """
34
-
35
- if isinstance(input_data,str):
36
- assert os.path.isfile(input_data), "Can't find file {}".format(input_data)
37
- with open(input_data,'r') as f:
38
- input_data = json.load(f)
39
- assert isinstance(input_data,dict), 'Illegal COCO input data'
40
- else:
41
- assert isinstance(input_data,dict), 'Illegal COCO input data'
42
- input_data = deepcopy(input_data)
43
-
44
- # It's safe to modify in-place now
45
- output_data = input_data
46
-
47
- # Read input name --> ID mapping
48
- input_category_name_to_input_category_id = {}
49
- for c in input_data['categories']:
50
- input_category_name_to_input_category_id[c['name']] = c['id']
51
-
52
- # Map input IDs --> output IDs
53
- input_category_id_to_output_category_id = {}
54
- for input_name in input_category_name_to_output_category_name.keys():
55
- output_name = input_category_name_to_output_category_name[input_name]
56
- assert output_name in output_category_name_to_id, \
57
- 'No output ID for {} --> {}'.format(input_name,output_name)
58
- input_id = input_category_name_to_input_category_id[input_name]
59
- output_id = output_category_name_to_id[output_name]
60
- input_category_id_to_output_category_id[input_id] = output_id
61
-
62
- # Map annotations
63
- for ann in output_data['annotations']:
64
- assert ann['category_id'] in input_category_id_to_output_category_id, \
65
- 'Unrecognized category ID {}'.format(ann['category_id'])
66
- ann['category_id'] = input_category_id_to_output_category_id[ann['category_id']]
67
-
68
- # Update the category list
69
- output_categories = []
70
- for output_name in output_category_name_to_id:
71
- category = {'name':output_name,'id':output_category_name_to_id[output_name]}
72
- output_categories.append(category)
73
- output_data['categories'] = output_categories
74
-
75
- if output_file is not None:
76
- with open(output_file,'w') as f:
77
- json.dump(output_data,f,indent=1)
78
-
79
- return input_data
80
-
81
-
82
- #%% Command-line driver
83
-
1
+ """
2
+
3
+ remap_coco_categories.py
4
+
5
+ Given a COCO-formatted dataset, remap the categories to a new mapping.
6
+
7
+ """
8
+
9
+ #%% Imports and constants
10
+
11
+ import os
12
+ import json
13
+
14
+ from copy import deepcopy
15
+
16
+
17
+ #%% Main function
18
+
19
+ def remap_coco_categories(input_data,
20
+ output_category_name_to_id,
21
+ input_category_name_to_output_category_name,
22
+ output_file=None):
23
+ """
24
+ Given a COCO-formatted dataset, remap the categories to a new categories mapping, optionally
25
+ writing the results to a new file.
26
+
27
+ output_category_name_to_id is a dict mapping strings to ints.
28
+
29
+ input_category_name_to_output_category_name is a dict mapping strings to strings.
30
+
31
+ [input_data] can be a COCO-formatted dict or a filename. If it's a dict, it will be copied,
32
+ not modified in place.
33
+ """
34
+
35
+ if isinstance(input_data,str):
36
+ assert os.path.isfile(input_data), "Can't find file {}".format(input_data)
37
+ with open(input_data,'r') as f:
38
+ input_data = json.load(f)
39
+ assert isinstance(input_data,dict), 'Illegal COCO input data'
40
+ else:
41
+ assert isinstance(input_data,dict), 'Illegal COCO input data'
42
+ input_data = deepcopy(input_data)
43
+
44
+ # It's safe to modify in-place now
45
+ output_data = input_data
46
+
47
+ # Read input name --> ID mapping
48
+ input_category_name_to_input_category_id = {}
49
+ for c in input_data['categories']:
50
+ input_category_name_to_input_category_id[c['name']] = c['id']
51
+
52
+ # Map input IDs --> output IDs
53
+ input_category_id_to_output_category_id = {}
54
+ for input_name in input_category_name_to_output_category_name.keys():
55
+ output_name = input_category_name_to_output_category_name[input_name]
56
+ assert output_name in output_category_name_to_id, \
57
+ 'No output ID for {} --> {}'.format(input_name,output_name)
58
+ input_id = input_category_name_to_input_category_id[input_name]
59
+ output_id = output_category_name_to_id[output_name]
60
+ input_category_id_to_output_category_id[input_id] = output_id
61
+
62
+ # Map annotations
63
+ for ann in output_data['annotations']:
64
+ assert ann['category_id'] in input_category_id_to_output_category_id, \
65
+ 'Unrecognized category ID {}'.format(ann['category_id'])
66
+ ann['category_id'] = input_category_id_to_output_category_id[ann['category_id']]
67
+
68
+ # Update the category list
69
+ output_categories = []
70
+ for output_name in output_category_name_to_id:
71
+ category = {'name':output_name,'id':output_category_name_to_id[output_name]}
72
+ output_categories.append(category)
73
+ output_data['categories'] = output_categories
74
+
75
+ if output_file is not None:
76
+ with open(output_file,'w') as f:
77
+ json.dump(output_data,f,indent=1)
78
+
79
+ return input_data
80
+
81
+
82
+ #%% Command-line driver
83
+
84
84
  # TODO
@@ -1,70 +1,66 @@
1
- ########
2
- #
3
- # remove_exif.py
4
- #
5
- # Removes all EXIF/IPTC/XMP metadata from a folder of images, without making
6
- # backup copies, using pyexiv2.
7
- #
8
- ########
1
+ """
2
+
3
+ remove_exif.py
4
+
5
+ Removes all EXIF/IPTC/XMP metadata from a folder of images, without making
6
+ backup copies, using pyexiv2.
7
+
8
+ TODO: This is a one-off script waiting to be cleaned up for more general use.
9
+
10
+ """
11
+
12
+ input_base = r'f:\images'
13
+
9
14
 
10
15
  #%% Imports and constants
11
16
 
12
17
  import os
13
18
  import glob
14
19
 
15
- input_base = r'f:\images'
16
- assert os.path.isdir(input_base)
17
-
18
-
19
- #%% List files
20
-
21
- all_files = [f for f in glob.glob(input_base + "*/**", recursive=True)]
22
- image_files = [s for s in all_files if (s.lower().endswith('.jpg'))]
23
-
24
-
25
- #%% Remove EXIF data (support)
26
-
27
- import pyexiv2
28
-
29
- # PYEXIV2 IS NOT THREAD SAFE; DO NOT CALL THIS IN PARALLEL FROM A SINGLE PROCESS
30
- def remove_exif(fn):
31
-
32
- try:
33
- img = pyexiv2.Image(fn)
34
- # data = img.read_exif(); print(data)
35
- img.clear_exif()
36
- img.clear_iptc()
37
- img.clear_xmp()
38
- img.close()
39
- except Exception as e:
40
- print('EXIF error on {}: {}'.format(fn,str(e)))
41
-
42
-
43
- #%% Debug
44
-
45
- if False:
46
- #%%
47
- fn = image_files[-10001]
48
- os.startfile(fn)
49
- #%%
50
- remove_exif(fn)
51
- os.startfile(fn)
52
-
53
-
54
- #%% Remove EXIF data (execution)
55
-
56
- from joblib import Parallel, delayed
57
-
58
- n_exif_threads = 50
59
-
60
- if n_exif_threads == 1:
61
-
62
- # fn = image_files[0]
63
- for fn in image_files:
64
- remove_exif(fn)
20
+ def main():
21
+
22
+ assert os.path.isdir(input_base)
23
+
24
+ ##%% List files
25
+
26
+ all_files = [f for f in glob.glob(input_base + "*/**", recursive=True)]
27
+ image_files = [s for s in all_files if (s.lower().endswith('.jpg'))]
28
+
29
+
30
+ ##%% Remove EXIF data (support)
31
+
32
+ import pyexiv2
33
+
34
+ # PYEXIV2 IS NOT THREAD SAFE; DO NOT CALL THIS IN PARALLEL FROM A SINGLE PROCESS
35
+ def remove_exif(fn):
65
36
 
66
- else:
67
- # joblib.Parallel defaults to a process-based backend, but let's be sure
68
- # results = Parallel(n_jobs=n_exif_threads,verbose=2,prefer='processes')(delayed(remove_exif)(fn) for fn in image_files[0:10])
69
- results = Parallel(n_jobs=n_exif_threads,verbose=2,prefer='processes')(delayed(remove_exif)(fn) for fn in image_files)
37
+ try:
38
+ img = pyexiv2.Image(fn)
39
+ # data = img.read_exif(); print(data)
40
+ img.clear_exif()
41
+ img.clear_iptc()
42
+ img.clear_xmp()
43
+ img.close()
44
+ except Exception as e:
45
+ print('EXIF error on {}: {}'.format(fn,str(e)))
46
+
47
+
48
+ ##%% Remove EXIF data (execution)
49
+
50
+ from joblib import Parallel, delayed
70
51
 
52
+ n_exif_threads = 50
53
+
54
+ if n_exif_threads == 1:
55
+
56
+ # fn = image_files[0]
57
+ for fn in image_files:
58
+ remove_exif(fn)
59
+
60
+ else:
61
+ # joblib.Parallel defaults to a process-based backend, but let's be sure
62
+ # results = Parallel(n_jobs=n_exif_threads,verbose=2,prefer='processes')(delayed(remove_exif)(fn) for fn in image_files[0:10])
63
+ _ = Parallel(n_jobs=n_exif_threads,verbose=2,prefer='processes')(delayed(remove_exif)(fn) for fn in image_files)
64
+
65
+ if __name__ == '__main__':
66
+ main()