megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (191) hide show
  1. api/__init__.py +0 -0
  2. api/batch_processing/__init__.py +0 -0
  3. api/batch_processing/api_core/__init__.py +0 -0
  4. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. api/batch_processing/api_core/batch_service/score.py +0 -1
  6. api/batch_processing/api_core/server_job_status_table.py +0 -1
  7. api/batch_processing/api_core_support/__init__.py +0 -0
  8. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
  9. api/batch_processing/api_support/__init__.py +0 -0
  10. api/batch_processing/api_support/summarize_daily_activity.py +0 -1
  11. api/batch_processing/data_preparation/__init__.py +0 -0
  12. api/batch_processing/data_preparation/manage_local_batch.py +93 -79
  13. api/batch_processing/data_preparation/manage_video_batch.py +8 -8
  14. api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
  15. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  16. api/batch_processing/postprocessing/__init__.py +0 -0
  17. api/batch_processing/postprocessing/add_max_conf.py +12 -12
  18. api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
  19. api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
  20. api/batch_processing/postprocessing/compare_batch_results.py +114 -44
  21. api/batch_processing/postprocessing/convert_output_format.py +62 -19
  22. api/batch_processing/postprocessing/load_api_results.py +17 -20
  23. api/batch_processing/postprocessing/md_to_coco.py +31 -21
  24. api/batch_processing/postprocessing/md_to_labelme.py +165 -68
  25. api/batch_processing/postprocessing/merge_detections.py +40 -15
  26. api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
  27. api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
  28. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
  29. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
  30. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
  31. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
  32. api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
  33. api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
  34. api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
  35. api/synchronous/__init__.py +0 -0
  36. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  37. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
  38. api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
  39. api/synchronous/api_core/animal_detection_api/config.py +35 -35
  40. api/synchronous/api_core/tests/__init__.py +0 -0
  41. api/synchronous/api_core/tests/load_test.py +109 -109
  42. classification/__init__.py +0 -0
  43. classification/aggregate_classifier_probs.py +21 -24
  44. classification/analyze_failed_images.py +11 -13
  45. classification/cache_batchapi_outputs.py +51 -51
  46. classification/create_classification_dataset.py +69 -68
  47. classification/crop_detections.py +54 -53
  48. classification/csv_to_json.py +97 -100
  49. classification/detect_and_crop.py +105 -105
  50. classification/evaluate_model.py +43 -42
  51. classification/identify_mislabeled_candidates.py +47 -46
  52. classification/json_to_azcopy_list.py +10 -10
  53. classification/json_validator.py +72 -71
  54. classification/map_classification_categories.py +44 -43
  55. classification/merge_classification_detection_output.py +68 -68
  56. classification/prepare_classification_script.py +157 -154
  57. classification/prepare_classification_script_mc.py +228 -228
  58. classification/run_classifier.py +27 -26
  59. classification/save_mislabeled.py +30 -30
  60. classification/train_classifier.py +20 -20
  61. classification/train_classifier_tf.py +21 -22
  62. classification/train_utils.py +10 -10
  63. data_management/__init__.py +0 -0
  64. data_management/annotations/__init__.py +0 -0
  65. data_management/annotations/annotation_constants.py +18 -31
  66. data_management/camtrap_dp_to_coco.py +238 -0
  67. data_management/cct_json_utils.py +107 -59
  68. data_management/cct_to_md.py +176 -158
  69. data_management/cct_to_wi.py +247 -219
  70. data_management/coco_to_labelme.py +272 -0
  71. data_management/coco_to_yolo.py +86 -62
  72. data_management/databases/__init__.py +0 -0
  73. data_management/databases/add_width_and_height_to_db.py +20 -16
  74. data_management/databases/combine_coco_camera_traps_files.py +35 -31
  75. data_management/databases/integrity_check_json_db.py +130 -83
  76. data_management/databases/subset_json_db.py +25 -16
  77. data_management/generate_crops_from_cct.py +27 -45
  78. data_management/get_image_sizes.py +188 -144
  79. data_management/importers/add_nacti_sizes.py +8 -8
  80. data_management/importers/add_timestamps_to_icct.py +78 -78
  81. data_management/importers/animl_results_to_md_results.py +158 -160
  82. data_management/importers/auckland_doc_test_to_json.py +9 -9
  83. data_management/importers/auckland_doc_to_json.py +8 -8
  84. data_management/importers/awc_to_json.py +7 -7
  85. data_management/importers/bellevue_to_json.py +15 -15
  86. data_management/importers/cacophony-thermal-importer.py +13 -13
  87. data_management/importers/carrizo_shrubfree_2018.py +8 -8
  88. data_management/importers/carrizo_trail_cam_2017.py +8 -8
  89. data_management/importers/cct_field_adjustments.py +9 -9
  90. data_management/importers/channel_islands_to_cct.py +10 -10
  91. data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
  92. data_management/importers/ena24_to_json.py +7 -7
  93. data_management/importers/filenames_to_json.py +8 -8
  94. data_management/importers/helena_to_cct.py +7 -7
  95. data_management/importers/idaho-camera-traps.py +7 -7
  96. data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
  97. data_management/importers/jb_csv_to_json.py +9 -9
  98. data_management/importers/mcgill_to_json.py +8 -8
  99. data_management/importers/missouri_to_json.py +18 -18
  100. data_management/importers/nacti_fieldname_adjustments.py +10 -10
  101. data_management/importers/noaa_seals_2019.py +8 -8
  102. data_management/importers/pc_to_json.py +7 -7
  103. data_management/importers/plot_wni_giraffes.py +7 -7
  104. data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
  105. data_management/importers/prepare_zsl_imerit.py +7 -7
  106. data_management/importers/rspb_to_json.py +8 -8
  107. data_management/importers/save_the_elephants_survey_A.py +8 -8
  108. data_management/importers/save_the_elephants_survey_B.py +9 -9
  109. data_management/importers/snapshot_safari_importer.py +26 -26
  110. data_management/importers/snapshot_safari_importer_reprise.py +665 -665
  111. data_management/importers/snapshot_serengeti_lila.py +14 -14
  112. data_management/importers/sulross_get_exif.py +8 -9
  113. data_management/importers/timelapse_csv_set_to_json.py +11 -11
  114. data_management/importers/ubc_to_json.py +13 -13
  115. data_management/importers/umn_to_json.py +7 -7
  116. data_management/importers/wellington_to_json.py +8 -8
  117. data_management/importers/wi_to_json.py +9 -9
  118. data_management/importers/zamba_results_to_md_results.py +181 -181
  119. data_management/labelme_to_coco.py +309 -159
  120. data_management/labelme_to_yolo.py +103 -60
  121. data_management/lila/__init__.py +0 -0
  122. data_management/lila/add_locations_to_island_camera_traps.py +9 -9
  123. data_management/lila/add_locations_to_nacti.py +147 -147
  124. data_management/lila/create_lila_blank_set.py +114 -31
  125. data_management/lila/create_lila_test_set.py +8 -8
  126. data_management/lila/create_links_to_md_results_files.py +106 -106
  127. data_management/lila/download_lila_subset.py +92 -90
  128. data_management/lila/generate_lila_per_image_labels.py +56 -43
  129. data_management/lila/get_lila_annotation_counts.py +18 -15
  130. data_management/lila/get_lila_image_counts.py +11 -11
  131. data_management/lila/lila_common.py +103 -70
  132. data_management/lila/test_lila_metadata_urls.py +132 -116
  133. data_management/ocr_tools.py +173 -128
  134. data_management/read_exif.py +161 -99
  135. data_management/remap_coco_categories.py +84 -0
  136. data_management/remove_exif.py +58 -62
  137. data_management/resize_coco_dataset.py +32 -44
  138. data_management/wi_download_csv_to_coco.py +246 -0
  139. data_management/yolo_output_to_md_output.py +86 -73
  140. data_management/yolo_to_coco.py +535 -95
  141. detection/__init__.py +0 -0
  142. detection/detector_training/__init__.py +0 -0
  143. detection/process_video.py +85 -33
  144. detection/pytorch_detector.py +43 -25
  145. detection/run_detector.py +157 -72
  146. detection/run_detector_batch.py +189 -114
  147. detection/run_inference_with_yolov5_val.py +118 -51
  148. detection/run_tiled_inference.py +113 -42
  149. detection/tf_detector.py +51 -28
  150. detection/video_utils.py +606 -521
  151. docs/source/conf.py +43 -0
  152. md_utils/__init__.py +0 -0
  153. md_utils/azure_utils.py +9 -9
  154. md_utils/ct_utils.py +249 -70
  155. md_utils/directory_listing.py +59 -64
  156. md_utils/md_tests.py +968 -862
  157. md_utils/path_utils.py +655 -155
  158. md_utils/process_utils.py +157 -133
  159. md_utils/sas_blob_utils.py +20 -20
  160. md_utils/split_locations_into_train_val.py +45 -32
  161. md_utils/string_utils.py +33 -10
  162. md_utils/url_utils.py +208 -27
  163. md_utils/write_html_image_list.py +51 -35
  164. md_visualization/__init__.py +0 -0
  165. md_visualization/plot_utils.py +102 -109
  166. md_visualization/render_images_with_thumbnails.py +34 -34
  167. md_visualization/visualization_utils.py +908 -311
  168. md_visualization/visualize_db.py +109 -58
  169. md_visualization/visualize_detector_output.py +61 -42
  170. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
  171. megadetector-5.0.9.dist-info/RECORD +224 -0
  172. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
  173. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
  174. taxonomy_mapping/__init__.py +0 -0
  175. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
  176. taxonomy_mapping/map_new_lila_datasets.py +154 -154
  177. taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
  178. taxonomy_mapping/preview_lila_taxonomy.py +591 -591
  179. taxonomy_mapping/retrieve_sample_image.py +12 -12
  180. taxonomy_mapping/simple_image_download.py +11 -11
  181. taxonomy_mapping/species_lookup.py +10 -10
  182. taxonomy_mapping/taxonomy_csv_checker.py +18 -18
  183. taxonomy_mapping/taxonomy_graph.py +47 -47
  184. taxonomy_mapping/validate_lila_category_mappings.py +83 -76
  185. data_management/cct_json_to_filename_json.py +0 -89
  186. data_management/cct_to_csv.py +0 -140
  187. data_management/databases/remove_corrupted_images_from_db.py +0 -191
  188. detection/detector_training/copy_checkpoints.py +0 -43
  189. md_visualization/visualize_megadb.py +0 -183
  190. megadetector-5.0.7.dist-info/RECORD +0 -202
  191. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
@@ -1,15 +1,15 @@
1
- ########
2
- #
3
- # read_exif.py
4
- #
5
- # Given a folder of images, read relevant metadata (EXIF/IPTC/XMP) fields from all images,
6
- # and write them to a .json or .csv file.
7
- #
8
- # This module can use either PIL (which can only reliably read EXIF data) or exiftool (which
9
- # can read everything). The latter approach expects that exiftool is available on the system
10
- # path. No attempt is made to be consistent in format across the two approaches.
11
- #
12
- ########
1
+ """
2
+
3
+ read_exif.py
4
+
5
+ Given a folder of images, reads relevant metadata (EXIF/IPTC/XMP) fields from all images,
6
+ and writes them to a .json or .csv file.
7
+
8
+ This module can use either PIL (which can only reliably read EXIF data) or exiftool (which
9
+ can read everything). The latter approach expects that exiftool is available on the system
10
+ path. No attempt is made to be consistent in format across the two approaches.
11
+
12
+ """
13
13
 
14
14
  #%% Imports and constants
15
15
 
@@ -24,7 +24,7 @@ from multiprocessing.pool import Pool as Pool
24
24
  from tqdm import tqdm
25
25
  from PIL import Image, ExifTags
26
26
 
27
- from md_utils.path_utils import find_images
27
+ from md_utils.path_utils import find_images, is_executable
28
28
  from md_utils.ct_utils import args_to_object
29
29
 
30
30
  debug_max_images = None
@@ -33,54 +33,61 @@ debug_max_images = None
33
33
  #%% Options
34
34
 
35
35
  class ReadExifOptions:
36
+ """
37
+ Parameters controlling metadata extraction.
38
+ """
36
39
 
40
+ #: Enable additional debug console output
37
41
  verbose = False
38
42
 
39
- # If this is True and an output file is specified for read_exif_from_folder,
40
- # and we encounter a serialization issue, we'll return the results but won't
41
- # error.
43
+ #: If this is True and an output file is specified for read_exif_from_folder,
44
+ #: and we encounter a serialization issue, we'll return the results but won't
45
+ #: error.
42
46
  allow_write_error = False
43
47
 
44
- # Number of concurrent workers
48
+ #: Number of concurrent workers, set to <= 1 to disable parallelization
45
49
  n_workers = 1
46
50
 
47
- # Should we use threads (vs. processes) for parallelization?
48
- #
49
- # Not relevant if n_workers is 1.
51
+ #: Should we use threads (vs. processes) for parallelization?
52
+ #:
53
+ #: Not relevant if n_workers is <= 1.
50
54
  use_threads = True
51
-
55
+
56
+ #: "File" and "ExifTool" are tag types used by ExifTool to report data that
57
+ #: doesn't come from EXIF, rather from the file (e.g. file size).
52
58
  tag_types_to_ignore = set(['File','ExifTool'])
53
59
 
60
+ #: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
61
+ #:
62
+ #: A useful set of tags one might want to limit queries for:
63
+ #:
64
+ #: options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight','DateTime',
65
+ #: 'DateTimeOriginal','Orientation']
66
+ tags_to_include = None
67
+
68
+ #: Include/exclude specific tags (tags_to_include and tags_to_exclude are mutually incompatible)
69
+ tags_to_exclude = None
70
+
71
+ #: The command line to invoke if using exiftool, can be an absolute path to exiftool.exe, or
72
+ #: can be just "exiftool", in which case it should be on your system path.
54
73
  exiftool_command_name = 'exiftool'
55
74
 
56
- # How should we handle byte-formatted EXIF tags?
57
- #
58
- # 'convert_to_string': convert to a Python string
59
- # 'delete': don't include at all
60
- # 'raw': include as a byte string
75
+ #: How should we handle byte-formatted EXIF tags?
76
+ #:
77
+ #: 'convert_to_string': convert to a Python string
78
+ #: 'delete': don't include at all
79
+ #: 'raw': include as a byte string
61
80
  byte_handling = 'convert_to_string' # 'convert_to_string','delete','raw'
62
81
 
63
- # Should we use exiftool or pil?
82
+ #: Should we use exiftool or PIL?
64
83
  processing_library = 'pil' # 'exiftool','pil'
65
-
84
+
66
85
 
67
86
  #%% Functions
68
87
 
69
- def enumerate_files(input_folder):
88
+ def _get_exif_ifd(exif):
70
89
  """
71
- Enumerates all image files in input_folder, returning relative paths
72
- """
73
-
74
- image_files = find_images(input_folder,recursive=True)
75
- image_files = [os.path.relpath(s,input_folder) for s in image_files]
76
- image_files = [s.replace('\\','/') for s in image_files]
77
- print('Enumerated {} files'.format(len(image_files)))
78
- return image_files
79
-
80
-
81
- def get_exif_ifd(exif):
82
- """
83
- Read EXIF data by finding the EXIF offset and reading tags directly
90
+ Read EXIF data from by finding the EXIF offset and reading tags directly
84
91
 
85
92
  https://github.com/python-pillow/Pillow/issues/5863
86
93
  """
@@ -98,8 +105,16 @@ def get_exif_ifd(exif):
98
105
 
99
106
  def read_pil_exif(im,options=None):
100
107
  """
101
- Read all the EXIF data we know how to read from [im] (path or PIL Image), whether it's
102
- in the PIL default EXIF data or not.
108
+ Read all the EXIF data we know how to read from an image, using PIL. This is primarily
109
+ an internal function; the main entry point for single-image EXIF information is
110
+ read_exif_tags_for_image().
111
+
112
+ Args:
113
+ im (str or PIL.Image.Image): image (as a filename or an Image object) from which
114
+ we should read EXIF data.
115
+
116
+ Returns:
117
+ dict: a dictionary mapping EXIF tag names to their values
103
118
  """
104
119
 
105
120
  if options is None:
@@ -128,10 +143,10 @@ def read_pil_exif(im,options=None):
128
143
  # print('Warning: unrecognized EXIF tag: {}'.format(k))
129
144
  exif_tags[k] = str(v)
130
145
 
131
- exif_idf_tags = get_exif_ifd(exif_info)
146
+ exif_ifd_tags = _get_exif_ifd(exif_info)
132
147
 
133
- for k in exif_idf_tags.keys():
134
- v = exif_idf_tags[k]
148
+ for k in exif_ifd_tags.keys():
149
+ v = exif_ifd_tags[k]
135
150
  if k in exif_tags:
136
151
  if options.verbose:
137
152
  print('Warning: redundant EXIF values for {} in {}:\n{}\n{}'.format(
@@ -167,8 +182,8 @@ def read_pil_exif(im,options=None):
167
182
 
168
183
  def format_datetime_as_exif_datetime_string(dt):
169
184
  """
170
- Returns a Python datetime object rendered using the standard Exif datetime
171
- string format
185
+ Returns a Python datetime object rendered using the standard EXIF datetime
186
+ string format ('%Y:%m:%d %H:%M:%S')
172
187
  """
173
188
 
174
189
  return datetime.strftime(dt, '%Y:%m:%d %H:%M:%S')
@@ -180,7 +195,14 @@ def parse_exif_datetime_string(s,verbose=False):
180
195
 
181
196
  %Y:%m:%d %H:%M:%S
182
197
 
183
- Parse one of those strings into a Python datetime object.
198
+ Parses one of those strings into a Python datetime object.
199
+
200
+ Args:
201
+ s (str): datetime string to parse, should be in standard EXIF datetime format
202
+ verbose (bool, optional): enable additional debug output
203
+
204
+ Returns:
205
+ datetime: the datetime object created from [s]
184
206
  """
185
207
 
186
208
  dt = None
@@ -192,17 +214,43 @@ def parse_exif_datetime_string(s,verbose=False):
192
214
  return dt
193
215
 
194
216
 
217
+ def _filter_tags(tags,options):
218
+ """
219
+ Internal function used to include/exclude specific tags from the exif_tags
220
+ dict.
221
+ """
222
+
223
+ if options is None:
224
+ return tags
225
+ if options.tags_to_include is None and options.tags_to_exclude is None:
226
+ return tags
227
+ if options.tags_to_include is not None:
228
+ assert options.tags_to_exclude is None, "tags_to_include and tags_to_exclude are incompatible"
229
+ tags_to_return = {}
230
+ for tag_name in tags.keys():
231
+ if tag_name in options.tags_to_include:
232
+ tags_to_return[tag_name] = tags[tag_name]
233
+ return tags_to_return
234
+ if options.tags_to_exclude is not None:
235
+ assert options.tags_to_include is None, "tags_to_include and tags_to_exclude are incompatible"
236
+ tags_to_return = {}
237
+ for tag_name in tags.keys():
238
+ if tag_name not in options.tags_to_exclude:
239
+ tags_to_return[tag_name] = tags[tag_name]
240
+ return tags_to_return
241
+
242
+
195
243
  def read_exif_tags_for_image(file_path,options=None):
196
244
  """
197
245
  Get relevant fields from EXIF data for an image
198
246
 
199
- Returns a dict with fields 'status' (str) and 'tags'
200
-
201
- The exact format of 'tags' depends on options.processing_library
202
-
203
- For exiftool, 'tags' is a list of lists, where each element is (type/tag/value)
204
-
205
- For pil, 'tags' is a dict (str:str)
247
+ Returns:
248
+ dict: a dict with fields 'status' (str) and 'tags'. The exact format of 'tags' depends on
249
+ options (ReadExifOptions, optional): parameters controlling metadata extraction
250
+ options.processing_library:
251
+
252
+ - For exiftool, 'tags' is a list of lists, where each element is (type/tag/value)
253
+ - For PIL, 'tags' is a dict (str:str)
206
254
  """
207
255
 
208
256
  if options is None:
@@ -227,8 +275,8 @@ def read_exif_tags_for_image(file_path,options=None):
227
275
  result['status'] = 'empty_read'
228
276
  else:
229
277
  result['status'] = 'success'
230
- result['tags'] = exif_tags
231
-
278
+ result['tags'] = _filter_tags(exif_tags,options)
279
+
232
280
  return result
233
281
 
234
282
  elif options.processing_library == 'exiftool':
@@ -283,9 +331,12 @@ def read_exif_tags_for_image(file_path,options=None):
283
331
  print('Ignoring tag with type {}'.format(field_type))
284
332
  continue
285
333
 
286
- field_tag = field_name_type_tokens[1].strip()
287
-
288
- tag = [field_type,field_tag,field_value]
334
+ field_name = field_name_type_tokens[1].strip()
335
+ if options.tags_to_exclude is not None and field_name in options.tags_to_exclude:
336
+ continue
337
+ if options.tags_to_include is not None and field_name not in options.tags_to_include:
338
+ continue
339
+ tag = [field_type,field_name,field_value]
289
340
 
290
341
  exif_tags.append(tag)
291
342
 
@@ -305,7 +356,7 @@ def read_exif_tags_for_image(file_path,options=None):
305
356
  # ...read_exif_tags_for_image()
306
357
 
307
358
 
308
- def populate_exif_data(im, image_base, options=None):
359
+ def _populate_exif_data(im, image_base, options=None):
309
360
  """
310
361
  Populate EXIF data into the 'exif_tags' field in the image object [im].
311
362
 
@@ -347,23 +398,28 @@ def populate_exif_data(im, image_base, options=None):
347
398
 
348
399
  return im
349
400
 
350
- # ...populate_exif_data()
401
+ # ..._populate_exif_data()
351
402
 
352
403
 
353
- def create_image_objects(image_files):
404
+ def _create_image_objects(image_files,recursive=True):
354
405
  """
355
406
  Create empty image objects for every image in [image_files], which can be a
356
407
  list of relative paths (which will get stored without processing, so the base
357
408
  path doesn't matter here), or a folder name.
358
409
 
359
410
  Returns a list of dicts with field 'file_name' (a relative path).
411
+
412
+ "recursive" is ignored if "image_files" is a list.
360
413
  """
361
414
 
362
415
  # Enumerate *relative* paths
363
416
  if isinstance(image_files,str):
364
417
  print('Enumerating image files in {}'.format(image_files))
365
418
  assert os.path.isdir(image_files), 'Invalid image folder {}'.format(image_files)
366
- image_files = enumerate_files(image_files)
419
+ image_files = find_images(image_files,
420
+ recursive=recursive,
421
+ return_relative_paths=True,
422
+ convert_slashes=True)
367
423
 
368
424
  images = []
369
425
  for fn in image_files:
@@ -378,7 +434,7 @@ def create_image_objects(image_files):
378
434
  return images
379
435
 
380
436
 
381
- def populate_exif_for_images(image_base,images,options=None):
437
+ def _populate_exif_for_images(image_base,images,options=None):
382
438
  """
383
439
  Main worker loop: read EXIF data for each image object in [images] and
384
440
  populate the image objects.
@@ -394,7 +450,7 @@ def populate_exif_for_images(image_base,images,options=None):
394
450
 
395
451
  results = []
396
452
  for im in tqdm(images):
397
- results.append(populate_exif_data(im,image_base,options))
453
+ results.append(_populate_exif_data(im,image_base,options))
398
454
 
399
455
  else:
400
456
 
@@ -406,13 +462,13 @@ def populate_exif_for_images(image_base,images,options=None):
406
462
  print('Starting parallel process pool with {} workers'.format(options.n_workers))
407
463
  pool = Pool(options.n_workers)
408
464
 
409
- results = list(tqdm(pool.imap(partial(populate_exif_data,image_base=image_base,
465
+ results = list(tqdm(pool.imap(partial(_populate_exif_data,image_base=image_base,
410
466
  options=options),images),total=len(images)))
411
467
 
412
468
  return results
413
469
 
414
470
 
415
- def write_exif_results(results,output_file):
471
+ def _write_exif_results(results,output_file):
416
472
  """
417
473
  Write EXIF information to [output_file].
418
474
 
@@ -489,33 +545,35 @@ def write_exif_results(results,output_file):
489
545
  print('Wrote results to {}'.format(output_file))
490
546
 
491
547
 
492
- def is_executable(name):
493
-
494
- """Check whether `name` is on PATH and marked as executable."""
495
-
496
- # https://stackoverflow.com/questions/11210104/check-if-a-program-exists-from-a-python-script
497
-
498
- from shutil import which
499
- return which(name) is not None
500
-
501
-
502
- def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=None):
548
+ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=None,recursive=True):
503
549
  """
504
- Read EXIF data for all images in input_folder.
505
-
506
- If filenames is not None, it should be a list of relative filenames; only those files will
507
- be processed.
508
-
509
- input_folder can be None or '', in which case filenames should be a list of absolute paths.
510
-
511
- if output_file is not None, results will be written to the specified .json file.
512
-
513
- returns a dictionary mapping relative filenames to EXIF data.
550
+ Read EXIF data for a folder of images.
551
+
552
+ Args:
553
+ input_folder (str): folder to process; if this is None, [filenames] should be a list of absolute
554
+ paths
555
+ output_file (str, optional): .json file to which we should write results; if this is None, results
556
+ are returned but not written to disk
557
+ options (ReadExifOptions, optional): parameters controlling metadata extraction
558
+ filenames (list, optional): allowlist of relative filenames (if [input_folder] is not None) or
559
+ a list of absolute filenames (if [input_folder] is None)
560
+ recursive (bool, optional): whether to recurse into [input_folder], not relevant if [input_folder]
561
+ is None.
562
+
563
+ Returns:
564
+ dict: a dictionary mapping relative filenames to EXIF data, whose format depends on whether
565
+ we're using PIL or exiftool.
514
566
  """
515
567
 
516
568
  if options is None:
517
569
  options = ReadExifOptions()
518
570
 
571
+ # Validate options
572
+ if options.tags_to_include is not None:
573
+ assert options.tags_to_exclude is None, "tags_to_include and tags_to_exclude are incompatible"
574
+ if options.tags_to_exclude is not None:
575
+ assert options.tags_to_include is None, "tags_to_include and tags_to_exclude are incompatible"
576
+
519
577
  if input_folder is None:
520
578
  input_folder = ''
521
579
  if len(input_folder) > 0:
@@ -542,16 +600,16 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
542
600
  assert is_executable(options.exiftool_command_name), 'exiftool not available'
543
601
 
544
602
  if filenames is None:
545
- images = create_image_objects(input_folder)
603
+ images = _create_image_objects(input_folder,recursive=recursive)
546
604
  else:
547
605
  assert isinstance(filenames,list)
548
- images = create_image_objects(filenames)
606
+ images = _create_image_objects(filenames)
549
607
 
550
- results = populate_exif_for_images(input_folder,images,options)
608
+ results = _populate_exif_for_images(input_folder,images,options)
551
609
 
552
610
  if output_file is not None:
553
611
  try:
554
- write_exif_results(results,output_file)
612
+ _write_exif_results(results,output_file)
555
613
  except Exception as e:
556
614
  if not options.allow_write_error:
557
615
  raise
@@ -567,14 +625,16 @@ if False:
567
625
 
568
626
  #%%
569
627
 
570
- input_folder = os.path.expanduser('~/data/KRU-test')
571
- output_file = os.path.expanduser('~/data/test-exif.json')
628
+ input_folder = r'C:\temp\md-name-testing'
629
+ output_file = None # r'C:\temp\md-name-testing\exif.json'
572
630
  options = ReadExifOptions()
573
631
  options.verbose = False
574
632
  options.n_workers = 10
575
633
  options.use_threads = False
576
634
  options.processing_library = 'pil'
577
635
  # options.processing_library = 'exiftool'
636
+ options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight','DateTime','DateTimeOriginal','Orientation']
637
+ # options.tags_to_exclude = ['MakerNote']
578
638
 
579
639
  results = read_exif_from_folder(input_folder,output_file,options)
580
640
 
@@ -596,8 +656,10 @@ def main():
596
656
  parser = argparse.ArgumentParser(description=('Read EXIF information from all images in' + \
597
657
  ' a folder, and write the results to .csv or .json'))
598
658
 
599
- parser.add_argument('input_folder', type=str)
600
- parser.add_argument('output_file', type=str)
659
+ parser.add_argument('input_folder', type=str,
660
+ help='Folder of images from which we should read EXIF information')
661
+ parser.add_argument('output_file', type=str,
662
+ help='Output file (.json) to which we should write EXIF information')
601
663
  parser.add_argument('--n_workers', type=int, default=1,
602
664
  help='Number of concurrent workers to use (defaults to 1)')
603
665
  parser.add_argument('--use_threads', action='store_true',
@@ -0,0 +1,84 @@
1
+ """
2
+
3
+ remap_coco_categories.py
4
+
5
+ Given a COCO-formatted dataset, remap the categories to a new mapping.
6
+
7
+ """
8
+
9
+ #%% Imports and constants
10
+
11
+ import os
12
+ import json
13
+
14
+ from copy import deepcopy
15
+
16
+
17
+ #%% Main function
18
+
19
+ def remap_coco_categories(input_data,
20
+ output_category_name_to_id,
21
+ input_category_name_to_output_category_name,
22
+ output_file=None):
23
+ """
24
+ Given a COCO-formatted dataset, remap the categories to a new categories mapping, optionally
25
+ writing the results to a new file.
26
+
27
+ output_category_name_to_id is a dict mapping strings to ints.
28
+
29
+ input_category_name_to_output_category_name is a dict mapping strings to strings.
30
+
31
+ [input_data] can be a COCO-formatted dict or a filename. If it's a dict, it will be copied,
32
+ not modified in place.
33
+ """
34
+
35
+ if isinstance(input_data,str):
36
+ assert os.path.isfile(input_data), "Can't find file {}".format(input_data)
37
+ with open(input_data,'r') as f:
38
+ input_data = json.load(f)
39
+ assert isinstance(input_data,dict), 'Illegal COCO input data'
40
+ else:
41
+ assert isinstance(input_data,dict), 'Illegal COCO input data'
42
+ input_data = deepcopy(input_data)
43
+
44
+ # It's safe to modify in-place now
45
+ output_data = input_data
46
+
47
+ # Read input name --> ID mapping
48
+ input_category_name_to_input_category_id = {}
49
+ for c in input_data['categories']:
50
+ input_category_name_to_input_category_id[c['name']] = c['id']
51
+
52
+ # Map input IDs --> output IDs
53
+ input_category_id_to_output_category_id = {}
54
+ for input_name in input_category_name_to_output_category_name.keys():
55
+ output_name = input_category_name_to_output_category_name[input_name]
56
+ assert output_name in output_category_name_to_id, \
57
+ 'No output ID for {} --> {}'.format(input_name,output_name)
58
+ input_id = input_category_name_to_input_category_id[input_name]
59
+ output_id = output_category_name_to_id[output_name]
60
+ input_category_id_to_output_category_id[input_id] = output_id
61
+
62
+ # Map annotations
63
+ for ann in output_data['annotations']:
64
+ assert ann['category_id'] in input_category_id_to_output_category_id, \
65
+ 'Unrecognized category ID {}'.format(ann['category_id'])
66
+ ann['category_id'] = input_category_id_to_output_category_id[ann['category_id']]
67
+
68
+ # Update the category list
69
+ output_categories = []
70
+ for output_name in output_category_name_to_id:
71
+ category = {'name':output_name,'id':output_category_name_to_id[output_name]}
72
+ output_categories.append(category)
73
+ output_data['categories'] = output_categories
74
+
75
+ if output_file is not None:
76
+ with open(output_file,'w') as f:
77
+ json.dump(output_data,f,indent=1)
78
+
79
+ return input_data
80
+
81
+
82
+ #%% Command-line driver
83
+
84
+ # TODO
@@ -1,70 +1,66 @@
1
- ########
2
- #
3
- # remove_exif.py
4
- #
5
- # Removes all EXIF/IPTC/XMP metadata from a folder of images, without making
6
- # backup copies, using pyexiv2.
7
- #
8
- ########
1
+ """
2
+
3
+ remove_exif.py
4
+
5
+ Removes all EXIF/IPTC/XMP metadata from a folder of images, without making
6
+ backup copies, using pyexiv2.
7
+
8
+ TODO: This is a one-off script waiting to be cleaned up for more general use.
9
+
10
+ """
11
+
12
+ input_base = r'f:\images'
13
+
9
14
 
10
15
  #%% Imports and constants
11
16
 
12
17
  import os
13
18
  import glob
14
19
 
15
- input_base = r'f:\images'
16
- assert os.path.isdir(input_base)
17
-
18
-
19
- #%% List files
20
-
21
- all_files = [f for f in glob.glob(input_base + "*/**", recursive=True)]
22
- image_files = [s for s in all_files if (s.lower().endswith('.jpg'))]
23
-
24
-
25
- #%% Remove EXIF data (support)
26
-
27
- import pyexiv2
28
-
29
- # PYEXIV2 IS NOT THREAD SAFE; DO NOT CALL THIS IN PARALLEL FROM A SINGLE PROCESS
30
- def remove_exif(fn):
31
-
32
- try:
33
- img = pyexiv2.Image(fn)
34
- # data = img.read_exif(); print(data)
35
- img.clear_exif()
36
- img.clear_iptc()
37
- img.clear_xmp()
38
- img.close()
39
- except Exception as e:
40
- print('EXIF error on {}: {}'.format(fn,str(e)))
41
-
42
-
43
- #%% Debug
44
-
45
- if False:
46
- #%%
47
- fn = image_files[-10001]
48
- os.startfile(fn)
49
- #%%
50
- remove_exif(fn)
51
- os.startfile(fn)
52
-
53
-
54
- #%% Remove EXIF data (execution)
55
-
56
- from joblib import Parallel, delayed
57
-
58
- n_exif_threads = 50
59
-
60
- if n_exif_threads == 1:
61
-
62
- # fn = image_files[0]
63
- for fn in image_files:
64
- remove_exif(fn)
20
+ def main():
21
+
22
+ assert os.path.isdir(input_base)
23
+
24
+ ##%% List files
25
+
26
+ all_files = [f for f in glob.glob(input_base + "*/**", recursive=True)]
27
+ image_files = [s for s in all_files if (s.lower().endswith('.jpg'))]
28
+
29
+
30
+ ##%% Remove EXIF data (support)
31
+
32
+ import pyexiv2
33
+
34
+ # PYEXIV2 IS NOT THREAD SAFE; DO NOT CALL THIS IN PARALLEL FROM A SINGLE PROCESS
35
+ def remove_exif(fn):
65
36
 
66
- else:
67
- # joblib.Parallel defaults to a process-based backend, but let's be sure
68
- # results = Parallel(n_jobs=n_exif_threads,verbose=2,prefer='processes')(delayed(remove_exif)(fn) for fn in image_files[0:10])
69
- results = Parallel(n_jobs=n_exif_threads,verbose=2,prefer='processes')(delayed(remove_exif)(fn) for fn in image_files)
37
+ try:
38
+ img = pyexiv2.Image(fn)
39
+ # data = img.read_exif(); print(data)
40
+ img.clear_exif()
41
+ img.clear_iptc()
42
+ img.clear_xmp()
43
+ img.close()
44
+ except Exception as e:
45
+ print('EXIF error on {}: {}'.format(fn,str(e)))
46
+
47
+
48
+ ##%% Remove EXIF data (execution)
49
+
50
+ from joblib import Parallel, delayed
70
51
 
52
+ n_exif_threads = 50
53
+
54
+ if n_exif_threads == 1:
55
+
56
+ # fn = image_files[0]
57
+ for fn in image_files:
58
+ remove_exif(fn)
59
+
60
+ else:
61
+ # joblib.Parallel defaults to a process-based backend, but let's be sure
62
+ # results = Parallel(n_jobs=n_exif_threads,verbose=2,prefer='processes')(delayed(remove_exif)(fn) for fn in image_files[0:10])
63
+ _ = Parallel(n_jobs=n_exif_threads,verbose=2,prefer='processes')(delayed(remove_exif)(fn) for fn in image_files)
64
+
65
+ if __name__ == '__main__':
66
+ main()