megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (197) hide show
  1. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  2. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  3. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  4. megadetector/classification/aggregate_classifier_probs.py +3 -3
  5. megadetector/classification/analyze_failed_images.py +5 -5
  6. megadetector/classification/cache_batchapi_outputs.py +5 -5
  7. megadetector/classification/create_classification_dataset.py +11 -12
  8. megadetector/classification/crop_detections.py +10 -10
  9. megadetector/classification/csv_to_json.py +8 -8
  10. megadetector/classification/detect_and_crop.py +13 -15
  11. megadetector/classification/efficientnet/model.py +8 -8
  12. megadetector/classification/efficientnet/utils.py +6 -5
  13. megadetector/classification/evaluate_model.py +7 -7
  14. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  15. megadetector/classification/json_to_azcopy_list.py +1 -1
  16. megadetector/classification/json_validator.py +29 -32
  17. megadetector/classification/map_classification_categories.py +9 -9
  18. megadetector/classification/merge_classification_detection_output.py +12 -9
  19. megadetector/classification/prepare_classification_script.py +19 -19
  20. megadetector/classification/prepare_classification_script_mc.py +26 -26
  21. megadetector/classification/run_classifier.py +4 -4
  22. megadetector/classification/save_mislabeled.py +6 -6
  23. megadetector/classification/train_classifier.py +1 -1
  24. megadetector/classification/train_classifier_tf.py +9 -9
  25. megadetector/classification/train_utils.py +10 -10
  26. megadetector/data_management/annotations/annotation_constants.py +1 -2
  27. megadetector/data_management/camtrap_dp_to_coco.py +79 -46
  28. megadetector/data_management/cct_json_utils.py +103 -103
  29. megadetector/data_management/cct_to_md.py +49 -49
  30. megadetector/data_management/cct_to_wi.py +33 -33
  31. megadetector/data_management/coco_to_labelme.py +75 -75
  32. megadetector/data_management/coco_to_yolo.py +210 -193
  33. megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
  34. megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
  35. megadetector/data_management/databases/integrity_check_json_db.py +228 -200
  36. megadetector/data_management/databases/subset_json_db.py +33 -33
  37. megadetector/data_management/generate_crops_from_cct.py +88 -39
  38. megadetector/data_management/get_image_sizes.py +54 -49
  39. megadetector/data_management/labelme_to_coco.py +133 -125
  40. megadetector/data_management/labelme_to_yolo.py +159 -73
  41. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  42. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  43. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  44. megadetector/data_management/lila/download_lila_subset.py +21 -24
  45. megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
  46. megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
  47. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  48. megadetector/data_management/lila/lila_common.py +73 -70
  49. megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
  50. megadetector/data_management/mewc_to_md.py +344 -340
  51. megadetector/data_management/ocr_tools.py +262 -255
  52. megadetector/data_management/read_exif.py +249 -227
  53. megadetector/data_management/remap_coco_categories.py +90 -28
  54. megadetector/data_management/remove_exif.py +81 -21
  55. megadetector/data_management/rename_images.py +187 -187
  56. megadetector/data_management/resize_coco_dataset.py +588 -120
  57. megadetector/data_management/speciesnet_to_md.py +41 -41
  58. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  59. megadetector/data_management/yolo_output_to_md_output.py +248 -122
  60. megadetector/data_management/yolo_to_coco.py +333 -191
  61. megadetector/detection/change_detection.py +832 -0
  62. megadetector/detection/process_video.py +340 -337
  63. megadetector/detection/pytorch_detector.py +358 -278
  64. megadetector/detection/run_detector.py +399 -186
  65. megadetector/detection/run_detector_batch.py +404 -377
  66. megadetector/detection/run_inference_with_yolov5_val.py +340 -327
  67. megadetector/detection/run_tiled_inference.py +257 -249
  68. megadetector/detection/tf_detector.py +24 -24
  69. megadetector/detection/video_utils.py +332 -295
  70. megadetector/postprocessing/add_max_conf.py +19 -11
  71. megadetector/postprocessing/categorize_detections_by_size.py +45 -45
  72. megadetector/postprocessing/classification_postprocessing.py +468 -433
  73. megadetector/postprocessing/combine_batch_outputs.py +23 -23
  74. megadetector/postprocessing/compare_batch_results.py +590 -525
  75. megadetector/postprocessing/convert_output_format.py +106 -102
  76. megadetector/postprocessing/create_crop_folder.py +347 -147
  77. megadetector/postprocessing/detector_calibration.py +173 -168
  78. megadetector/postprocessing/generate_csv_report.py +508 -499
  79. megadetector/postprocessing/load_api_results.py +48 -27
  80. megadetector/postprocessing/md_to_coco.py +133 -102
  81. megadetector/postprocessing/md_to_labelme.py +107 -90
  82. megadetector/postprocessing/md_to_wi.py +40 -40
  83. megadetector/postprocessing/merge_detections.py +92 -114
  84. megadetector/postprocessing/postprocess_batch_results.py +319 -301
  85. megadetector/postprocessing/remap_detection_categories.py +91 -38
  86. megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
  87. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  88. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  89. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
  90. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  91. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  92. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  93. megadetector/postprocessing/validate_batch_results.py +70 -70
  94. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  95. megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
  96. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
  97. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
  98. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  99. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  100. megadetector/taxonomy_mapping/species_lookup.py +156 -74
  101. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  102. megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
  103. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  104. megadetector/utils/ct_utils.py +1049 -211
  105. megadetector/utils/directory_listing.py +21 -77
  106. megadetector/utils/gpu_test.py +22 -22
  107. megadetector/utils/md_tests.py +632 -529
  108. megadetector/utils/path_utils.py +1520 -431
  109. megadetector/utils/process_utils.py +41 -41
  110. megadetector/utils/split_locations_into_train_val.py +62 -62
  111. megadetector/utils/string_utils.py +148 -27
  112. megadetector/utils/url_utils.py +489 -176
  113. megadetector/utils/wi_utils.py +2658 -2526
  114. megadetector/utils/write_html_image_list.py +137 -137
  115. megadetector/visualization/plot_utils.py +34 -30
  116. megadetector/visualization/render_images_with_thumbnails.py +39 -74
  117. megadetector/visualization/visualization_utils.py +487 -435
  118. megadetector/visualization/visualize_db.py +232 -198
  119. megadetector/visualization/visualize_detector_output.py +82 -76
  120. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
  121. megadetector-10.0.0.dist-info/RECORD +139 -0
  122. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
  123. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  124. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  125. megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
  126. megadetector/api/batch_processing/api_core/server.py +0 -294
  127. megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
  128. megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
  129. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  130. megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
  131. megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
  132. megadetector/api/batch_processing/api_core/server_utils.py +0 -88
  133. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  134. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  135. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  136. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  137. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  138. megadetector/api/synchronous/__init__.py +0 -0
  139. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  140. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
  141. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
  142. megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
  143. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  144. megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
  145. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  146. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  147. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  148. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  149. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  150. megadetector/data_management/importers/awc_to_json.py +0 -191
  151. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  152. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  153. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  154. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  155. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  156. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  157. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  158. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  159. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  160. megadetector/data_management/importers/ena24_to_json.py +0 -276
  161. megadetector/data_management/importers/filenames_to_json.py +0 -386
  162. megadetector/data_management/importers/helena_to_cct.py +0 -283
  163. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  164. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  165. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  166. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  167. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  168. megadetector/data_management/importers/missouri_to_json.py +0 -490
  169. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  170. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  171. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  172. megadetector/data_management/importers/pc_to_json.py +0 -365
  173. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  174. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  175. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  176. megadetector/data_management/importers/rspb_to_json.py +0 -356
  177. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  178. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  179. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  180. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  181. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  182. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  183. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  184. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  185. megadetector/data_management/importers/ubc_to_json.py +0 -399
  186. megadetector/data_management/importers/umn_to_json.py +0 -507
  187. megadetector/data_management/importers/wellington_to_json.py +0 -263
  188. megadetector/data_management/importers/wi_to_json.py +0 -442
  189. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  190. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  191. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  192. megadetector/utils/azure_utils.py +0 -178
  193. megadetector/utils/sas_blob_utils.py +0 -509
  194. megadetector-5.0.28.dist-info/RECORD +0 -209
  195. /megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
  196. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
  197. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,9 @@
2
2
 
3
3
  remap_coco_categories.py
4
4
 
5
- Given a COCO-formatted dataset, remap the categories to a new mapping.
5
+ Given a COCO-formatted dataset, remap the categories to a new mapping. A common use
6
+ case is to take a fine-grained dataset (e.g. with species-level categories) and
7
+ map them to coarse categories (typically MD categories).
6
8
 
7
9
  """
8
10
 
@@ -10,6 +12,7 @@ Given a COCO-formatted dataset, remap the categories to a new mapping.
10
12
 
11
13
  import os
12
14
  import json
15
+ import argparse
13
16
 
14
17
  from copy import deepcopy
15
18
  from megadetector.utils.ct_utils import invert_dictionary
@@ -25,24 +28,24 @@ def remap_coco_categories(input_data,
25
28
  """
26
29
  Given a COCO-formatted dataset, remap the categories to a new categories mapping, optionally
27
30
  writing the results to a new file.
28
-
31
+
29
32
  Args:
30
- input_data (str or dict): a COCO-formatted dict or a filename. If it's a dict, it will
33
+ input_data (str or dict): a COCO-formatted dict or a filename. If it's a dict, it will
31
34
  be copied, not modified in place.
32
- output_category_name_to_id (dict) a dict mapping strings to ints. Categories not in
35
+ output_category_name_to_id (dict): a dict mapping strings to ints. Categories not in
33
36
  this dict will be ignored or will result in errors, depending on allow_unused_categories.
34
- input_category_name_to_output_category_name: a dict mapping strings to strings.
35
- Annotations using categories not in this dict will be omitted or will result in
37
+ input_category_name_to_output_category_name (dict): a dict mapping strings to strings.
38
+ Annotations using categories not in this dict will be omitted or will result in
36
39
  errors, depending on allow_unused_categories.
37
40
  output_file (str, optional): output file to which we should write remapped COCO data
38
41
  allow_unused_categories (bool, optional): should we ignore categories not present in the
39
42
  input/output mappings? If this is False and we encounter an unmapped category, we'll
40
43
  error.
41
-
44
+
42
45
  Returns:
43
46
  dict: COCO-formatted dict
44
47
  """
45
-
48
+
46
49
  if isinstance(input_data,str):
47
50
  assert os.path.isfile(input_data), "Can't find file {}".format(input_data)
48
51
  with open(input_data,'r') as f:
@@ -51,45 +54,45 @@ def remap_coco_categories(input_data,
51
54
  else:
52
55
  assert isinstance(input_data,dict), 'Illegal COCO input data'
53
56
  input_data = deepcopy(input_data)
54
-
57
+
55
58
  # It's safe to modify in-place now
56
59
  output_data = input_data
57
-
60
+
58
61
  # Read input name --> ID mapping
59
62
  input_category_name_to_input_category_id = {}
60
63
  for c in input_data['categories']:
61
64
  input_category_name_to_input_category_id[c['name']] = c['id']
62
65
  input_category_id_to_input_category_name = \
63
66
  invert_dictionary(input_category_name_to_input_category_id)
64
-
67
+
65
68
  # Map input IDs --> output IDs
66
69
  input_category_id_to_output_category_id = {}
67
70
  input_category_names = list(input_category_name_to_output_category_name.keys())
68
-
71
+
69
72
  # input_name = input_category_names[0]
70
73
  for input_name in input_category_names:
71
-
74
+
72
75
  output_name = input_category_name_to_output_category_name[input_name]
73
76
  assert output_name in output_category_name_to_id, \
74
77
  'No output ID for {} --> {}'.format(input_name,output_name)
75
78
  input_id = input_category_name_to_input_category_id[input_name]
76
79
  output_id = output_category_name_to_id[output_name]
77
80
  input_category_id_to_output_category_id[input_id] = output_id
78
-
81
+
79
82
  # ...for each category we want to keep
80
-
83
+
81
84
  printed_unused_category_warnings = set()
82
-
85
+
83
86
  valid_annotations = []
84
-
87
+
85
88
  # Map annotations
86
89
  for ann in output_data['annotations']:
87
-
90
+
88
91
  input_category_id = ann['category_id']
89
92
  if input_category_id not in input_category_id_to_output_category_id:
90
93
  if allow_unused_categories:
91
94
  if input_category_id not in printed_unused_category_warnings:
92
- printed_unused_category_warnings.add(input_category_id)
95
+ printed_unused_category_warnings.add(input_category_id)
93
96
  input_category_name = \
94
97
  input_category_id_to_input_category_name[input_category_id]
95
98
  s = 'Skipping unmapped category ID {} ({})'.format(
@@ -98,31 +101,31 @@ def remap_coco_categories(input_data,
98
101
  continue
99
102
  else:
100
103
  s = 'Unmapped category ID {}'.format(input_category_id)
101
- raise ValueError(s)
104
+ raise ValueError(s)
102
105
  output_category_id = input_category_id_to_output_category_id[input_category_id]
103
106
  ann['category_id'] = output_category_id
104
- valid_annotations.append(ann)
105
-
107
+ valid_annotations.append(ann)
108
+
106
109
  # ...for each annotation
107
-
110
+
108
111
  # The only reason annotations should get excluded is the case where we allow
109
112
  # unused categories
110
113
  if not allow_unused_categories:
111
114
  assert len(valid_annotations) == len(output_data['annotations'])
112
-
115
+
113
116
  output_data['annotations'] = valid_annotations
114
-
117
+
115
118
  # Update the category list
116
119
  output_categories = []
117
120
  for output_name in output_category_name_to_id:
118
121
  category = {'name':output_name,'id':output_category_name_to_id[output_name]}
119
122
  output_categories.append(category)
120
123
  output_data['categories'] = output_categories
121
-
124
+
122
125
  if output_file is not None:
123
126
  with open(output_file,'w') as f:
124
127
  json.dump(output_data,f,indent=1)
125
-
128
+
126
129
  return input_data
127
130
 
128
131
  # ...def remap_coco_categories(...)
@@ -130,4 +133,63 @@ def remap_coco_categories(input_data,
130
133
 
131
134
  #%% Command-line driver
132
135
 
133
- # TODO
136
+ def main():
137
+ """
138
+ Command-line interface to remap COCO categories.
139
+ """
140
+
141
+ parser = argparse.ArgumentParser(
142
+ description='Remap categories in a COCO-formatted dataset'
143
+ )
144
+ parser.add_argument(
145
+ 'input_coco_file',
146
+ type=str,
147
+ help='Path to the input COCO .json file'
148
+ )
149
+ parser.add_argument(
150
+ 'output_category_map_file',
151
+ type=str,
152
+ help="Path to a .json file mapping output category names to integer IDs (e.g., {'cat':0, 'dog':1})"
153
+ )
154
+ parser.add_argument(
155
+ 'input_to_output_category_map_file',
156
+ type=str,
157
+ help="Path to a .json file mapping input category names to output category names" + \
158
+ " (e.g., {'old_cat_name':'cat', 'old_dog_name':'dog'})"
159
+ )
160
+ parser.add_argument(
161
+ 'output_coco_file',
162
+ type=str,
163
+ help='Path to save the remapped COCO .json file'
164
+ )
165
+ parser.add_argument(
166
+ '--allow_unused_categories',
167
+ action='store_true',
168
+ help='Allow unmapped categories (by default, errors on unmapped categories)'
169
+ )
170
+
171
+ args = parser.parse_args()
172
+
173
+ # Load category mappings
174
+ with open(args.output_category_map_file, 'r') as f:
175
+ output_category_name_to_id = json.load(f)
176
+
177
+ with open(args.input_to_output_category_map_file, 'r') as f:
178
+ input_category_name_to_output_category_name = json.load(f)
179
+
180
+ # Load COCO data
181
+ with open(args.input_coco_file, 'r') as f:
182
+ input_data = json.load(f)
183
+
184
+ remap_coco_categories(
185
+ input_data=input_data,
186
+ output_category_name_to_id=output_category_name_to_id,
187
+ input_category_name_to_output_category_name=input_category_name_to_output_category_name,
188
+ output_file=args.output_coco_file,
189
+ allow_unused_categories=args.allow_unused_categories
190
+ )
191
+
192
+ print(f'Successfully remapped categories and saved to {args.output_coco_file}')
193
+
194
+ if __name__ == '__main__':
195
+ main()
@@ -2,7 +2,7 @@
2
2
 
3
3
  remove_exif.py
4
4
 
5
- Removes all EXIF/IPTC/XMP metadata from a folder of images, without making
5
+ Removes all EXIF/IPTC/XMP metadata from a folder of images, without making
6
6
  backup copies, using pyexiv2. Ignores non-jpeg images.
7
7
 
8
8
  This module is rarely used, and pyexiv2 is not thread-safe, so pyexiv2 is not
@@ -14,6 +14,7 @@ included in package-level dependency lists. YMMV.
14
14
 
15
15
  import os
16
16
  import glob
17
+ import argparse
17
18
 
18
19
  from multiprocessing.pool import Pool as Pool
19
20
  from tqdm import tqdm
@@ -21,22 +22,33 @@ from tqdm import tqdm
21
22
 
22
23
  #%% Support functions
23
24
 
24
- # Pyexif2 is not thread safe, do not call this function in parallel within a process
25
- #
26
- # Parallelizing across processes is fine.
27
25
  def remove_exif_from_image(fn):
26
+ """
27
+ Remove EXIF information from a single image
28
+
29
+ pyexiv2 is not thread safe, do not call this function in parallel within a process.
30
+
31
+ Parallelizing across processes is fine.
32
+
33
+ Args:
34
+ fn (str): image file from which we should remove EXIF information
35
+
36
+ Returns:
37
+ bool: whether EXIF removal succeeded
38
+ """
39
+
40
+ import pyexiv2 # type: ignore
28
41
 
29
- import pyexiv2
30
-
31
42
  try:
32
43
  img = pyexiv2.Image(fn)
33
44
  img.clear_exif()
34
45
  img.clear_iptc()
35
46
  img.clear_xmp()
36
- img.close()
47
+ img.close()
37
48
  except Exception as e:
38
49
  print('EXIF error on {}: {}'.format(fn,str(e)))
39
-
50
+ return False
51
+
40
52
  return True
41
53
 
42
54
 
@@ -44,22 +56,23 @@ def remove_exif_from_image(fn):
44
56
 
45
57
  def remove_exif(image_base_folder,recursive=True,n_processes=1):
46
58
  """
47
- Removes all EXIF/IPTC/XMP metadata from a folder of images, without making
59
+ Removes all EXIF/IPTC/XMP metadata from a folder of images, without making
48
60
  backup copies, using pyexiv2. Ignores non-jpeg images.
49
-
61
+
50
62
  Args:
51
63
  image_base_folder (str): the folder from which we should remove EXIF data
52
64
  recursive (bool, optional): whether to process [image_base_folder] recursively
53
65
  n_processes (int, optional): number of concurrent workers. Because pyexiv2 is not
54
- thread-safe, only process-based parallelism is supported.
66
+ thread-safe, only process-based parallelism is supported.
55
67
  """
68
+
56
69
  try:
57
- import pyexiv2 #noqa
70
+ import pyexiv2 # type: ignore #noqa
58
71
  except:
59
72
  print('pyexiv2 not available; try "pip install pyexiv2"')
60
73
  raise
61
74
 
62
-
75
+
63
76
  ##%% List files
64
77
 
65
78
  assert os.path.isdir(image_base_folder), \
@@ -67,25 +80,72 @@ def remove_exif(image_base_folder,recursive=True,n_processes=1):
67
80
  all_files = [f for f in glob.glob(image_base_folder+ "*/**", recursive=recursive)]
68
81
  image_files = [s for s in all_files if \
69
82
  (s.lower().endswith('.jpg') or s.lower().endswith('.jpeg'))]
70
-
83
+
71
84
 
72
85
  ##%% Remove EXIF data (execution)
73
86
 
74
87
  if n_processes == 1:
75
-
88
+
76
89
  # fn = image_files[0]
77
90
  for fn in tqdm(image_files):
78
91
  remove_exif_from_image(fn)
79
-
92
+
80
93
  else:
81
94
  # pyexiv2 is not thread-safe, so we need to use processes
82
- print('Starting parallel process pool with {} workers'.format(n_processes))
83
- pool = Pool(n_processes)
84
- _ = list(tqdm(pool.imap(remove_exif_from_image,image_files),total=len(image_files)))
85
-
95
+ pool = None
96
+ try:
97
+ print('Starting parallel process pool with {} workers'.format(n_processes))
98
+ pool = Pool(n_processes)
99
+ _ = list(tqdm(pool.imap(remove_exif_from_image,image_files),total=len(image_files)))
100
+ finally:
101
+ pool.close()
102
+ pool.join()
103
+ print("Pool closed and joined for EXIF removal")
104
+
86
105
  # ...remove_exif(...)
87
106
 
88
107
 
89
108
  #%% Command-line driver
90
109
 
91
- ## TODO
110
+ def main():
111
+ """
112
+ Command-line interface to remove EXIF data from images.
113
+ """
114
+
115
+ parser = argparse.ArgumentParser(
116
+ description='Removes EXIF/IPTC/XMP metadata from images in a folder'
117
+ )
118
+ parser.add_argument(
119
+ 'image_base_folder',
120
+ type=str,
121
+ help='Folder to process for EXIF removal'
122
+ )
123
+ parser.add_argument(
124
+ '--nonrecursive',
125
+ action='store_true',
126
+ help="Don't recurse into [image_base_folder] (default is recursive)"
127
+ )
128
+ parser.add_argument(
129
+ '--n_processes',
130
+ type=int,
131
+ default=1,
132
+ help='Number of concurrent processes for EXIF removal (default: 1)'
133
+ )
134
+
135
+ args = parser.parse_args()
136
+
137
+ recursive = (not args.nonrecursive)
138
+
139
+ print('Processing folder: {}'.format(args.image_base_folder))
140
+ if not os.path.isdir(args.image_base_folder):
141
+ raise ValueError('Folder not found at {}'.format(args.image_base_folder))
142
+
143
+ remove_exif(
144
+ image_base_folder=args.image_base_folder,
145
+ recursive=recursive,
146
+ n_processes=args.n_processes
147
+ )
148
+ print('Finished removing EXIF data')
149
+
150
+ if __name__ == '__main__':
151
+ main()