megadetector 5.0.11__py3-none-any.whl → 5.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (203) hide show
  1. megadetector/api/__init__.py +0 -0
  2. megadetector/api/batch_processing/__init__.py +0 -0
  3. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  4. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. megadetector/api/batch_processing/api_core/batch_service/score.py +439 -0
  6. megadetector/api/batch_processing/api_core/server.py +294 -0
  7. megadetector/api/batch_processing/api_core/server_api_config.py +97 -0
  8. megadetector/api/batch_processing/api_core/server_app_config.py +55 -0
  9. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +220 -0
  10. megadetector/api/batch_processing/api_core/server_job_status_table.py +149 -0
  11. megadetector/api/batch_processing/api_core/server_orchestration.py +360 -0
  12. megadetector/api/batch_processing/api_core/server_utils.py +88 -0
  13. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  14. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +46 -0
  15. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  16. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +152 -0
  17. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  18. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  19. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  20. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  21. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
  22. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  23. megadetector/api/synchronous/__init__.py +0 -0
  24. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  25. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +152 -0
  26. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +263 -0
  27. megadetector/api/synchronous/api_core/animal_detection_api/config.py +35 -0
  28. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  29. megadetector/api/synchronous/api_core/tests/load_test.py +110 -0
  30. megadetector/classification/__init__.py +0 -0
  31. megadetector/classification/aggregate_classifier_probs.py +108 -0
  32. megadetector/classification/analyze_failed_images.py +227 -0
  33. megadetector/classification/cache_batchapi_outputs.py +198 -0
  34. megadetector/classification/create_classification_dataset.py +627 -0
  35. megadetector/classification/crop_detections.py +516 -0
  36. megadetector/classification/csv_to_json.py +226 -0
  37. megadetector/classification/detect_and_crop.py +855 -0
  38. megadetector/classification/efficientnet/__init__.py +9 -0
  39. megadetector/classification/efficientnet/model.py +415 -0
  40. megadetector/classification/efficientnet/utils.py +607 -0
  41. megadetector/classification/evaluate_model.py +520 -0
  42. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  43. megadetector/classification/json_to_azcopy_list.py +63 -0
  44. megadetector/classification/json_validator.py +699 -0
  45. megadetector/classification/map_classification_categories.py +276 -0
  46. megadetector/classification/merge_classification_detection_output.py +506 -0
  47. megadetector/classification/prepare_classification_script.py +194 -0
  48. megadetector/classification/prepare_classification_script_mc.py +228 -0
  49. megadetector/classification/run_classifier.py +287 -0
  50. megadetector/classification/save_mislabeled.py +110 -0
  51. megadetector/classification/train_classifier.py +827 -0
  52. megadetector/classification/train_classifier_tf.py +725 -0
  53. megadetector/classification/train_utils.py +323 -0
  54. megadetector/data_management/__init__.py +0 -0
  55. megadetector/data_management/annotations/__init__.py +0 -0
  56. megadetector/data_management/annotations/annotation_constants.py +34 -0
  57. megadetector/data_management/camtrap_dp_to_coco.py +237 -0
  58. megadetector/data_management/cct_json_utils.py +404 -0
  59. megadetector/data_management/cct_to_md.py +176 -0
  60. megadetector/data_management/cct_to_wi.py +289 -0
  61. megadetector/data_management/coco_to_labelme.py +283 -0
  62. megadetector/data_management/coco_to_yolo.py +662 -0
  63. megadetector/data_management/databases/__init__.py +0 -0
  64. megadetector/data_management/databases/add_width_and_height_to_db.py +33 -0
  65. megadetector/data_management/databases/combine_coco_camera_traps_files.py +206 -0
  66. megadetector/data_management/databases/integrity_check_json_db.py +493 -0
  67. megadetector/data_management/databases/subset_json_db.py +115 -0
  68. megadetector/data_management/generate_crops_from_cct.py +149 -0
  69. megadetector/data_management/get_image_sizes.py +189 -0
  70. megadetector/data_management/importers/add_nacti_sizes.py +52 -0
  71. megadetector/data_management/importers/add_timestamps_to_icct.py +79 -0
  72. megadetector/data_management/importers/animl_results_to_md_results.py +158 -0
  73. megadetector/data_management/importers/auckland_doc_test_to_json.py +373 -0
  74. megadetector/data_management/importers/auckland_doc_to_json.py +201 -0
  75. megadetector/data_management/importers/awc_to_json.py +191 -0
  76. megadetector/data_management/importers/bellevue_to_json.py +273 -0
  77. megadetector/data_management/importers/cacophony-thermal-importer.py +793 -0
  78. megadetector/data_management/importers/carrizo_shrubfree_2018.py +269 -0
  79. megadetector/data_management/importers/carrizo_trail_cam_2017.py +289 -0
  80. megadetector/data_management/importers/cct_field_adjustments.py +58 -0
  81. megadetector/data_management/importers/channel_islands_to_cct.py +913 -0
  82. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +180 -0
  83. megadetector/data_management/importers/eMammal/eMammal_helpers.py +249 -0
  84. megadetector/data_management/importers/eMammal/make_eMammal_json.py +223 -0
  85. megadetector/data_management/importers/ena24_to_json.py +276 -0
  86. megadetector/data_management/importers/filenames_to_json.py +386 -0
  87. megadetector/data_management/importers/helena_to_cct.py +283 -0
  88. megadetector/data_management/importers/idaho-camera-traps.py +1407 -0
  89. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +294 -0
  90. megadetector/data_management/importers/jb_csv_to_json.py +150 -0
  91. megadetector/data_management/importers/mcgill_to_json.py +250 -0
  92. megadetector/data_management/importers/missouri_to_json.py +490 -0
  93. megadetector/data_management/importers/nacti_fieldname_adjustments.py +79 -0
  94. megadetector/data_management/importers/noaa_seals_2019.py +181 -0
  95. megadetector/data_management/importers/pc_to_json.py +365 -0
  96. megadetector/data_management/importers/plot_wni_giraffes.py +123 -0
  97. megadetector/data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -0
  98. megadetector/data_management/importers/prepare_zsl_imerit.py +131 -0
  99. megadetector/data_management/importers/rspb_to_json.py +356 -0
  100. megadetector/data_management/importers/save_the_elephants_survey_A.py +320 -0
  101. megadetector/data_management/importers/save_the_elephants_survey_B.py +329 -0
  102. megadetector/data_management/importers/snapshot_safari_importer.py +758 -0
  103. megadetector/data_management/importers/snapshot_safari_importer_reprise.py +665 -0
  104. megadetector/data_management/importers/snapshot_serengeti_lila.py +1067 -0
  105. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +150 -0
  106. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +153 -0
  107. megadetector/data_management/importers/sulross_get_exif.py +65 -0
  108. megadetector/data_management/importers/timelapse_csv_set_to_json.py +490 -0
  109. megadetector/data_management/importers/ubc_to_json.py +399 -0
  110. megadetector/data_management/importers/umn_to_json.py +507 -0
  111. megadetector/data_management/importers/wellington_to_json.py +263 -0
  112. megadetector/data_management/importers/wi_to_json.py +442 -0
  113. megadetector/data_management/importers/zamba_results_to_md_results.py +181 -0
  114. megadetector/data_management/labelme_to_coco.py +547 -0
  115. megadetector/data_management/labelme_to_yolo.py +272 -0
  116. megadetector/data_management/lila/__init__.py +0 -0
  117. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +97 -0
  118. megadetector/data_management/lila/add_locations_to_nacti.py +147 -0
  119. megadetector/data_management/lila/create_lila_blank_set.py +558 -0
  120. megadetector/data_management/lila/create_lila_test_set.py +152 -0
  121. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  122. megadetector/data_management/lila/download_lila_subset.py +178 -0
  123. megadetector/data_management/lila/generate_lila_per_image_labels.py +516 -0
  124. megadetector/data_management/lila/get_lila_annotation_counts.py +170 -0
  125. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  126. megadetector/data_management/lila/lila_common.py +300 -0
  127. megadetector/data_management/lila/test_lila_metadata_urls.py +132 -0
  128. megadetector/data_management/ocr_tools.py +870 -0
  129. megadetector/data_management/read_exif.py +809 -0
  130. megadetector/data_management/remap_coco_categories.py +84 -0
  131. megadetector/data_management/remove_exif.py +66 -0
  132. megadetector/data_management/rename_images.py +187 -0
  133. megadetector/data_management/resize_coco_dataset.py +189 -0
  134. megadetector/data_management/wi_download_csv_to_coco.py +247 -0
  135. megadetector/data_management/yolo_output_to_md_output.py +446 -0
  136. megadetector/data_management/yolo_to_coco.py +676 -0
  137. megadetector/detection/__init__.py +0 -0
  138. megadetector/detection/detector_training/__init__.py +0 -0
  139. megadetector/detection/detector_training/model_main_tf2.py +114 -0
  140. megadetector/detection/process_video.py +846 -0
  141. megadetector/detection/pytorch_detector.py +355 -0
  142. megadetector/detection/run_detector.py +779 -0
  143. megadetector/detection/run_detector_batch.py +1219 -0
  144. megadetector/detection/run_inference_with_yolov5_val.py +1087 -0
  145. megadetector/detection/run_tiled_inference.py +934 -0
  146. megadetector/detection/tf_detector.py +192 -0
  147. megadetector/detection/video_utils.py +698 -0
  148. megadetector/postprocessing/__init__.py +0 -0
  149. megadetector/postprocessing/add_max_conf.py +64 -0
  150. megadetector/postprocessing/categorize_detections_by_size.py +165 -0
  151. megadetector/postprocessing/classification_postprocessing.py +716 -0
  152. megadetector/postprocessing/combine_api_outputs.py +249 -0
  153. megadetector/postprocessing/compare_batch_results.py +966 -0
  154. megadetector/postprocessing/convert_output_format.py +396 -0
  155. megadetector/postprocessing/load_api_results.py +195 -0
  156. megadetector/postprocessing/md_to_coco.py +310 -0
  157. megadetector/postprocessing/md_to_labelme.py +330 -0
  158. megadetector/postprocessing/merge_detections.py +412 -0
  159. megadetector/postprocessing/postprocess_batch_results.py +1908 -0
  160. megadetector/postprocessing/remap_detection_categories.py +170 -0
  161. megadetector/postprocessing/render_detection_confusion_matrix.py +660 -0
  162. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +211 -0
  163. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +83 -0
  164. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1635 -0
  165. megadetector/postprocessing/separate_detections_into_folders.py +730 -0
  166. megadetector/postprocessing/subset_json_detector_output.py +700 -0
  167. megadetector/postprocessing/top_folders_to_bottom.py +223 -0
  168. megadetector/taxonomy_mapping/__init__.py +0 -0
  169. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  170. megadetector/taxonomy_mapping/map_new_lila_datasets.py +150 -0
  171. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -0
  172. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +588 -0
  173. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  174. megadetector/taxonomy_mapping/simple_image_download.py +219 -0
  175. megadetector/taxonomy_mapping/species_lookup.py +834 -0
  176. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  177. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  178. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  179. megadetector/utils/__init__.py +0 -0
  180. megadetector/utils/azure_utils.py +178 -0
  181. megadetector/utils/ct_utils.py +613 -0
  182. megadetector/utils/directory_listing.py +246 -0
  183. megadetector/utils/md_tests.py +1164 -0
  184. megadetector/utils/path_utils.py +1045 -0
  185. megadetector/utils/process_utils.py +160 -0
  186. megadetector/utils/sas_blob_utils.py +509 -0
  187. megadetector/utils/split_locations_into_train_val.py +228 -0
  188. megadetector/utils/string_utils.py +92 -0
  189. megadetector/utils/url_utils.py +323 -0
  190. megadetector/utils/write_html_image_list.py +225 -0
  191. megadetector/visualization/__init__.py +0 -0
  192. megadetector/visualization/plot_utils.py +293 -0
  193. megadetector/visualization/render_images_with_thumbnails.py +275 -0
  194. megadetector/visualization/visualization_utils.py +1536 -0
  195. megadetector/visualization/visualize_db.py +552 -0
  196. megadetector/visualization/visualize_detector_output.py +405 -0
  197. {megadetector-5.0.11.dist-info → megadetector-5.0.13.dist-info}/LICENSE +0 -0
  198. {megadetector-5.0.11.dist-info → megadetector-5.0.13.dist-info}/METADATA +2 -2
  199. megadetector-5.0.13.dist-info/RECORD +201 -0
  200. megadetector-5.0.13.dist-info/top_level.txt +1 -0
  201. megadetector-5.0.11.dist-info/RECORD +0 -5
  202. megadetector-5.0.11.dist-info/top_level.txt +0 -1
  203. {megadetector-5.0.11.dist-info → megadetector-5.0.13.dist-info}/WHEEL +0 -0
@@ -0,0 +1,84 @@
1
+ """
2
+
3
+ remap_coco_categories.py
4
+
5
+ Given a COCO-formatted dataset, remap the categories to a new mapping.
6
+
7
+ """
8
+
9
+ #%% Imports and constants
10
+
11
+ import os
12
+ import json
13
+
14
+ from copy import deepcopy
15
+
16
+
17
+ #%% Main function
18
+
19
+ def remap_coco_categories(input_data,
20
+ output_category_name_to_id,
21
+ input_category_name_to_output_category_name,
22
+ output_file=None):
23
+ """
24
+ Given a COCO-formatted dataset, remap the categories to a new categories mapping, optionally
25
+ writing the results to a new file.
26
+
27
+ output_category_name_to_id is a dict mapping strings to ints.
28
+
29
+ input_category_name_to_output_category_name is a dict mapping strings to strings.
30
+
31
+ [input_data] can be a COCO-formatted dict or a filename. If it's a dict, it will be copied,
32
+ not modified in place.
33
+ """
34
+
35
+ if isinstance(input_data,str):
36
+ assert os.path.isfile(input_data), "Can't find file {}".format(input_data)
37
+ with open(input_data,'r') as f:
38
+ input_data = json.load(f)
39
+ assert isinstance(input_data,dict), 'Illegal COCO input data'
40
+ else:
41
+ assert isinstance(input_data,dict), 'Illegal COCO input data'
42
+ input_data = deepcopy(input_data)
43
+
44
+ # It's safe to modify in-place now
45
+ output_data = input_data
46
+
47
+ # Read input name --> ID mapping
48
+ input_category_name_to_input_category_id = {}
49
+ for c in input_data['categories']:
50
+ input_category_name_to_input_category_id[c['name']] = c['id']
51
+
52
+ # Map input IDs --> output IDs
53
+ input_category_id_to_output_category_id = {}
54
+ for input_name in input_category_name_to_output_category_name.keys():
55
+ output_name = input_category_name_to_output_category_name[input_name]
56
+ assert output_name in output_category_name_to_id, \
57
+ 'No output ID for {} --> {}'.format(input_name,output_name)
58
+ input_id = input_category_name_to_input_category_id[input_name]
59
+ output_id = output_category_name_to_id[output_name]
60
+ input_category_id_to_output_category_id[input_id] = output_id
61
+
62
+ # Map annotations
63
+ for ann in output_data['annotations']:
64
+ assert ann['category_id'] in input_category_id_to_output_category_id, \
65
+ 'Unrecognized category ID {}'.format(ann['category_id'])
66
+ ann['category_id'] = input_category_id_to_output_category_id[ann['category_id']]
67
+
68
+ # Update the category list
69
+ output_categories = []
70
+ for output_name in output_category_name_to_id:
71
+ category = {'name':output_name,'id':output_category_name_to_id[output_name]}
72
+ output_categories.append(category)
73
+ output_data['categories'] = output_categories
74
+
75
+ if output_file is not None:
76
+ with open(output_file,'w') as f:
77
+ json.dump(output_data,f,indent=1)
78
+
79
+ return input_data
80
+
81
+
82
+ #%% Command-line driver
83
+
84
+ # TODO
@@ -0,0 +1,66 @@
1
+ """
2
+
3
+ remove_exif.py
4
+
5
+ Removes all EXIF/IPTC/XMP metadata from a folder of images, without making
6
+ backup copies, using pyexiv2.
7
+
8
+ TODO: This is a one-off script waiting to be cleaned up for more general use.
9
+
10
+ """
11
+
12
+ input_base = r'f:\images'
13
+
14
+
15
+ #%% Imports and constants
16
+
17
+ import os
18
+ import glob
19
+
20
+ def main():
21
+
22
+ assert os.path.isdir(input_base)
23
+
24
+ ##%% List files
25
+
26
+ all_files = [f for f in glob.glob(input_base + "*/**", recursive=True)]
27
+ image_files = [s for s in all_files if (s.lower().endswith('.jpg'))]
28
+
29
+
30
+ ##%% Remove EXIF data (support)
31
+
32
+ import pyexiv2
33
+
34
+ # PYEXIV2 IS NOT THREAD SAFE; DO NOT CALL THIS IN PARALLEL FROM A SINGLE PROCESS
35
+ def remove_exif(fn):
36
+
37
+ try:
38
+ img = pyexiv2.Image(fn)
39
+ # data = img.read_exif(); print(data)
40
+ img.clear_exif()
41
+ img.clear_iptc()
42
+ img.clear_xmp()
43
+ img.close()
44
+ except Exception as e:
45
+ print('EXIF error on {}: {}'.format(fn,str(e)))
46
+
47
+
48
+ ##%% Remove EXIF data (execution)
49
+
50
+ from joblib import Parallel, delayed
51
+
52
+ n_exif_threads = 50
53
+
54
+ if n_exif_threads == 1:
55
+
56
+ # fn = image_files[0]
57
+ for fn in image_files:
58
+ remove_exif(fn)
59
+
60
+ else:
61
+ # joblib.Parallel defaults to a process-based backend, but let's be sure
62
+ # results = Parallel(n_jobs=n_exif_threads,verbose=2,prefer='processes')(delayed(remove_exif)(fn) for fn in image_files[0:10])
63
+ _ = Parallel(n_jobs=n_exif_threads,verbose=2,prefer='processes')(delayed(remove_exif)(fn) for fn in image_files)
64
+
65
+ if __name__ == '__main__':
66
+ main()
@@ -0,0 +1,187 @@
1
+ """
2
+
3
+ rename_images.py.py
4
+
5
+ Copies images from a possibly-nested folder structure to a flat folder structure, including EXIF
6
+ timestamps in each filename. Loosely equivalent to camtrapR's imageRename() function.
7
+
8
+ """
9
+
10
+ #%% Imports and constants
11
+
12
+ import os
13
+
14
+ from megadetector.utils.path_utils import \
15
+ find_images, insert_before_extension, parallel_copy_files
16
+ from megadetector.data_management.read_exif import \
17
+ ReadExifOptions, read_exif_from_folder
18
+
19
+
20
+ #%% Functions
21
+
22
+ def rename_images(input_folder,
23
+ output_folder,
24
+ dry_run=False,
25
+ verbose=False,
26
+ read_exif_options=None,
27
+ n_copy_workers=8):
28
+ """
29
+ For the given image struct in COCO format and associated list of annotations, reformats the
30
+ detections into labelme format.
31
+
32
+ Args:
33
+ input_folder: the folder to search for images, always recursive
34
+ output_folder: the folder to which we will copy images; cannot be the
35
+ same as [input_folder]
36
+ dry_run: only map images, don't actually copy
37
+ verbose (bool, optional): enable additional debug output
38
+ read_exif_options (ReadExifOptions, optional): parameters controlling the reading of
39
+ EXIF information
40
+ n_copy_workers (int, optional): number of parallel threads to use for copying
41
+
42
+ Returns:
43
+ dict: a dict mapping relative filenames in the input folder to relative filenames in the output
44
+ folder
45
+ """
46
+
47
+ assert os.path.isdir(input_folder), 'Input folder {} does not exist'.format(
48
+ input_folder)
49
+
50
+ if not dry_run:
51
+ os.makedirs(output_folder,exist_ok=True)
52
+
53
+ # Read exif information
54
+ if read_exif_options is None:
55
+ read_exif_options = ReadExifOptions()
56
+
57
+ read_exif_options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight','DateTime',
58
+ 'DateTimeOriginal']
59
+ read_exif_options.verbose = False
60
+
61
+ exif_info = read_exif_from_folder(input_folder=input_folder,
62
+ output_file=None,
63
+ options=read_exif_options,
64
+ filenames=None,recursive=True)
65
+
66
+ print('Read EXIF information for {} images'.format(len(exif_info)))
67
+
68
+ filename_to_exif_info = {info['file_name']:info for info in exif_info}
69
+
70
+ image_files = find_images(input_folder,return_relative_paths=True,convert_slashes=True,recursive=True)
71
+
72
+ for fn in image_files:
73
+ assert fn in filename_to_exif_info, 'No EXIF info available for {}'.format(fn)
74
+
75
+ input_fn_relative_to_output_fn_relative = {}
76
+
77
+ # fn_relative = image_files[0]
78
+ for fn_relative in image_files:
79
+
80
+ input_fn_abs = os.path.join(input_folder,fn_relative)
81
+ image_exif_info = filename_to_exif_info[fn_relative]
82
+ if 'exif_tags' in image_exif_info:
83
+ image_exif_info = image_exif_info['exif_tags']
84
+
85
+ if image_exif_info is None or \
86
+ 'DateTimeOriginal' not in image_exif_info or \
87
+ image_exif_info['DateTimeOriginal'] is None:
88
+
89
+ dt_tag = 'unknown_datetime'
90
+ print('Warning: no datetime for {}'.format(fn_relative))
91
+
92
+ else:
93
+
94
+ dt_tag = str(image_exif_info['DateTimeOriginal']).replace(':','-').replace(' ','_').strip()
95
+
96
+ flat_filename = fn_relative.replace('\\','/').replace('/','_')
97
+
98
+ output_fn_relative = insert_before_extension(flat_filename,dt_tag)
99
+
100
+ input_fn_relative_to_output_fn_relative[fn_relative] = output_fn_relative
101
+
102
+ if not dry_run:
103
+
104
+ input_fn_abs_to_output_fn_abs = {}
105
+ for input_fn_relative in input_fn_relative_to_output_fn_relative:
106
+ output_fn_relative = input_fn_relative_to_output_fn_relative[input_fn_relative]
107
+ input_fn_abs = os.path.join(input_folder,input_fn_relative)
108
+ output_fn_abs = os.path.join(output_folder,output_fn_relative)
109
+ input_fn_abs_to_output_fn_abs[input_fn_abs] = output_fn_abs
110
+
111
+ parallel_copy_files(input_file_to_output_file=input_fn_abs_to_output_fn_abs,
112
+ max_workers=n_copy_workers,
113
+ use_threads=True,
114
+ overwrite=True,
115
+ verbose=verbose)
116
+
117
+ return input_fn_relative_to_output_fn_relative
118
+
119
+ # ...def rename_images()
120
+
121
+
122
+ #%% Interactive driver
123
+
124
+ if False:
125
+
126
+ pass
127
+
128
+ #%% Configure options
129
+
130
+ input_folder = r'G:\camera_traps\camera_trap_videos\2024.05.25\cam3'
131
+ output_folder = r'G:\camera_traps\camera_trap_videos\2024.05.25\cam3_flat'
132
+ dry_run = False
133
+ verbose = True
134
+ read_exif_options = ReadExifOptions()
135
+ read_exif_options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight','DateTime',
136
+ 'DateTimeOriginal']
137
+ read_exif_options.n_workers = 8
138
+ read_exif_options.verbose = verbose
139
+ n_copy_workers = 8
140
+
141
+
142
+ #%% Programmatic execution
143
+
144
+ input_fn_relative_to_output_fn_relative = rename_images(input_folder,
145
+ output_folder,
146
+ dry_run=dry_run,
147
+ verbose=verbose,
148
+ read_exif_options=read_exif_options,
149
+ n_copy_workers=n_copy_workers)
150
+
151
+
152
+ #%% Command-line driver
153
+
154
+ import sys,argparse
155
+
156
+ def main():
157
+
158
+ parser = argparse.ArgumentParser(
159
+ description='Copies images from a possibly-nested folder structure to a flat folder structure, ' + \
160
+ 'adding datetime information from EXIF to each filename')
161
+
162
+ parser.add_argument(
163
+ 'input_folder',
164
+ type=str,
165
+ help='The folder to search for images, always recursive')
166
+
167
+ parser.add_argument(
168
+ 'output_folder',
169
+ type=str,
170
+ help='The folder to which we should write the flattened image structure')
171
+
172
+ parser.add_argument(
173
+ '--dry_run',
174
+ action='store_true',
175
+ help="Only map images, don't actually copy")
176
+
177
+ if len(sys.argv[1:]) == 0:
178
+ parser.print_help()
179
+ parser.exit()
180
+
181
+ args = parser.parse_args()
182
+
183
+ rename_images(args.input_folder,args.output_folder,dry_run=args.dry_run,
184
+ verbose=True,read_exif_options=None)
185
+
186
+ if __name__ == '__main__':
187
+ main()
@@ -0,0 +1,189 @@
1
+ """
2
+
3
+ resize_coco_dataset.py
4
+
5
+ Given a COCO-formatted dataset, resizes all the images to a target size,
6
+ scaling bounding boxes accordingly.
7
+
8
+ """
9
+
10
+ #%% Imports and constants
11
+
12
+ import os
13
+ import json
14
+ import shutil
15
+
16
+ from tqdm import tqdm
17
+ from collections import defaultdict
18
+
19
+ from megadetector.utils.path_utils import insert_before_extension
20
+ from megadetector.visualization.visualization_utils import \
21
+ open_image, resize_image, exif_preserving_save
22
+
23
+
24
+ #%% Functions
25
+
26
+ def resize_coco_dataset(input_folder,input_filename,
27
+ output_folder,output_filename,
28
+ target_size=(-1,-1),
29
+ correct_size_image_handling='copy'):
30
+ """
31
+ Given a COCO-formatted dataset (images in input_folder, data in input_filename), resizes
32
+ all the images to a target size (in output_folder) and scales bounding boxes accordingly.
33
+
34
+ Args:
35
+ input_folder (str): the folder where images live; filenames in [input_filename] should
36
+ be relative to [input_folder]
37
+ input_filename (str): the (input) COCO-formatted .json file containing annotations
38
+ output_folder (str): the folder to which we should write resized images; can be the
39
+ same as [input_folder], in which case images are over-written
40
+ output_filename (str): the COCO-formatted .json file we should generate that refers to
41
+ the resized images
42
+ target_size (list or tuple of ints): this should be tuple/list of ints, with length 2 (w,h).
43
+ If either dimension is -1, aspect ratio will be preserved. If both dimensions are -1, this means
44
+ "keep the original size". If both dimensions are -1 and correct_size_image_handling is copy, this
45
+ function is basically a no-op.
46
+ correct_size_image_handling (str): can be 'copy' (in which case the original image is just copied
47
+ to the output folder) or 'rewrite' (in which case the image is opened via PIL and re-written,
48
+ attempting to preserve the same quality). The only reason to do use 'rewrite' 'is the case where
49
+ you're superstitious about biases coming from images in a training set being written by different
50
+ image encoders.
51
+
52
+ Returns:
53
+ dict: the COCO database with resized images, identical to the content of [output_filename]
54
+ """
55
+
56
+ # Read input data
57
+ with open(input_filename,'r') as f:
58
+ d = json.load(f)
59
+
60
+ # Map image IDs to annotations
61
+ image_id_to_annotations = defaultdict(list)
62
+ for ann in d['annotations']:
63
+ image_id_to_annotations[ann['image_id']].append(ann)
64
+
65
+ # For each image
66
+
67
+ # TODO: this is trivially parallelizable
68
+ #
69
+ # im = d['images'][0]
70
+ for im in tqdm(d['images']):
71
+
72
+ input_fn_relative = im['file_name']
73
+ input_fn_abs = os.path.join(input_folder,input_fn_relative)
74
+ assert os.path.isfile(input_fn_abs), "Can't find image file {}".format(input_fn_abs)
75
+
76
+ output_fn_abs = os.path.join(output_folder,input_fn_relative)
77
+ os.makedirs(os.path.dirname(output_fn_abs),exist_ok=True)
78
+
79
+ pil_im = open_image(input_fn_abs)
80
+ input_w = pil_im.width
81
+ input_h = pil_im.height
82
+
83
+ image_is_already_target_size = \
84
+ (input_w == target_size[0]) and (input_h == target_size[1])
85
+ preserve_original_size = \
86
+ (target_size[0] == -1) and (target_size[1] == -1)
87
+
88
+ # If the image is already the right size...
89
+ if (image_is_already_target_size or preserve_original_size):
90
+ output_w = input_w
91
+ output_h = input_h
92
+ if correct_size_image_handling == 'copy':
93
+ shutil.copyfile(input_fn_abs,output_fn_abs)
94
+ elif correct_size_image_handling == 'rewrite':
95
+ exif_preserving_save(pil_im,output_fn_abs)
96
+ else:
97
+ raise ValueError('Unrecognized value {} for correct_size_image_handling'.format(
98
+ correct_size_image_handling))
99
+ else:
100
+ pil_im = resize_image(pil_im, target_size[0], target_size[1])
101
+ output_w = pil_im.width
102
+ output_h = pil_im.height
103
+ exif_preserving_save(pil_im,output_fn_abs)
104
+
105
+ im['width'] = output_w
106
+ im['height'] = output_h
107
+
108
+ # For each box
109
+ annotations_this_image = image_id_to_annotations[im['id']]
110
+
111
+ # ann = annotations_this_image[0]
112
+ for ann in annotations_this_image:
113
+
114
+ if 'bbox' in ann:
115
+
116
+ # boxes are [x,y,w,h]
117
+ bbox = ann['bbox']
118
+
119
+ # Do we need to scale this box?
120
+ if (output_w != input_w) or (output_h != input_h):
121
+ width_scale = output_w/input_w
122
+ height_scale = output_h/input_h
123
+ bbox = \
124
+ [bbox[0] * width_scale,
125
+ bbox[1] * height_scale,
126
+ bbox[2] * width_scale,
127
+ bbox[3] * height_scale]
128
+
129
+ ann['bbox'] = bbox
130
+
131
+ # ...if this annotation has a box
132
+
133
+ # ...for each annotation
134
+
135
+ # ...for each image
136
+
137
+ # Write output file
138
+ with open(output_filename,'w') as f:
139
+ json.dump(d,f,indent=1)
140
+
141
+ return d
142
+
143
+ # ...def resize_coco_dataset(...)
144
+
145
+
146
+ #%% Interactive driver
147
+
148
+ if False:
149
+
150
+ pass
151
+
152
+ #%% Test resizing
153
+
154
+ input_folder = os.path.expanduser('~/data/usgs-tegus/usgs-kissel-training')
155
+ input_filename = os.path.expanduser('~/data/usgs-tegus/usgs-kissel-training.json')
156
+ target_size = (1600,-1)
157
+
158
+ output_filename = insert_before_extension(input_filename,'resized-test')
159
+ output_folder = input_folder + '-resized-test'
160
+
161
+ correct_size_image_handling = 'rewrite'
162
+
163
+ resize_coco_dataset(input_folder,input_filename,
164
+ output_folder,output_filename,
165
+ target_size=target_size,
166
+ correct_size_image_handling=correct_size_image_handling)
167
+
168
+
169
+ #%% Preview
170
+
171
+ from megadetector.visualization import visualize_db
172
+ options = visualize_db.DbVizOptions()
173
+ options.parallelize_rendering = True
174
+ options.viz_size = (900, -1)
175
+ options.num_to_visualize = 5000
176
+
177
+ html_file,_ = visualize_db.visualize_db(output_filename,
178
+ os.path.expanduser('~/tmp/resize_coco_preview'),
179
+ output_folder,options)
180
+
181
+
182
+ from megadetector.utils import path_utils # noqa
183
+ path_utils.open_file(html_file)
184
+
185
+
186
+ #%% Command-line driver
187
+
188
+ # TODO
189
+