megadetector 5.0.11__py3-none-any.whl → 5.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (201) hide show
  1. megadetector/api/__init__.py +0 -0
  2. megadetector/api/batch_processing/__init__.py +0 -0
  3. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  4. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. megadetector/api/batch_processing/api_core/batch_service/score.py +439 -0
  6. megadetector/api/batch_processing/api_core/server.py +294 -0
  7. megadetector/api/batch_processing/api_core/server_api_config.py +98 -0
  8. megadetector/api/batch_processing/api_core/server_app_config.py +55 -0
  9. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +220 -0
  10. megadetector/api/batch_processing/api_core/server_job_status_table.py +152 -0
  11. megadetector/api/batch_processing/api_core/server_orchestration.py +360 -0
  12. megadetector/api/batch_processing/api_core/server_utils.py +92 -0
  13. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  14. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +46 -0
  15. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  16. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +152 -0
  17. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  18. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  19. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  20. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  21. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +126 -0
  22. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  23. megadetector/api/synchronous/__init__.py +0 -0
  24. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  25. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +152 -0
  26. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -0
  27. megadetector/api/synchronous/api_core/animal_detection_api/config.py +35 -0
  28. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  29. megadetector/api/synchronous/api_core/tests/load_test.py +110 -0
  30. megadetector/classification/__init__.py +0 -0
  31. megadetector/classification/aggregate_classifier_probs.py +108 -0
  32. megadetector/classification/analyze_failed_images.py +227 -0
  33. megadetector/classification/cache_batchapi_outputs.py +198 -0
  34. megadetector/classification/create_classification_dataset.py +627 -0
  35. megadetector/classification/crop_detections.py +516 -0
  36. megadetector/classification/csv_to_json.py +226 -0
  37. megadetector/classification/detect_and_crop.py +855 -0
  38. megadetector/classification/efficientnet/__init__.py +9 -0
  39. megadetector/classification/efficientnet/model.py +415 -0
  40. megadetector/classification/efficientnet/utils.py +610 -0
  41. megadetector/classification/evaluate_model.py +520 -0
  42. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  43. megadetector/classification/json_to_azcopy_list.py +63 -0
  44. megadetector/classification/json_validator.py +699 -0
  45. megadetector/classification/map_classification_categories.py +276 -0
  46. megadetector/classification/merge_classification_detection_output.py +506 -0
  47. megadetector/classification/prepare_classification_script.py +194 -0
  48. megadetector/classification/prepare_classification_script_mc.py +228 -0
  49. megadetector/classification/run_classifier.py +287 -0
  50. megadetector/classification/save_mislabeled.py +110 -0
  51. megadetector/classification/train_classifier.py +827 -0
  52. megadetector/classification/train_classifier_tf.py +725 -0
  53. megadetector/classification/train_utils.py +323 -0
  54. megadetector/data_management/__init__.py +0 -0
  55. megadetector/data_management/annotations/__init__.py +0 -0
  56. megadetector/data_management/annotations/annotation_constants.py +34 -0
  57. megadetector/data_management/camtrap_dp_to_coco.py +239 -0
  58. megadetector/data_management/cct_json_utils.py +395 -0
  59. megadetector/data_management/cct_to_md.py +176 -0
  60. megadetector/data_management/cct_to_wi.py +289 -0
  61. megadetector/data_management/coco_to_labelme.py +272 -0
  62. megadetector/data_management/coco_to_yolo.py +662 -0
  63. megadetector/data_management/databases/__init__.py +0 -0
  64. megadetector/data_management/databases/add_width_and_height_to_db.py +33 -0
  65. megadetector/data_management/databases/combine_coco_camera_traps_files.py +206 -0
  66. megadetector/data_management/databases/integrity_check_json_db.py +477 -0
  67. megadetector/data_management/databases/subset_json_db.py +115 -0
  68. megadetector/data_management/generate_crops_from_cct.py +149 -0
  69. megadetector/data_management/get_image_sizes.py +189 -0
  70. megadetector/data_management/importers/add_nacti_sizes.py +52 -0
  71. megadetector/data_management/importers/add_timestamps_to_icct.py +79 -0
  72. megadetector/data_management/importers/animl_results_to_md_results.py +158 -0
  73. megadetector/data_management/importers/auckland_doc_test_to_json.py +373 -0
  74. megadetector/data_management/importers/auckland_doc_to_json.py +201 -0
  75. megadetector/data_management/importers/awc_to_json.py +191 -0
  76. megadetector/data_management/importers/bellevue_to_json.py +273 -0
  77. megadetector/data_management/importers/cacophony-thermal-importer.py +796 -0
  78. megadetector/data_management/importers/carrizo_shrubfree_2018.py +269 -0
  79. megadetector/data_management/importers/carrizo_trail_cam_2017.py +289 -0
  80. megadetector/data_management/importers/cct_field_adjustments.py +58 -0
  81. megadetector/data_management/importers/channel_islands_to_cct.py +913 -0
  82. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +180 -0
  83. megadetector/data_management/importers/eMammal/eMammal_helpers.py +249 -0
  84. megadetector/data_management/importers/eMammal/make_eMammal_json.py +223 -0
  85. megadetector/data_management/importers/ena24_to_json.py +276 -0
  86. megadetector/data_management/importers/filenames_to_json.py +386 -0
  87. megadetector/data_management/importers/helena_to_cct.py +283 -0
  88. megadetector/data_management/importers/idaho-camera-traps.py +1407 -0
  89. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +294 -0
  90. megadetector/data_management/importers/jb_csv_to_json.py +150 -0
  91. megadetector/data_management/importers/mcgill_to_json.py +250 -0
  92. megadetector/data_management/importers/missouri_to_json.py +490 -0
  93. megadetector/data_management/importers/nacti_fieldname_adjustments.py +79 -0
  94. megadetector/data_management/importers/noaa_seals_2019.py +181 -0
  95. megadetector/data_management/importers/pc_to_json.py +365 -0
  96. megadetector/data_management/importers/plot_wni_giraffes.py +123 -0
  97. megadetector/data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -0
  98. megadetector/data_management/importers/prepare_zsl_imerit.py +131 -0
  99. megadetector/data_management/importers/rspb_to_json.py +356 -0
  100. megadetector/data_management/importers/save_the_elephants_survey_A.py +320 -0
  101. megadetector/data_management/importers/save_the_elephants_survey_B.py +329 -0
  102. megadetector/data_management/importers/snapshot_safari_importer.py +758 -0
  103. megadetector/data_management/importers/snapshot_safari_importer_reprise.py +665 -0
  104. megadetector/data_management/importers/snapshot_serengeti_lila.py +1067 -0
  105. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +150 -0
  106. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +153 -0
  107. megadetector/data_management/importers/sulross_get_exif.py +65 -0
  108. megadetector/data_management/importers/timelapse_csv_set_to_json.py +490 -0
  109. megadetector/data_management/importers/ubc_to_json.py +399 -0
  110. megadetector/data_management/importers/umn_to_json.py +507 -0
  111. megadetector/data_management/importers/wellington_to_json.py +263 -0
  112. megadetector/data_management/importers/wi_to_json.py +442 -0
  113. megadetector/data_management/importers/zamba_results_to_md_results.py +181 -0
  114. megadetector/data_management/labelme_to_coco.py +547 -0
  115. megadetector/data_management/labelme_to_yolo.py +272 -0
  116. megadetector/data_management/lila/__init__.py +0 -0
  117. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +97 -0
  118. megadetector/data_management/lila/add_locations_to_nacti.py +147 -0
  119. megadetector/data_management/lila/create_lila_blank_set.py +558 -0
  120. megadetector/data_management/lila/create_lila_test_set.py +152 -0
  121. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  122. megadetector/data_management/lila/download_lila_subset.py +178 -0
  123. megadetector/data_management/lila/generate_lila_per_image_labels.py +516 -0
  124. megadetector/data_management/lila/get_lila_annotation_counts.py +170 -0
  125. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  126. megadetector/data_management/lila/lila_common.py +300 -0
  127. megadetector/data_management/lila/test_lila_metadata_urls.py +132 -0
  128. megadetector/data_management/ocr_tools.py +874 -0
  129. megadetector/data_management/read_exif.py +681 -0
  130. megadetector/data_management/remap_coco_categories.py +84 -0
  131. megadetector/data_management/remove_exif.py +66 -0
  132. megadetector/data_management/resize_coco_dataset.py +189 -0
  133. megadetector/data_management/wi_download_csv_to_coco.py +246 -0
  134. megadetector/data_management/yolo_output_to_md_output.py +441 -0
  135. megadetector/data_management/yolo_to_coco.py +676 -0
  136. megadetector/detection/__init__.py +0 -0
  137. megadetector/detection/detector_training/__init__.py +0 -0
  138. megadetector/detection/detector_training/model_main_tf2.py +114 -0
  139. megadetector/detection/process_video.py +702 -0
  140. megadetector/detection/pytorch_detector.py +341 -0
  141. megadetector/detection/run_detector.py +779 -0
  142. megadetector/detection/run_detector_batch.py +1219 -0
  143. megadetector/detection/run_inference_with_yolov5_val.py +917 -0
  144. megadetector/detection/run_tiled_inference.py +934 -0
  145. megadetector/detection/tf_detector.py +189 -0
  146. megadetector/detection/video_utils.py +606 -0
  147. megadetector/postprocessing/__init__.py +0 -0
  148. megadetector/postprocessing/add_max_conf.py +64 -0
  149. megadetector/postprocessing/categorize_detections_by_size.py +163 -0
  150. megadetector/postprocessing/combine_api_outputs.py +249 -0
  151. megadetector/postprocessing/compare_batch_results.py +958 -0
  152. megadetector/postprocessing/convert_output_format.py +396 -0
  153. megadetector/postprocessing/load_api_results.py +195 -0
  154. megadetector/postprocessing/md_to_coco.py +310 -0
  155. megadetector/postprocessing/md_to_labelme.py +330 -0
  156. megadetector/postprocessing/merge_detections.py +401 -0
  157. megadetector/postprocessing/postprocess_batch_results.py +1902 -0
  158. megadetector/postprocessing/remap_detection_categories.py +170 -0
  159. megadetector/postprocessing/render_detection_confusion_matrix.py +660 -0
  160. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +211 -0
  161. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +83 -0
  162. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1631 -0
  163. megadetector/postprocessing/separate_detections_into_folders.py +730 -0
  164. megadetector/postprocessing/subset_json_detector_output.py +696 -0
  165. megadetector/postprocessing/top_folders_to_bottom.py +223 -0
  166. megadetector/taxonomy_mapping/__init__.py +0 -0
  167. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  168. megadetector/taxonomy_mapping/map_new_lila_datasets.py +150 -0
  169. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -0
  170. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +590 -0
  171. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  172. megadetector/taxonomy_mapping/simple_image_download.py +219 -0
  173. megadetector/taxonomy_mapping/species_lookup.py +834 -0
  174. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  175. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  176. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  177. megadetector/utils/__init__.py +0 -0
  178. megadetector/utils/azure_utils.py +178 -0
  179. megadetector/utils/ct_utils.py +612 -0
  180. megadetector/utils/directory_listing.py +246 -0
  181. megadetector/utils/md_tests.py +968 -0
  182. megadetector/utils/path_utils.py +1044 -0
  183. megadetector/utils/process_utils.py +157 -0
  184. megadetector/utils/sas_blob_utils.py +509 -0
  185. megadetector/utils/split_locations_into_train_val.py +228 -0
  186. megadetector/utils/string_utils.py +92 -0
  187. megadetector/utils/url_utils.py +323 -0
  188. megadetector/utils/write_html_image_list.py +225 -0
  189. megadetector/visualization/__init__.py +0 -0
  190. megadetector/visualization/plot_utils.py +293 -0
  191. megadetector/visualization/render_images_with_thumbnails.py +275 -0
  192. megadetector/visualization/visualization_utils.py +1536 -0
  193. megadetector/visualization/visualize_db.py +550 -0
  194. megadetector/visualization/visualize_detector_output.py +405 -0
  195. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/METADATA +1 -1
  196. megadetector-5.0.12.dist-info/RECORD +199 -0
  197. megadetector-5.0.12.dist-info/top_level.txt +1 -0
  198. megadetector-5.0.11.dist-info/RECORD +0 -5
  199. megadetector-5.0.11.dist-info/top_level.txt +0 -1
  200. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/LICENSE +0 -0
  201. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/WHEEL +0 -0
@@ -0,0 +1,115 @@
1
+ """
2
+
3
+ subset_json_db.py
4
+
5
+ Select a subset of images (and associated annotations) from a .json file in COCO
6
+ Camera Traps format based on a string query.
7
+
8
+ To subset .json files in the MegaDetector output format, see
9
+ subset_json_detector_output.py.
10
+
11
+ """
12
+
13
+ #%% Constants and imports
14
+
15
+ import sys
16
+ import json
17
+ import argparse
18
+
19
+ from tqdm import tqdm
20
+
21
+
22
+ #%% Functions
23
+
24
+ def subset_json_db(input_json, query, output_json=None, ignore_case=False):
25
+ """
26
+ Given a json file (or dictionary already loaded from a json file), produce a new
27
+ database containing only the images whose filenames contain the string 'query',
28
+ optionally writing that DB output to a new json file.
29
+
30
+ Args:
31
+ input_json (str): COCO Camera Traps .json file to load, or an already-loaded dict
32
+ query (str): string to query for, only include images in the output whose filenames
33
+ contain this string.
34
+ output_json (str, optional): file to write the resulting .json file to
35
+ ignore_case (bool, optional): whether to perform a case-insensitive search for [query]
36
+
37
+ Returns:
38
+ dict: possibly-modified CCT dictionary
39
+ """
40
+
41
+ if ignore_case:
42
+ query = query.lower()
43
+
44
+ # Load the input file if necessary
45
+ if isinstance(input_json,str):
46
+ print('Loading input .json...')
47
+ with open(input_json, 'r') as f:
48
+ data = json.load(f)
49
+ else:
50
+ data = input_json
51
+
52
+ # Find images matching the query
53
+ images = []
54
+ image_ids = set()
55
+
56
+ for im in tqdm(data['images']):
57
+ fn = im['file_name']
58
+ if ignore_case:
59
+ fn = fn.lower()
60
+ if query in fn:
61
+ images.append(im)
62
+ image_ids.add(im['id'])
63
+
64
+ # Find annotations referring to those images
65
+ annotations = []
66
+
67
+ for ann in tqdm(data['annotations']):
68
+ if ann['image_id'] in image_ids:
69
+ annotations.append(ann)
70
+
71
+ output_data = data
72
+ output_data['images'] = images
73
+ output_data['annotations'] = annotations
74
+
75
+ # Write the output file if requested
76
+ if output_json is not None:
77
+ print('Writing output .json...')
78
+ json.dump(output_data,open(output_json,'w'),indent=1)
79
+
80
+ return output_data
81
+
82
+
83
+ #%% Interactive driver
84
+
85
+ if False:
86
+
87
+ #%%
88
+
89
+ input_json = r"e:\Statewide_wolf_container\idfg_20190409.json"
90
+ output_json = r"e:\Statewide_wolf_container\idfg_20190409_clearcreek.json"
91
+ query = 'clearcreek'
92
+ ignore_case = True
93
+ db = subset_json_db(input_json, query, output_json, ignore_case)
94
+
95
+
96
+ #%% Command-line driver
97
+
98
+ def main():
99
+
100
+ parser = argparse.ArgumentParser()
101
+ parser.add_argument('input_json', type=str, help='Input file (a COCO Camera Traps .json file)')
102
+ parser.add_argument('output_json', type=str, help='Output file')
103
+ parser.add_argument('query', type=str, help='Filename query')
104
+ parser.add_argument('--ignore_case', action='store_true')
105
+
106
+ if len(sys.argv[1:]) == 0:
107
+ parser.print_help()
108
+ parser.exit()
109
+
110
+ args = parser.parse_args()
111
+
112
+ subset_json_db(args.input_json,args.query,args.output_json,args.ignore_case)
113
+
114
+ if __name__ == '__main__':
115
+ main()
@@ -0,0 +1,149 @@
1
+ """
2
+
3
+ generate_crops_from_cct.py
4
+
5
+ Given a .json file in COCO Camera Traps format, creates a cropped image for
6
+ each bounding box.
7
+
8
+ """
9
+
10
+ #%% Imports and constants
11
+
12
+ import os
13
+ import json
14
+
15
+ from tqdm import tqdm
16
+ from PIL import Image
17
+
18
+
19
+ #%% Functions
20
+
21
+ def generate_crops_from_cct(cct_file,image_dir,output_dir,padding=0,flat_output=True):
22
+ """
23
+ Given a .json file in COCO Camera Traps format, creates a cropped image for
24
+ each bounding box.
25
+
26
+ Args:
27
+ cct_file (str): the COCO .json file from which we should load data
28
+ image_dir (str): the folder where the images live; filenames in the .json
29
+ file should be relative to this folder
30
+ output_dir (str): the folder where we should write cropped images
31
+ padding (float, optional): number of pixels we should expand each box before
32
+ cropping
33
+ flat_output (bool, optional): if False, folder structure will be preserved
34
+ in the output, e.g. the image a/b/c/d.jpg will result in image files
35
+ in the output folder called, e.g., a/b/c/d_crop_000_id_12345.jpg. If
36
+ [flat_output] is True, the corresponding output image will be
37
+ a_b_c_d_crop_000_id_12345.jpg.
38
+ """
39
+
40
+ ## Read and validate input
41
+
42
+ assert os.path.isfile(cct_file)
43
+ assert os.path.isdir(image_dir)
44
+ os.makedirs(output_dir,exist_ok=True)
45
+
46
+ with open(cct_file,'r') as f:
47
+ d = json.load(f)
48
+
49
+
50
+ ## Find annotations for each image
51
+
52
+ from collections import defaultdict
53
+
54
+ # This actually maps image IDs to annotations, but only to annotations
55
+ # containing boxes
56
+ image_id_to_boxes = defaultdict(list)
57
+
58
+ n_boxes = 0
59
+
60
+ for ann in d['annotations']:
61
+ if 'bbox' in ann:
62
+ image_id_to_boxes[ann['image_id']].append(ann)
63
+ n_boxes += 1
64
+
65
+ print('Found {} boxes in {} annotations for {} images'.format(
66
+ n_boxes,len(d['annotations']),len(d['images'])))
67
+
68
+
69
+ ## Generate crops
70
+
71
+ # im = d['images'][0]
72
+ for im in tqdm(d['images']):
73
+
74
+ input_image_fn = os.path.join(os.path.join(image_dir,im['file_name']))
75
+ assert os.path.isfile(input_image_fn), 'Could not find image {}'.format(input_image_fn)
76
+
77
+ if im['id'] not in image_id_to_boxes:
78
+ continue
79
+
80
+ annotations_this_image = image_id_to_boxes[im['id']]
81
+
82
+ # Load the image
83
+ img = Image.open(input_image_fn)
84
+
85
+ # Generate crops
86
+ # i_ann = 0; ann = annotations_this_image[i_ann]
87
+ for i_ann,ann in enumerate(annotations_this_image):
88
+
89
+ # x/y/w/h, origin at the upper-left
90
+ bbox = ann['bbox']
91
+
92
+ xmin = bbox[0]
93
+ ymin = bbox[1]
94
+ xmax = xmin + bbox[2]
95
+ ymax = ymin + bbox[3]
96
+
97
+ xmin -= padding / 2
98
+ ymin -= padding / 2
99
+ xmax += padding / 2
100
+ ymax += padding / 2
101
+
102
+ xmin = max(xmin,0)
103
+ ymin = max(ymin,0)
104
+ xmax = min(xmax,img.width-1)
105
+ ymax = min(ymax,img.height-1)
106
+
107
+ crop = img.crop(box=[xmin, ymin, xmax, ymax])
108
+
109
+ output_fn = os.path.splitext(im['file_name'])[0].replace('\\','/')
110
+ if flat_output:
111
+ output_fn = output_fn.replace('/','_')
112
+ output_fn = output_fn + '_crop' + str(i_ann).zfill(3) + '_id_' + ann['id']
113
+ output_fn = output_fn + '.jpg'
114
+
115
+ output_full_path = os.path.join(output_dir,output_fn)
116
+
117
+ if not flat_output:
118
+ os.makedirs(os.path.dirname(output_full_path),exist_ok=True)
119
+
120
+ crop.save(output_full_path)
121
+
122
+ # ...for each box
123
+
124
+ # ...for each image
125
+
126
+ # ...generate_crops_from_cct()
127
+
128
+
129
+ #%% Interactive driver
130
+
131
+ if False:
132
+
133
+ pass
134
+
135
+ #%%
136
+
137
+ cct_file = os.path.expanduser('~/data/noaa/noaa_estuary_fish.json')
138
+ image_dir = os.path.expanduser('~/data/noaa/JPEGImages')
139
+ padding = 50
140
+ flat_output = True
141
+ output_dir = '/home/user/tmp/noaa-fish-crops'
142
+
143
+ generate_crops_from_cct(cct_file,image_dir,output_dir,padding,flat_output=True)
144
+ files = os.listdir(output_dir)
145
+
146
+
147
+ #%% Command-line driver
148
+
149
+ # TODO
@@ -0,0 +1,189 @@
1
+ """
2
+
3
+ get_image_sizes.py
4
+
5
+ Given a json-formatted list of image filenames, retrieves the width and height of
6
+ every image, optionally writing the results to a new .json file.
7
+
8
+ """
9
+
10
+ #%% Constants and imports
11
+
12
+ import argparse
13
+ import json
14
+ import os
15
+ import sys
16
+
17
+ from PIL import Image
18
+
19
+ from multiprocessing.pool import ThreadPool
20
+ from multiprocessing.pool import Pool
21
+ from functools import partial
22
+ from tqdm import tqdm
23
+
24
+ from megadetector.utils.path_utils import find_images
25
+
26
+ image_base = ''
27
+ default_n_threads = 1
28
+ use_threads = False
29
+
30
+
31
+ #%% Processing functions
32
+
33
+ def _get_image_size(image_path,image_prefix=None):
34
+ """
35
+ Support function to get the size of a single image. Returns a (path,w,h) tuple.
36
+ w and h will be -1 if the image fails to load.
37
+ """
38
+
39
+ if image_prefix is not None:
40
+ full_path = os.path.join(image_prefix,image_path)
41
+ else:
42
+ full_path = image_path
43
+
44
+ # Is this image on disk?
45
+ if not os.path.isfile(full_path):
46
+ print('Could not find image {}'.format(full_path))
47
+ return (image_path,-1,-1)
48
+
49
+ try:
50
+ pil_im = Image.open(full_path)
51
+ w = pil_im.width
52
+ h = pil_im.height
53
+ return (image_path,w,h)
54
+ except Exception as e:
55
+ print('Error reading image {}: {}'.format(full_path,str(e)))
56
+ return (image_path,-1,-1)
57
+
58
+
59
+ def get_image_sizes(filenames,image_prefix=None,output_file=None,
60
+ n_workers=default_n_threads,use_threads=True,
61
+ recursive=True):
62
+ """
63
+ Gets the width and height of all images in [filenames], which can be:
64
+
65
+ * A .json-formatted file containing list of strings
66
+ * A folder
67
+ * A list of files
68
+
69
+ ...returning a list of (path,w,h) tuples, and optionally writing the results to [output_file].
70
+
71
+ Args:
72
+ filenames (str or list): the image filenames for which we should retrieve sizes,
73
+ can be the name of a .json-formatted file containing list of strings, a folder
74
+ in which we should enumerate images, or a list of files.
75
+ image_prefix (str, optional): optional prefix to add to images to get to full paths;
76
+ useful when [filenames] contains relative files, in which case [image_prefix] is the
77
+ base folder for the source images.
78
+ output_file (str, optional): a .json file to write the imgae sizes
79
+ n_workers (int, optional): number of parallel workers to use, set to <=1 to
80
+ disable parallelization
81
+ use_threads (bool, optional): whether to use threads (True) or processes (False)
82
+ for parallelization; not relevant if [n_workers] <= 1
83
+ recursive (bool, optional): only relevant if [filenames] is actually a folder,
84
+ determines whether image enumeration within that folder will be recursive
85
+
86
+ Returns:
87
+ list: list of (path,w,h) tuples
88
+ """
89
+
90
+ if output_file is not None:
91
+ assert os.path.isdir(os.path.dirname(output_file)), \
92
+ 'Illegal output file {}, parent folder does not exist'.format(output_file)
93
+
94
+ if isinstance(filenames,str) and os.path.isfile(filenames):
95
+ with open(filenames,'r') as f:
96
+ filenames = json.load(f)
97
+ filenames = [s.strip() for s in filenames]
98
+ elif isinstance(filenames,str) and os.path.isdir(filenames):
99
+ filenames = find_images(filenames,recursive=recursive,
100
+ return_relative_paths=False,convert_slashes=True)
101
+ else:
102
+ assert isinstance(filenames,list)
103
+
104
+ if n_workers <= 1:
105
+
106
+ all_results = []
107
+ for i_file,fn in tqdm(enumerate(filenames),total=len(filenames)):
108
+ all_results.append(_get_image_size(fn,image_prefix=image_prefix))
109
+
110
+ else:
111
+
112
+ print('Creating a pool with {} workers'.format(n_workers))
113
+ if use_threads:
114
+ pool = ThreadPool(n_workers)
115
+ else:
116
+ pool = Pool(n_workers)
117
+ # all_results = list(tqdm(pool.imap(process_image, filenames), total=len(filenames)))
118
+ all_results = list(tqdm(pool.imap(
119
+ partial(_get_image_size,image_prefix=image_prefix), filenames), total=len(filenames)))
120
+
121
+ if output_file is not None:
122
+ with open(output_file,'w') as f:
123
+ json.dump(all_results,f,indent=1)
124
+
125
+ return all_results
126
+
127
+
128
+ #%% Interactive driver
129
+
130
+ if False:
131
+
132
+ pass
133
+
134
+ #%%
135
+
136
+ # List images in a test folder
137
+ base_dir = r'c:\temp\test_images'
138
+ image_list_file = os.path.join(base_dir,'images.json')
139
+ relative_image_list_file = os.path.join(base_dir,'images_relative.json')
140
+ image_size_file = os.path.join(base_dir,'image_sizes.json')
141
+ from megadetector.utils import path_utils
142
+ image_names = path_utils.find_images(base_dir,recursive=True)
143
+
144
+ with open(image_list_file,'w') as f:
145
+ json.dump(image_names,f,indent=1)
146
+
147
+ relative_image_names = []
148
+ for s in image_names:
149
+ relative_image_names.append(os.path.relpath(s,base_dir))
150
+
151
+ with open(relative_image_list_file,'w') as f:
152
+ json.dump(relative_image_names,f,indent=1)
153
+
154
+
155
+ #%%
156
+
157
+ get_image_sizes(relative_image_list_file,image_size_file,image_prefix=base_dir,n_threads=4)
158
+
159
+
160
+ #%% Command-line driver
161
+
162
+ def main():
163
+
164
+ parser = argparse.ArgumentParser()
165
+ parser.add_argument('filenames',type=str,
166
+ help='Folder from which we should fetch image sizes, or .json file with a list of filenames')
167
+ parser.add_argument('output_file',type=str,
168
+ help='Output file (.json) to which we should write image size information')
169
+ parser.add_argument('--image_prefix', type=str, default=None,
170
+ help='Prefix to append to image filenames, only relevant if [filenames] points to a list of ' + \
171
+ 'relative paths')
172
+ parser.add_argument('--n_threads', type=int, default=default_n_threads,
173
+ help='Number of concurrent workers, set to <=1 to disable parallelization (default {})'.format(
174
+ default_n_threads))
175
+
176
+ if len(sys.argv[1:])==0:
177
+ parser.print_help()
178
+ parser.exit()
179
+
180
+ args = parser.parse_args()
181
+
182
+ _ = get_image_sizes(filenames=args.filenames,
183
+ output_file=args.output_file,
184
+ image_prefix=args.image_prefix,
185
+ n_workers=args.n_threads)
186
+
187
+ if __name__ == '__main__':
188
+
189
+ main()
@@ -0,0 +1,52 @@
1
+ """
2
+
3
+ add_nacti_sizes.py
4
+
5
+ NACTI bounding box metadata was posted before we inclduded width and height as semi-standard
6
+ fields; pull size information from the main metadata file and add to the bbox file.
7
+
8
+ """
9
+
10
+ #%% Constants and environment
11
+
12
+ import json
13
+ from tqdm import tqdm
14
+
15
+ input_file = 'G:/temp/nacti_metadata.json'
16
+ input_bbox_file = 'G:/temp/nacti_20200401_bboxes.json'
17
+ output_bbox_file = 'G:/temp/nacti_20230920_bboxes.json'
18
+
19
+
20
+ #%% Read .json files
21
+
22
+ with open(input_file,'r') as f:
23
+ input_data = json.load(f)
24
+
25
+ with open(input_bbox_file,'r') as f:
26
+ input_bbox_data = json.load(f)
27
+
28
+ print('Finished reading .json data')
29
+
30
+
31
+ #%% Map image names to width and height
32
+
33
+ filename_to_size = {}
34
+ for im in tqdm(input_data['images']):
35
+ filename_to_size[im['file_name']] = (im['width'],im['height'])
36
+
37
+
38
+ #%% Add to output data
39
+
40
+ for im in tqdm(input_bbox_data['images']):
41
+ size = filename_to_size[im['file_name']]
42
+ im['width'] = size[0]
43
+ im['height'] = size[1]
44
+
45
+
46
+ #%% Write output
47
+
48
+ output_bbox_data = input_bbox_data
49
+ output_bbox_data['version'] = '2023-09-20'
50
+
51
+ with open(output_bbox_file,'w') as f:
52
+ json.dump(output_bbox_data,f,indent=1)
@@ -0,0 +1,79 @@
1
+ """
2
+
3
+ add_timestamps_to_icct.py
4
+
5
+ The Island Conservation Camera Traps dataset was originally posted without timestamps
6
+ in either .json metadata or EXIF metadata. We pulled timestamps out using ocr_tools.py,
7
+ this script adds those timestamps into the .json metadata.
8
+
9
+ """
10
+
11
+ #%% Imports and constants
12
+
13
+ import json
14
+
15
+ ocr_results_file = r'g:\temp\ocr_results.2023.10.31.07.37.54.json'
16
+ input_metadata_file = r'd:\lila\islandconservationcameratraps\island_conservation.json'
17
+ output_metadata_file = r'g:\temp\island_conservation_camera_traps_1.02.json'
18
+ ocr_results_file_base = 'g:/temp/island_conservation_camera_traps/'
19
+ assert ocr_results_file_base.endswith('/')
20
+
21
+
22
+ #%% Read input metadata
23
+
24
+ with open(input_metadata_file,'r') as f:
25
+ input_metadata = json.load(f)
26
+
27
+ assert input_metadata['info']['version'] == '1.01'
28
+
29
+ # im = input_metadata['images'][0]
30
+ for im in input_metadata['images']:
31
+ assert 'datetime' not in im
32
+
33
+
34
+ #%% Read OCR results
35
+
36
+ with open(ocr_results_file,'r') as f:
37
+ abs_filename_to_ocr_results = json.load(f)
38
+
39
+ relative_filename_to_ocr_results = {}
40
+
41
+ for fn_abs in abs_filename_to_ocr_results:
42
+ assert ocr_results_file_base in fn_abs
43
+ fn_relative = fn_abs.replace(ocr_results_file_base,'')
44
+ relative_filename_to_ocr_results[fn_relative] = abs_filename_to_ocr_results[fn_abs]
45
+
46
+
47
+ #%% Add datetimes to metadata
48
+
49
+ images_not_in_datetime_results = []
50
+ images_with_failed_datetimes = []
51
+
52
+ for i_image,im in enumerate(input_metadata['images']):
53
+ if im['file_name'] not in relative_filename_to_ocr_results:
54
+ images_not_in_datetime_results.append(im)
55
+ im['datetime'] = None
56
+ continue
57
+ ocr_results = relative_filename_to_ocr_results[im['file_name']]
58
+ if ocr_results['datetime'] is None:
59
+ images_with_failed_datetimes.append(im)
60
+ im['datetime'] = None
61
+ continue
62
+ im['datetime'] = ocr_results['datetime']
63
+
64
+ print('{} of {} images were not in datetime results'.format(
65
+ len(images_not_in_datetime_results),len(input_metadata['images'])))
66
+
67
+ print('{} of {} images were had failed datetime results'.format(
68
+ len(images_with_failed_datetimes),len(input_metadata['images'])))
69
+
70
+ for im in input_metadata['images']:
71
+ assert 'datetime' in im
72
+
73
+
74
+ #%% Write output
75
+
76
+ input_metadata['info']['version'] = '1.02'
77
+
78
+ with open(output_metadata_file,'w') as f:
79
+ json.dump(input_metadata,f,indent=1)