megadetector 5.0.8__py3-none-any.whl → 5.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (190) hide show
  1. api/__init__.py +0 -0
  2. api/batch_processing/__init__.py +0 -0
  3. api/batch_processing/api_core/__init__.py +0 -0
  4. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. api/batch_processing/api_core/batch_service/score.py +0 -1
  6. api/batch_processing/api_core/server_job_status_table.py +0 -1
  7. api/batch_processing/api_core_support/__init__.py +0 -0
  8. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
  9. api/batch_processing/api_support/__init__.py +0 -0
  10. api/batch_processing/api_support/summarize_daily_activity.py +0 -1
  11. api/batch_processing/data_preparation/__init__.py +0 -0
  12. api/batch_processing/data_preparation/manage_local_batch.py +65 -65
  13. api/batch_processing/data_preparation/manage_video_batch.py +8 -8
  14. api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
  15. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  16. api/batch_processing/postprocessing/__init__.py +0 -0
  17. api/batch_processing/postprocessing/add_max_conf.py +12 -12
  18. api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
  19. api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
  20. api/batch_processing/postprocessing/compare_batch_results.py +113 -43
  21. api/batch_processing/postprocessing/convert_output_format.py +41 -16
  22. api/batch_processing/postprocessing/load_api_results.py +16 -17
  23. api/batch_processing/postprocessing/md_to_coco.py +31 -21
  24. api/batch_processing/postprocessing/md_to_labelme.py +52 -22
  25. api/batch_processing/postprocessing/merge_detections.py +14 -14
  26. api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
  27. api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
  28. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
  29. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
  30. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
  31. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
  32. api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
  33. api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
  34. api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
  35. api/synchronous/__init__.py +0 -0
  36. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  37. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
  38. api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
  39. api/synchronous/api_core/animal_detection_api/config.py +35 -35
  40. api/synchronous/api_core/tests/__init__.py +0 -0
  41. api/synchronous/api_core/tests/load_test.py +109 -109
  42. classification/__init__.py +0 -0
  43. classification/aggregate_classifier_probs.py +21 -24
  44. classification/analyze_failed_images.py +11 -13
  45. classification/cache_batchapi_outputs.py +51 -51
  46. classification/create_classification_dataset.py +69 -68
  47. classification/crop_detections.py +54 -53
  48. classification/csv_to_json.py +97 -100
  49. classification/detect_and_crop.py +105 -105
  50. classification/evaluate_model.py +43 -42
  51. classification/identify_mislabeled_candidates.py +47 -46
  52. classification/json_to_azcopy_list.py +10 -10
  53. classification/json_validator.py +72 -71
  54. classification/map_classification_categories.py +44 -43
  55. classification/merge_classification_detection_output.py +68 -68
  56. classification/prepare_classification_script.py +157 -154
  57. classification/prepare_classification_script_mc.py +228 -228
  58. classification/run_classifier.py +27 -26
  59. classification/save_mislabeled.py +30 -30
  60. classification/train_classifier.py +20 -20
  61. classification/train_classifier_tf.py +21 -22
  62. classification/train_utils.py +10 -10
  63. data_management/__init__.py +0 -0
  64. data_management/annotations/__init__.py +0 -0
  65. data_management/annotations/annotation_constants.py +18 -31
  66. data_management/camtrap_dp_to_coco.py +238 -0
  67. data_management/cct_json_utils.py +102 -59
  68. data_management/cct_to_md.py +176 -158
  69. data_management/cct_to_wi.py +247 -219
  70. data_management/coco_to_labelme.py +272 -263
  71. data_management/coco_to_yolo.py +79 -58
  72. data_management/databases/__init__.py +0 -0
  73. data_management/databases/add_width_and_height_to_db.py +20 -16
  74. data_management/databases/combine_coco_camera_traps_files.py +35 -31
  75. data_management/databases/integrity_check_json_db.py +62 -24
  76. data_management/databases/subset_json_db.py +24 -15
  77. data_management/generate_crops_from_cct.py +27 -45
  78. data_management/get_image_sizes.py +188 -162
  79. data_management/importers/add_nacti_sizes.py +8 -8
  80. data_management/importers/add_timestamps_to_icct.py +78 -78
  81. data_management/importers/animl_results_to_md_results.py +158 -158
  82. data_management/importers/auckland_doc_test_to_json.py +9 -9
  83. data_management/importers/auckland_doc_to_json.py +8 -8
  84. data_management/importers/awc_to_json.py +7 -7
  85. data_management/importers/bellevue_to_json.py +15 -15
  86. data_management/importers/cacophony-thermal-importer.py +13 -13
  87. data_management/importers/carrizo_shrubfree_2018.py +8 -8
  88. data_management/importers/carrizo_trail_cam_2017.py +8 -8
  89. data_management/importers/cct_field_adjustments.py +9 -9
  90. data_management/importers/channel_islands_to_cct.py +10 -10
  91. data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
  92. data_management/importers/ena24_to_json.py +7 -7
  93. data_management/importers/filenames_to_json.py +8 -8
  94. data_management/importers/helena_to_cct.py +7 -7
  95. data_management/importers/idaho-camera-traps.py +7 -7
  96. data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
  97. data_management/importers/jb_csv_to_json.py +9 -9
  98. data_management/importers/mcgill_to_json.py +8 -8
  99. data_management/importers/missouri_to_json.py +18 -18
  100. data_management/importers/nacti_fieldname_adjustments.py +10 -10
  101. data_management/importers/noaa_seals_2019.py +7 -7
  102. data_management/importers/pc_to_json.py +7 -7
  103. data_management/importers/plot_wni_giraffes.py +7 -7
  104. data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
  105. data_management/importers/prepare_zsl_imerit.py +7 -7
  106. data_management/importers/rspb_to_json.py +8 -8
  107. data_management/importers/save_the_elephants_survey_A.py +8 -8
  108. data_management/importers/save_the_elephants_survey_B.py +9 -9
  109. data_management/importers/snapshot_safari_importer.py +26 -26
  110. data_management/importers/snapshot_safari_importer_reprise.py +665 -665
  111. data_management/importers/snapshot_serengeti_lila.py +14 -14
  112. data_management/importers/sulross_get_exif.py +8 -9
  113. data_management/importers/timelapse_csv_set_to_json.py +11 -11
  114. data_management/importers/ubc_to_json.py +13 -13
  115. data_management/importers/umn_to_json.py +7 -7
  116. data_management/importers/wellington_to_json.py +8 -8
  117. data_management/importers/wi_to_json.py +9 -9
  118. data_management/importers/zamba_results_to_md_results.py +181 -181
  119. data_management/labelme_to_coco.py +65 -24
  120. data_management/labelme_to_yolo.py +8 -8
  121. data_management/lila/__init__.py +0 -0
  122. data_management/lila/add_locations_to_island_camera_traps.py +9 -9
  123. data_management/lila/add_locations_to_nacti.py +147 -147
  124. data_management/lila/create_lila_blank_set.py +13 -13
  125. data_management/lila/create_lila_test_set.py +8 -8
  126. data_management/lila/create_links_to_md_results_files.py +106 -106
  127. data_management/lila/download_lila_subset.py +44 -110
  128. data_management/lila/generate_lila_per_image_labels.py +55 -42
  129. data_management/lila/get_lila_annotation_counts.py +18 -15
  130. data_management/lila/get_lila_image_counts.py +11 -11
  131. data_management/lila/lila_common.py +96 -33
  132. data_management/lila/test_lila_metadata_urls.py +132 -116
  133. data_management/ocr_tools.py +173 -128
  134. data_management/read_exif.py +110 -97
  135. data_management/remap_coco_categories.py +83 -83
  136. data_management/remove_exif.py +58 -62
  137. data_management/resize_coco_dataset.py +30 -23
  138. data_management/wi_download_csv_to_coco.py +246 -239
  139. data_management/yolo_output_to_md_output.py +86 -73
  140. data_management/yolo_to_coco.py +300 -60
  141. detection/__init__.py +0 -0
  142. detection/detector_training/__init__.py +0 -0
  143. detection/process_video.py +85 -33
  144. detection/pytorch_detector.py +43 -25
  145. detection/run_detector.py +157 -72
  146. detection/run_detector_batch.py +179 -113
  147. detection/run_inference_with_yolov5_val.py +108 -48
  148. detection/run_tiled_inference.py +111 -40
  149. detection/tf_detector.py +51 -29
  150. detection/video_utils.py +606 -521
  151. docs/source/conf.py +43 -0
  152. md_utils/__init__.py +0 -0
  153. md_utils/azure_utils.py +9 -9
  154. md_utils/ct_utils.py +228 -68
  155. md_utils/directory_listing.py +59 -64
  156. md_utils/md_tests.py +968 -871
  157. md_utils/path_utils.py +460 -134
  158. md_utils/process_utils.py +157 -133
  159. md_utils/sas_blob_utils.py +20 -20
  160. md_utils/split_locations_into_train_val.py +45 -32
  161. md_utils/string_utils.py +33 -10
  162. md_utils/url_utils.py +176 -60
  163. md_utils/write_html_image_list.py +40 -33
  164. md_visualization/__init__.py +0 -0
  165. md_visualization/plot_utils.py +102 -109
  166. md_visualization/render_images_with_thumbnails.py +34 -34
  167. md_visualization/visualization_utils.py +597 -291
  168. md_visualization/visualize_db.py +76 -48
  169. md_visualization/visualize_detector_output.py +61 -42
  170. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/METADATA +13 -7
  171. megadetector-5.0.10.dist-info/RECORD +224 -0
  172. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/top_level.txt +1 -0
  173. taxonomy_mapping/__init__.py +0 -0
  174. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
  175. taxonomy_mapping/map_new_lila_datasets.py +154 -154
  176. taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
  177. taxonomy_mapping/preview_lila_taxonomy.py +591 -591
  178. taxonomy_mapping/retrieve_sample_image.py +12 -12
  179. taxonomy_mapping/simple_image_download.py +11 -11
  180. taxonomy_mapping/species_lookup.py +10 -10
  181. taxonomy_mapping/taxonomy_csv_checker.py +18 -18
  182. taxonomy_mapping/taxonomy_graph.py +47 -47
  183. taxonomy_mapping/validate_lila_category_mappings.py +83 -76
  184. data_management/cct_json_to_filename_json.py +0 -89
  185. data_management/cct_to_csv.py +0 -140
  186. data_management/databases/remove_corrupted_images_from_db.py +0 -191
  187. detection/detector_training/copy_checkpoints.py +0 -43
  188. megadetector-5.0.8.dist-info/RECORD +0 -205
  189. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/LICENSE +0 -0
  190. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/WHEEL +0 -0
@@ -1,162 +1,188 @@
1
- ########
2
- #
3
- # get_image_sizes.py
4
- #
5
- # Given a json-formatted list of image filenames, retrieve the width and height of
6
- # every image, optionally writing the results to a new .json file.
7
- #
8
- ########
9
-
10
- #%% Constants and imports
11
-
12
- import argparse
13
- import json
14
- import os
15
- from PIL import Image
16
- import sys
17
-
18
- from md_utils.path_utils import find_images
19
-
20
- from multiprocessing.pool import ThreadPool
21
- from multiprocessing.pool import Pool
22
- from functools import partial
23
- from tqdm import tqdm
24
-
25
- image_base = ''
26
- default_n_threads = 1
27
- use_threads = False
28
-
29
-
30
- #%% Processing functions
31
-
32
- def _get_image_size(image_path,image_prefix=None):
33
- """
34
- Support function to get the size of a single image. Returns a (path,w,h) tuple.
35
- w and h will be -1 if the image fails to load.
36
- """
37
-
38
- if image_prefix is not None:
39
- full_path = os.path.join(image_prefix,image_path)
40
- else:
41
- full_path = image_path
42
-
43
- # Is this image on disk?
44
- if not os.path.isfile(full_path):
45
- print('Could not find image {}'.format(full_path))
46
- return (image_path,-1,-1)
47
-
48
- try:
49
- pil_im = Image.open(full_path)
50
- w = pil_im.width
51
- h = pil_im.height
52
- return (image_path,w,h)
53
- except Exception as e:
54
- print('Error reading image {}: {}'.format(full_path,str(e)))
55
- return (image_path,-1,-1)
56
-
57
-
58
- def get_image_sizes(filenames,image_prefix=None,output_file=None,
59
- n_workers=default_n_threads,use_threads=True,
60
- recursive=True):
61
- """
62
- Get the width and height of all images in [filenames], which can be:
63
-
64
- * A .json-formatted file
65
- * A folder
66
- * A list of files
67
-
68
- ...returning a list of (path,w,h) tuples, and optionally writing the results to [output_file].
69
- """
70
-
71
- if output_file is not None:
72
- assert os.path.isdir(os.path.dirname(output_file)), \
73
- 'Illegal output file {}, parent folder does not exist'.format(output_file)
74
-
75
- if isinstance(filenames,str) and os.path.isfile(filenames):
76
- with open(filenames,'r') as f:
77
- filenames = json.load(f)
78
- filenames = [s.strip() for s in filenames]
79
- elif isinstance(filenames,str) and os.path.isdir(filenames):
80
- filenames = find_images(filenames,recursive=recursive,
81
- return_relative_paths=False,convert_slashes=True)
82
- else:
83
- assert isinstance(filenames,list)
84
-
85
- if n_workers <= 1:
86
-
87
- all_results = []
88
- for i_file,fn in tqdm(enumerate(filenames),total=len(filenames)):
89
- all_results.append(_get_image_size(fn,image_prefix=image_prefix))
90
-
91
- else:
92
-
93
- print('Creating a pool with {} workers'.format(n_workers))
94
- if use_threads:
95
- pool = ThreadPool(n_workers)
96
- else:
97
- pool = Pool(n_workers)
98
- # all_results = list(tqdm(pool.imap(process_image, filenames), total=len(filenames)))
99
- all_results = list(tqdm(pool.imap(
100
- partial(_get_image_size,image_prefix=image_prefix), filenames), total=len(filenames)))
101
-
102
- if output_file is not None:
103
- with open(output_file,'w') as f:
104
- json.dump(all_results,f,indent=1)
105
-
106
- return all_results
107
-
108
-
109
- #%% Interactive driver
110
-
111
- if False:
112
-
113
- pass
114
-
115
- #%%
116
-
117
- # List images in a test folder
118
- base_dir = r'c:\temp\test_images'
119
- image_list_file = os.path.join(base_dir,'images.json')
120
- relative_image_list_file = os.path.join(base_dir,'images_relative.json')
121
- image_size_file = os.path.join(base_dir,'image_sizes.json')
122
- from md_utils import path_utils
123
- image_names = path_utils.find_images(base_dir,recursive=True)
124
-
125
- with open(image_list_file,'w') as f:
126
- json.dump(image_names,f,indent=1)
127
-
128
- relative_image_names = []
129
- for s in image_names:
130
- relative_image_names.append(os.path.relpath(s,base_dir))
131
-
132
- with open(relative_image_list_file,'w') as f:
133
- json.dump(relative_image_names,f,indent=1)
134
-
135
-
136
- #%%
137
-
138
- get_image_sizes(relative_image_list_file,image_size_file,image_prefix=base_dir,n_threads=4)
139
-
140
-
141
- #%% Command-line driver
142
-
143
- def main():
144
-
145
- parser = argparse.ArgumentParser()
146
- parser.add_argument('input_file',type=str)
147
- parser.add_argument('output_file',type=str)
148
- parser.add_argument('--image_prefix', type=str, default=None)
149
- parser.add_argument('--n_threads', type=int, default=default_n_threads)
150
-
151
- if len(sys.argv[1:])==0:
152
- parser.print_help()
153
- parser.exit()
154
-
155
- args = parser.parse_args()
156
-
157
- _ = get_image_sizes(args.input_file,args.output_file,args.image_prefix,args.n_threads)
158
-
159
-
160
- if __name__ == '__main__':
161
-
162
- main()
1
+ """
2
+
3
+ get_image_sizes.py
4
+
5
+ Given a json-formatted list of image filenames, retrieves the width and height of
6
+ every image, optionally writing the results to a new .json file.
7
+
8
+ """
9
+
10
+ #%% Constants and imports
11
+
12
+ import argparse
13
+ import json
14
+ import os
15
+ from PIL import Image
16
+ import sys
17
+
18
+ from multiprocessing.pool import ThreadPool
19
+ from multiprocessing.pool import Pool
20
+ from functools import partial
21
+ from tqdm import tqdm
22
+
23
+ from md_utils.path_utils import find_images
24
+
25
+ image_base = ''
26
+ default_n_threads = 1
27
+ use_threads = False
28
+
29
+
30
+ #%% Processing functions
31
+
32
+ def _get_image_size(image_path,image_prefix=None):
33
+ """
34
+ Support function to get the size of a single image. Returns a (path,w,h) tuple.
35
+ w and h will be -1 if the image fails to load.
36
+ """
37
+
38
+ if image_prefix is not None:
39
+ full_path = os.path.join(image_prefix,image_path)
40
+ else:
41
+ full_path = image_path
42
+
43
+ # Is this image on disk?
44
+ if not os.path.isfile(full_path):
45
+ print('Could not find image {}'.format(full_path))
46
+ return (image_path,-1,-1)
47
+
48
+ try:
49
+ pil_im = Image.open(full_path)
50
+ w = pil_im.width
51
+ h = pil_im.height
52
+ return (image_path,w,h)
53
+ except Exception as e:
54
+ print('Error reading image {}: {}'.format(full_path,str(e)))
55
+ return (image_path,-1,-1)
56
+
57
+
58
+ def get_image_sizes(filenames,image_prefix=None,output_file=None,
59
+ n_workers=default_n_threads,use_threads=True,
60
+ recursive=True):
61
+ """
62
+ Gets the width and height of all images in [filenames], which can be:
63
+
64
+ * A .json-formatted file containing list of strings
65
+ * A folder
66
+ * A list of files
67
+
68
+ ...returning a list of (path,w,h) tuples, and optionally writing the results to [output_file].
69
+
70
+ Args:
71
+ filenames (str or list): the image filenames for which we should retrieve sizes,
72
+ can be the name of a .json-formatted file containing list of strings, a folder
73
+ in which we should enumerate images, or a list of files.
74
+ image_prefix (str, optional): optional prefix to add to images to get to full paths;
75
+ useful when [filenames] contains relative files, in which case [image_prefix] is the
76
+ base folder for the source images.
77
+ output_file (str, optional): a .json file to write the imgae sizes
78
+ n_workers (int, optional): number of parallel workers to use, set to <=1 to
79
+ disable parallelization
80
+ use_threads (bool, optional): whether to use threads (True) or processes (False)
81
+ for parallelization; not relevant if [n_workers] <= 1
82
+ recursive (bool, optional): only relevant if [filenames] is actually a folder,
83
+ determines whether image enumeration within that folder will be recursive
84
+
85
+ Returns:
86
+ list: list of (path,w,h) tuples
87
+ """
88
+
89
+ if output_file is not None:
90
+ assert os.path.isdir(os.path.dirname(output_file)), \
91
+ 'Illegal output file {}, parent folder does not exist'.format(output_file)
92
+
93
+ if isinstance(filenames,str) and os.path.isfile(filenames):
94
+ with open(filenames,'r') as f:
95
+ filenames = json.load(f)
96
+ filenames = [s.strip() for s in filenames]
97
+ elif isinstance(filenames,str) and os.path.isdir(filenames):
98
+ filenames = find_images(filenames,recursive=recursive,
99
+ return_relative_paths=False,convert_slashes=True)
100
+ else:
101
+ assert isinstance(filenames,list)
102
+
103
+ if n_workers <= 1:
104
+
105
+ all_results = []
106
+ for i_file,fn in tqdm(enumerate(filenames),total=len(filenames)):
107
+ all_results.append(_get_image_size(fn,image_prefix=image_prefix))
108
+
109
+ else:
110
+
111
+ print('Creating a pool with {} workers'.format(n_workers))
112
+ if use_threads:
113
+ pool = ThreadPool(n_workers)
114
+ else:
115
+ pool = Pool(n_workers)
116
+ # all_results = list(tqdm(pool.imap(process_image, filenames), total=len(filenames)))
117
+ all_results = list(tqdm(pool.imap(
118
+ partial(_get_image_size,image_prefix=image_prefix), filenames), total=len(filenames)))
119
+
120
+ if output_file is not None:
121
+ with open(output_file,'w') as f:
122
+ json.dump(all_results,f,indent=1)
123
+
124
+ return all_results
125
+
126
+
127
+ #%% Interactive driver
128
+
129
+ if False:
130
+
131
+ pass
132
+
133
+ #%%
134
+
135
+ # List images in a test folder
136
+ base_dir = r'c:\temp\test_images'
137
+ image_list_file = os.path.join(base_dir,'images.json')
138
+ relative_image_list_file = os.path.join(base_dir,'images_relative.json')
139
+ image_size_file = os.path.join(base_dir,'image_sizes.json')
140
+ from md_utils import path_utils
141
+ image_names = path_utils.find_images(base_dir,recursive=True)
142
+
143
+ with open(image_list_file,'w') as f:
144
+ json.dump(image_names,f,indent=1)
145
+
146
+ relative_image_names = []
147
+ for s in image_names:
148
+ relative_image_names.append(os.path.relpath(s,base_dir))
149
+
150
+ with open(relative_image_list_file,'w') as f:
151
+ json.dump(relative_image_names,f,indent=1)
152
+
153
+
154
+ #%%
155
+
156
+ get_image_sizes(relative_image_list_file,image_size_file,image_prefix=base_dir,n_threads=4)
157
+
158
+
159
+ #%% Command-line driver
160
+
161
+ def main():
162
+
163
+ parser = argparse.ArgumentParser()
164
+ parser.add_argument('filenames',type=str,
165
+ help='Folder from which we should fetch image sizes, or .json file with a list of filenames')
166
+ parser.add_argument('output_file',type=str,
167
+ help='Output file (.json) to which we should write image size information')
168
+ parser.add_argument('--image_prefix', type=str, default=None,
169
+ help='Prefix to append to image filenames, only relevant if [filenames] points to a list of ' + \
170
+ 'relative paths')
171
+ parser.add_argument('--n_threads', type=int, default=default_n_threads,
172
+ help='Number of concurrent workers, set to <=1 to disable parallelization (default {})'.format(
173
+ default_n_threads))
174
+
175
+ if len(sys.argv[1:])==0:
176
+ parser.print_help()
177
+ parser.exit()
178
+
179
+ args = parser.parse_args()
180
+
181
+ _ = get_image_sizes(filenames=args.filenames,
182
+ output_file=args.output_file,
183
+ image_prefix=args.image_prefix,
184
+ n_workers=args.n_threads)
185
+
186
+ if __name__ == '__main__':
187
+
188
+ main()
@@ -1,11 +1,11 @@
1
- ########
2
- #
3
- # add_nacti_sizes.py
4
- #
5
- # NACTI bounding box metadata was posted before we inclduded width and height as semi-standard
6
- # fields; pull size information from the main metadata file and add to the bbox file.
7
- #
8
- ########
1
+ """
2
+
3
+ add_nacti_sizes.py
4
+
5
+ NACTI bounding box metadata was posted before we inclduded width and height as semi-standard
6
+ fields; pull size information from the main metadata file and add to the bbox file.
7
+
8
+ """
9
9
 
10
10
  #%% Constants and environment
11
11
 
@@ -1,79 +1,79 @@
1
- ########
2
- #
3
- # add_timestamps_to_icct.py
4
- #
5
- # The Island Conservation Camera Traps dataset was originally posted without timestamps
6
- # in either .json metadata or EXIF metadata. We pulled timestamps out using ocr_tools.py,
7
- # this script adds those timestamps into the .json metadata.
8
- #
9
- ########
10
-
11
- #%% Imports and constants
12
-
13
- import json
14
-
15
- ocr_results_file = r'g:\temp\ocr_results.2023.10.31.07.37.54.json'
16
- input_metadata_file = r'd:\lila\islandconservationcameratraps\island_conservation.json'
17
- output_metadata_file = r'g:\temp\island_conservation_camera_traps_1.02.json'
18
- ocr_results_file_base = 'g:/temp/island_conservation_camera_traps/'
19
- assert ocr_results_file_base.endswith('/')
20
-
21
-
22
- #%% Read input metadata
23
-
24
- with open(input_metadata_file,'r') as f:
25
- input_metadata = json.load(f)
26
-
27
- assert input_metadata['info']['version'] == '1.01'
28
-
29
- # im = input_metadata['images'][0]
30
- for im in input_metadata['images']:
31
- assert 'datetime' not in im
32
-
33
-
34
- #%% Read OCR results
35
-
36
- with open(ocr_results_file,'r') as f:
37
- abs_filename_to_ocr_results = json.load(f)
38
-
39
- relative_filename_to_ocr_results = {}
40
-
41
- for fn_abs in abs_filename_to_ocr_results:
42
- assert ocr_results_file_base in fn_abs
43
- fn_relative = fn_abs.replace(ocr_results_file_base,'')
44
- relative_filename_to_ocr_results[fn_relative] = abs_filename_to_ocr_results[fn_abs]
45
-
46
-
47
- #%% Add datetimes to metadata
48
-
49
- images_not_in_datetime_results = []
50
- images_with_failed_datetimes = []
51
-
52
- for i_image,im in enumerate(input_metadata['images']):
53
- if im['file_name'] not in relative_filename_to_ocr_results:
54
- images_not_in_datetime_results.append(im)
55
- im['datetime'] = None
56
- continue
57
- ocr_results = relative_filename_to_ocr_results[im['file_name']]
58
- if ocr_results['datetime'] is None:
59
- images_with_failed_datetimes.append(im)
60
- im['datetime'] = None
61
- continue
62
- im['datetime'] = ocr_results['datetime']
63
-
64
- print('{} of {} images were not in datetime results'.format(
65
- len(images_not_in_datetime_results),len(input_metadata['images'])))
66
-
67
- print('{} of {} images were had failed datetime results'.format(
68
- len(images_with_failed_datetimes),len(input_metadata['images'])))
69
-
70
- for im in input_metadata['images']:
71
- assert 'datetime' in im
72
-
73
-
74
- #%% Write output
75
-
76
- input_metadata['info']['version'] = '1.02'
77
-
78
- with open(output_metadata_file,'w') as f:
1
+ """
2
+
3
+ add_timestamps_to_icct.py
4
+
5
+ The Island Conservation Camera Traps dataset was originally posted without timestamps
6
+ in either .json metadata or EXIF metadata. We pulled timestamps out using ocr_tools.py,
7
+ this script adds those timestamps into the .json metadata.
8
+
9
+ """
10
+
11
+ #%% Imports and constants
12
+
13
+ import json
14
+
15
+ ocr_results_file = r'g:\temp\ocr_results.2023.10.31.07.37.54.json'
16
+ input_metadata_file = r'd:\lila\islandconservationcameratraps\island_conservation.json'
17
+ output_metadata_file = r'g:\temp\island_conservation_camera_traps_1.02.json'
18
+ ocr_results_file_base = 'g:/temp/island_conservation_camera_traps/'
19
+ assert ocr_results_file_base.endswith('/')
20
+
21
+
22
+ #%% Read input metadata
23
+
24
+ with open(input_metadata_file,'r') as f:
25
+ input_metadata = json.load(f)
26
+
27
+ assert input_metadata['info']['version'] == '1.01'
28
+
29
+ # im = input_metadata['images'][0]
30
+ for im in input_metadata['images']:
31
+ assert 'datetime' not in im
32
+
33
+
34
+ #%% Read OCR results
35
+
36
+ with open(ocr_results_file,'r') as f:
37
+ abs_filename_to_ocr_results = json.load(f)
38
+
39
+ relative_filename_to_ocr_results = {}
40
+
41
+ for fn_abs in abs_filename_to_ocr_results:
42
+ assert ocr_results_file_base in fn_abs
43
+ fn_relative = fn_abs.replace(ocr_results_file_base,'')
44
+ relative_filename_to_ocr_results[fn_relative] = abs_filename_to_ocr_results[fn_abs]
45
+
46
+
47
+ #%% Add datetimes to metadata
48
+
49
+ images_not_in_datetime_results = []
50
+ images_with_failed_datetimes = []
51
+
52
+ for i_image,im in enumerate(input_metadata['images']):
53
+ if im['file_name'] not in relative_filename_to_ocr_results:
54
+ images_not_in_datetime_results.append(im)
55
+ im['datetime'] = None
56
+ continue
57
+ ocr_results = relative_filename_to_ocr_results[im['file_name']]
58
+ if ocr_results['datetime'] is None:
59
+ images_with_failed_datetimes.append(im)
60
+ im['datetime'] = None
61
+ continue
62
+ im['datetime'] = ocr_results['datetime']
63
+
64
+ print('{} of {} images were not in datetime results'.format(
65
+ len(images_not_in_datetime_results),len(input_metadata['images'])))
66
+
67
+ print('{} of {} images were had failed datetime results'.format(
68
+ len(images_with_failed_datetimes),len(input_metadata['images'])))
69
+
70
+ for im in input_metadata['images']:
71
+ assert 'datetime' in im
72
+
73
+
74
+ #%% Write output
75
+
76
+ input_metadata['info']['version'] = '1.02'
77
+
78
+ with open(output_metadata_file,'w') as f:
79
79
  json.dump(input_metadata,f,indent=1)