megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (197) hide show
  1. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  2. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  3. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  4. megadetector/classification/aggregate_classifier_probs.py +3 -3
  5. megadetector/classification/analyze_failed_images.py +5 -5
  6. megadetector/classification/cache_batchapi_outputs.py +5 -5
  7. megadetector/classification/create_classification_dataset.py +11 -12
  8. megadetector/classification/crop_detections.py +10 -10
  9. megadetector/classification/csv_to_json.py +8 -8
  10. megadetector/classification/detect_and_crop.py +13 -15
  11. megadetector/classification/efficientnet/model.py +8 -8
  12. megadetector/classification/efficientnet/utils.py +6 -5
  13. megadetector/classification/evaluate_model.py +7 -7
  14. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  15. megadetector/classification/json_to_azcopy_list.py +1 -1
  16. megadetector/classification/json_validator.py +29 -32
  17. megadetector/classification/map_classification_categories.py +9 -9
  18. megadetector/classification/merge_classification_detection_output.py +12 -9
  19. megadetector/classification/prepare_classification_script.py +19 -19
  20. megadetector/classification/prepare_classification_script_mc.py +26 -26
  21. megadetector/classification/run_classifier.py +4 -4
  22. megadetector/classification/save_mislabeled.py +6 -6
  23. megadetector/classification/train_classifier.py +1 -1
  24. megadetector/classification/train_classifier_tf.py +9 -9
  25. megadetector/classification/train_utils.py +10 -10
  26. megadetector/data_management/annotations/annotation_constants.py +1 -2
  27. megadetector/data_management/camtrap_dp_to_coco.py +79 -46
  28. megadetector/data_management/cct_json_utils.py +103 -103
  29. megadetector/data_management/cct_to_md.py +49 -49
  30. megadetector/data_management/cct_to_wi.py +33 -33
  31. megadetector/data_management/coco_to_labelme.py +75 -75
  32. megadetector/data_management/coco_to_yolo.py +210 -193
  33. megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
  34. megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
  35. megadetector/data_management/databases/integrity_check_json_db.py +228 -200
  36. megadetector/data_management/databases/subset_json_db.py +33 -33
  37. megadetector/data_management/generate_crops_from_cct.py +88 -39
  38. megadetector/data_management/get_image_sizes.py +54 -49
  39. megadetector/data_management/labelme_to_coco.py +133 -125
  40. megadetector/data_management/labelme_to_yolo.py +159 -73
  41. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  42. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  43. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  44. megadetector/data_management/lila/download_lila_subset.py +21 -24
  45. megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
  46. megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
  47. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  48. megadetector/data_management/lila/lila_common.py +73 -70
  49. megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
  50. megadetector/data_management/mewc_to_md.py +344 -340
  51. megadetector/data_management/ocr_tools.py +262 -255
  52. megadetector/data_management/read_exif.py +249 -227
  53. megadetector/data_management/remap_coco_categories.py +90 -28
  54. megadetector/data_management/remove_exif.py +81 -21
  55. megadetector/data_management/rename_images.py +187 -187
  56. megadetector/data_management/resize_coco_dataset.py +588 -120
  57. megadetector/data_management/speciesnet_to_md.py +41 -41
  58. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  59. megadetector/data_management/yolo_output_to_md_output.py +248 -122
  60. megadetector/data_management/yolo_to_coco.py +333 -191
  61. megadetector/detection/change_detection.py +832 -0
  62. megadetector/detection/process_video.py +340 -337
  63. megadetector/detection/pytorch_detector.py +358 -278
  64. megadetector/detection/run_detector.py +399 -186
  65. megadetector/detection/run_detector_batch.py +404 -377
  66. megadetector/detection/run_inference_with_yolov5_val.py +340 -327
  67. megadetector/detection/run_tiled_inference.py +257 -249
  68. megadetector/detection/tf_detector.py +24 -24
  69. megadetector/detection/video_utils.py +332 -295
  70. megadetector/postprocessing/add_max_conf.py +19 -11
  71. megadetector/postprocessing/categorize_detections_by_size.py +45 -45
  72. megadetector/postprocessing/classification_postprocessing.py +468 -433
  73. megadetector/postprocessing/combine_batch_outputs.py +23 -23
  74. megadetector/postprocessing/compare_batch_results.py +590 -525
  75. megadetector/postprocessing/convert_output_format.py +106 -102
  76. megadetector/postprocessing/create_crop_folder.py +347 -147
  77. megadetector/postprocessing/detector_calibration.py +173 -168
  78. megadetector/postprocessing/generate_csv_report.py +508 -499
  79. megadetector/postprocessing/load_api_results.py +48 -27
  80. megadetector/postprocessing/md_to_coco.py +133 -102
  81. megadetector/postprocessing/md_to_labelme.py +107 -90
  82. megadetector/postprocessing/md_to_wi.py +40 -40
  83. megadetector/postprocessing/merge_detections.py +92 -114
  84. megadetector/postprocessing/postprocess_batch_results.py +319 -301
  85. megadetector/postprocessing/remap_detection_categories.py +91 -38
  86. megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
  87. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  88. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  89. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
  90. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  91. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  92. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  93. megadetector/postprocessing/validate_batch_results.py +70 -70
  94. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  95. megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
  96. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
  97. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
  98. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  99. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  100. megadetector/taxonomy_mapping/species_lookup.py +156 -74
  101. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  102. megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
  103. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  104. megadetector/utils/ct_utils.py +1049 -211
  105. megadetector/utils/directory_listing.py +21 -77
  106. megadetector/utils/gpu_test.py +22 -22
  107. megadetector/utils/md_tests.py +632 -529
  108. megadetector/utils/path_utils.py +1520 -431
  109. megadetector/utils/process_utils.py +41 -41
  110. megadetector/utils/split_locations_into_train_val.py +62 -62
  111. megadetector/utils/string_utils.py +148 -27
  112. megadetector/utils/url_utils.py +489 -176
  113. megadetector/utils/wi_utils.py +2658 -2526
  114. megadetector/utils/write_html_image_list.py +137 -137
  115. megadetector/visualization/plot_utils.py +34 -30
  116. megadetector/visualization/render_images_with_thumbnails.py +39 -74
  117. megadetector/visualization/visualization_utils.py +487 -435
  118. megadetector/visualization/visualize_db.py +232 -198
  119. megadetector/visualization/visualize_detector_output.py +82 -76
  120. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
  121. megadetector-10.0.0.dist-info/RECORD +139 -0
  122. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
  123. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  124. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  125. megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
  126. megadetector/api/batch_processing/api_core/server.py +0 -294
  127. megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
  128. megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
  129. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  130. megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
  131. megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
  132. megadetector/api/batch_processing/api_core/server_utils.py +0 -88
  133. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  134. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  135. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  136. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  137. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  138. megadetector/api/synchronous/__init__.py +0 -0
  139. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  140. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
  141. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
  142. megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
  143. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  144. megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
  145. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  146. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  147. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  148. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  149. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  150. megadetector/data_management/importers/awc_to_json.py +0 -191
  151. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  152. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  153. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  154. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  155. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  156. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  157. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  158. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  159. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  160. megadetector/data_management/importers/ena24_to_json.py +0 -276
  161. megadetector/data_management/importers/filenames_to_json.py +0 -386
  162. megadetector/data_management/importers/helena_to_cct.py +0 -283
  163. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  164. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  165. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  166. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  167. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  168. megadetector/data_management/importers/missouri_to_json.py +0 -490
  169. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  170. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  171. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  172. megadetector/data_management/importers/pc_to_json.py +0 -365
  173. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  174. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  175. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  176. megadetector/data_management/importers/rspb_to_json.py +0 -356
  177. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  178. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  179. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  180. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  181. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  182. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  183. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  184. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  185. megadetector/data_management/importers/ubc_to_json.py +0 -399
  186. megadetector/data_management/importers/umn_to_json.py +0 -507
  187. megadetector/data_management/importers/wellington_to_json.py +0 -263
  188. megadetector/data_management/importers/wi_to_json.py +0 -442
  189. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  190. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  191. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  192. megadetector/utils/azure_utils.py +0 -178
  193. megadetector/utils/sas_blob_utils.py +0 -509
  194. megadetector-5.0.28.dist-info/RECORD +0 -209
  195. /megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
  196. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
  197. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0
@@ -2,14 +2,14 @@
2
2
 
3
3
  subset_json_db.py
4
4
 
5
- Select a subset of images (and associated annotations) from a .json file in COCO
5
+ Select a subset of images (and associated annotations) from a .json file in COCO
6
6
  Camera Traps format based on a string query.
7
7
 
8
8
  To subset .json files in the MegaDetector output format, see
9
9
  subset_json_detector_output.py.
10
10
 
11
11
  """
12
-
12
+
13
13
  #%% Constants and imports
14
14
 
15
15
  import os
@@ -18,6 +18,7 @@ import json
18
18
  import argparse
19
19
 
20
20
  from tqdm import tqdm
21
+ from megadetector.utils import ct_utils
21
22
  from copy import copy
22
23
 
23
24
 
@@ -25,22 +26,22 @@ from copy import copy
25
26
 
26
27
  def subset_json_db(input_json, query, output_json=None, ignore_case=False, verbose=False):
27
28
  """
28
- Given a json file (or dictionary already loaded from a json file), produce a new
29
- database containing only the images whose filenames contain the string 'query',
29
+ Given a json file (or dictionary already loaded from a json file), produce a new
30
+ database containing only the images whose filenames contain the string 'query',
30
31
  optionally writing that DB output to a new json file.
31
-
32
+
32
33
  Args:
33
34
  input_json (str): COCO Camera Traps .json file to load, or an already-loaded dict
34
- query (str or list): string to query for, only include images in the output whose filenames
35
+ query (str or list): string to query for, only include images in the output whose filenames
35
36
  contain this string. If this is a list, test for exact matches.
36
37
  output_json (str, optional): file to write the resulting .json file to
37
38
  ignore_case (bool, optional): whether to perform a case-insensitive search for [query]
38
39
  verbose (bool, optional): enable additional debug output
39
-
40
+
40
41
  Returns:
41
42
  dict: CCT dictionary containing a subset of the images and annotations in the input dict
42
43
  """
43
-
44
+
44
45
  # Load the input file if necessary
45
46
  if isinstance(input_json,str):
46
47
  print('Loading input .json...')
@@ -51,26 +52,26 @@ def subset_json_db(input_json, query, output_json=None, ignore_case=False, verbo
51
52
 
52
53
  # Find images matching the query
53
54
  images = []
54
-
55
+
55
56
  if isinstance(query,str):
56
-
57
+
57
58
  if ignore_case:
58
59
  query = query.lower()
59
-
60
+
60
61
  for im in tqdm(input_data['images']):
61
62
  fn = im['file_name']
62
63
  if ignore_case:
63
64
  fn = fn.lower()
64
65
  if query in fn:
65
66
  images.append(im)
66
-
67
+
67
68
  else:
68
-
69
+
69
70
  query = set(query)
70
-
71
+
71
72
  if ignore_case:
72
73
  query = set([s.lower() for s in query])
73
-
74
+
74
75
  for im in input_data['images']:
75
76
  fn = im['file_name']
76
77
  if ignore_case:
@@ -79,27 +80,26 @@ def subset_json_db(input_json, query, output_json=None, ignore_case=False, verbo
79
80
  images.append(im)
80
81
 
81
82
  image_ids = set([im['id'] for im in images])
82
-
83
+
83
84
  # Find annotations referring to those images
84
85
  annotations = []
85
-
86
+
86
87
  for ann in input_data['annotations']:
87
88
  if ann['image_id'] in image_ids:
88
89
  annotations.append(ann)
89
-
90
+
90
91
  output_data = copy(input_data)
91
92
  output_data['images'] = images
92
93
  output_data['annotations'] = annotations
93
-
94
+
94
95
  # Write the output file if requested
95
96
  if output_json is not None:
96
97
  if verbose:
97
98
  print('Writing output .json to {}'.format(output_json))
98
99
  output_dir = os.path.dirname(output_json)
99
100
  os.makedirs(output_dir,exist_ok=True)
100
- with open(output_json,'w') as f:
101
- json.dump(output_data,f,indent=1)
102
-
101
+ ct_utils.write_json(output_json, output_data)
102
+
103
103
  if verbose:
104
104
  print('Keeping {} of {} images, {} of {} annotations'.format(
105
105
  len(output_data['images']),len(input_data['images']),
@@ -111,33 +111,33 @@ def subset_json_db(input_json, query, output_json=None, ignore_case=False, verbo
111
111
  #%% Interactive driver
112
112
 
113
113
  if False:
114
-
114
+
115
115
  #%%
116
-
116
+
117
117
  input_json = r"e:\Statewide_wolf_container\idfg_20190409.json"
118
118
  output_json = r"e:\Statewide_wolf_container\idfg_20190409_clearcreek.json"
119
119
  query = 'clearcreek'
120
120
  ignore_case = True
121
121
  db = subset_json_db(input_json, query, output_json, ignore_case)
122
-
122
+
123
123
 
124
124
  #%% Command-line driver
125
125
 
126
- def main():
127
-
126
+ def main(): # noqa
127
+
128
128
  parser = argparse.ArgumentParser()
129
129
  parser.add_argument('input_json', type=str, help='Input file (a COCO Camera Traps .json file)')
130
- parser.add_argument('output_json', type=str, help='Output file')
131
- parser.add_argument('query', type=str, help='Filename query')
130
+ parser.add_argument('output_json', type=str, help='Output file')
131
+ parser.add_argument('query', type=str, help='Filename query')
132
132
  parser.add_argument('--ignore_case', action='store_true')
133
-
133
+
134
134
  if len(sys.argv[1:]) == 0:
135
135
  parser.print_help()
136
136
  parser.exit()
137
137
 
138
- args = parser.parse_args()
139
-
138
+ args = parser.parse_args()
139
+
140
140
  subset_json_db(args.input_json,args.query,args.output_json,args.ignore_case)
141
141
 
142
- if __name__ == '__main__':
142
+ if __name__ == '__main__':
143
143
  main()
@@ -10,6 +10,7 @@ each bounding box.
10
10
  #%% Imports and constants
11
11
 
12
12
  import os
13
+ import argparse
13
14
  import json
14
15
 
15
16
  from tqdm import tqdm
@@ -22,7 +23,7 @@ def generate_crops_from_cct(cct_file,image_dir,output_dir,padding=0,flat_output=
22
23
  """
23
24
  Given a .json file in COCO Camera Traps format, creates a cropped image for
24
25
  each bounding box.
25
-
26
+
26
27
  Args:
27
28
  cct_file (str): the COCO .json file from which we should load data
28
29
  image_dir (str): the folder where the images live; filenames in the .json
@@ -31,119 +32,167 @@ def generate_crops_from_cct(cct_file,image_dir,output_dir,padding=0,flat_output=
31
32
  padding (float, optional): number of pixels we should expand each box before
32
33
  cropping
33
34
  flat_output (bool, optional): if False, folder structure will be preserved
34
- in the output, e.g. the image a/b/c/d.jpg will result in image files
35
+ in the output, e.g. the image a/b/c/d.jpg will result in image files
35
36
  in the output folder called, e.g., a/b/c/d_crop_000_id_12345.jpg. If
36
- [flat_output] is True, the corresponding output image will be
37
- a_b_c_d_crop_000_id_12345.jpg.
37
+ [flat_output] is True, the corresponding output image will be
38
+ a_b_c_d_crop_000_id_12345.jpg.
38
39
  """
39
-
40
+
40
41
  ## Read and validate input
41
-
42
+
42
43
  assert os.path.isfile(cct_file)
43
44
  assert os.path.isdir(image_dir)
44
45
  os.makedirs(output_dir,exist_ok=True)
45
46
 
46
47
  with open(cct_file,'r') as f:
47
48
  d = json.load(f)
48
-
49
-
49
+
50
+
50
51
  ## Find annotations for each image
51
-
52
+
52
53
  from collections import defaultdict
53
-
54
+
54
55
  # This actually maps image IDs to annotations, but only to annotations
55
56
  # containing boxes
56
57
  image_id_to_boxes = defaultdict(list)
57
-
58
+
58
59
  n_boxes = 0
59
-
60
+
60
61
  for ann in d['annotations']:
61
62
  if 'bbox' in ann:
62
63
  image_id_to_boxes[ann['image_id']].append(ann)
63
64
  n_boxes += 1
64
-
65
+
65
66
  print('Found {} boxes in {} annotations for {} images'.format(
66
67
  n_boxes,len(d['annotations']),len(d['images'])))
67
-
68
-
68
+
69
+
69
70
  ## Generate crops
70
-
71
+
71
72
  # im = d['images'][0]
72
73
  for im in tqdm(d['images']):
73
-
74
+
74
75
  input_image_fn = os.path.join(os.path.join(image_dir,im['file_name']))
75
76
  assert os.path.isfile(input_image_fn), 'Could not find image {}'.format(input_image_fn)
76
-
77
+
77
78
  if im['id'] not in image_id_to_boxes:
78
79
  continue
79
-
80
+
80
81
  annotations_this_image = image_id_to_boxes[im['id']]
81
-
82
+
82
83
  # Load the image
83
84
  img = Image.open(input_image_fn)
84
-
85
+
85
86
  # Generate crops
86
87
  # i_ann = 0; ann = annotations_this_image[i_ann]
87
88
  for i_ann,ann in enumerate(annotations_this_image):
88
-
89
+
89
90
  # x/y/w/h, origin at the upper-left
90
91
  bbox = ann['bbox']
91
-
92
+
92
93
  xmin = bbox[0]
93
94
  ymin = bbox[1]
94
95
  xmax = xmin + bbox[2]
95
96
  ymax = ymin + bbox[3]
96
-
97
+
97
98
  xmin -= padding / 2
98
99
  ymin -= padding / 2
99
100
  xmax += padding / 2
100
101
  ymax += padding / 2
101
-
102
+
102
103
  xmin = max(xmin,0)
103
104
  ymin = max(ymin,0)
104
105
  xmax = min(xmax,img.width-1)
105
106
  ymax = min(ymax,img.height-1)
106
-
107
+
107
108
  crop = img.crop(box=[xmin, ymin, xmax, ymax])
108
-
109
+
109
110
  output_fn = os.path.splitext(im['file_name'])[0].replace('\\','/')
110
111
  if flat_output:
111
112
  output_fn = output_fn.replace('/','_')
112
113
  output_fn = output_fn + '_crop' + str(i_ann).zfill(3) + '_id_' + ann['id']
113
114
  output_fn = output_fn + '.jpg'
114
-
115
+
115
116
  output_full_path = os.path.join(output_dir,output_fn)
116
-
117
+
117
118
  if not flat_output:
118
119
  os.makedirs(os.path.dirname(output_full_path),exist_ok=True)
119
-
120
+
120
121
  crop.save(output_full_path)
121
-
122
+
122
123
  # ...for each box
123
-
124
+
124
125
  # ...for each image
125
-
126
+
126
127
  # ...generate_crops_from_cct()
127
128
 
128
129
 
129
130
  #%% Interactive driver
130
131
 
131
132
  if False:
132
-
133
+
133
134
  pass
134
135
 
135
136
  #%%
136
-
137
+
137
138
  cct_file = os.path.expanduser('~/data/noaa/noaa_estuary_fish.json')
138
139
  image_dir = os.path.expanduser('~/data/noaa/JPEGImages')
139
140
  padding = 50
140
141
  flat_output = True
141
142
  output_dir = '/home/user/tmp/noaa-fish-crops'
142
-
143
+
143
144
  generate_crops_from_cct(cct_file,image_dir,output_dir,padding,flat_output=True)
144
145
  files = os.listdir(output_dir)
145
-
146
-
146
+
147
+
147
148
  #%% Command-line driver
148
149
 
149
- # TODO
150
+ def main():
151
+ """
152
+ Command-line interface to generate crops from a COCO Camera Traps .json file.
153
+ """
154
+
155
+ parser = argparse.ArgumentParser(
156
+ description='Generate cropped images from a COCO Camera Traps .json file'
157
+ )
158
+ parser.add_argument(
159
+ 'cct_file',
160
+ type=str,
161
+ help='COCO .json file to load data from'
162
+ )
163
+ parser.add_argument(
164
+ 'image_dir',
165
+ type=str,
166
+ help='Folder where images are located'
167
+ )
168
+ parser.add_argument(
169
+ 'output_dir',
170
+ type=str,
171
+ help='Folder to which we should write cropped images'
172
+ )
173
+ parser.add_argument(
174
+ '--padding',
175
+ type=int,
176
+ default=0,
177
+ help='Pixels to expand each box before cropping'
178
+ )
179
+ parser.add_argument(
180
+ '--flat_output',
181
+ action='store_true',
182
+ help='Flatten folder structure in output (preserves folder structure by default)'
183
+ )
184
+
185
+ args = parser.parse_args()
186
+
187
+ generate_crops_from_cct(
188
+ cct_file=args.cct_file,
189
+ image_dir=args.image_dir,
190
+ output_dir=args.output_dir,
191
+ padding=args.padding,
192
+ flat_output=args.flat_output
193
+ )
194
+
195
+ print(f'Generated crops in {args.output_dir}')
196
+
197
+ if __name__ == '__main__':
198
+ main()
@@ -2,7 +2,7 @@
2
2
 
3
3
  get_image_sizes.py
4
4
 
5
- Given a json-formatted list of image filenames, retrieves the width and height of
5
+ Given a json-formatted list of image filenames, retrieves the width and height of
6
6
  every image, optionally writing the results to a new .json file.
7
7
 
8
8
  """
@@ -35,45 +35,45 @@ def _get_image_size(image_path,image_prefix=None):
35
35
  Support function to get the size of a single image. Returns a (path,w,h) tuple.
36
36
  w and h will be -1 if the image fails to load.
37
37
  """
38
-
38
+
39
39
  if image_prefix is not None:
40
40
  full_path = os.path.join(image_prefix,image_path)
41
41
  else:
42
42
  full_path = image_path
43
-
43
+
44
44
  # Is this image on disk?
45
45
  if not os.path.isfile(full_path):
46
46
  print('Could not find image {}'.format(full_path))
47
47
  return (image_path,-1,-1)
48
48
 
49
- try:
49
+ try:
50
50
  pil_im = Image.open(full_path)
51
- w = pil_im.width
51
+ w = pil_im.width
52
52
  h = pil_im.height
53
53
  return (image_path,w,h)
54
- except Exception as e:
54
+ except Exception as e:
55
55
  print('Error reading image {}: {}'.format(full_path,str(e)))
56
56
  return (image_path,-1,-1)
57
-
58
-
57
+
58
+
59
59
  def get_image_sizes(filenames,image_prefix=None,output_file=None,
60
60
  n_workers=default_n_threads,use_threads=True,
61
61
  recursive=True):
62
62
  """
63
63
  Gets the width and height of all images in [filenames], which can be:
64
-
64
+
65
65
  * A .json-formatted file containing list of strings
66
66
  * A folder
67
67
  * A list of files
68
68
 
69
69
  ...returning a list of (path,w,h) tuples, and optionally writing the results to [output_file].
70
-
70
+
71
71
  Args:
72
- filenames (str or list): the image filenames for which we should retrieve sizes,
73
- can be the name of a .json-formatted file containing list of strings, a folder
72
+ filenames (str or list): the image filenames for which we should retrieve sizes,
73
+ can be the name of a .json-formatted file containing list of strings, a folder
74
74
  in which we should enumerate images, or a list of files.
75
75
  image_prefix (str, optional): optional prefix to add to images to get to full paths;
76
- useful when [filenames] contains relative files, in which case [image_prefix] is the
76
+ useful when [filenames] contains relative files, in which case [image_prefix] is the
77
77
  base folder for the source images.
78
78
  output_file (str, optional): a .json file to write the imgae sizes
79
79
  n_workers (int, optional): number of parallel workers to use, set to <=1 to
@@ -82,57 +82,62 @@ def get_image_sizes(filenames,image_prefix=None,output_file=None,
82
82
  for parallelization; not relevant if [n_workers] <= 1
83
83
  recursive (bool, optional): only relevant if [filenames] is actually a folder,
84
84
  determines whether image enumeration within that folder will be recursive
85
-
85
+
86
86
  Returns:
87
87
  list: list of (path,w,h) tuples
88
- """
89
-
88
+ """
89
+
90
90
  if output_file is not None:
91
91
  assert os.path.isdir(os.path.dirname(output_file)), \
92
92
  'Illegal output file {}, parent folder does not exist'.format(output_file)
93
-
93
+
94
94
  if isinstance(filenames,str) and os.path.isfile(filenames):
95
- with open(filenames,'r') as f:
95
+ with open(filenames,'r') as f:
96
96
  filenames = json.load(f)
97
97
  filenames = [s.strip() for s in filenames]
98
98
  elif isinstance(filenames,str) and os.path.isdir(filenames):
99
99
  filenames = find_images(filenames,recursive=recursive,
100
100
  return_relative_paths=False,convert_slashes=True)
101
101
  else:
102
- assert isinstance(filenames,list)
103
-
102
+ assert isinstance(filenames,list)
103
+
104
104
  if n_workers <= 1:
105
-
105
+
106
106
  all_results = []
107
107
  for i_file,fn in tqdm(enumerate(filenames),total=len(filenames)):
108
108
  all_results.append(_get_image_size(fn,image_prefix=image_prefix))
109
-
109
+
110
110
  else:
111
-
111
+
112
112
  print('Creating a pool with {} workers'.format(n_workers))
113
113
  if use_threads:
114
- pool = ThreadPool(n_workers)
114
+ pool = ThreadPool(n_workers)
115
115
  else:
116
116
  pool = Pool(n_workers)
117
117
  # all_results = list(tqdm(pool.imap(process_image, filenames), total=len(filenames)))
118
- all_results = list(tqdm(pool.imap(
119
- partial(_get_image_size,image_prefix=image_prefix), filenames), total=len(filenames)))
120
-
118
+ try:
119
+ all_results = list(tqdm(pool.imap(
120
+ partial(_get_image_size,image_prefix=image_prefix), filenames), total=len(filenames)))
121
+ finally:
122
+ pool.close()
123
+ pool.join()
124
+ print("Pool closed and joined for image size reads")
125
+
121
126
  if output_file is not None:
122
127
  with open(output_file,'w') as f:
123
128
  json.dump(all_results,f,indent=1)
124
-
129
+
125
130
  return all_results
126
131
 
127
-
132
+
128
133
  #%% Interactive driver
129
134
 
130
135
  if False:
131
136
 
132
- pass
137
+ pass
133
138
 
134
139
  #%%
135
-
140
+
136
141
  # List images in a test folder
137
142
  base_dir = r'c:\temp\test_images'
138
143
  image_list_file = os.path.join(base_dir,'images.json')
@@ -140,50 +145,50 @@ if False:
140
145
  image_size_file = os.path.join(base_dir,'image_sizes.json')
141
146
  from megadetector.utils import path_utils
142
147
  image_names = path_utils.find_images(base_dir,recursive=True)
143
-
148
+
144
149
  with open(image_list_file,'w') as f:
145
150
  json.dump(image_names,f,indent=1)
146
-
151
+
147
152
  relative_image_names = []
148
153
  for s in image_names:
149
154
  relative_image_names.append(os.path.relpath(s,base_dir))
150
-
155
+
151
156
  with open(relative_image_list_file,'w') as f:
152
157
  json.dump(relative_image_names,f,indent=1)
153
-
154
-
158
+
159
+
155
160
  #%%
156
-
161
+
157
162
  get_image_sizes(relative_image_list_file,image_size_file,image_prefix=base_dir,n_threads=4)
158
-
159
-
163
+
164
+
160
165
  #%% Command-line driver
161
-
162
- def main():
163
-
166
+
167
+ def main(): # noqa
168
+
164
169
  parser = argparse.ArgumentParser()
165
170
  parser.add_argument('filenames',type=str,
166
171
  help='Folder from which we should fetch image sizes, or .json file with a list of filenames')
167
172
  parser.add_argument('output_file',type=str,
168
173
  help='Output file (.json) to which we should write image size information')
169
174
  parser.add_argument('--image_prefix', type=str, default=None,
170
- help='Prefix to append to image filenames, only relevant if [filenames] points to a list of ' + \
171
- 'relative paths')
175
+ help='Prefix to append to image filenames, only relevant if [filenames] points to a ' + \
176
+ 'list of relative paths')
172
177
  parser.add_argument('--n_threads', type=int, default=default_n_threads,
173
178
  help='Number of concurrent workers, set to <=1 to disable parallelization (default {})'.format(
174
179
  default_n_threads))
175
-
180
+
176
181
  if len(sys.argv[1:])==0:
177
182
  parser.print_help()
178
183
  parser.exit()
179
-
184
+
180
185
  args = parser.parse_args()
181
-
186
+
182
187
  _ = get_image_sizes(filenames=args.filenames,
183
188
  output_file=args.output_file,
184
189
  image_prefix=args.image_prefix,
185
190
  n_workers=args.n_threads)
186
-
191
+
187
192
  if __name__ == '__main__':
188
-
193
+
189
194
  main()