megadetector 5.0.27__py3-none-any.whl → 5.0.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (176) hide show
  1. megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
  2. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
  3. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  7. megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
  8. megadetector/classification/aggregate_classifier_probs.py +3 -3
  9. megadetector/classification/analyze_failed_images.py +5 -5
  10. megadetector/classification/cache_batchapi_outputs.py +5 -5
  11. megadetector/classification/create_classification_dataset.py +11 -12
  12. megadetector/classification/crop_detections.py +10 -10
  13. megadetector/classification/csv_to_json.py +8 -8
  14. megadetector/classification/detect_and_crop.py +13 -15
  15. megadetector/classification/evaluate_model.py +7 -7
  16. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  17. megadetector/classification/json_to_azcopy_list.py +1 -1
  18. megadetector/classification/json_validator.py +29 -32
  19. megadetector/classification/map_classification_categories.py +9 -9
  20. megadetector/classification/merge_classification_detection_output.py +12 -9
  21. megadetector/classification/prepare_classification_script.py +19 -19
  22. megadetector/classification/prepare_classification_script_mc.py +23 -23
  23. megadetector/classification/run_classifier.py +4 -4
  24. megadetector/classification/save_mislabeled.py +6 -6
  25. megadetector/classification/train_classifier.py +1 -1
  26. megadetector/classification/train_classifier_tf.py +9 -9
  27. megadetector/classification/train_utils.py +10 -10
  28. megadetector/data_management/annotations/annotation_constants.py +1 -1
  29. megadetector/data_management/camtrap_dp_to_coco.py +45 -45
  30. megadetector/data_management/cct_json_utils.py +101 -101
  31. megadetector/data_management/cct_to_md.py +49 -49
  32. megadetector/data_management/cct_to_wi.py +33 -33
  33. megadetector/data_management/coco_to_labelme.py +75 -75
  34. megadetector/data_management/coco_to_yolo.py +189 -189
  35. megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
  36. megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
  37. megadetector/data_management/databases/integrity_check_json_db.py +202 -188
  38. megadetector/data_management/databases/subset_json_db.py +33 -33
  39. megadetector/data_management/generate_crops_from_cct.py +38 -38
  40. megadetector/data_management/get_image_sizes.py +54 -49
  41. megadetector/data_management/labelme_to_coco.py +130 -124
  42. megadetector/data_management/labelme_to_yolo.py +78 -72
  43. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  44. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  45. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  46. megadetector/data_management/lila/download_lila_subset.py +21 -24
  47. megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
  48. megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
  49. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  50. megadetector/data_management/lila/lila_common.py +70 -70
  51. megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
  52. megadetector/data_management/mewc_to_md.py +339 -340
  53. megadetector/data_management/ocr_tools.py +258 -252
  54. megadetector/data_management/read_exif.py +232 -223
  55. megadetector/data_management/remap_coco_categories.py +26 -26
  56. megadetector/data_management/remove_exif.py +31 -20
  57. megadetector/data_management/rename_images.py +187 -187
  58. megadetector/data_management/resize_coco_dataset.py +41 -41
  59. megadetector/data_management/speciesnet_to_md.py +41 -41
  60. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  61. megadetector/data_management/yolo_output_to_md_output.py +117 -120
  62. megadetector/data_management/yolo_to_coco.py +195 -188
  63. megadetector/detection/change_detection.py +831 -0
  64. megadetector/detection/process_video.py +341 -338
  65. megadetector/detection/pytorch_detector.py +308 -266
  66. megadetector/detection/run_detector.py +186 -166
  67. megadetector/detection/run_detector_batch.py +366 -364
  68. megadetector/detection/run_inference_with_yolov5_val.py +328 -325
  69. megadetector/detection/run_tiled_inference.py +312 -253
  70. megadetector/detection/tf_detector.py +24 -24
  71. megadetector/detection/video_utils.py +291 -283
  72. megadetector/postprocessing/add_max_conf.py +15 -11
  73. megadetector/postprocessing/categorize_detections_by_size.py +44 -44
  74. megadetector/postprocessing/classification_postprocessing.py +808 -311
  75. megadetector/postprocessing/combine_batch_outputs.py +20 -21
  76. megadetector/postprocessing/compare_batch_results.py +528 -517
  77. megadetector/postprocessing/convert_output_format.py +97 -97
  78. megadetector/postprocessing/create_crop_folder.py +220 -147
  79. megadetector/postprocessing/detector_calibration.py +173 -168
  80. megadetector/postprocessing/generate_csv_report.py +508 -0
  81. megadetector/postprocessing/load_api_results.py +25 -22
  82. megadetector/postprocessing/md_to_coco.py +129 -98
  83. megadetector/postprocessing/md_to_labelme.py +89 -83
  84. megadetector/postprocessing/md_to_wi.py +40 -40
  85. megadetector/postprocessing/merge_detections.py +87 -114
  86. megadetector/postprocessing/postprocess_batch_results.py +319 -302
  87. megadetector/postprocessing/remap_detection_categories.py +36 -36
  88. megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
  89. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  90. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  91. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
  92. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  93. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  94. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  95. megadetector/postprocessing/validate_batch_results.py +70 -70
  96. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  97. megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
  98. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
  99. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -69
  100. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  101. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  102. megadetector/taxonomy_mapping/species_lookup.py +33 -33
  103. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  104. megadetector/taxonomy_mapping/taxonomy_graph.py +11 -11
  105. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  106. megadetector/utils/azure_utils.py +22 -22
  107. megadetector/utils/ct_utils.py +1019 -200
  108. megadetector/utils/directory_listing.py +21 -77
  109. megadetector/utils/gpu_test.py +22 -22
  110. megadetector/utils/md_tests.py +541 -518
  111. megadetector/utils/path_utils.py +1511 -406
  112. megadetector/utils/process_utils.py +41 -41
  113. megadetector/utils/sas_blob_utils.py +53 -49
  114. megadetector/utils/split_locations_into_train_val.py +73 -60
  115. megadetector/utils/string_utils.py +147 -26
  116. megadetector/utils/url_utils.py +463 -173
  117. megadetector/utils/wi_utils.py +2629 -2868
  118. megadetector/utils/write_html_image_list.py +137 -137
  119. megadetector/visualization/plot_utils.py +21 -21
  120. megadetector/visualization/render_images_with_thumbnails.py +37 -73
  121. megadetector/visualization/visualization_utils.py +424 -404
  122. megadetector/visualization/visualize_db.py +197 -190
  123. megadetector/visualization/visualize_detector_output.py +126 -98
  124. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/METADATA +6 -3
  125. megadetector-5.0.29.dist-info/RECORD +163 -0
  126. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
  127. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  128. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  129. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  130. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  131. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  132. megadetector/data_management/importers/awc_to_json.py +0 -191
  133. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  134. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  135. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  136. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  137. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  138. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  139. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  140. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  141. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  142. megadetector/data_management/importers/ena24_to_json.py +0 -276
  143. megadetector/data_management/importers/filenames_to_json.py +0 -386
  144. megadetector/data_management/importers/helena_to_cct.py +0 -283
  145. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  146. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  147. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  148. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  149. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  150. megadetector/data_management/importers/missouri_to_json.py +0 -490
  151. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  152. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  153. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  154. megadetector/data_management/importers/pc_to_json.py +0 -365
  155. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  156. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  157. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  158. megadetector/data_management/importers/rspb_to_json.py +0 -356
  159. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  160. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  161. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  162. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  163. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  164. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  165. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  166. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  167. megadetector/data_management/importers/ubc_to_json.py +0 -399
  168. megadetector/data_management/importers/umn_to_json.py +0 -507
  169. megadetector/data_management/importers/wellington_to_json.py +0 -263
  170. megadetector/data_management/importers/wi_to_json.py +0 -442
  171. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  172. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  173. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  174. megadetector-5.0.27.dist-info/RECORD +0 -208
  175. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
  176. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
@@ -2,14 +2,14 @@
2
2
 
3
3
  subset_json_db.py
4
4
 
5
- Select a subset of images (and associated annotations) from a .json file in COCO
5
+ Select a subset of images (and associated annotations) from a .json file in COCO
6
6
  Camera Traps format based on a string query.
7
7
 
8
8
  To subset .json files in the MegaDetector output format, see
9
9
  subset_json_detector_output.py.
10
10
 
11
11
  """
12
-
12
+
13
13
  #%% Constants and imports
14
14
 
15
15
  import os
@@ -18,6 +18,7 @@ import json
18
18
  import argparse
19
19
 
20
20
  from tqdm import tqdm
21
+ from megadetector.utils import ct_utils
21
22
  from copy import copy
22
23
 
23
24
 
@@ -25,22 +26,22 @@ from copy import copy
25
26
 
26
27
  def subset_json_db(input_json, query, output_json=None, ignore_case=False, verbose=False):
27
28
  """
28
- Given a json file (or dictionary already loaded from a json file), produce a new
29
- database containing only the images whose filenames contain the string 'query',
29
+ Given a json file (or dictionary already loaded from a json file), produce a new
30
+ database containing only the images whose filenames contain the string 'query',
30
31
  optionally writing that DB output to a new json file.
31
-
32
+
32
33
  Args:
33
34
  input_json (str): COCO Camera Traps .json file to load, or an already-loaded dict
34
- query (str or list): string to query for, only include images in the output whose filenames
35
+ query (str or list): string to query for, only include images in the output whose filenames
35
36
  contain this string. If this is a list, test for exact matches.
36
37
  output_json (str, optional): file to write the resulting .json file to
37
38
  ignore_case (bool, optional): whether to perform a case-insensitive search for [query]
38
39
  verbose (bool, optional): enable additional debug output
39
-
40
+
40
41
  Returns:
41
42
  dict: CCT dictionary containing a subset of the images and annotations in the input dict
42
43
  """
43
-
44
+
44
45
  # Load the input file if necessary
45
46
  if isinstance(input_json,str):
46
47
  print('Loading input .json...')
@@ -51,26 +52,26 @@ def subset_json_db(input_json, query, output_json=None, ignore_case=False, verbo
51
52
 
52
53
  # Find images matching the query
53
54
  images = []
54
-
55
+
55
56
  if isinstance(query,str):
56
-
57
+
57
58
  if ignore_case:
58
59
  query = query.lower()
59
-
60
+
60
61
  for im in tqdm(input_data['images']):
61
62
  fn = im['file_name']
62
63
  if ignore_case:
63
64
  fn = fn.lower()
64
65
  if query in fn:
65
66
  images.append(im)
66
-
67
+
67
68
  else:
68
-
69
+
69
70
  query = set(query)
70
-
71
+
71
72
  if ignore_case:
72
73
  query = set([s.lower() for s in query])
73
-
74
+
74
75
  for im in input_data['images']:
75
76
  fn = im['file_name']
76
77
  if ignore_case:
@@ -79,27 +80,26 @@ def subset_json_db(input_json, query, output_json=None, ignore_case=False, verbo
79
80
  images.append(im)
80
81
 
81
82
  image_ids = set([im['id'] for im in images])
82
-
83
+
83
84
  # Find annotations referring to those images
84
85
  annotations = []
85
-
86
+
86
87
  for ann in input_data['annotations']:
87
88
  if ann['image_id'] in image_ids:
88
89
  annotations.append(ann)
89
-
90
+
90
91
  output_data = copy(input_data)
91
92
  output_data['images'] = images
92
93
  output_data['annotations'] = annotations
93
-
94
+
94
95
  # Write the output file if requested
95
96
  if output_json is not None:
96
97
  if verbose:
97
98
  print('Writing output .json to {}'.format(output_json))
98
99
  output_dir = os.path.dirname(output_json)
99
100
  os.makedirs(output_dir,exist_ok=True)
100
- with open(output_json,'w') as f:
101
- json.dump(output_data,f,indent=1)
102
-
101
+ ct_utils.write_json(output_json, output_data)
102
+
103
103
  if verbose:
104
104
  print('Keeping {} of {} images, {} of {} annotations'.format(
105
105
  len(output_data['images']),len(input_data['images']),
@@ -111,33 +111,33 @@ def subset_json_db(input_json, query, output_json=None, ignore_case=False, verbo
111
111
  #%% Interactive driver
112
112
 
113
113
  if False:
114
-
114
+
115
115
  #%%
116
-
116
+
117
117
  input_json = r"e:\Statewide_wolf_container\idfg_20190409.json"
118
118
  output_json = r"e:\Statewide_wolf_container\idfg_20190409_clearcreek.json"
119
119
  query = 'clearcreek'
120
120
  ignore_case = True
121
121
  db = subset_json_db(input_json, query, output_json, ignore_case)
122
-
122
+
123
123
 
124
124
  #%% Command-line driver
125
125
 
126
- def main():
127
-
126
+ def main(): # noqa
127
+
128
128
  parser = argparse.ArgumentParser()
129
129
  parser.add_argument('input_json', type=str, help='Input file (a COCO Camera Traps .json file)')
130
- parser.add_argument('output_json', type=str, help='Output file')
131
- parser.add_argument('query', type=str, help='Filename query')
130
+ parser.add_argument('output_json', type=str, help='Output file')
131
+ parser.add_argument('query', type=str, help='Filename query')
132
132
  parser.add_argument('--ignore_case', action='store_true')
133
-
133
+
134
134
  if len(sys.argv[1:]) == 0:
135
135
  parser.print_help()
136
136
  parser.exit()
137
137
 
138
- args = parser.parse_args()
139
-
138
+ args = parser.parse_args()
139
+
140
140
  subset_json_db(args.input_json,args.query,args.output_json,args.ignore_case)
141
141
 
142
- if __name__ == '__main__':
142
+ if __name__ == '__main__':
143
143
  main()
@@ -22,7 +22,7 @@ def generate_crops_from_cct(cct_file,image_dir,output_dir,padding=0,flat_output=
22
22
  """
23
23
  Given a .json file in COCO Camera Traps format, creates a cropped image for
24
24
  each bounding box.
25
-
25
+
26
26
  Args:
27
27
  cct_file (str): the COCO .json file from which we should load data
28
28
  image_dir (str): the folder where the images live; filenames in the .json
@@ -31,119 +31,119 @@ def generate_crops_from_cct(cct_file,image_dir,output_dir,padding=0,flat_output=
31
31
  padding (float, optional): number of pixels we should expand each box before
32
32
  cropping
33
33
  flat_output (bool, optional): if False, folder structure will be preserved
34
- in the output, e.g. the image a/b/c/d.jpg will result in image files
34
+ in the output, e.g. the image a/b/c/d.jpg will result in image files
35
35
  in the output folder called, e.g., a/b/c/d_crop_000_id_12345.jpg. If
36
- [flat_output] is True, the corresponding output image will be
37
- a_b_c_d_crop_000_id_12345.jpg.
36
+ [flat_output] is True, the corresponding output image will be
37
+ a_b_c_d_crop_000_id_12345.jpg.
38
38
  """
39
-
39
+
40
40
  ## Read and validate input
41
-
41
+
42
42
  assert os.path.isfile(cct_file)
43
43
  assert os.path.isdir(image_dir)
44
44
  os.makedirs(output_dir,exist_ok=True)
45
45
 
46
46
  with open(cct_file,'r') as f:
47
47
  d = json.load(f)
48
-
49
-
48
+
49
+
50
50
  ## Find annotations for each image
51
-
51
+
52
52
  from collections import defaultdict
53
-
53
+
54
54
  # This actually maps image IDs to annotations, but only to annotations
55
55
  # containing boxes
56
56
  image_id_to_boxes = defaultdict(list)
57
-
57
+
58
58
  n_boxes = 0
59
-
59
+
60
60
  for ann in d['annotations']:
61
61
  if 'bbox' in ann:
62
62
  image_id_to_boxes[ann['image_id']].append(ann)
63
63
  n_boxes += 1
64
-
64
+
65
65
  print('Found {} boxes in {} annotations for {} images'.format(
66
66
  n_boxes,len(d['annotations']),len(d['images'])))
67
-
68
-
67
+
68
+
69
69
  ## Generate crops
70
-
70
+
71
71
  # im = d['images'][0]
72
72
  for im in tqdm(d['images']):
73
-
73
+
74
74
  input_image_fn = os.path.join(os.path.join(image_dir,im['file_name']))
75
75
  assert os.path.isfile(input_image_fn), 'Could not find image {}'.format(input_image_fn)
76
-
76
+
77
77
  if im['id'] not in image_id_to_boxes:
78
78
  continue
79
-
79
+
80
80
  annotations_this_image = image_id_to_boxes[im['id']]
81
-
81
+
82
82
  # Load the image
83
83
  img = Image.open(input_image_fn)
84
-
84
+
85
85
  # Generate crops
86
86
  # i_ann = 0; ann = annotations_this_image[i_ann]
87
87
  for i_ann,ann in enumerate(annotations_this_image):
88
-
88
+
89
89
  # x/y/w/h, origin at the upper-left
90
90
  bbox = ann['bbox']
91
-
91
+
92
92
  xmin = bbox[0]
93
93
  ymin = bbox[1]
94
94
  xmax = xmin + bbox[2]
95
95
  ymax = ymin + bbox[3]
96
-
96
+
97
97
  xmin -= padding / 2
98
98
  ymin -= padding / 2
99
99
  xmax += padding / 2
100
100
  ymax += padding / 2
101
-
101
+
102
102
  xmin = max(xmin,0)
103
103
  ymin = max(ymin,0)
104
104
  xmax = min(xmax,img.width-1)
105
105
  ymax = min(ymax,img.height-1)
106
-
106
+
107
107
  crop = img.crop(box=[xmin, ymin, xmax, ymax])
108
-
108
+
109
109
  output_fn = os.path.splitext(im['file_name'])[0].replace('\\','/')
110
110
  if flat_output:
111
111
  output_fn = output_fn.replace('/','_')
112
112
  output_fn = output_fn + '_crop' + str(i_ann).zfill(3) + '_id_' + ann['id']
113
113
  output_fn = output_fn + '.jpg'
114
-
114
+
115
115
  output_full_path = os.path.join(output_dir,output_fn)
116
-
116
+
117
117
  if not flat_output:
118
118
  os.makedirs(os.path.dirname(output_full_path),exist_ok=True)
119
-
119
+
120
120
  crop.save(output_full_path)
121
-
121
+
122
122
  # ...for each box
123
-
123
+
124
124
  # ...for each image
125
-
125
+
126
126
  # ...generate_crops_from_cct()
127
127
 
128
128
 
129
129
  #%% Interactive driver
130
130
 
131
131
  if False:
132
-
132
+
133
133
  pass
134
134
 
135
135
  #%%
136
-
136
+
137
137
  cct_file = os.path.expanduser('~/data/noaa/noaa_estuary_fish.json')
138
138
  image_dir = os.path.expanduser('~/data/noaa/JPEGImages')
139
139
  padding = 50
140
140
  flat_output = True
141
141
  output_dir = '/home/user/tmp/noaa-fish-crops'
142
-
142
+
143
143
  generate_crops_from_cct(cct_file,image_dir,output_dir,padding,flat_output=True)
144
144
  files = os.listdir(output_dir)
145
-
146
-
145
+
146
+
147
147
  #%% Command-line driver
148
148
 
149
149
  # TODO
@@ -2,7 +2,7 @@
2
2
 
3
3
  get_image_sizes.py
4
4
 
5
- Given a json-formatted list of image filenames, retrieves the width and height of
5
+ Given a json-formatted list of image filenames, retrieves the width and height of
6
6
  every image, optionally writing the results to a new .json file.
7
7
 
8
8
  """
@@ -35,45 +35,45 @@ def _get_image_size(image_path,image_prefix=None):
35
35
  Support function to get the size of a single image. Returns a (path,w,h) tuple.
36
36
  w and h will be -1 if the image fails to load.
37
37
  """
38
-
38
+
39
39
  if image_prefix is not None:
40
40
  full_path = os.path.join(image_prefix,image_path)
41
41
  else:
42
42
  full_path = image_path
43
-
43
+
44
44
  # Is this image on disk?
45
45
  if not os.path.isfile(full_path):
46
46
  print('Could not find image {}'.format(full_path))
47
47
  return (image_path,-1,-1)
48
48
 
49
- try:
49
+ try:
50
50
  pil_im = Image.open(full_path)
51
- w = pil_im.width
51
+ w = pil_im.width
52
52
  h = pil_im.height
53
53
  return (image_path,w,h)
54
- except Exception as e:
54
+ except Exception as e:
55
55
  print('Error reading image {}: {}'.format(full_path,str(e)))
56
56
  return (image_path,-1,-1)
57
-
58
-
57
+
58
+
59
59
  def get_image_sizes(filenames,image_prefix=None,output_file=None,
60
60
  n_workers=default_n_threads,use_threads=True,
61
61
  recursive=True):
62
62
  """
63
63
  Gets the width and height of all images in [filenames], which can be:
64
-
64
+
65
65
  * A .json-formatted file containing list of strings
66
66
  * A folder
67
67
  * A list of files
68
68
 
69
69
  ...returning a list of (path,w,h) tuples, and optionally writing the results to [output_file].
70
-
70
+
71
71
  Args:
72
- filenames (str or list): the image filenames for which we should retrieve sizes,
73
- can be the name of a .json-formatted file containing list of strings, a folder
72
+ filenames (str or list): the image filenames for which we should retrieve sizes,
73
+ can be the name of a .json-formatted file containing list of strings, a folder
74
74
  in which we should enumerate images, or a list of files.
75
75
  image_prefix (str, optional): optional prefix to add to images to get to full paths;
76
- useful when [filenames] contains relative files, in which case [image_prefix] is the
76
+ useful when [filenames] contains relative files, in which case [image_prefix] is the
77
77
  base folder for the source images.
78
78
  output_file (str, optional): a .json file to write the imgae sizes
79
79
  n_workers (int, optional): number of parallel workers to use, set to <=1 to
@@ -82,57 +82,62 @@ def get_image_sizes(filenames,image_prefix=None,output_file=None,
82
82
  for parallelization; not relevant if [n_workers] <= 1
83
83
  recursive (bool, optional): only relevant if [filenames] is actually a folder,
84
84
  determines whether image enumeration within that folder will be recursive
85
-
85
+
86
86
  Returns:
87
87
  list: list of (path,w,h) tuples
88
- """
89
-
88
+ """
89
+
90
90
  if output_file is not None:
91
91
  assert os.path.isdir(os.path.dirname(output_file)), \
92
92
  'Illegal output file {}, parent folder does not exist'.format(output_file)
93
-
93
+
94
94
  if isinstance(filenames,str) and os.path.isfile(filenames):
95
- with open(filenames,'r') as f:
95
+ with open(filenames,'r') as f:
96
96
  filenames = json.load(f)
97
97
  filenames = [s.strip() for s in filenames]
98
98
  elif isinstance(filenames,str) and os.path.isdir(filenames):
99
99
  filenames = find_images(filenames,recursive=recursive,
100
100
  return_relative_paths=False,convert_slashes=True)
101
101
  else:
102
- assert isinstance(filenames,list)
103
-
102
+ assert isinstance(filenames,list)
103
+
104
104
  if n_workers <= 1:
105
-
105
+
106
106
  all_results = []
107
107
  for i_file,fn in tqdm(enumerate(filenames),total=len(filenames)):
108
108
  all_results.append(_get_image_size(fn,image_prefix=image_prefix))
109
-
109
+
110
110
  else:
111
-
111
+
112
112
  print('Creating a pool with {} workers'.format(n_workers))
113
113
  if use_threads:
114
- pool = ThreadPool(n_workers)
114
+ pool = ThreadPool(n_workers)
115
115
  else:
116
116
  pool = Pool(n_workers)
117
117
  # all_results = list(tqdm(pool.imap(process_image, filenames), total=len(filenames)))
118
- all_results = list(tqdm(pool.imap(
119
- partial(_get_image_size,image_prefix=image_prefix), filenames), total=len(filenames)))
120
-
118
+ try:
119
+ all_results = list(tqdm(pool.imap(
120
+ partial(_get_image_size,image_prefix=image_prefix), filenames), total=len(filenames)))
121
+ finally:
122
+ pool.close()
123
+ pool.join()
124
+ print("Pool closed and joined for image size reads")
125
+
121
126
  if output_file is not None:
122
127
  with open(output_file,'w') as f:
123
128
  json.dump(all_results,f,indent=1)
124
-
129
+
125
130
  return all_results
126
131
 
127
-
132
+
128
133
  #%% Interactive driver
129
134
 
130
135
  if False:
131
136
 
132
- pass
137
+ pass
133
138
 
134
139
  #%%
135
-
140
+
136
141
  # List images in a test folder
137
142
  base_dir = r'c:\temp\test_images'
138
143
  image_list_file = os.path.join(base_dir,'images.json')
@@ -140,50 +145,50 @@ if False:
140
145
  image_size_file = os.path.join(base_dir,'image_sizes.json')
141
146
  from megadetector.utils import path_utils
142
147
  image_names = path_utils.find_images(base_dir,recursive=True)
143
-
148
+
144
149
  with open(image_list_file,'w') as f:
145
150
  json.dump(image_names,f,indent=1)
146
-
151
+
147
152
  relative_image_names = []
148
153
  for s in image_names:
149
154
  relative_image_names.append(os.path.relpath(s,base_dir))
150
-
155
+
151
156
  with open(relative_image_list_file,'w') as f:
152
157
  json.dump(relative_image_names,f,indent=1)
153
-
154
-
158
+
159
+
155
160
  #%%
156
-
161
+
157
162
  get_image_sizes(relative_image_list_file,image_size_file,image_prefix=base_dir,n_threads=4)
158
-
159
-
163
+
164
+
160
165
  #%% Command-line driver
161
-
162
- def main():
163
-
166
+
167
+ def main(): # noqa
168
+
164
169
  parser = argparse.ArgumentParser()
165
170
  parser.add_argument('filenames',type=str,
166
171
  help='Folder from which we should fetch image sizes, or .json file with a list of filenames')
167
172
  parser.add_argument('output_file',type=str,
168
173
  help='Output file (.json) to which we should write image size information')
169
174
  parser.add_argument('--image_prefix', type=str, default=None,
170
- help='Prefix to append to image filenames, only relevant if [filenames] points to a list of ' + \
171
- 'relative paths')
175
+ help='Prefix to append to image filenames, only relevant if [filenames] points to a ' + \
176
+ 'list of relative paths')
172
177
  parser.add_argument('--n_threads', type=int, default=default_n_threads,
173
178
  help='Number of concurrent workers, set to <=1 to disable parallelization (default {})'.format(
174
179
  default_n_threads))
175
-
180
+
176
181
  if len(sys.argv[1:])==0:
177
182
  parser.print_help()
178
183
  parser.exit()
179
-
184
+
180
185
  args = parser.parse_args()
181
-
186
+
182
187
  _ = get_image_sizes(filenames=args.filenames,
183
188
  output_file=args.output_file,
184
189
  image_prefix=args.image_prefix,
185
190
  n_workers=args.n_threads)
186
-
191
+
187
192
  if __name__ == '__main__':
188
-
193
+
189
194
  main()