megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (197) hide show
  1. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  2. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  3. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  4. megadetector/classification/aggregate_classifier_probs.py +3 -3
  5. megadetector/classification/analyze_failed_images.py +5 -5
  6. megadetector/classification/cache_batchapi_outputs.py +5 -5
  7. megadetector/classification/create_classification_dataset.py +11 -12
  8. megadetector/classification/crop_detections.py +10 -10
  9. megadetector/classification/csv_to_json.py +8 -8
  10. megadetector/classification/detect_and_crop.py +13 -15
  11. megadetector/classification/efficientnet/model.py +8 -8
  12. megadetector/classification/efficientnet/utils.py +6 -5
  13. megadetector/classification/evaluate_model.py +7 -7
  14. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  15. megadetector/classification/json_to_azcopy_list.py +1 -1
  16. megadetector/classification/json_validator.py +29 -32
  17. megadetector/classification/map_classification_categories.py +9 -9
  18. megadetector/classification/merge_classification_detection_output.py +12 -9
  19. megadetector/classification/prepare_classification_script.py +19 -19
  20. megadetector/classification/prepare_classification_script_mc.py +26 -26
  21. megadetector/classification/run_classifier.py +4 -4
  22. megadetector/classification/save_mislabeled.py +6 -6
  23. megadetector/classification/train_classifier.py +1 -1
  24. megadetector/classification/train_classifier_tf.py +9 -9
  25. megadetector/classification/train_utils.py +10 -10
  26. megadetector/data_management/annotations/annotation_constants.py +1 -2
  27. megadetector/data_management/camtrap_dp_to_coco.py +79 -46
  28. megadetector/data_management/cct_json_utils.py +103 -103
  29. megadetector/data_management/cct_to_md.py +49 -49
  30. megadetector/data_management/cct_to_wi.py +33 -33
  31. megadetector/data_management/coco_to_labelme.py +75 -75
  32. megadetector/data_management/coco_to_yolo.py +210 -193
  33. megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
  34. megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
  35. megadetector/data_management/databases/integrity_check_json_db.py +228 -200
  36. megadetector/data_management/databases/subset_json_db.py +33 -33
  37. megadetector/data_management/generate_crops_from_cct.py +88 -39
  38. megadetector/data_management/get_image_sizes.py +54 -49
  39. megadetector/data_management/labelme_to_coco.py +133 -125
  40. megadetector/data_management/labelme_to_yolo.py +159 -73
  41. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  42. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  43. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  44. megadetector/data_management/lila/download_lila_subset.py +21 -24
  45. megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
  46. megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
  47. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  48. megadetector/data_management/lila/lila_common.py +73 -70
  49. megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
  50. megadetector/data_management/mewc_to_md.py +344 -340
  51. megadetector/data_management/ocr_tools.py +262 -255
  52. megadetector/data_management/read_exif.py +249 -227
  53. megadetector/data_management/remap_coco_categories.py +90 -28
  54. megadetector/data_management/remove_exif.py +81 -21
  55. megadetector/data_management/rename_images.py +187 -187
  56. megadetector/data_management/resize_coco_dataset.py +588 -120
  57. megadetector/data_management/speciesnet_to_md.py +41 -41
  58. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  59. megadetector/data_management/yolo_output_to_md_output.py +248 -122
  60. megadetector/data_management/yolo_to_coco.py +333 -191
  61. megadetector/detection/change_detection.py +832 -0
  62. megadetector/detection/process_video.py +340 -337
  63. megadetector/detection/pytorch_detector.py +358 -278
  64. megadetector/detection/run_detector.py +399 -186
  65. megadetector/detection/run_detector_batch.py +404 -377
  66. megadetector/detection/run_inference_with_yolov5_val.py +340 -327
  67. megadetector/detection/run_tiled_inference.py +257 -249
  68. megadetector/detection/tf_detector.py +24 -24
  69. megadetector/detection/video_utils.py +332 -295
  70. megadetector/postprocessing/add_max_conf.py +19 -11
  71. megadetector/postprocessing/categorize_detections_by_size.py +45 -45
  72. megadetector/postprocessing/classification_postprocessing.py +468 -433
  73. megadetector/postprocessing/combine_batch_outputs.py +23 -23
  74. megadetector/postprocessing/compare_batch_results.py +590 -525
  75. megadetector/postprocessing/convert_output_format.py +106 -102
  76. megadetector/postprocessing/create_crop_folder.py +347 -147
  77. megadetector/postprocessing/detector_calibration.py +173 -168
  78. megadetector/postprocessing/generate_csv_report.py +508 -499
  79. megadetector/postprocessing/load_api_results.py +48 -27
  80. megadetector/postprocessing/md_to_coco.py +133 -102
  81. megadetector/postprocessing/md_to_labelme.py +107 -90
  82. megadetector/postprocessing/md_to_wi.py +40 -40
  83. megadetector/postprocessing/merge_detections.py +92 -114
  84. megadetector/postprocessing/postprocess_batch_results.py +319 -301
  85. megadetector/postprocessing/remap_detection_categories.py +91 -38
  86. megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
  87. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  88. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  89. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
  90. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  91. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  92. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  93. megadetector/postprocessing/validate_batch_results.py +70 -70
  94. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  95. megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
  96. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
  97. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
  98. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  99. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  100. megadetector/taxonomy_mapping/species_lookup.py +156 -74
  101. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  102. megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
  103. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  104. megadetector/utils/ct_utils.py +1049 -211
  105. megadetector/utils/directory_listing.py +21 -77
  106. megadetector/utils/gpu_test.py +22 -22
  107. megadetector/utils/md_tests.py +632 -529
  108. megadetector/utils/path_utils.py +1520 -431
  109. megadetector/utils/process_utils.py +41 -41
  110. megadetector/utils/split_locations_into_train_val.py +62 -62
  111. megadetector/utils/string_utils.py +148 -27
  112. megadetector/utils/url_utils.py +489 -176
  113. megadetector/utils/wi_utils.py +2658 -2526
  114. megadetector/utils/write_html_image_list.py +137 -137
  115. megadetector/visualization/plot_utils.py +34 -30
  116. megadetector/visualization/render_images_with_thumbnails.py +39 -74
  117. megadetector/visualization/visualization_utils.py +487 -435
  118. megadetector/visualization/visualize_db.py +232 -198
  119. megadetector/visualization/visualize_detector_output.py +82 -76
  120. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
  121. megadetector-10.0.0.dist-info/RECORD +139 -0
  122. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
  123. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  124. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  125. megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
  126. megadetector/api/batch_processing/api_core/server.py +0 -294
  127. megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
  128. megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
  129. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  130. megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
  131. megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
  132. megadetector/api/batch_processing/api_core/server_utils.py +0 -88
  133. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  134. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  135. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  136. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  137. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  138. megadetector/api/synchronous/__init__.py +0 -0
  139. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  140. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
  141. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
  142. megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
  143. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  144. megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
  145. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  146. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  147. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  148. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  149. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  150. megadetector/data_management/importers/awc_to_json.py +0 -191
  151. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  152. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  153. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  154. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  155. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  156. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  157. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  158. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  159. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  160. megadetector/data_management/importers/ena24_to_json.py +0 -276
  161. megadetector/data_management/importers/filenames_to_json.py +0 -386
  162. megadetector/data_management/importers/helena_to_cct.py +0 -283
  163. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  164. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  165. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  166. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  167. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  168. megadetector/data_management/importers/missouri_to_json.py +0 -490
  169. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  170. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  171. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  172. megadetector/data_management/importers/pc_to_json.py +0 -365
  173. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  174. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  175. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  176. megadetector/data_management/importers/rspb_to_json.py +0 -356
  177. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  178. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  179. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  180. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  181. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  182. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  183. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  184. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  185. megadetector/data_management/importers/ubc_to_json.py +0 -399
  186. megadetector/data_management/importers/umn_to_json.py +0 -507
  187. megadetector/data_management/importers/wellington_to_json.py +0 -263
  188. megadetector/data_management/importers/wi_to_json.py +0 -442
  189. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  190. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  191. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  192. megadetector/utils/azure_utils.py +0 -178
  193. megadetector/utils/sas_blob_utils.py +0 -509
  194. megadetector-5.0.28.dist-info/RECORD +0 -209
  195. /megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
  196. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
  197. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0
@@ -12,178 +12,646 @@ scaling bounding boxes accordingly.
12
12
  import os
13
13
  import json
14
14
  import shutil
15
+ import argparse
16
+ import sys
15
17
 
16
- from tqdm import tqdm
17
18
  from collections import defaultdict
19
+ from multiprocessing.pool import Pool, ThreadPool
20
+ from functools import partial
21
+
22
+ from PIL import Image
23
+ from tqdm import tqdm
18
24
 
19
25
  from megadetector.utils.path_utils import insert_before_extension
20
26
  from megadetector.visualization.visualization_utils import \
21
27
  open_image, resize_image, exif_preserving_save
28
+ from megadetector.utils.ct_utils import make_test_folder
29
+ from megadetector.utils.ct_utils import write_json
22
30
 
23
31
 
24
32
  #%% Functions
25
33
 
26
- def resize_coco_dataset(input_folder,input_filename,
27
- output_folder,output_filename,
34
+ def _process_single_image_for_resize(image_data,
35
+ input_folder,
36
+ output_folder,
37
+ target_size,
38
+ correct_size_image_handling,
39
+ unavailable_image_handling,
40
+ no_enlarge_width,
41
+ verbose):
42
+ """
43
+ Processes a single image: loads, resizes/copies, updates metadata, and scales annotations.
44
+
45
+ [image_data] is a tuple of [im,annotations]
46
+ """
47
+
48
+ assert unavailable_image_handling in ('error','omit'), \
49
+ f'Illegal unavailable_image_handling {unavailable_image_handling}'
50
+
51
+ assert isinstance(image_data,tuple) and len(image_data) == 2
52
+ assert isinstance(image_data[0],dict)
53
+ assert isinstance(image_data[1],list)
54
+ im = image_data[0].copy()
55
+ annotations_this_image = [ann.copy() for ann in image_data[1]]
56
+
57
+ input_fn_relative = im['file_name']
58
+ input_fn_abs = os.path.join(input_folder, input_fn_relative)
59
+
60
+ if not os.path.isfile(input_fn_abs):
61
+ if unavailable_image_handling == 'error':
62
+ raise FileNotFoundError('Could not find file {}'.format(input_fn_abs))
63
+ else:
64
+ print("Can't find image {}, skipping".format(input_fn_relative))
65
+ return None, None
66
+
67
+ output_fn_abs = os.path.join(output_folder, input_fn_relative)
68
+ os.makedirs(os.path.dirname(output_fn_abs), exist_ok=True)
69
+
70
+ if verbose:
71
+ print('Resizing {} to {}'.format(input_fn_abs,output_fn_abs))
72
+
73
+ try:
74
+ pil_im = open_image(input_fn_abs)
75
+ input_w = pil_im.width
76
+ input_h = pil_im.height
77
+ except Exception as e:
78
+ if unavailable_image_handling == 'error':
79
+ raise Exception('Could not open image {}: {}'.format(
80
+ input_fn_relative, str(e)))
81
+ else:
82
+ print("Can't open image {}, skipping".format(input_fn_relative))
83
+ return None, None
84
+
85
+ image_is_already_target_size = \
86
+ (input_w == target_size[0]) and (input_h == target_size[1])
87
+ if no_enlarge_width and (input_w < target_size[0]):
88
+ image_is_already_target_size = True
89
+ preserve_original_size = \
90
+ (target_size[0] == -1) and (target_size[1] == -1)
91
+
92
+ # Do we need to resize, or can we try to get away with a copy?
93
+ if image_is_already_target_size or preserve_original_size:
94
+ output_w = input_w
95
+ output_h = input_h
96
+ if correct_size_image_handling == 'copy':
97
+ if input_fn_abs != output_fn_abs: # only copy if src and dst are different
98
+ shutil.copyfile(input_fn_abs, output_fn_abs)
99
+ elif correct_size_image_handling == 'rewrite':
100
+ exif_preserving_save(pil_im, output_fn_abs)
101
+ else:
102
+ raise ValueError(
103
+ f'Unrecognized value {correct_size_image_handling} for correct_size_image_handling')
104
+ else:
105
+ try:
106
+ pil_im = resize_image(pil_im, target_size[0], target_size[1],
107
+ no_enlarge_width=no_enlarge_width)
108
+ output_w = pil_im.width
109
+ output_h = pil_im.height
110
+ exif_preserving_save(pil_im, output_fn_abs)
111
+ except Exception as e:
112
+ if unavailable_image_handling == 'error':
113
+ raise Exception('Could not resize image {}: {}'.format(
114
+ input_fn_relative, str(e)))
115
+ else:
116
+ print("Can't resize image {}, skipping".format(input_fn_relative))
117
+ return None,None
118
+
119
+ im['width'] = output_w
120
+ im['height'] = output_h
121
+
122
+ for ann in annotations_this_image:
123
+
124
+ if 'bbox' in ann:
125
+ bbox = ann['bbox']
126
+ if (output_w != input_w) or (output_h != input_h):
127
+ width_scale = output_w / input_w
128
+ height_scale = output_h / input_h
129
+ bbox = [
130
+ bbox[0] * width_scale,
131
+ bbox[1] * height_scale,
132
+ bbox[2] * width_scale,
133
+ bbox[3] * height_scale
134
+ ]
135
+ ann['bbox'] = bbox
136
+
137
+ # ...for each annotation associated with this image
138
+
139
+ return im, annotations_this_image
140
+
141
+ # ...def _process_single_image_for_resize(...)
142
+
143
+
144
+ def resize_coco_dataset(input_folder,
145
+ input_filename,
146
+ output_folder,
147
+ output_filename=None,
28
148
  target_size=(-1,-1),
29
- correct_size_image_handling='copy'):
149
+ correct_size_image_handling='copy',
150
+ unavailable_image_handling='error',
151
+ n_workers=1,
152
+ pool_type='thread',
153
+ no_enlarge_width=True,
154
+ verbose=False):
30
155
  """
31
156
  Given a COCO-formatted dataset (images in input_folder, data in input_filename), resizes
32
157
  all the images to a target size (in output_folder) and scales bounding boxes accordingly.
33
-
158
+
34
159
  Args:
35
- input_folder (str): the folder where images live; filenames in [input_filename] should
160
+ input_folder (str): the folder where images live; filenames in [input_filename] should
36
161
  be relative to [input_folder]
37
162
  input_filename (str): the (input) COCO-formatted .json file containing annotations
38
163
  output_folder (str): the folder to which we should write resized images; can be the
39
164
  same as [input_folder], in which case images are over-written
40
- output_filename (str): the COCO-formatted .json file we should generate that refers to
41
- the resized images
42
- target_size (list or tuple of ints): this should be tuple/list of ints, with length 2 (w,h).
43
- If either dimension is -1, aspect ratio will be preserved. If both dimensions are -1, this means
44
- "keep the original size". If both dimensions are -1 and correct_size_image_handling is copy, this
45
- function is basically a no-op.
46
- correct_size_image_handling (str): can be 'copy' (in which case the original image is just copied
165
+ output_filename (str, optional): the COCO-formatted .json file we should generate that refers
166
+ to the resized images
167
+ target_size (list or tuple of ints, optional): this should be tuple/list of ints, with length 2 (w,h).
168
+ If either dimension is -1, aspect ratio will be preserved. If both dimensions are -1, this means
169
+ "keep the original size". If both dimensions are -1 and correct_size_image_handling is copy, this
170
+ function is basically a no-op.
171
+ correct_size_image_handling (str, optional): what to do in the case where the original size
172
+ already matches the target size. Can be 'copy' (in which case the original image is just copied
47
173
  to the output folder) or 'rewrite' (in which case the image is opened via PIL and re-written,
48
- attempting to preserve the same quality). The only reason to do use 'rewrite' 'is the case where
49
- you're superstitious about biases coming from images in a training set being written by different
174
+ attempting to preserve the same quality). The only reason to do use 'rewrite' 'is the case where
175
+ you're superstitious about biases coming from images in a training set being written by different
50
176
  image encoders.
51
-
177
+ unavailable_image_handling (str, optional): what to do when a file can't be opened. Can be
178
+ 'error' or 'omit'.
179
+ n_workers (int, optional): number of workers to use for parallel processing.
180
+ Defaults to 1 (no parallelization). If <= 1, processing is sequential.
181
+ pool_type (str, optional): type of multiprocessing pool to use ('thread' or 'process').
182
+ Defaults to 'thread'. Only used if n_workers > 1.
183
+ no_enlarge_width (bool, optional): if [no_enlarge_width] is True, and
184
+ [target width] is larger than the original image width, does not modify the image,
185
+ but still writes it
186
+ verbose (bool, optional): enable additional debug output
187
+
52
188
  Returns:
53
189
  dict: the COCO database with resized images, identical to the content of [output_filename]
54
190
  """
55
-
191
+
192
+ # Validate arguments
193
+
194
+ assert unavailable_image_handling in ('error','omit'), \
195
+ f'Illegal unavailable_image_handling {unavailable_image_handling}'
196
+
56
197
  # Read input data
57
198
  with open(input_filename,'r') as f:
58
199
  d = json.load(f)
59
-
200
+
60
201
  # Map image IDs to annotations
61
202
  image_id_to_annotations = defaultdict(list)
62
203
  for ann in d['annotations']:
63
204
  image_id_to_annotations[ann['image_id']].append(ann)
64
-
65
- # For each image
66
-
67
- # TODO: this is trivially parallelizable
68
- #
69
- # im = d['images'][0]
70
- for im in tqdm(d['images']):
71
-
72
- input_fn_relative = im['file_name']
73
- input_fn_abs = os.path.join(input_folder,input_fn_relative)
74
- assert os.path.isfile(input_fn_abs), "Can't find image file {}".format(input_fn_abs)
75
-
76
- output_fn_abs = os.path.join(output_folder,input_fn_relative)
77
- os.makedirs(os.path.dirname(output_fn_abs),exist_ok=True)
78
-
79
- pil_im = open_image(input_fn_abs)
80
- input_w = pil_im.width
81
- input_h = pil_im.height
82
-
83
- image_is_already_target_size = \
84
- (input_w == target_size[0]) and (input_h == target_size[1])
85
- preserve_original_size = \
86
- (target_size[0] == -1) and (target_size[1] == -1)
87
-
88
- # If the image is already the right size...
89
- if (image_is_already_target_size or preserve_original_size):
90
- output_w = input_w
91
- output_h = input_h
92
- if correct_size_image_handling == 'copy':
93
- shutil.copyfile(input_fn_abs,output_fn_abs)
94
- elif correct_size_image_handling == 'rewrite':
95
- exif_preserving_save(pil_im,output_fn_abs)
96
- else:
97
- raise ValueError('Unrecognized value {} for correct_size_image_handling'.format(
98
- correct_size_image_handling))
205
+
206
+ original_images = d['images']
207
+
208
+ # Our worker function will take tuples of images and their
209
+ # associated annotations
210
+ image_annotation_tuples = []
211
+ for im in original_images:
212
+ if im['id'] not in image_id_to_annotations:
213
+ annotations_this_image = []
99
214
  else:
100
- pil_im = resize_image(pil_im, target_size[0], target_size[1])
101
- output_w = pil_im.width
102
- output_h = pil_im.height
103
- exif_preserving_save(pil_im,output_fn_abs)
104
-
105
- im['width'] = output_w
106
- im['height'] = output_h
107
-
108
- # For each box
109
- annotations_this_image = image_id_to_annotations[im['id']]
110
-
111
- # ann = annotations_this_image[0]
112
- for ann in annotations_this_image:
113
-
114
- if 'bbox' in ann:
115
-
116
- # boxes are [x,y,w,h]
117
- bbox = ann['bbox']
118
-
119
- # Do we need to scale this box?
120
- if (output_w != input_w) or (output_h != input_h):
121
- width_scale = output_w/input_w
122
- height_scale = output_h/input_h
123
- bbox = \
124
- [bbox[0] * width_scale,
125
- bbox[1] * height_scale,
126
- bbox[2] * width_scale,
127
- bbox[3] * height_scale]
128
-
129
- ann['bbox'] = bbox
130
-
131
- # ...if this annotation has a box
132
-
133
- # ...for each annotation
134
-
135
- # ...for each image
136
-
137
- # Write output file
138
- with open(output_filename,'w') as f:
139
- json.dump(d,f,indent=1)
140
-
215
+ annotations_this_image = image_id_to_annotations[im['id']]
216
+ image_annotation_tuple = (im,annotations_this_image)
217
+ image_annotation_tuples.append(image_annotation_tuple)
218
+
219
+ processed_results = []
220
+
221
+ if n_workers <= 1:
222
+
223
+ for image_annotation_tuple in tqdm(image_annotation_tuples,
224
+ desc="Resizing images sequentially"):
225
+ result = _process_single_image_for_resize(
226
+ image_data=image_annotation_tuple,
227
+ input_folder=input_folder,
228
+ output_folder=output_folder,
229
+ target_size=target_size,
230
+ correct_size_image_handling=correct_size_image_handling,
231
+ unavailable_image_handling=unavailable_image_handling,
232
+ no_enlarge_width=no_enlarge_width,
233
+ verbose=verbose
234
+ )
235
+ processed_results.append(result)
236
+
237
+ else:
238
+ try:
239
+
240
+ assert pool_type in ('process', 'thread'), f'Illegal pool type {pool_type}'
241
+ selected_pool = ThreadPool if (pool_type == 'thread') else Pool
242
+
243
+ print(f'Starting a {pool_type} pool of {n_workers} workers for image resizing')
244
+ pool = selected_pool(n_workers)
245
+
246
+ p_process_image = partial(_process_single_image_for_resize,
247
+ input_folder=input_folder,
248
+ output_folder=output_folder,
249
+ target_size=target_size,
250
+ correct_size_image_handling=correct_size_image_handling,
251
+ unavailable_image_handling=unavailable_image_handling,
252
+ no_enlarge_width=no_enlarge_width,
253
+ verbose=verbose)
254
+
255
+ processed_results = list(tqdm(pool.imap(p_process_image, image_annotation_tuples),
256
+ total=len(image_annotation_tuples),
257
+ desc=f"Resizing images with {pool_type} pool"))
258
+
259
+ finally:
260
+ pool.close()
261
+ pool.join()
262
+ print(f"{pool_type.capitalize()} pool closed and joined.")
263
+
264
+ new_images_list = []
265
+ new_annotations_list = []
266
+ for res_im_data, res_annotations in processed_results:
267
+ if res_im_data is None or res_annotations is None:
268
+ assert res_annotations is None and res_im_data is None
269
+ assert unavailable_image_handling == 'omit'
270
+ continue
271
+ new_images_list.append(res_im_data)
272
+ new_annotations_list.extend(res_annotations)
273
+
274
+ d['images'] = new_images_list
275
+ d['annotations'] = new_annotations_list
276
+
277
+ if output_filename is not None:
278
+ write_json(output_filename,d)
279
+
141
280
  return d
142
281
 
143
282
  # ...def resize_coco_dataset(...)
144
-
283
+
145
284
 
146
285
  #%% Interactive driver
147
286
 
148
287
  if False:
149
-
288
+
150
289
  pass
151
290
 
152
291
  #%% Test resizing
153
-
154
- input_folder = os.path.expanduser('~/data/usgs-tegus/usgs-kissel-training')
155
- input_filename = os.path.expanduser('~/data/usgs-tegus/usgs-kissel-training.json')
156
- target_size = (1600,-1)
157
-
158
- output_filename = insert_before_extension(input_filename,'resized-test')
159
- output_folder = input_folder + '-resized-test'
160
-
292
+
293
+ input_folder = 'i:/data/lila/ena24'
294
+ # input_filename = 'i:/data/lila/ena24.json'
295
+ input_filename = 'i:/data/lila/ena24-mini.json'
296
+
297
+ output_folder = 'i:/data/lila/ena24-resized'
298
+ output_filename = insert_before_extension(input_filename,'resized')
299
+
300
+ target_size = (640,-1)
301
+
161
302
  correct_size_image_handling = 'rewrite'
162
-
163
- resize_coco_dataset(input_folder,input_filename,
164
- output_folder,output_filename,
165
- target_size=target_size,
166
- correct_size_image_handling=correct_size_image_handling)
167
-
168
-
303
+
304
+ _ = resize_coco_dataset(input_folder=input_folder,
305
+ input_filename=input_filename,
306
+ output_folder=output_folder,
307
+ output_filename=output_filename,
308
+ target_size=target_size,
309
+ correct_size_image_handling=correct_size_image_handling,
310
+ unavailable_image_handling='omit',
311
+ n_workers=10,
312
+ pool_type='process')
313
+
314
+
169
315
  #%% Preview
170
-
316
+
171
317
  from megadetector.visualization import visualize_db
172
318
  options = visualize_db.DbVizOptions()
173
319
  options.parallelize_rendering = True
174
- options.viz_size = (900, -1)
175
- options.num_to_visualize = 5000
320
+ options.viz_size = (640, -1)
321
+ options.num_to_visualize = 100
176
322
 
323
+ preview_folder = 'i:/data/lila/ena24-resized-preview'
177
324
  html_file,_ = visualize_db.visualize_db(output_filename,
178
- os.path.expanduser('~/tmp/resize_coco_preview'),
179
- output_folder,options)
180
-
325
+ preview_folder,
326
+ output_folder,options)
327
+
181
328
 
182
329
  from megadetector.utils import path_utils # noqa
183
330
  path_utils.open_file(html_file)
184
-
185
-
331
+
332
+
186
333
  #%% Command-line driver
187
334
 
188
- # TODO
335
+ def main():
336
+ """
337
+ Command-line driver for resize_coco_dataset
338
+ """
339
+
340
+ parser = argparse.ArgumentParser(
341
+ description='Resize images in a COCO dataset and scale annotations'
342
+ )
343
+ parser.add_argument(
344
+ 'input_folder',
345
+ type=str,
346
+ help='Path to the folder containing original images'
347
+ )
348
+ parser.add_argument(
349
+ 'input_filename',
350
+ type=str,
351
+ help='Path to the input COCO .json file'
352
+ )
353
+ parser.add_argument(
354
+ 'output_folder',
355
+ type=str,
356
+ help='Path to the folder where resized images will be saved'
357
+ )
358
+ parser.add_argument(
359
+ 'output_filename',
360
+ type=str,
361
+ help='Path to the output COCO .json file for resized data'
362
+ )
363
+ parser.add_argument(
364
+ '--target_size',
365
+ type=str,
366
+ default='-1,-1',
367
+ help='Target size as "width,height". Use -1 to preserve aspect ratio for a dimension. ' + \
368
+ 'E.g., "800,600" or "1024,-1".'
369
+ )
370
+ parser.add_argument(
371
+ '--correct_size_image_handling',
372
+ type=str,
373
+ default='copy',
374
+ choices=['copy', 'rewrite'],
375
+ help='How to handle images already at target size'
376
+ )
377
+ parser.add_argument(
378
+ '--n_workers',
379
+ type=int,
380
+ default=1,
381
+ help='Number of workers for parallel processing. <=1 for sequential'
382
+ )
383
+ parser.add_argument(
384
+ '--pool_type',
385
+ type=str,
386
+ default='thread',
387
+ choices=['thread', 'process'],
388
+ help='Type of multiprocessing pool if n_workers > 1'
389
+ )
390
+
391
+ if len(sys.argv[1:]) == 0:
392
+ parser.print_help()
393
+ parser.exit()
394
+
395
+ args = parser.parse_args()
396
+
397
+ try:
398
+ target_size_parts = args.target_size.split(',')
399
+ if len(target_size_parts) != 2:
400
+ raise ValueError("target_size must have two comma-separated parts (width,height).")
401
+ parsed_target_size = (int(target_size_parts[0]), int(target_size_parts[1]))
402
+ except ValueError as e:
403
+ print(f"Error parsing target_size: {e}")
404
+ parser.print_help()
405
+ parser.exit()
406
+
407
+ resize_coco_dataset(
408
+ args.input_folder,
409
+ args.input_filename,
410
+ args.output_folder,
411
+ args.output_filename,
412
+ target_size=parsed_target_size,
413
+ correct_size_image_handling=args.correct_size_image_handling,
414
+ n_workers=args.n_workers,
415
+ pool_type=args.pool_type
416
+ )
417
+ print("Dataset resizing complete")
418
+
419
+ if __name__ == '__main__':
420
+ main()
421
+
422
+
423
+ #%% Tests
424
+
425
+ class TestResizeCocoDataset:
426
+ """
427
+ Test class for the resize_coco_dataset function.
428
+ """
429
+
430
+ def set_up(self): # noqa
431
+ self.test_dir = make_test_folder(subfolder='resize_coco_tests')
432
+
433
+ self.input_images_dir_seq = os.path.join(self.test_dir, 'input_images_seq')
434
+ os.makedirs(self.input_images_dir_seq, exist_ok=True)
435
+
436
+ self.input_images_dir_par = os.path.join(self.test_dir, 'input_images_par')
437
+ os.makedirs(self.input_images_dir_par, exist_ok=True)
438
+
439
+ self.output_images_dir_seq = os.path.join(self.test_dir, 'output_images_seq')
440
+ os.makedirs(self.output_images_dir_seq, exist_ok=True)
441
+
442
+ self.output_images_dir_par = os.path.join(self.test_dir, 'output_images_par')
443
+ os.makedirs(self.output_images_dir_par, exist_ok=True)
444
+
445
+ def tear_down(self): # noqa
446
+
447
+ # Ensure shutil is imported if not already globally in the file
448
+ # (it is, under '#%% Imports and constants')
449
+ if hasattr(self, 'test_dir') and os.path.exists(self.test_dir):
450
+ shutil.rmtree(self.test_dir)
451
+
452
+ def _create_dummy_image_and_coco_json(self,
453
+ image_dir,
454
+ json_filename_base="input_coco.json",
455
+ num_images=2,
456
+ original_size=(100, 100),
457
+ num_annotations_per_image=2):
458
+ coco_data = {
459
+ "images": [],
460
+ "annotations": [],
461
+ "categories": [{"id": 1, "name": "test_category"}]
462
+ }
463
+
464
+ annotation_id_counter = 1
465
+
466
+ for i in range(num_images):
467
+ image_name = f"image_{i}.png"
468
+ image_path = os.path.join(image_dir, image_name)
469
+
470
+ # Create a dummy image
471
+ try:
472
+ img = Image.new('RGB', original_size, color='red')
473
+ img.save(image_path)
474
+ except Exception as e:
475
+ # In some environments, font loading for default PIL text might fail.
476
+ # For a simple color image, this shouldn't be an issue.
477
+ # If it is, consider a simpler save or pre-creating a tiny PNG.
478
+ print(f"Warning: Could not create dummy image {image_path}: {e}")
479
+ # Fallback: create an empty file, though this will fail later steps
480
+ # open(image_path, 'a').close()
481
+
482
+ image_entry = {
483
+ "id": i + 1,
484
+ "file_name": image_name, # Filename only, not path
485
+ "width": original_size[0],
486
+ "height": original_size[1]
487
+ }
488
+ coco_data["images"].append(image_entry)
489
+
490
+ for j in range(num_annotations_per_image):
491
+ annotation_entry = {
492
+ "id": annotation_id_counter,
493
+ "image_id": image_entry["id"],
494
+ "category_id": 1, # Corresponds to "test_category"
495
+ # Simple, non-overlapping bbox for testing scaling
496
+ "bbox": [10 + j*30, 10 + j*5, 20, 15]
497
+ }
498
+ coco_data["annotations"].append(annotation_entry)
499
+ annotation_id_counter += 1
500
+
501
+ json_file_path = os.path.join(self.test_dir, json_filename_base)
502
+ with open(json_file_path, 'w') as f:
503
+ json.dump(coco_data, f, indent=1)
504
+
505
+ return json_file_path, coco_data
506
+
507
+ def test_resize_sequential_vs_parallel(self):
508
+ """
509
+ Test driver for sequence vs. parallel COCO dataset resizing.
510
+ """
511
+
512
+ self.set_up()
513
+
514
+ try:
515
+ num_images_to_test = 3
516
+ original_w, original_h = 120, 80
517
+ target_w, target_h = 60, 40
518
+ target_size_test = (target_w, target_h)
519
+
520
+ # Sequential run
521
+ input_json_path_seq, _ = self._create_dummy_image_and_coco_json(
522
+ image_dir=self.input_images_dir_seq,
523
+ json_filename_base="input_coco_seq.json",
524
+ num_images=num_images_to_test,
525
+ original_size=(original_w, original_h)
526
+ )
527
+ output_json_path_seq = os.path.join(self.test_dir, 'output_coco_seq.json')
528
+
529
+ print("Test: starting sequential resize (1 worker)...")
530
+ resize_coco_dataset(
531
+ input_folder=self.input_images_dir_seq,
532
+ input_filename=input_json_path_seq,
533
+ output_folder=self.output_images_dir_seq,
534
+ output_filename=output_json_path_seq,
535
+ target_size=target_size_test,
536
+ n_workers=1
537
+ )
538
+ print(f"Test: Sequential resize complete. Output: {output_json_path_seq}")
539
+
540
+ # Parallel run
541
+ # For the parallel run, we use different input/output directories but can reuse the same logic
542
+ # for creating the dummy dataset structure. The image files will be new.
543
+ input_json_path_par, _ = self._create_dummy_image_and_coco_json(
544
+ image_dir=self.input_images_dir_par,
545
+ json_filename_base="input_coco_par.json",
546
+ num_images=num_images_to_test,
547
+ original_size=(original_w, original_h)
548
+ )
549
+ output_json_path_par = os.path.join(self.test_dir, 'output_coco_par.json')
550
+
551
+ print("Test: Starting parallel resize (2 workers, thread pool)...")
552
+ resize_coco_dataset(
553
+ input_folder=self.input_images_dir_par,
554
+ input_filename=input_json_path_par,
555
+ output_folder=self.output_images_dir_par,
556
+ output_filename=output_json_path_par,
557
+ target_size=target_size_test,
558
+ n_workers=2, # Using 2 workers for testing parallelism
559
+ pool_type='thread'
560
+ )
561
+ print(f"Test: Parallel resize complete. Output: {output_json_path_par}")
562
+
563
+ # Load results
564
+ with open(output_json_path_seq, 'r') as f:
565
+ data_seq = json.load(f)
566
+ with open(output_json_path_par, 'r') as f:
567
+ data_par = json.load(f)
568
+
569
+ # Compare COCO JSON data
570
+ # Compare images
571
+ assert len(data_seq['images']) == num_images_to_test
572
+ assert len(data_seq['images']) == len(data_par['images']), "Number of images differs"
573
+
574
+ sorted_images_seq = sorted(data_seq['images'], key=lambda x: x['id'])
575
+ sorted_images_par = sorted(data_par['images'], key=lambda x: x['id'])
576
+
577
+ for img_s, img_p in zip(sorted_images_seq, sorted_images_par, strict=True):
578
+ assert img_s['id'] == img_p['id'], \
579
+ f"Image IDs differ: {img_s['id']} vs {img_p['id']}"
580
+ # Filenames are generated independently, so we only check structure, not exact name matching
581
+ # across seq/par runs' inputs, but output structure should be consistent if input
582
+ # names were e.g. image_0, image_1
583
+ assert img_s['file_name'] == img_p['file_name']
584
+ assert img_s['width'] == target_w, \
585
+ f"Seq image {img_s['id']} width incorrect"
586
+ assert img_s['height'] == target_h, \
587
+ f"Seq image {img_s['id']} height incorrect"
588
+ assert img_p['width'] == target_w, \
589
+ f"Par image {img_p['id']} width incorrect"
590
+ assert img_p['height'] == target_h, \
591
+ f"Par image {img_p['id']} height incorrect"
592
+
593
+ # Compare annotations
594
+ assert len(data_seq['annotations']) == len(data_par['annotations']), \
595
+ "Number of annotations differs"
596
+ # Assuming _create_dummy_image_and_coco_json creates the same number of annotations for each test run
597
+
598
+ sorted_anns_seq = sorted(data_seq['annotations'], key=lambda x: x['id'])
599
+ sorted_anns_par = sorted(data_par['annotations'], key=lambda x: x['id'])
600
+
601
+ for ann_s, ann_p in zip(sorted_anns_seq, sorted_anns_par, strict=True):
602
+ assert ann_s['id'] == ann_p['id'], \
603
+ f"Annotation IDs differ: {ann_s['id']} vs {ann_p['id']}"
604
+ assert ann_s['image_id'] == ann_p['image_id'], \
605
+ f"Annotation image_ids differ for ann_id {ann_s['id']}"
606
+ assert ann_s['category_id'] == ann_p['category_id'], \
607
+ f"Annotation category_ids differ for ann_id {ann_s['id']}"
608
+
609
+ # Check bbox scaling (example: original width 120, target 60 -> scale 0.5)
610
+ # Original bbox: [10, 10, 20, 15] -> Scaled: [5, 5, 10, 7.5] (Floats possible)
611
+ # Need to compare with tolerance or ensure rounding is handled if expecting ints
612
+ # For this test, let's assume direct comparison works due to simple scaling.
613
+ # If PIL's resize causes slight pixel shifts affecting precise sub-pixel bbox calculations,
614
+ # then a tolerance (pytest.approx) would be better.
615
+ # Given the current resize_coco_dataset logic, it's direct multiplication.
616
+ for i in range(4):
617
+ assert abs(ann_s['bbox'][i] - ann_p['bbox'][i]) < 1e-5, \
618
+ f"Bbox element {i} differs for ann_id {ann_s['id']}: {ann_s['bbox']} vs {ann_p['bbox']}"
619
+
620
+ # Compare actual image files
621
+ seq_files = sorted(os.listdir(self.output_images_dir_seq))
622
+ par_files = sorted(os.listdir(self.output_images_dir_par))
623
+
624
+ assert len(seq_files) == num_images_to_test, "Incorrect number of output images (sequential)"
625
+ assert len(seq_files) == len(par_files), "Number of output image files differs"
626
+
627
+ for fname_s, fname_p in zip(seq_files, par_files, strict=True):
628
+ assert fname_s == fname_p, "Output image filenames differ between seq and par runs"
629
+ img_s_path = os.path.join(self.output_images_dir_seq, fname_s)
630
+ img_p_path = os.path.join(self.output_images_dir_par, fname_p)
631
+
632
+ with Image.open(img_s_path) as img_s_pil:
633
+ assert img_s_pil.size == target_size_test, \
634
+ f"Image {fname_s} (seq) has wrong dimensions: {img_s_pil.size}"
635
+ with Image.open(img_p_path) as img_p_pil:
636
+ assert img_p_pil.size == target_size_test, \
637
+ f"Image {fname_p} (par) has wrong dimensions: {img_p_pil.size}"
638
+
639
+ print("Test test_resize_sequential_vs_parallel PASSED")
640
+
641
+ finally:
642
+ self.tear_down()
643
+
644
+ # ...def test_resize_sequential_vs_parallel(...)
645
+
646
+ # ...class TestResizeCocoDataset
647
+
648
+
649
+ def test_resize_coco_dataset_main():
650
+ """
651
+ Driver for the TestResizeCocoDataset() class.
652
+ """
189
653
 
654
+ print("Starting TestResizeCocoDataset main runner...")
655
+ test_runner = TestResizeCocoDataset()
656
+ test_runner.test_resize_sequential_vs_parallel()
657
+ print("TestResizeCocoDataset main runner finished.")