megadetector 10.0.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. megadetector/__init__.py +0 -0
  2. megadetector/api/__init__.py +0 -0
  3. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
  7. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  8. megadetector/classification/__init__.py +0 -0
  9. megadetector/classification/aggregate_classifier_probs.py +108 -0
  10. megadetector/classification/analyze_failed_images.py +227 -0
  11. megadetector/classification/cache_batchapi_outputs.py +198 -0
  12. megadetector/classification/create_classification_dataset.py +626 -0
  13. megadetector/classification/crop_detections.py +516 -0
  14. megadetector/classification/csv_to_json.py +226 -0
  15. megadetector/classification/detect_and_crop.py +853 -0
  16. megadetector/classification/efficientnet/__init__.py +9 -0
  17. megadetector/classification/efficientnet/model.py +415 -0
  18. megadetector/classification/efficientnet/utils.py +608 -0
  19. megadetector/classification/evaluate_model.py +520 -0
  20. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  21. megadetector/classification/json_to_azcopy_list.py +63 -0
  22. megadetector/classification/json_validator.py +696 -0
  23. megadetector/classification/map_classification_categories.py +276 -0
  24. megadetector/classification/merge_classification_detection_output.py +509 -0
  25. megadetector/classification/prepare_classification_script.py +194 -0
  26. megadetector/classification/prepare_classification_script_mc.py +228 -0
  27. megadetector/classification/run_classifier.py +287 -0
  28. megadetector/classification/save_mislabeled.py +110 -0
  29. megadetector/classification/train_classifier.py +827 -0
  30. megadetector/classification/train_classifier_tf.py +725 -0
  31. megadetector/classification/train_utils.py +323 -0
  32. megadetector/data_management/__init__.py +0 -0
  33. megadetector/data_management/animl_to_md.py +161 -0
  34. megadetector/data_management/annotations/__init__.py +0 -0
  35. megadetector/data_management/annotations/annotation_constants.py +33 -0
  36. megadetector/data_management/camtrap_dp_to_coco.py +270 -0
  37. megadetector/data_management/cct_json_utils.py +566 -0
  38. megadetector/data_management/cct_to_md.py +184 -0
  39. megadetector/data_management/cct_to_wi.py +293 -0
  40. megadetector/data_management/coco_to_labelme.py +284 -0
  41. megadetector/data_management/coco_to_yolo.py +701 -0
  42. megadetector/data_management/databases/__init__.py +0 -0
  43. megadetector/data_management/databases/add_width_and_height_to_db.py +107 -0
  44. megadetector/data_management/databases/combine_coco_camera_traps_files.py +210 -0
  45. megadetector/data_management/databases/integrity_check_json_db.py +563 -0
  46. megadetector/data_management/databases/subset_json_db.py +195 -0
  47. megadetector/data_management/generate_crops_from_cct.py +200 -0
  48. megadetector/data_management/get_image_sizes.py +164 -0
  49. megadetector/data_management/labelme_to_coco.py +559 -0
  50. megadetector/data_management/labelme_to_yolo.py +349 -0
  51. megadetector/data_management/lila/__init__.py +0 -0
  52. megadetector/data_management/lila/create_lila_blank_set.py +556 -0
  53. megadetector/data_management/lila/create_lila_test_set.py +192 -0
  54. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  55. megadetector/data_management/lila/download_lila_subset.py +182 -0
  56. megadetector/data_management/lila/generate_lila_per_image_labels.py +777 -0
  57. megadetector/data_management/lila/get_lila_annotation_counts.py +174 -0
  58. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  59. megadetector/data_management/lila/lila_common.py +319 -0
  60. megadetector/data_management/lila/test_lila_metadata_urls.py +164 -0
  61. megadetector/data_management/mewc_to_md.py +344 -0
  62. megadetector/data_management/ocr_tools.py +873 -0
  63. megadetector/data_management/read_exif.py +964 -0
  64. megadetector/data_management/remap_coco_categories.py +195 -0
  65. megadetector/data_management/remove_exif.py +156 -0
  66. megadetector/data_management/rename_images.py +194 -0
  67. megadetector/data_management/resize_coco_dataset.py +665 -0
  68. megadetector/data_management/speciesnet_to_md.py +41 -0
  69. megadetector/data_management/wi_download_csv_to_coco.py +247 -0
  70. megadetector/data_management/yolo_output_to_md_output.py +594 -0
  71. megadetector/data_management/yolo_to_coco.py +984 -0
  72. megadetector/data_management/zamba_to_md.py +188 -0
  73. megadetector/detection/__init__.py +0 -0
  74. megadetector/detection/change_detection.py +840 -0
  75. megadetector/detection/process_video.py +479 -0
  76. megadetector/detection/pytorch_detector.py +1451 -0
  77. megadetector/detection/run_detector.py +1267 -0
  78. megadetector/detection/run_detector_batch.py +2172 -0
  79. megadetector/detection/run_inference_with_yolov5_val.py +1314 -0
  80. megadetector/detection/run_md_and_speciesnet.py +1604 -0
  81. megadetector/detection/run_tiled_inference.py +1044 -0
  82. megadetector/detection/tf_detector.py +209 -0
  83. megadetector/detection/video_utils.py +1379 -0
  84. megadetector/postprocessing/__init__.py +0 -0
  85. megadetector/postprocessing/add_max_conf.py +72 -0
  86. megadetector/postprocessing/categorize_detections_by_size.py +166 -0
  87. megadetector/postprocessing/classification_postprocessing.py +1943 -0
  88. megadetector/postprocessing/combine_batch_outputs.py +249 -0
  89. megadetector/postprocessing/compare_batch_results.py +2110 -0
  90. megadetector/postprocessing/convert_output_format.py +403 -0
  91. megadetector/postprocessing/create_crop_folder.py +629 -0
  92. megadetector/postprocessing/detector_calibration.py +570 -0
  93. megadetector/postprocessing/generate_csv_report.py +522 -0
  94. megadetector/postprocessing/load_api_results.py +223 -0
  95. megadetector/postprocessing/md_to_coco.py +428 -0
  96. megadetector/postprocessing/md_to_labelme.py +351 -0
  97. megadetector/postprocessing/md_to_wi.py +41 -0
  98. megadetector/postprocessing/merge_detections.py +392 -0
  99. megadetector/postprocessing/postprocess_batch_results.py +2140 -0
  100. megadetector/postprocessing/remap_detection_categories.py +226 -0
  101. megadetector/postprocessing/render_detection_confusion_matrix.py +677 -0
  102. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +206 -0
  103. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +82 -0
  104. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1665 -0
  105. megadetector/postprocessing/separate_detections_into_folders.py +795 -0
  106. megadetector/postprocessing/subset_json_detector_output.py +964 -0
  107. megadetector/postprocessing/top_folders_to_bottom.py +238 -0
  108. megadetector/postprocessing/validate_batch_results.py +332 -0
  109. megadetector/taxonomy_mapping/__init__.py +0 -0
  110. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  111. megadetector/taxonomy_mapping/map_new_lila_datasets.py +211 -0
  112. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +165 -0
  113. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +543 -0
  114. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  115. megadetector/taxonomy_mapping/simple_image_download.py +231 -0
  116. megadetector/taxonomy_mapping/species_lookup.py +1008 -0
  117. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  118. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  119. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  120. megadetector/tests/__init__.py +0 -0
  121. megadetector/tests/test_nms_synthetic.py +335 -0
  122. megadetector/utils/__init__.py +0 -0
  123. megadetector/utils/ct_utils.py +1857 -0
  124. megadetector/utils/directory_listing.py +199 -0
  125. megadetector/utils/extract_frames_from_video.py +307 -0
  126. megadetector/utils/gpu_test.py +125 -0
  127. megadetector/utils/md_tests.py +2072 -0
  128. megadetector/utils/path_utils.py +2872 -0
  129. megadetector/utils/process_utils.py +172 -0
  130. megadetector/utils/split_locations_into_train_val.py +237 -0
  131. megadetector/utils/string_utils.py +234 -0
  132. megadetector/utils/url_utils.py +825 -0
  133. megadetector/utils/wi_platform_utils.py +968 -0
  134. megadetector/utils/wi_taxonomy_utils.py +1766 -0
  135. megadetector/utils/write_html_image_list.py +239 -0
  136. megadetector/visualization/__init__.py +0 -0
  137. megadetector/visualization/plot_utils.py +309 -0
  138. megadetector/visualization/render_images_with_thumbnails.py +243 -0
  139. megadetector/visualization/visualization_utils.py +1973 -0
  140. megadetector/visualization/visualize_db.py +630 -0
  141. megadetector/visualization/visualize_detector_output.py +498 -0
  142. megadetector/visualization/visualize_video_output.py +705 -0
  143. megadetector-10.0.15.dist-info/METADATA +115 -0
  144. megadetector-10.0.15.dist-info/RECORD +147 -0
  145. megadetector-10.0.15.dist-info/WHEEL +5 -0
  146. megadetector-10.0.15.dist-info/licenses/LICENSE +19 -0
  147. megadetector-10.0.15.dist-info/top_level.txt +1 -0
@@ -0,0 +1,629 @@
1
+ """
2
+
3
+ create_crop_folder.py
4
+
5
+ Given a MegaDetector .json file and a folder of images, creates a new folder
6
+ of images representing all above-threshold crops from the original folder.
7
+
8
+ """
9
+
10
+ #%% Constants and imports
11
+
12
+ import os
13
+ import json
14
+ import argparse
15
+
16
+ from tqdm import tqdm
17
+
18
+ from multiprocessing.pool import Pool, ThreadPool
19
+ from collections import defaultdict
20
+ from functools import partial
21
+
22
+ from megadetector.utils.path_utils import insert_before_extension
23
+ from megadetector.utils.ct_utils import invert_dictionary
24
+ from megadetector.utils.ct_utils import is_list_sorted
25
+ from megadetector.visualization.visualization_utils import crop_image
26
+ from megadetector.visualization.visualization_utils import exif_preserving_save
27
+
28
+
29
+ #%% Support classes
30
+
31
+ class CreateCropFolderOptions:
32
+ """
33
+ Options used to parameterize create_crop_folder().
34
+ """
35
+
36
+ def __init__(self):
37
+
38
+ #: Confidence threshold determining which detections get written
39
+ self.confidence_threshold = 0.1
40
+
41
+ #: Number of pixels to expand each crop
42
+ self.expansion = 0
43
+
44
+ #: JPEG quality to use for saving crops (None for default)
45
+ self.quality = 95
46
+
47
+ #: Whether to overwrite existing images
48
+ self.overwrite = True
49
+
50
+ #: Number of concurrent workers
51
+ self.n_workers = 8
52
+
53
+ #: Whether to use processes ('process') or threads ('thread') for parallelization
54
+ self.pool_type = 'thread'
55
+
56
+ #: Include only these categories, or None to include all
57
+ #:
58
+ #: options.category_names_to_include = ['animal']
59
+ self.category_names_to_include = None
60
+
61
+
62
+ #%% Support functions
63
+
64
+ def _get_crop_filename(image_fn,crop_id):
65
+ """
66
+ Generate crop filenames in a consistent way.
67
+ """
68
+
69
+ if isinstance(crop_id,int):
70
+ crop_id = str(crop_id).zfill(3)
71
+ assert isinstance(crop_id,str)
72
+ return insert_before_extension(image_fn,'crop_' + crop_id)
73
+
74
+
75
+ def _generate_crops_for_single_image(crops_this_image,
76
+ input_folder,
77
+ output_folder,
78
+ options):
79
+ """
80
+ Generate all the crops required for a single image.
81
+
82
+ Args:
83
+ crops_this_image (list of dict): list of dicts with at least keys
84
+ 'image_fn_relative', 'crop_id'
85
+ input_folder (str): input folder (whole images)
86
+ output_folder (crops): output folder (crops)
87
+ options (CreateCropFolderOptions): cropping options
88
+ """
89
+
90
+ if len(crops_this_image) == 0:
91
+ return
92
+
93
+ image_fn_relative = crops_this_image[0]['image_fn_relative']
94
+ input_fn_abs = os.path.join(input_folder,image_fn_relative)
95
+ assert os.path.isfile(input_fn_abs)
96
+
97
+ detections_to_crop = [c['detection'] for c in crops_this_image]
98
+
99
+ cropped_images = crop_image(detections_to_crop,
100
+ input_fn_abs,
101
+ confidence_threshold=0,
102
+ expansion=options.expansion)
103
+
104
+ assert len(cropped_images) == len(crops_this_image)
105
+
106
+ # i_crop = 0; crop_info = crops_this_image[0]
107
+ for i_crop,crop_info in enumerate(crops_this_image):
108
+
109
+ assert crop_info['image_fn_relative'] == image_fn_relative
110
+ crop_filename_relative = _get_crop_filename(image_fn_relative, crop_info['crop_id'])
111
+ crop_filename_abs = os.path.join(output_folder,crop_filename_relative).replace('\\','/')
112
+
113
+ if os.path.isfile(crop_filename_abs) and not options.overwrite:
114
+ continue
115
+
116
+ cropped_image = cropped_images[i_crop]
117
+ os.makedirs(os.path.dirname(crop_filename_abs),exist_ok=True)
118
+ exif_preserving_save(cropped_image,crop_filename_abs,quality=options.quality)
119
+
120
+ # ...for each crop
121
+
122
+
123
+ #%% Main function
124
+
125
+ def crop_results_to_image_results(image_results_file_with_crop_ids,
126
+ crop_results_file,
127
+ output_file,
128
+ delete_crop_information=True,
129
+ require_identical_detection_categories=True,
130
+ restrict_to_top_n=-1,
131
+ crop_results_prefix=None,
132
+ detections_without_classification_handling='error'):
133
+ """
134
+ This function is intended to be run after you have:
135
+
136
+ 1. Run MegaDetector on a folder
137
+ 2. Generated a crop folder using create_crop_folder
138
+ 3. Run a species classifier on those crops
139
+
140
+ This function will take the crop-level results and transform them back
141
+ to the original images. Classification categories, if available, are taken
142
+ from [crop_results_file].
143
+
144
+ Args:
145
+ image_results_file_with_crop_ids (str): results file for the original images,
146
+ containing crop IDs, likely generated via create_crop_folder. All
147
+ non-standard fields in this file will be passed along to [output_file].
148
+ crop_results_file (str): results file for the crop folder
149
+ output_file (str): output .json file, containing crop-level classifications
150
+ mapped back to the image level.
151
+ delete_crop_information (bool, optional): whether to delete the "crop_id" and
152
+ "crop_filename_relative" fields from each detection, if present.
153
+ require_identical_detection_categories (bool, optional): if True, error if
154
+ the image-level and crop-level detection categories are different. If False,
155
+ ignore the crop-level detection categories.
156
+ restrict_to_top_n (int, optional): If >0, removes all but the top N classification
157
+ results for each detection.
158
+ crop_results_prefix (str, optional): if not None, removes this prefix from crop
159
+ results filenames. Intended to support the case where the crop results
160
+ use absolute paths.
161
+ detections_without_classification_handling (str, optional): what to do when we
162
+ encounter a crop that doesn't appear in classification results: 'error',
163
+ or 'include' ("include" means "leave the detection alone, without classifications"
164
+ """
165
+
166
+ ##%% Validate inputs
167
+
168
+ assert os.path.isfile(image_results_file_with_crop_ids), \
169
+ 'Could not find image-level input file {}'.format(image_results_file_with_crop_ids)
170
+ assert os.path.isfile(crop_results_file), \
171
+ 'Could not find crop results file {}'.format(crop_results_file)
172
+ output_dir = os.path.dirname(output_file)
173
+ if len(output_dir) > 0:
174
+ os.makedirs(output_dir,exist_ok=True)
175
+
176
+
177
+ ##%% Read input files
178
+
179
+ print('Reading input...')
180
+
181
+ with open(image_results_file_with_crop_ids,'r') as f:
182
+ image_results_with_crop_ids = json.load(f)
183
+ with open(crop_results_file,'r') as f:
184
+ crop_results = json.load(f)
185
+
186
+ # Find all the detection categories that need to be consistent
187
+ used_detection_category_ids = set()
188
+ for im in tqdm(image_results_with_crop_ids['images']):
189
+ if 'detections' not in im or im['detections'] is None:
190
+ continue
191
+ for det in im['detections']:
192
+ if 'crop_id' in det:
193
+ used_detection_category_ids.add(det['category'])
194
+
195
+ # Make sure the detection categories that matter are consistent across the two files
196
+ if require_identical_detection_categories:
197
+ for category_id in used_detection_category_ids:
198
+ category_name = image_results_with_crop_ids['detection_categories'][category_id]
199
+ assert category_id in crop_results['detection_categories'] and \
200
+ category_name == crop_results['detection_categories'][category_id], \
201
+ 'Crop results and detection results use incompatible categories'
202
+
203
+ crop_filename_to_results = {}
204
+
205
+ # im = crop_results['images'][0]
206
+ for im in crop_results['images']:
207
+ fn = im['file']
208
+ # Possibly remove a prefix from each filename
209
+ if (crop_results_prefix is not None) and (crop_results_prefix in fn):
210
+ if fn.startswith(crop_results_prefix):
211
+ fn = fn.replace(crop_results_prefix,'',1)
212
+ im['file'] = fn
213
+ crop_filename_to_results[fn] = im
214
+
215
+ if 'classification_categories' in crop_results:
216
+ image_results_with_crop_ids['classification_categories'] = \
217
+ crop_results['classification_categories']
218
+
219
+ if 'classification_category_descriptions' in crop_results:
220
+ image_results_with_crop_ids['classification_category_descriptions'] = \
221
+ crop_results['classification_category_descriptions']
222
+
223
+
224
+ ##%% Read classifications from crop results, merge into image-level results
225
+
226
+ print('Reading classification results...')
227
+
228
+ n_skipped_detections = 0
229
+
230
+ # Loop over the original image-level detections
231
+ #
232
+ # im = image_results_with_crop_ids['images'][0]
233
+ for i_image,im in tqdm(enumerate(image_results_with_crop_ids['images']),
234
+ total=len(image_results_with_crop_ids['images'])):
235
+
236
+ if 'detections' not in im or im['detections'] is None:
237
+ continue
238
+
239
+ # i_det = 0; det = im['detections'][i_det]
240
+ for det in im['detections']:
241
+
242
+ if 'classifications' in det:
243
+ del det['classifications']
244
+
245
+ if 'crop_id' in det:
246
+
247
+ # We may be skipping detections with no classification results
248
+ skip_detection = False
249
+
250
+ # Find the corresponding crop in the classification results
251
+ crop_filename_relative = det['crop_filename_relative']
252
+ if crop_filename_relative not in crop_filename_to_results:
253
+ if detections_without_classification_handling == 'error':
254
+ raise ValueError('Crop lookup error: {}'.format(crop_filename_relative))
255
+ elif detections_without_classification_handling == 'include':
256
+ # Leave this detection unclassified
257
+ skip_detection = True
258
+ else:
259
+ raise ValueError(
260
+ 'Illegal value for detections_without_classification_handling: {}'.format(
261
+ detections_without_classification_handling
262
+ ))
263
+
264
+ if skip_detection:
265
+
266
+ n_skipped_detections += 1
267
+
268
+ else:
269
+
270
+ crop_results_this_detection = crop_filename_to_results[crop_filename_relative]
271
+
272
+ # Consistency checking
273
+ assert crop_results_this_detection['file'] == crop_filename_relative, \
274
+ 'Crop filename mismatch'
275
+ assert len(crop_results_this_detection['detections']) == 1, \
276
+ 'Multiple crop results for a single detection'
277
+ assert crop_results_this_detection['detections'][0]['bbox'] == [0,0,1,1], \
278
+ 'Invalid crop bounding box'
279
+
280
+ # This check was helpful for the case where crop-level results had already
281
+ # taken detection confidence values from detector output by construct, but this isn't
282
+ # really meaningful for most cases.
283
+ # assert abs(crop_results_this_detection['detections'][0]['conf'] - det['conf']) < 0.01
284
+
285
+ if require_identical_detection_categories:
286
+ assert crop_results_this_detection['detections'][0]['category'] == det['category']
287
+
288
+ # Copy the crop-level classifications
289
+ det['classifications'] = crop_results_this_detection['detections'][0]['classifications']
290
+ confidence_values = [x[1] for x in det['classifications']]
291
+ assert is_list_sorted(confidence_values,reverse=True)
292
+ if restrict_to_top_n > 0:
293
+ det['classifications'] = det['classifications'][0:restrict_to_top_n]
294
+
295
+ if delete_crop_information:
296
+ if 'crop_id' in det:
297
+ del det['crop_id']
298
+ if 'crop_filename_relative' in det:
299
+ del det['crop_filename_relative']
300
+
301
+ # ...for each detection
302
+
303
+ # ...for each image
304
+
305
+ if n_skipped_detections > 0:
306
+ print('Skipped {} detections'.format(n_skipped_detections))
307
+
308
+
309
+ ##%% Write output file
310
+
311
+ print('Writing output file...')
312
+
313
+ with open(output_file,'w') as f:
314
+ json.dump(image_results_with_crop_ids,f,indent=1)
315
+
316
+ # ...def crop_results_to_image_results(...)
317
+
318
+
319
+ def create_crop_folder(input_file,
320
+ input_folder,
321
+ output_folder,
322
+ output_file=None,
323
+ crops_output_file=None,
324
+ options=None):
325
+ """
326
+ Given a MegaDetector .json file and a folder of images, creates a new folder
327
+ of images representing all above-threshold crops from the original folder.
328
+
329
+ Optionally writes a new .json file that attaches unique IDs to each detection.
330
+
331
+ Args:
332
+ input_file (str): MD-formatted .json file to process
333
+ input_folder (str): Input image folder
334
+ output_folder (str): Output (cropped) image folder
335
+ output_file (str, optional): new .json file that attaches unique IDs to each detection.
336
+ crops_output_file (str, optional): new .json file that includes whole-image detections
337
+ for each of the crops, using confidence values from the original results
338
+ options (CreateCropFolderOptions, optional): crop parameters
339
+ """
340
+
341
+ ## Validate options, prepare output folders
342
+
343
+ if options is None:
344
+ options = CreateCropFolderOptions()
345
+
346
+ assert os.path.isfile(input_file), 'Input file {} not found'.format(input_file)
347
+ assert os.path.isdir(input_folder), 'Input folder {} not found'.format(input_folder)
348
+ os.makedirs(output_folder,exist_ok=True)
349
+
350
+ if output_file is not None:
351
+ output_dir = os.path.dirname(output_file)
352
+ if len(output_dir) > 0:
353
+ os.makedirs(output_dir,exist_ok=True)
354
+
355
+
356
+ ##%% Read input
357
+
358
+ print('Reading MD results file...')
359
+ with open(input_file,'r') as f:
360
+ detection_results = json.load(f)
361
+
362
+ category_ids_to_include = None
363
+
364
+ if options.category_names_to_include is not None:
365
+ category_id_to_name = detection_results['detection_categories']
366
+ category_name_to_id = invert_dictionary(category_id_to_name)
367
+ category_ids_to_include = set()
368
+ for category_name in options.category_names_to_include:
369
+ assert category_name in category_name_to_id, \
370
+ 'Unrecognized category name {}'.format(category_name)
371
+ category_ids_to_include.add(category_name_to_id[category_name])
372
+
373
+ ##%% Make a list of crops that we need to create
374
+
375
+ # Maps input images to list of dicts, with keys 'crop_id','detection'
376
+ image_fn_relative_to_crops = defaultdict(list)
377
+ n_crops = 0
378
+
379
+ n_detections_excluded_by_category = 0
380
+
381
+ # im = detection_results['images'][0]
382
+ for i_image,im in enumerate(detection_results['images']):
383
+
384
+ if 'detections' not in im or im['detections'] is None or len(im['detections']) == 0:
385
+ continue
386
+
387
+ detections_this_image = im['detections']
388
+
389
+ image_fn_relative = im['file']
390
+
391
+ for i_detection,det in enumerate(detections_this_image):
392
+
393
+ if det['conf'] < options.confidence_threshold:
394
+ continue
395
+
396
+ if (category_ids_to_include is not None) and \
397
+ (det['category'] not in category_ids_to_include):
398
+ n_detections_excluded_by_category += 1
399
+ continue
400
+
401
+ det['crop_id'] = i_detection
402
+
403
+ crop_info = {'image_fn_relative':image_fn_relative,
404
+ 'crop_id':i_detection,
405
+ 'detection':det}
406
+
407
+ crop_filename_relative = _get_crop_filename(image_fn_relative,
408
+ crop_info['crop_id'])
409
+ det['crop_filename_relative'] = crop_filename_relative
410
+
411
+ image_fn_relative_to_crops[image_fn_relative].append(crop_info)
412
+ n_crops += 1
413
+
414
+ # ...for each input image
415
+
416
+ print('Prepared a list of {} crops from {} of {} input images'.format(
417
+ n_crops,len(image_fn_relative_to_crops),len(detection_results['images'])))
418
+
419
+ if n_detections_excluded_by_category > 0:
420
+ print('Excluded {} detections by category'.format(n_detections_excluded_by_category))
421
+
422
+ ##%% Generate crops
423
+
424
+ if options.n_workers <= 1:
425
+
426
+ # image_fn_relative = next(iter(image_fn_relative_to_crops))
427
+ for image_fn_relative in tqdm(image_fn_relative_to_crops.keys()):
428
+ crops_this_image = image_fn_relative_to_crops[image_fn_relative]
429
+ _generate_crops_for_single_image(crops_this_image=crops_this_image,
430
+ input_folder=input_folder,
431
+ output_folder=output_folder,
432
+ options=options)
433
+
434
+ else:
435
+
436
+ print('Creating a {} pool with {} workers'.format(options.pool_type,options.n_workers))
437
+ pool = None
438
+ try:
439
+ if options.pool_type == 'thread':
440
+ pool = ThreadPool(options.n_workers)
441
+ else:
442
+ assert options.pool_type == 'process'
443
+ pool = Pool(options.n_workers)
444
+
445
+ # Each element in this list is the list of crops for a single image
446
+ crop_lists = list(image_fn_relative_to_crops.values())
447
+
448
+ with tqdm(total=len(image_fn_relative_to_crops)) as pbar:
449
+ for i,_ in enumerate(pool.imap_unordered(partial(
450
+ _generate_crops_for_single_image,
451
+ input_folder=input_folder,
452
+ output_folder=output_folder,
453
+ options=options),
454
+ crop_lists)):
455
+ pbar.update()
456
+ finally:
457
+ if pool is not None:
458
+ pool.close()
459
+ pool.join()
460
+ print('Pool closed and joined for crop folder creation')
461
+
462
+ # ...if we're using parallel processing
463
+
464
+
465
+ ##%% Write output file
466
+
467
+ if output_file is not None:
468
+ with open(output_file,'w') as f:
469
+ json.dump(detection_results,f,indent=1)
470
+
471
+ if crops_output_file is not None:
472
+
473
+ original_images = detection_results['images']
474
+
475
+ detection_results_cropped = detection_results
476
+ detection_results_cropped['images'] = []
477
+
478
+ # im = original_images[0]
479
+ for im in original_images:
480
+
481
+ if 'detections' not in im or im['detections'] is None or len(im['detections']) == 0:
482
+ continue
483
+
484
+ detections_this_image = im['detections']
485
+ image_fn_relative = im['file']
486
+
487
+ for i_detection,det in enumerate(detections_this_image):
488
+
489
+ if 'crop_id' in det:
490
+ im_out = {}
491
+ im_out['file'] = det['crop_filename_relative']
492
+ det_out = {}
493
+ det_out['category'] = det['category']
494
+ det_out['conf'] = det['conf']
495
+ det_out['bbox'] = [0, 0, 1, 1]
496
+ im_out['detections'] = [det_out]
497
+ detection_results_cropped['images'].append(im_out)
498
+
499
+ # ...if we need to include this crop in the new .json file
500
+
501
+ # ...for each crop
502
+
503
+ # ...for each original image
504
+
505
+ with open(crops_output_file,'w') as f:
506
+ json.dump(detection_results_cropped,f,indent=1)
507
+
508
+ # ...def create_crop_folder()
509
+
510
+
511
+ #%% Command-line driver
512
+
513
+ def main():
514
+ """
515
+ Command-line interface for creating a crop folder from MegaDetector results.
516
+ """
517
+
518
+ parser = argparse.ArgumentParser(
519
+ description='Create a folder of crops from MegaDetector results'
520
+ )
521
+ parser.add_argument(
522
+ 'input_file',
523
+ type=str,
524
+ help='Path to the MegaDetector .json results file'
525
+ )
526
+ parser.add_argument(
527
+ 'input_folder',
528
+ type=str,
529
+ help='Path to the folder containing the original images'
530
+ )
531
+ parser.add_argument(
532
+ 'output_folder',
533
+ type=str,
534
+ help='Path to the folder where cropped images will be saved'
535
+ )
536
+ parser.add_argument(
537
+ '--output_file',
538
+ type=str,
539
+ default=None,
540
+ help='Path to save the modified MegaDetector .json file (with crop IDs and filenames)'
541
+ )
542
+ parser.add_argument(
543
+ '--crops_output_file',
544
+ type=str,
545
+ default=None,
546
+ help='Path to save a new .json file for the crops themselves (with full-image detections for each crop)'
547
+ )
548
+ parser.add_argument(
549
+ '--confidence_threshold',
550
+ type=float,
551
+ default=0.1,
552
+ help='Confidence threshold for detections to be cropped (default: 0.1)'
553
+ )
554
+ parser.add_argument(
555
+ '--expansion',
556
+ type=int,
557
+ default=0,
558
+ help='Number of pixels to expand each crop (default: 0)'
559
+ )
560
+ parser.add_argument(
561
+ '--quality',
562
+ type=int,
563
+ default=95,
564
+ help='JPEG quality for saving crops (default: 95)'
565
+ )
566
+ parser.add_argument(
567
+ '--overwrite',
568
+ type=str,
569
+ default='true',
570
+ choices=['true', 'false'],
571
+ help="Overwrite existing crop images (default: 'true')"
572
+ )
573
+ parser.add_argument(
574
+ '--n_workers',
575
+ type=int,
576
+ default=8,
577
+ help='Number of concurrent workers (default: 8)'
578
+ )
579
+ parser.add_argument(
580
+ '--pool_type',
581
+ type=str,
582
+ default='thread',
583
+ choices=['thread', 'process'],
584
+ help="Type of parallelism to use ('thread' or 'process', default: 'thread')"
585
+ )
586
+ parser.add_argument(
587
+ '--category_names',
588
+ type=str,
589
+ default=None,
590
+ help="Comma-separated list of category names to include " + \
591
+ "(e.g., 'animal,person'). If None (default), all categories are included."
592
+ )
593
+
594
+ args = parser.parse_args()
595
+
596
+ options = CreateCropFolderOptions()
597
+ options.confidence_threshold = args.confidence_threshold
598
+ options.expansion = args.expansion
599
+ options.quality = args.quality
600
+ options.overwrite = (args.overwrite.lower() == 'true')
601
+ options.n_workers = args.n_workers
602
+ options.pool_type = args.pool_type
603
+
604
+ if args.category_names:
605
+ options.category_names_to_include = [name.strip() for name in args.category_names.split(',')]
606
+ else:
607
+ options.category_names_to_include = None
608
+
609
+ print('Starting crop folder creation...')
610
+ print('Input MD results: {}'.format(args.input_file))
611
+ print('Input image folder: {}'.format(args.input_folder))
612
+ print('Output crop folder: {}'.format(args.output_folder))
613
+
614
+ if args.output_file:
615
+ print('Modified MD results will be saved to {}'.format(args.output_file))
616
+ if args.crops_output_file:
617
+ print('Crops .json output will be saved to {}'.format(args.crops_output_file))
618
+
619
+ create_crop_folder(
620
+ input_file=args.input_file,
621
+ input_folder=args.input_folder,
622
+ output_folder=args.output_folder,
623
+ output_file=args.output_file,
624
+ crops_output_file=args.crops_output_file,
625
+ options=options
626
+ )
627
+
628
+ if __name__ == '__main__':
629
+ main()