megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (197) hide show
  1. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  2. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  3. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  4. megadetector/classification/aggregate_classifier_probs.py +3 -3
  5. megadetector/classification/analyze_failed_images.py +5 -5
  6. megadetector/classification/cache_batchapi_outputs.py +5 -5
  7. megadetector/classification/create_classification_dataset.py +11 -12
  8. megadetector/classification/crop_detections.py +10 -10
  9. megadetector/classification/csv_to_json.py +8 -8
  10. megadetector/classification/detect_and_crop.py +13 -15
  11. megadetector/classification/efficientnet/model.py +8 -8
  12. megadetector/classification/efficientnet/utils.py +6 -5
  13. megadetector/classification/evaluate_model.py +7 -7
  14. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  15. megadetector/classification/json_to_azcopy_list.py +1 -1
  16. megadetector/classification/json_validator.py +29 -32
  17. megadetector/classification/map_classification_categories.py +9 -9
  18. megadetector/classification/merge_classification_detection_output.py +12 -9
  19. megadetector/classification/prepare_classification_script.py +19 -19
  20. megadetector/classification/prepare_classification_script_mc.py +26 -26
  21. megadetector/classification/run_classifier.py +4 -4
  22. megadetector/classification/save_mislabeled.py +6 -6
  23. megadetector/classification/train_classifier.py +1 -1
  24. megadetector/classification/train_classifier_tf.py +9 -9
  25. megadetector/classification/train_utils.py +10 -10
  26. megadetector/data_management/annotations/annotation_constants.py +1 -2
  27. megadetector/data_management/camtrap_dp_to_coco.py +79 -46
  28. megadetector/data_management/cct_json_utils.py +103 -103
  29. megadetector/data_management/cct_to_md.py +49 -49
  30. megadetector/data_management/cct_to_wi.py +33 -33
  31. megadetector/data_management/coco_to_labelme.py +75 -75
  32. megadetector/data_management/coco_to_yolo.py +210 -193
  33. megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
  34. megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
  35. megadetector/data_management/databases/integrity_check_json_db.py +228 -200
  36. megadetector/data_management/databases/subset_json_db.py +33 -33
  37. megadetector/data_management/generate_crops_from_cct.py +88 -39
  38. megadetector/data_management/get_image_sizes.py +54 -49
  39. megadetector/data_management/labelme_to_coco.py +133 -125
  40. megadetector/data_management/labelme_to_yolo.py +159 -73
  41. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  42. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  43. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  44. megadetector/data_management/lila/download_lila_subset.py +21 -24
  45. megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
  46. megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
  47. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  48. megadetector/data_management/lila/lila_common.py +73 -70
  49. megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
  50. megadetector/data_management/mewc_to_md.py +344 -340
  51. megadetector/data_management/ocr_tools.py +262 -255
  52. megadetector/data_management/read_exif.py +249 -227
  53. megadetector/data_management/remap_coco_categories.py +90 -28
  54. megadetector/data_management/remove_exif.py +81 -21
  55. megadetector/data_management/rename_images.py +187 -187
  56. megadetector/data_management/resize_coco_dataset.py +588 -120
  57. megadetector/data_management/speciesnet_to_md.py +41 -41
  58. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  59. megadetector/data_management/yolo_output_to_md_output.py +248 -122
  60. megadetector/data_management/yolo_to_coco.py +333 -191
  61. megadetector/detection/change_detection.py +832 -0
  62. megadetector/detection/process_video.py +340 -337
  63. megadetector/detection/pytorch_detector.py +358 -278
  64. megadetector/detection/run_detector.py +399 -186
  65. megadetector/detection/run_detector_batch.py +404 -377
  66. megadetector/detection/run_inference_with_yolov5_val.py +340 -327
  67. megadetector/detection/run_tiled_inference.py +257 -249
  68. megadetector/detection/tf_detector.py +24 -24
  69. megadetector/detection/video_utils.py +332 -295
  70. megadetector/postprocessing/add_max_conf.py +19 -11
  71. megadetector/postprocessing/categorize_detections_by_size.py +45 -45
  72. megadetector/postprocessing/classification_postprocessing.py +468 -433
  73. megadetector/postprocessing/combine_batch_outputs.py +23 -23
  74. megadetector/postprocessing/compare_batch_results.py +590 -525
  75. megadetector/postprocessing/convert_output_format.py +106 -102
  76. megadetector/postprocessing/create_crop_folder.py +347 -147
  77. megadetector/postprocessing/detector_calibration.py +173 -168
  78. megadetector/postprocessing/generate_csv_report.py +508 -499
  79. megadetector/postprocessing/load_api_results.py +48 -27
  80. megadetector/postprocessing/md_to_coco.py +133 -102
  81. megadetector/postprocessing/md_to_labelme.py +107 -90
  82. megadetector/postprocessing/md_to_wi.py +40 -40
  83. megadetector/postprocessing/merge_detections.py +92 -114
  84. megadetector/postprocessing/postprocess_batch_results.py +319 -301
  85. megadetector/postprocessing/remap_detection_categories.py +91 -38
  86. megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
  87. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  88. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  89. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
  90. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  91. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  92. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  93. megadetector/postprocessing/validate_batch_results.py +70 -70
  94. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  95. megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
  96. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
  97. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
  98. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  99. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  100. megadetector/taxonomy_mapping/species_lookup.py +156 -74
  101. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  102. megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
  103. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  104. megadetector/utils/ct_utils.py +1049 -211
  105. megadetector/utils/directory_listing.py +21 -77
  106. megadetector/utils/gpu_test.py +22 -22
  107. megadetector/utils/md_tests.py +632 -529
  108. megadetector/utils/path_utils.py +1520 -431
  109. megadetector/utils/process_utils.py +41 -41
  110. megadetector/utils/split_locations_into_train_val.py +62 -62
  111. megadetector/utils/string_utils.py +148 -27
  112. megadetector/utils/url_utils.py +489 -176
  113. megadetector/utils/wi_utils.py +2658 -2526
  114. megadetector/utils/write_html_image_list.py +137 -137
  115. megadetector/visualization/plot_utils.py +34 -30
  116. megadetector/visualization/render_images_with_thumbnails.py +39 -74
  117. megadetector/visualization/visualization_utils.py +487 -435
  118. megadetector/visualization/visualize_db.py +232 -198
  119. megadetector/visualization/visualize_detector_output.py +82 -76
  120. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
  121. megadetector-10.0.0.dist-info/RECORD +139 -0
  122. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
  123. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  124. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  125. megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
  126. megadetector/api/batch_processing/api_core/server.py +0 -294
  127. megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
  128. megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
  129. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  130. megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
  131. megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
  132. megadetector/api/batch_processing/api_core/server_utils.py +0 -88
  133. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  134. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  135. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  136. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  137. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  138. megadetector/api/synchronous/__init__.py +0 -0
  139. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  140. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
  141. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
  142. megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
  143. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  144. megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
  145. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  146. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  147. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  148. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  149. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  150. megadetector/data_management/importers/awc_to_json.py +0 -191
  151. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  152. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  153. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  154. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  155. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  156. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  157. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  158. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  159. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  160. megadetector/data_management/importers/ena24_to_json.py +0 -276
  161. megadetector/data_management/importers/filenames_to_json.py +0 -386
  162. megadetector/data_management/importers/helena_to_cct.py +0 -283
  163. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  164. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  165. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  166. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  167. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  168. megadetector/data_management/importers/missouri_to_json.py +0 -490
  169. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  170. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  171. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  172. megadetector/data_management/importers/pc_to_json.py +0 -365
  173. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  174. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  175. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  176. megadetector/data_management/importers/rspb_to_json.py +0 -356
  177. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  178. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  179. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  180. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  181. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  182. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  183. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  184. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  185. megadetector/data_management/importers/ubc_to_json.py +0 -399
  186. megadetector/data_management/importers/umn_to_json.py +0 -507
  187. megadetector/data_management/importers/wellington_to_json.py +0 -263
  188. megadetector/data_management/importers/wi_to_json.py +0 -442
  189. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  190. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  191. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  192. megadetector/utils/azure_utils.py +0 -178
  193. megadetector/utils/sas_blob_utils.py +0 -509
  194. megadetector-5.0.28.dist-info/RECORD +0 -209
  195. /megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
  196. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
  197. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0
@@ -11,6 +11,8 @@ of images representing all above-threshold crops from the original folder.
11
11
 
12
12
  import os
13
13
  import json
14
+ import argparse
15
+
14
16
  from tqdm import tqdm
15
17
 
16
18
  from multiprocessing.pool import Pool, ThreadPool
@@ -19,6 +21,7 @@ from functools import partial
19
21
 
20
22
  from megadetector.utils.path_utils import insert_before_extension
21
23
  from megadetector.utils.ct_utils import invert_dictionary
24
+ from megadetector.utils.ct_utils import is_list_sorted
22
25
  from megadetector.visualization.visualization_utils import crop_image
23
26
  from megadetector.visualization.visualization_utils import exif_preserving_save
24
27
 
@@ -29,24 +32,24 @@ class CreateCropFolderOptions:
29
32
  """
30
33
  Options used to parameterize create_crop_folder().
31
34
  """
32
-
35
+
33
36
  def __init__(self):
34
-
37
+
35
38
  #: Confidence threshold determining which detections get written
36
39
  self.confidence_threshold = 0.1
37
-
40
+
38
41
  #: Number of pixels to expand each crop
39
42
  self.expansion = 0
40
-
43
+
41
44
  #: JPEG quality to use for saving crops (None for default)
42
45
  self.quality = 95
43
-
46
+
44
47
  #: Whether to overwrite existing images
45
48
  self.overwrite = True
46
-
49
+
47
50
  #: Number of concurrent workers
48
51
  self.n_workers = 8
49
-
52
+
50
53
  #: Whether to use processes ('process') or threads ('thread') for parallelization
51
54
  self.pool_type = 'thread'
52
55
 
@@ -54,14 +57,15 @@ class CreateCropFolderOptions:
54
57
  #:
55
58
  #: options.category_names_to_include = ['animal']
56
59
  self.category_names_to_include = None
57
-
58
-
60
+
61
+
59
62
  #%% Support functions
60
63
 
61
64
  def _get_crop_filename(image_fn,crop_id):
62
65
  """
63
66
  Generate crop filenames in a consistent way.
64
67
  """
68
+
65
69
  if isinstance(crop_id,int):
66
70
  crop_id = str(crop_id).zfill(3)
67
71
  assert isinstance(crop_id,str)
@@ -74,37 +78,45 @@ def _generate_crops_for_single_image(crops_this_image,
74
78
  options):
75
79
  """
76
80
  Generate all the crops required for a single image.
81
+
82
+ Args:
83
+ crops_this_image (list of dict): list of dicts with at least keys
84
+ 'image_fn_relative', 'crop_id'
85
+ input_folder (str): input folder (whole images)
86
+ output_folder (crops): output folder (crops)
87
+ options (CreateCropFolderOptions): cropping options
77
88
  """
89
+
78
90
  if len(crops_this_image) == 0:
79
91
  return
80
-
81
- image_fn_relative = crops_this_image[0]['image_fn_relative']
92
+
93
+ image_fn_relative = crops_this_image[0]['image_fn_relative']
82
94
  input_fn_abs = os.path.join(input_folder,image_fn_relative)
83
95
  assert os.path.isfile(input_fn_abs)
84
-
96
+
85
97
  detections_to_crop = [c['detection'] for c in crops_this_image]
86
-
98
+
87
99
  cropped_images = crop_image(detections_to_crop,
88
100
  input_fn_abs,
89
101
  confidence_threshold=0,
90
102
  expansion=options.expansion)
91
-
103
+
92
104
  assert len(cropped_images) == len(crops_this_image)
93
-
105
+
94
106
  # i_crop = 0; crop_info = crops_this_image[0]
95
107
  for i_crop,crop_info in enumerate(crops_this_image):
96
-
108
+
97
109
  assert crop_info['image_fn_relative'] == image_fn_relative
98
- crop_filename_relative = _get_crop_filename(image_fn_relative, crop_info['crop_id'])
110
+ crop_filename_relative = _get_crop_filename(image_fn_relative, crop_info['crop_id'])
99
111
  crop_filename_abs = os.path.join(output_folder,crop_filename_relative).replace('\\','/')
100
-
112
+
101
113
  if os.path.isfile(crop_filename_abs) and not options.overwrite:
102
114
  continue
103
-
104
- cropped_image = cropped_images[i_crop]
105
- os.makedirs(os.path.dirname(crop_filename_abs),exist_ok=True)
115
+
116
+ cropped_image = cropped_images[i_crop]
117
+ os.makedirs(os.path.dirname(crop_filename_abs),exist_ok=True)
106
118
  exif_preserving_save(cropped_image,crop_filename_abs,quality=options.quality)
107
-
119
+
108
120
  # ...for each crop
109
121
 
110
122
 
@@ -113,119 +125,185 @@ def _generate_crops_for_single_image(crops_this_image,
113
125
  def crop_results_to_image_results(image_results_file_with_crop_ids,
114
126
  crop_results_file,
115
127
  output_file,
116
- delete_crop_information=True):
128
+ delete_crop_information=True,
129
+ require_identical_detection_categories=True,
130
+ restrict_to_top_n=-1,
131
+ crop_results_prefix=None,
132
+ detections_without_classification_handling='error'):
117
133
  """
118
134
  This function is intended to be run after you have:
119
-
135
+
120
136
  1. Run MegaDetector on a folder
121
137
  2. Generated a crop folder using create_crop_folder
122
138
  3. Run a species classifier on those crops
123
-
139
+
124
140
  This function will take the crop-level results and transform them back
125
- to the original images. Classification categories, if available, are taken
141
+ to the original images. Classification categories, if available, are taken
126
142
  from [crop_results_file].
127
-
143
+
128
144
  Args:
129
145
  image_results_file_with_crop_ids (str): results file for the original images,
130
- containing crop IDs, likely generated via create_crop_folder. All
146
+ containing crop IDs, likely generated via create_crop_folder. All
131
147
  non-standard fields in this file will be passed along to [output_file].
132
148
  crop_results_file (str): results file for the crop folder
133
149
  output_file (str): output .json file, containing crop-level classifications
134
150
  mapped back to the image level.
135
151
  delete_crop_information (bool, optional): whether to delete the "crop_id" and
136
152
  "crop_filename_relative" fields from each detection, if present.
153
+ require_identical_detection_categories (bool, optional): if True, error if
154
+ the image-level and crop-level detection categories are different. If False,
155
+ ignore the crop-level detection categories.
156
+ restrict_to_top_n (int, optional): If >0, removes all but the top N classification
157
+ results for each detection.
158
+ crop_results_prefix (str, optional): if not None, removes this prefix from crop
159
+ results filenames. Intended to support the case where the crop results
160
+ use absolute paths.
161
+ detections_without_classification_handling (str, optional): what to do when we
162
+ encounter a crop that doesn't appear in classification results: 'error',
163
+ or 'include' ("include" means "leave the detection alone, without classifications"
137
164
  """
138
-
165
+
139
166
  ##%% Validate inputs
140
-
167
+
141
168
  assert os.path.isfile(image_results_file_with_crop_ids), \
142
169
  'Could not find image-level input file {}'.format(image_results_file_with_crop_ids)
143
170
  assert os.path.isfile(crop_results_file), \
144
171
  'Could not find crop results file {}'.format(crop_results_file)
145
172
  os.makedirs(os.path.dirname(output_file),exist_ok=True)
146
-
147
-
173
+
174
+
148
175
  ##%% Read input files
149
-
176
+
150
177
  print('Reading input...')
151
-
178
+
152
179
  with open(image_results_file_with_crop_ids,'r') as f:
153
180
  image_results_with_crop_ids = json.load(f)
154
181
  with open(crop_results_file,'r') as f:
155
182
  crop_results = json.load(f)
156
183
 
157
184
  # Find all the detection categories that need to be consistent
158
- used_category_ids = set()
185
+ used_detection_category_ids = set()
159
186
  for im in tqdm(image_results_with_crop_ids['images']):
160
187
  if 'detections' not in im or im['detections'] is None:
161
- continue
188
+ continue
162
189
  for det in im['detections']:
163
190
  if 'crop_id' in det:
164
- used_category_ids.add(det['category'])
165
-
166
- # Make sure the categories that matter are consistent across the two files
167
- for category_id in used_category_ids:
168
- category_name = image_results_with_crop_ids['detection_categories'][category_id]
169
- assert category_id in crop_results['detection_categories'] and \
170
- category_name == crop_results['detection_categories'][category_id], \
171
- 'Crop results and detection results use incompatible categories'
172
-
191
+ used_detection_category_ids.add(det['category'])
192
+
193
+ # Make sure the detection categories that matter are consistent across the two files
194
+ if require_identical_detection_categories:
195
+ for category_id in used_detection_category_ids:
196
+ category_name = image_results_with_crop_ids['detection_categories'][category_id]
197
+ assert category_id in crop_results['detection_categories'] and \
198
+ category_name == crop_results['detection_categories'][category_id], \
199
+ 'Crop results and detection results use incompatible categories'
200
+
173
201
  crop_filename_to_results = {}
174
-
202
+
175
203
  # im = crop_results['images'][0]
176
204
  for im in crop_results['images']:
177
- crop_filename_to_results[im['file']] = im
178
-
205
+ fn = im['file']
206
+ # Possibly remove a prefix from each filename
207
+ if (crop_results_prefix is not None) and (crop_results_prefix in fn):
208
+ if fn.startswith(crop_results_prefix):
209
+ fn = fn.replace(crop_results_prefix,'',1)
210
+ im['file'] = fn
211
+ crop_filename_to_results[fn] = im
212
+
179
213
  if 'classification_categories' in crop_results:
180
214
  image_results_with_crop_ids['classification_categories'] = \
181
215
  crop_results['classification_categories']
182
-
216
+
183
217
  if 'classification_category_descriptions' in crop_results:
184
218
  image_results_with_crop_ids['classification_category_descriptions'] = \
185
219
  crop_results['classification_category_descriptions']
186
-
187
-
220
+
221
+
188
222
  ##%% Read classifications from crop results, merge into image-level results
189
-
223
+
224
+ print('Reading classification results...')
225
+
226
+ n_skipped_detections = 0
227
+
228
+ # Loop over the original image-level detections
229
+ #
190
230
  # im = image_results_with_crop_ids['images'][0]
191
- for im in tqdm(image_results_with_crop_ids['images']):
192
-
231
+ for i_image,im in tqdm(enumerate(image_results_with_crop_ids['images']),
232
+ total=len(image_results_with_crop_ids['images'])):
233
+
193
234
  if 'detections' not in im or im['detections'] is None:
194
235
  continue
195
-
236
+
237
+ # i_det = 0; det = im['detections'][i_det]
196
238
  for det in im['detections']:
197
-
239
+
198
240
  if 'classifications' in det:
199
241
  del det['classifications']
200
-
242
+
201
243
  if 'crop_id' in det:
244
+
245
+ # We may be skipping detections with no classification results
246
+ skip_detection = False
247
+
248
+ # Find the corresponding crop in the classification results
202
249
  crop_filename_relative = det['crop_filename_relative']
203
- assert crop_filename_relative in crop_filename_to_results, \
204
- 'Crop lookup error'
205
- crop_results_this_detection = crop_filename_to_results[crop_filename_relative]
206
- assert crop_results_this_detection['file'] == crop_filename_relative
207
- assert len(crop_results_this_detection['detections']) == 1
208
- # Allow a slight confidence difference for the case where output precision was truncated
209
- assert abs(crop_results_this_detection['detections'][0]['conf'] - det['conf']) < 0.01
210
- assert crop_results_this_detection['detections'][0]['category'] == det['category']
211
- assert crop_results_this_detection['detections'][0]['bbox'] == [0,0,1,1]
212
- det['classifications'] = crop_results_this_detection['detections'][0]['classifications']
213
-
250
+ if crop_filename_relative not in crop_filename_to_results:
251
+ if detections_without_classification_handling == 'error':
252
+ raise ValueError('Crop lookup error: {}'.format(crop_filename_relative))
253
+ elif detections_without_classification_handling == 'include':
254
+ # Leave this detection unclassified
255
+ skip_detection = True
256
+ else:
257
+ raise ValueError(
258
+ 'Illegal value for detections_without_classification_handling: {}'.format(
259
+ detections_without_classification_handling
260
+ ))
261
+
262
+ if not skip_detection:
263
+
264
+ crop_results_this_detection = crop_filename_to_results[crop_filename_relative]
265
+
266
+ # Consistency checking
267
+ assert crop_results_this_detection['file'] == crop_filename_relative, \
268
+ 'Crop filename mismatch'
269
+ assert len(crop_results_this_detection['detections']) == 1, \
270
+ 'Multiple crop results for a single detection'
271
+ assert crop_results_this_detection['detections'][0]['bbox'] == [0,0,1,1], \
272
+ 'Invalid crop bounding box'
273
+
274
+ # This check was helpful for the case where crop-level results had already
275
+ # taken detection confidence values from detector output by construct, but this isn't
276
+ # really meaningful for most cases.
277
+ # assert abs(crop_results_this_detection['detections'][0]['conf'] - det['conf']) < 0.01
278
+
279
+ if require_identical_detection_categories:
280
+ assert crop_results_this_detection['detections'][0]['category'] == det['category']
281
+
282
+ # Copy the crop-level classifications
283
+ det['classifications'] = crop_results_this_detection['detections'][0]['classifications']
284
+ confidence_values = [x[1] for x in det['classifications']]
285
+ assert is_list_sorted(confidence_values,reverse=True)
286
+ if restrict_to_top_n > 0:
287
+ det['classifications'] = det['classifications'][0:restrict_to_top_n]
288
+
214
289
  if delete_crop_information:
215
290
  if 'crop_id' in det:
216
291
  del det['crop_id']
217
292
  if 'crop_filename_relative' in det:
218
293
  del det['crop_filename_relative']
219
-
294
+
220
295
  # ...for each detection
221
-
222
- # ...for each image
223
-
224
-
296
+
297
+ # ...for each image
298
+
299
+ if n_skipped_detections > 0:
300
+ print('Skipped {} detections'.format(n_skipped_detections))
301
+
302
+
225
303
  ##%% Write output file
226
-
304
+
227
305
  print('Writing output file...')
228
-
306
+
229
307
  with open(output_file,'w') as f:
230
308
  json.dump(image_results_with_crop_ids,f,indent=1)
231
309
 
@@ -241,9 +319,9 @@ def create_crop_folder(input_file,
241
319
  """
242
320
  Given a MegaDetector .json file and a folder of images, creates a new folder
243
321
  of images representing all above-threshold crops from the original folder.
244
-
322
+
245
323
  Optionally writes a new .json file that attaches unique IDs to each detection.
246
-
324
+
247
325
  Args:
248
326
  input_file (str): MD-formatted .json file to process
249
327
  input_folder (str): Input image folder
@@ -251,11 +329,11 @@ def create_crop_folder(input_file,
251
329
  output_file (str, optional): new .json file that attaches unique IDs to each detection.
252
330
  crops_output_file (str, optional): new .json file that includes whole-image detections
253
331
  for each of the crops, using confidence values from the original results
254
- options (CreateCropFolderOptions, optional): crop parameters
332
+ options (CreateCropFolderOptions, optional): crop parameters
255
333
  """
256
-
334
+
257
335
  ## Validate options, prepare output folders
258
-
336
+
259
337
  if options is None:
260
338
  options = CreateCropFolderOptions()
261
339
 
@@ -264,45 +342,45 @@ def create_crop_folder(input_file,
264
342
  os.makedirs(output_folder,exist_ok=True)
265
343
  if output_file is not None:
266
344
  os.makedirs(os.path.dirname(output_file),exist_ok=True)
267
-
268
-
345
+
346
+
269
347
  ##%% Read input
270
-
271
- print('Reading MD results file...')
348
+
349
+ print('Reading MD results file...')
272
350
  with open(input_file,'r') as f:
273
351
  detection_results = json.load(f)
274
-
352
+
275
353
  category_ids_to_include = None
276
-
277
- if options.category_names_to_include is not None:
354
+
355
+ if options.category_names_to_include is not None:
278
356
  category_id_to_name = detection_results['detection_categories']
279
- category_name_to_id = invert_dictionary(category_id_to_name)
357
+ category_name_to_id = invert_dictionary(category_id_to_name)
280
358
  category_ids_to_include = set()
281
359
  for category_name in options.category_names_to_include:
282
360
  assert category_name in category_name_to_id, \
283
361
  'Unrecognized category name {}'.format(category_name)
284
- category_ids_to_include.add(category_name_to_id[category_name])
362
+ category_ids_to_include.add(category_name_to_id[category_name])
285
363
 
286
364
  ##%% Make a list of crops that we need to create
287
-
365
+
288
366
  # Maps input images to list of dicts, with keys 'crop_id','detection'
289
367
  image_fn_relative_to_crops = defaultdict(list)
290
368
  n_crops = 0
291
-
369
+
292
370
  n_detections_excluded_by_category = 0
293
371
 
294
372
  # im = detection_results['images'][0]
295
373
  for i_image,im in enumerate(detection_results['images']):
296
-
374
+
297
375
  if 'detections' not in im or im['detections'] is None or len(im['detections']) == 0:
298
376
  continue
299
-
377
+
300
378
  detections_this_image = im['detections']
301
-
379
+
302
380
  image_fn_relative = im['file']
303
-
381
+
304
382
  for i_detection,det in enumerate(detections_this_image):
305
-
383
+
306
384
  if det['conf'] < options.confidence_threshold:
307
385
  continue
308
386
 
@@ -312,87 +390,93 @@ def create_crop_folder(input_file,
312
390
  continue
313
391
 
314
392
  det['crop_id'] = i_detection
315
-
393
+
316
394
  crop_info = {'image_fn_relative':image_fn_relative,
317
- 'crop_id':i_detection,
318
- 'detection':det}
319
-
320
- crop_filename_relative = _get_crop_filename(image_fn_relative,
395
+ 'crop_id':i_detection,
396
+ 'detection':det}
397
+
398
+ crop_filename_relative = _get_crop_filename(image_fn_relative,
321
399
  crop_info['crop_id'])
322
400
  det['crop_filename_relative'] = crop_filename_relative
323
401
 
324
402
  image_fn_relative_to_crops[image_fn_relative].append(crop_info)
325
403
  n_crops += 1
326
-
327
- # ...for each input image
404
+
405
+ # ...for each input image
328
406
 
329
407
  print('Prepared a list of {} crops from {} of {} input images'.format(
330
408
  n_crops,len(image_fn_relative_to_crops),len(detection_results['images'])))
331
-
409
+
332
410
  if n_detections_excluded_by_category > 0:
333
411
  print('Excluded {} detections by category'.format(n_detections_excluded_by_category))
334
-
412
+
335
413
  ##%% Generate crops
336
-
414
+
337
415
  if options.n_workers <= 1:
338
-
416
+
339
417
  # image_fn_relative = next(iter(image_fn_relative_to_crops))
340
418
  for image_fn_relative in tqdm(image_fn_relative_to_crops.keys()):
341
- crops_this_image = image_fn_relative_to_crops[image_fn_relative]
419
+ crops_this_image = image_fn_relative_to_crops[image_fn_relative]
342
420
  _generate_crops_for_single_image(crops_this_image=crops_this_image,
343
421
  input_folder=input_folder,
344
422
  output_folder=output_folder,
345
423
  options=options)
346
-
424
+
347
425
  else:
348
-
426
+
349
427
  print('Creating a {} pool with {} workers'.format(options.pool_type,options.n_workers))
428
+ pool = None
429
+ try:
430
+ if options.pool_type == 'thread':
431
+ pool = ThreadPool(options.n_workers)
432
+ else:
433
+ assert options.pool_type == 'process'
434
+ pool = Pool(options.n_workers)
435
+
436
+ # Each element in this list is the list of crops for a single image
437
+ crop_lists = list(image_fn_relative_to_crops.values())
438
+
439
+ with tqdm(total=len(image_fn_relative_to_crops)) as pbar:
440
+ for i,_ in enumerate(pool.imap_unordered(partial(
441
+ _generate_crops_for_single_image,
442
+ input_folder=input_folder,
443
+ output_folder=output_folder,
444
+ options=options),
445
+ crop_lists)):
446
+ pbar.update()
447
+ finally:
448
+ if pool is not None:
449
+ pool.close()
450
+ pool.join()
451
+ print("Pool closed and joined for crop folder creation")
452
+
453
+ # ...if we're using parallel processing
454
+
350
455
 
351
- if options.pool_type == 'thread':
352
- pool = ThreadPool(options.n_workers)
353
- else:
354
- assert options.pool_type == 'process'
355
- pool = Pool(options.n_workers)
356
-
357
- # Each element in this list is the list of crops for a single image
358
- crop_lists = list(image_fn_relative_to_crops.values())
359
-
360
- with tqdm(total=len(image_fn_relative_to_crops)) as pbar:
361
- for i,_ in enumerate(pool.imap_unordered(partial(
362
- _generate_crops_for_single_image,
363
- input_folder=input_folder,
364
- output_folder=output_folder,
365
- options=options),
366
- crop_lists)):
367
- pbar.update()
368
-
369
- # ...if we're using parallel processing
370
-
371
-
372
456
  ##%% Write output file
373
-
457
+
374
458
  if output_file is not None:
375
459
  with open(output_file,'w') as f:
376
460
  json.dump(detection_results,f,indent=1)
377
-
461
+
378
462
  if crops_output_file is not None:
379
-
463
+
380
464
  original_images = detection_results['images']
381
-
465
+
382
466
  detection_results_cropped = detection_results
383
467
  detection_results_cropped['images'] = []
384
-
468
+
385
469
  # im = original_images[0]
386
470
  for im in original_images:
387
-
471
+
388
472
  if 'detections' not in im or im['detections'] is None or len(im['detections']) == 0:
389
473
  continue
390
-
391
- detections_this_image = im['detections']
474
+
475
+ detections_this_image = im['detections']
392
476
  image_fn_relative = im['file']
393
-
477
+
394
478
  for i_detection,det in enumerate(detections_this_image):
395
-
479
+
396
480
  if 'crop_id' in det:
397
481
  im_out = {}
398
482
  im_out['file'] = det['crop_filename_relative']
@@ -402,19 +486,135 @@ def create_crop_folder(input_file,
402
486
  det_out['bbox'] = [0, 0, 1, 1]
403
487
  im_out['detections'] = [det_out]
404
488
  detection_results_cropped['images'].append(im_out)
405
-
489
+
406
490
  # ...if we need to include this crop in the new .json file
407
-
491
+
408
492
  # ...for each crop
409
-
493
+
410
494
  # ...for each original image
411
-
495
+
412
496
  with open(crops_output_file,'w') as f:
413
497
  json.dump(detection_results_cropped,f,indent=1)
414
-
498
+
415
499
  # ...def create_crop_folder()
416
500
 
417
501
 
418
502
  #%% Command-line driver
419
503
 
420
- # TODO
504
+ def main():
505
+ """
506
+ Command-line interface for creating a crop folder from MegaDetector results.
507
+ """
508
+
509
+ parser = argparse.ArgumentParser(
510
+ description='Create a folder of crops from MegaDetector results'
511
+ )
512
+ parser.add_argument(
513
+ 'input_file',
514
+ type=str,
515
+ help='Path to the MegaDetector .json results file'
516
+ )
517
+ parser.add_argument(
518
+ 'input_folder',
519
+ type=str,
520
+ help='Path to the folder containing the original images'
521
+ )
522
+ parser.add_argument(
523
+ 'output_folder',
524
+ type=str,
525
+ help='Path to the folder where cropped images will be saved'
526
+ )
527
+ parser.add_argument(
528
+ '--output_file',
529
+ type=str,
530
+ default=None,
531
+ help='Path to save the modified MegaDetector .json file (with crop IDs and filenames)'
532
+ )
533
+ parser.add_argument(
534
+ '--crops_output_file',
535
+ type=str,
536
+ default=None,
537
+ help='Path to save a new .json file for the crops themselves (with full-image detections for each crop)'
538
+ )
539
+ parser.add_argument(
540
+ '--confidence_threshold',
541
+ type=float,
542
+ default=0.1,
543
+ help='Confidence threshold for detections to be cropped (default: 0.1)'
544
+ )
545
+ parser.add_argument(
546
+ '--expansion',
547
+ type=int,
548
+ default=0,
549
+ help='Number of pixels to expand each crop (default: 0)'
550
+ )
551
+ parser.add_argument(
552
+ '--quality',
553
+ type=int,
554
+ default=95,
555
+ help='JPEG quality for saving crops (default: 95)'
556
+ )
557
+ parser.add_argument(
558
+ '--overwrite',
559
+ type=str,
560
+ default='true',
561
+ choices=['true', 'false'],
562
+ help="Overwrite existing crop images (default: 'true')"
563
+ )
564
+ parser.add_argument(
565
+ '--n_workers',
566
+ type=int,
567
+ default=8,
568
+ help='Number of concurrent workers (default: 8)'
569
+ )
570
+ parser.add_argument(
571
+ '--pool_type',
572
+ type=str,
573
+ default='thread',
574
+ choices=['thread', 'process'],
575
+ help="Type of parallelism to use ('thread' or 'process', default: 'thread')"
576
+ )
577
+ parser.add_argument(
578
+ '--category_names',
579
+ type=str,
580
+ default=None,
581
+ help="Comma-separated list of category names to include " + \
582
+ "(e.g., 'animal,person'). If None (default), all categories are included."
583
+ )
584
+
585
+ args = parser.parse_args()
586
+
587
+ options = CreateCropFolderOptions()
588
+ options.confidence_threshold = args.confidence_threshold
589
+ options.expansion = args.expansion
590
+ options.quality = args.quality
591
+ options.overwrite = (args.overwrite.lower() == 'true')
592
+ options.n_workers = args.n_workers
593
+ options.pool_type = args.pool_type
594
+
595
+ if args.category_names:
596
+ options.category_names_to_include = [name.strip() for name in args.category_names.split(',')]
597
+ else:
598
+ options.category_names_to_include = None
599
+
600
+ print('Starting crop folder creation...')
601
+ print('Input MD results: {}'.format(args.input_file))
602
+ print('Input image folder {}'.format(args.input_folder))
603
+ print('Output crop folder: {}'.format(args.output_folder))
604
+
605
+ if args.output_file:
606
+ print('Modified MD results will be saved to {}'.format(args.output_file))
607
+ if args.crops_output_file:
608
+ print('Crops .json output will be saved to {}'.format(args.crops_output_file))
609
+
610
+ create_crop_folder(
611
+ input_file=args.input_file,
612
+ input_folder=args.input_folder,
613
+ output_folder=args.output_folder,
614
+ output_file=args.output_file,
615
+ crops_output_file=args.crops_output_file,
616
+ options=options
617
+ )
618
+
619
+ if __name__ == '__main__':
620
+ main()