megadetector 5.0.9__py3-none-any.whl → 5.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (226) hide show
  1. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/LICENSE +0 -0
  2. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/METADATA +12 -11
  3. megadetector-5.0.11.dist-info/RECORD +5 -0
  4. megadetector-5.0.11.dist-info/top_level.txt +1 -0
  5. api/__init__.py +0 -0
  6. api/batch_processing/__init__.py +0 -0
  7. api/batch_processing/api_core/__init__.py +0 -0
  8. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  9. api/batch_processing/api_core/batch_service/score.py +0 -439
  10. api/batch_processing/api_core/server.py +0 -294
  11. api/batch_processing/api_core/server_api_config.py +0 -98
  12. api/batch_processing/api_core/server_app_config.py +0 -55
  13. api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  14. api/batch_processing/api_core/server_job_status_table.py +0 -152
  15. api/batch_processing/api_core/server_orchestration.py +0 -360
  16. api/batch_processing/api_core/server_utils.py +0 -92
  17. api/batch_processing/api_core_support/__init__.py +0 -0
  18. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  19. api/batch_processing/api_support/__init__.py +0 -0
  20. api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  21. api/batch_processing/data_preparation/__init__.py +0 -0
  22. api/batch_processing/data_preparation/manage_local_batch.py +0 -2391
  23. api/batch_processing/data_preparation/manage_video_batch.py +0 -327
  24. api/batch_processing/integration/digiKam/setup.py +0 -6
  25. api/batch_processing/integration/digiKam/xmp_integration.py +0 -465
  26. api/batch_processing/integration/eMammal/test_scripts/config_template.py +0 -5
  27. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -126
  28. api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +0 -55
  29. api/batch_processing/postprocessing/__init__.py +0 -0
  30. api/batch_processing/postprocessing/add_max_conf.py +0 -64
  31. api/batch_processing/postprocessing/categorize_detections_by_size.py +0 -163
  32. api/batch_processing/postprocessing/combine_api_outputs.py +0 -249
  33. api/batch_processing/postprocessing/compare_batch_results.py +0 -958
  34. api/batch_processing/postprocessing/convert_output_format.py +0 -397
  35. api/batch_processing/postprocessing/load_api_results.py +0 -195
  36. api/batch_processing/postprocessing/md_to_coco.py +0 -310
  37. api/batch_processing/postprocessing/md_to_labelme.py +0 -330
  38. api/batch_processing/postprocessing/merge_detections.py +0 -401
  39. api/batch_processing/postprocessing/postprocess_batch_results.py +0 -1904
  40. api/batch_processing/postprocessing/remap_detection_categories.py +0 -170
  41. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +0 -661
  42. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +0 -211
  43. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +0 -82
  44. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +0 -1631
  45. api/batch_processing/postprocessing/separate_detections_into_folders.py +0 -731
  46. api/batch_processing/postprocessing/subset_json_detector_output.py +0 -696
  47. api/batch_processing/postprocessing/top_folders_to_bottom.py +0 -223
  48. api/synchronous/__init__.py +0 -0
  49. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  50. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -152
  51. api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -266
  52. api/synchronous/api_core/animal_detection_api/config.py +0 -35
  53. api/synchronous/api_core/animal_detection_api/data_management/annotations/annotation_constants.py +0 -47
  54. api/synchronous/api_core/animal_detection_api/detection/detector_training/copy_checkpoints.py +0 -43
  55. api/synchronous/api_core/animal_detection_api/detection/detector_training/model_main_tf2.py +0 -114
  56. api/synchronous/api_core/animal_detection_api/detection/process_video.py +0 -543
  57. api/synchronous/api_core/animal_detection_api/detection/pytorch_detector.py +0 -304
  58. api/synchronous/api_core/animal_detection_api/detection/run_detector.py +0 -627
  59. api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +0 -1029
  60. api/synchronous/api_core/animal_detection_api/detection/run_inference_with_yolov5_val.py +0 -581
  61. api/synchronous/api_core/animal_detection_api/detection/run_tiled_inference.py +0 -754
  62. api/synchronous/api_core/animal_detection_api/detection/tf_detector.py +0 -165
  63. api/synchronous/api_core/animal_detection_api/detection/video_utils.py +0 -495
  64. api/synchronous/api_core/animal_detection_api/md_utils/azure_utils.py +0 -174
  65. api/synchronous/api_core/animal_detection_api/md_utils/ct_utils.py +0 -262
  66. api/synchronous/api_core/animal_detection_api/md_utils/directory_listing.py +0 -251
  67. api/synchronous/api_core/animal_detection_api/md_utils/matlab_porting_tools.py +0 -97
  68. api/synchronous/api_core/animal_detection_api/md_utils/path_utils.py +0 -416
  69. api/synchronous/api_core/animal_detection_api/md_utils/process_utils.py +0 -110
  70. api/synchronous/api_core/animal_detection_api/md_utils/sas_blob_utils.py +0 -509
  71. api/synchronous/api_core/animal_detection_api/md_utils/string_utils.py +0 -59
  72. api/synchronous/api_core/animal_detection_api/md_utils/url_utils.py +0 -144
  73. api/synchronous/api_core/animal_detection_api/md_utils/write_html_image_list.py +0 -226
  74. api/synchronous/api_core/animal_detection_api/md_visualization/visualization_utils.py +0 -841
  75. api/synchronous/api_core/tests/__init__.py +0 -0
  76. api/synchronous/api_core/tests/load_test.py +0 -110
  77. classification/__init__.py +0 -0
  78. classification/aggregate_classifier_probs.py +0 -108
  79. classification/analyze_failed_images.py +0 -227
  80. classification/cache_batchapi_outputs.py +0 -198
  81. classification/create_classification_dataset.py +0 -627
  82. classification/crop_detections.py +0 -516
  83. classification/csv_to_json.py +0 -226
  84. classification/detect_and_crop.py +0 -855
  85. classification/efficientnet/__init__.py +0 -9
  86. classification/efficientnet/model.py +0 -415
  87. classification/efficientnet/utils.py +0 -610
  88. classification/evaluate_model.py +0 -520
  89. classification/identify_mislabeled_candidates.py +0 -152
  90. classification/json_to_azcopy_list.py +0 -63
  91. classification/json_validator.py +0 -695
  92. classification/map_classification_categories.py +0 -276
  93. classification/merge_classification_detection_output.py +0 -506
  94. classification/prepare_classification_script.py +0 -194
  95. classification/prepare_classification_script_mc.py +0 -228
  96. classification/run_classifier.py +0 -286
  97. classification/save_mislabeled.py +0 -110
  98. classification/train_classifier.py +0 -825
  99. classification/train_classifier_tf.py +0 -724
  100. classification/train_utils.py +0 -322
  101. data_management/__init__.py +0 -0
  102. data_management/annotations/__init__.py +0 -0
  103. data_management/annotations/annotation_constants.py +0 -34
  104. data_management/camtrap_dp_to_coco.py +0 -238
  105. data_management/cct_json_utils.py +0 -395
  106. data_management/cct_to_md.py +0 -176
  107. data_management/cct_to_wi.py +0 -289
  108. data_management/coco_to_labelme.py +0 -272
  109. data_management/coco_to_yolo.py +0 -662
  110. data_management/databases/__init__.py +0 -0
  111. data_management/databases/add_width_and_height_to_db.py +0 -33
  112. data_management/databases/combine_coco_camera_traps_files.py +0 -206
  113. data_management/databases/integrity_check_json_db.py +0 -477
  114. data_management/databases/subset_json_db.py +0 -115
  115. data_management/generate_crops_from_cct.py +0 -149
  116. data_management/get_image_sizes.py +0 -188
  117. data_management/importers/add_nacti_sizes.py +0 -52
  118. data_management/importers/add_timestamps_to_icct.py +0 -79
  119. data_management/importers/animl_results_to_md_results.py +0 -158
  120. data_management/importers/auckland_doc_test_to_json.py +0 -372
  121. data_management/importers/auckland_doc_to_json.py +0 -200
  122. data_management/importers/awc_to_json.py +0 -189
  123. data_management/importers/bellevue_to_json.py +0 -273
  124. data_management/importers/cacophony-thermal-importer.py +0 -796
  125. data_management/importers/carrizo_shrubfree_2018.py +0 -268
  126. data_management/importers/carrizo_trail_cam_2017.py +0 -287
  127. data_management/importers/cct_field_adjustments.py +0 -57
  128. data_management/importers/channel_islands_to_cct.py +0 -913
  129. data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  130. data_management/importers/eMammal/eMammal_helpers.py +0 -249
  131. data_management/importers/eMammal/make_eMammal_json.py +0 -223
  132. data_management/importers/ena24_to_json.py +0 -275
  133. data_management/importers/filenames_to_json.py +0 -385
  134. data_management/importers/helena_to_cct.py +0 -282
  135. data_management/importers/idaho-camera-traps.py +0 -1407
  136. data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  137. data_management/importers/jb_csv_to_json.py +0 -150
  138. data_management/importers/mcgill_to_json.py +0 -250
  139. data_management/importers/missouri_to_json.py +0 -489
  140. data_management/importers/nacti_fieldname_adjustments.py +0 -79
  141. data_management/importers/noaa_seals_2019.py +0 -181
  142. data_management/importers/pc_to_json.py +0 -365
  143. data_management/importers/plot_wni_giraffes.py +0 -123
  144. data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
  145. data_management/importers/prepare_zsl_imerit.py +0 -131
  146. data_management/importers/rspb_to_json.py +0 -356
  147. data_management/importers/save_the_elephants_survey_A.py +0 -320
  148. data_management/importers/save_the_elephants_survey_B.py +0 -332
  149. data_management/importers/snapshot_safari_importer.py +0 -758
  150. data_management/importers/snapshot_safari_importer_reprise.py +0 -665
  151. data_management/importers/snapshot_serengeti_lila.py +0 -1067
  152. data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  153. data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  154. data_management/importers/sulross_get_exif.py +0 -65
  155. data_management/importers/timelapse_csv_set_to_json.py +0 -490
  156. data_management/importers/ubc_to_json.py +0 -399
  157. data_management/importers/umn_to_json.py +0 -507
  158. data_management/importers/wellington_to_json.py +0 -263
  159. data_management/importers/wi_to_json.py +0 -441
  160. data_management/importers/zamba_results_to_md_results.py +0 -181
  161. data_management/labelme_to_coco.py +0 -548
  162. data_management/labelme_to_yolo.py +0 -272
  163. data_management/lila/__init__.py +0 -0
  164. data_management/lila/add_locations_to_island_camera_traps.py +0 -97
  165. data_management/lila/add_locations_to_nacti.py +0 -147
  166. data_management/lila/create_lila_blank_set.py +0 -557
  167. data_management/lila/create_lila_test_set.py +0 -151
  168. data_management/lila/create_links_to_md_results_files.py +0 -106
  169. data_management/lila/download_lila_subset.py +0 -177
  170. data_management/lila/generate_lila_per_image_labels.py +0 -515
  171. data_management/lila/get_lila_annotation_counts.py +0 -170
  172. data_management/lila/get_lila_image_counts.py +0 -111
  173. data_management/lila/lila_common.py +0 -300
  174. data_management/lila/test_lila_metadata_urls.py +0 -132
  175. data_management/ocr_tools.py +0 -874
  176. data_management/read_exif.py +0 -681
  177. data_management/remap_coco_categories.py +0 -84
  178. data_management/remove_exif.py +0 -66
  179. data_management/resize_coco_dataset.py +0 -189
  180. data_management/wi_download_csv_to_coco.py +0 -246
  181. data_management/yolo_output_to_md_output.py +0 -441
  182. data_management/yolo_to_coco.py +0 -676
  183. detection/__init__.py +0 -0
  184. detection/detector_training/__init__.py +0 -0
  185. detection/detector_training/model_main_tf2.py +0 -114
  186. detection/process_video.py +0 -703
  187. detection/pytorch_detector.py +0 -337
  188. detection/run_detector.py +0 -779
  189. detection/run_detector_batch.py +0 -1219
  190. detection/run_inference_with_yolov5_val.py +0 -917
  191. detection/run_tiled_inference.py +0 -935
  192. detection/tf_detector.py +0 -188
  193. detection/video_utils.py +0 -606
  194. docs/source/conf.py +0 -43
  195. md_utils/__init__.py +0 -0
  196. md_utils/azure_utils.py +0 -174
  197. md_utils/ct_utils.py +0 -612
  198. md_utils/directory_listing.py +0 -246
  199. md_utils/md_tests.py +0 -968
  200. md_utils/path_utils.py +0 -1044
  201. md_utils/process_utils.py +0 -157
  202. md_utils/sas_blob_utils.py +0 -509
  203. md_utils/split_locations_into_train_val.py +0 -228
  204. md_utils/string_utils.py +0 -92
  205. md_utils/url_utils.py +0 -323
  206. md_utils/write_html_image_list.py +0 -225
  207. md_visualization/__init__.py +0 -0
  208. md_visualization/plot_utils.py +0 -293
  209. md_visualization/render_images_with_thumbnails.py +0 -275
  210. md_visualization/visualization_utils.py +0 -1537
  211. md_visualization/visualize_db.py +0 -551
  212. md_visualization/visualize_detector_output.py +0 -406
  213. megadetector-5.0.9.dist-info/RECORD +0 -224
  214. megadetector-5.0.9.dist-info/top_level.txt +0 -8
  215. taxonomy_mapping/__init__.py +0 -0
  216. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +0 -491
  217. taxonomy_mapping/map_new_lila_datasets.py +0 -154
  218. taxonomy_mapping/prepare_lila_taxonomy_release.py +0 -142
  219. taxonomy_mapping/preview_lila_taxonomy.py +0 -591
  220. taxonomy_mapping/retrieve_sample_image.py +0 -71
  221. taxonomy_mapping/simple_image_download.py +0 -218
  222. taxonomy_mapping/species_lookup.py +0 -834
  223. taxonomy_mapping/taxonomy_csv_checker.py +0 -159
  224. taxonomy_mapping/taxonomy_graph.py +0 -346
  225. taxonomy_mapping/validate_lila_category_mappings.py +0 -83
  226. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/WHEEL +0 -0
@@ -1,84 +0,0 @@
1
- """
2
-
3
- remap_coco_categories.py
4
-
5
- Given a COCO-formatted dataset, remap the categories to a new mapping.
6
-
7
- """
8
-
9
- #%% Imports and constants
10
-
11
- import os
12
- import json
13
-
14
- from copy import deepcopy
15
-
16
-
17
- #%% Main function
18
-
19
- def remap_coco_categories(input_data,
20
- output_category_name_to_id,
21
- input_category_name_to_output_category_name,
22
- output_file=None):
23
- """
24
- Given a COCO-formatted dataset, remap the categories to a new categories mapping, optionally
25
- writing the results to a new file.
26
-
27
- output_category_name_to_id is a dict mapping strings to ints.
28
-
29
- input_category_name_to_output_category_name is a dict mapping strings to strings.
30
-
31
- [input_data] can be a COCO-formatted dict or a filename. If it's a dict, it will be copied,
32
- not modified in place.
33
- """
34
-
35
- if isinstance(input_data,str):
36
- assert os.path.isfile(input_data), "Can't find file {}".format(input_data)
37
- with open(input_data,'r') as f:
38
- input_data = json.load(f)
39
- assert isinstance(input_data,dict), 'Illegal COCO input data'
40
- else:
41
- assert isinstance(input_data,dict), 'Illegal COCO input data'
42
- input_data = deepcopy(input_data)
43
-
44
- # It's safe to modify in-place now
45
- output_data = input_data
46
-
47
- # Read input name --> ID mapping
48
- input_category_name_to_input_category_id = {}
49
- for c in input_data['categories']:
50
- input_category_name_to_input_category_id[c['name']] = c['id']
51
-
52
- # Map input IDs --> output IDs
53
- input_category_id_to_output_category_id = {}
54
- for input_name in input_category_name_to_output_category_name.keys():
55
- output_name = input_category_name_to_output_category_name[input_name]
56
- assert output_name in output_category_name_to_id, \
57
- 'No output ID for {} --> {}'.format(input_name,output_name)
58
- input_id = input_category_name_to_input_category_id[input_name]
59
- output_id = output_category_name_to_id[output_name]
60
- input_category_id_to_output_category_id[input_id] = output_id
61
-
62
- # Map annotations
63
- for ann in output_data['annotations']:
64
- assert ann['category_id'] in input_category_id_to_output_category_id, \
65
- 'Unrecognized category ID {}'.format(ann['category_id'])
66
- ann['category_id'] = input_category_id_to_output_category_id[ann['category_id']]
67
-
68
- # Update the category list
69
- output_categories = []
70
- for output_name in output_category_name_to_id:
71
- category = {'name':output_name,'id':output_category_name_to_id[output_name]}
72
- output_categories.append(category)
73
- output_data['categories'] = output_categories
74
-
75
- if output_file is not None:
76
- with open(output_file,'w') as f:
77
- json.dump(output_data,f,indent=1)
78
-
79
- return input_data
80
-
81
-
82
- #%% Command-line driver
83
-
84
- # TODO
@@ -1,66 +0,0 @@
1
- """
2
-
3
- remove_exif.py
4
-
5
- Removes all EXIF/IPTC/XMP metadata from a folder of images, without making
6
- backup copies, using pyexiv2.
7
-
8
- TODO: This is a one-off script waiting to be cleaned up for more general use.
9
-
10
- """
11
-
12
- input_base = r'f:\images'
13
-
14
-
15
- #%% Imports and constants
16
-
17
- import os
18
- import glob
19
-
20
- def main():
21
-
22
- assert os.path.isdir(input_base)
23
-
24
- ##%% List files
25
-
26
- all_files = [f for f in glob.glob(input_base + "*/**", recursive=True)]
27
- image_files = [s for s in all_files if (s.lower().endswith('.jpg'))]
28
-
29
-
30
- ##%% Remove EXIF data (support)
31
-
32
- import pyexiv2
33
-
34
- # PYEXIV2 IS NOT THREAD SAFE; DO NOT CALL THIS IN PARALLEL FROM A SINGLE PROCESS
35
- def remove_exif(fn):
36
-
37
- try:
38
- img = pyexiv2.Image(fn)
39
- # data = img.read_exif(); print(data)
40
- img.clear_exif()
41
- img.clear_iptc()
42
- img.clear_xmp()
43
- img.close()
44
- except Exception as e:
45
- print('EXIF error on {}: {}'.format(fn,str(e)))
46
-
47
-
48
- ##%% Remove EXIF data (execution)
49
-
50
- from joblib import Parallel, delayed
51
-
52
- n_exif_threads = 50
53
-
54
- if n_exif_threads == 1:
55
-
56
- # fn = image_files[0]
57
- for fn in image_files:
58
- remove_exif(fn)
59
-
60
- else:
61
- # joblib.Parallel defaults to a process-based backend, but let's be sure
62
- # results = Parallel(n_jobs=n_exif_threads,verbose=2,prefer='processes')(delayed(remove_exif)(fn) for fn in image_files[0:10])
63
- _ = Parallel(n_jobs=n_exif_threads,verbose=2,prefer='processes')(delayed(remove_exif)(fn) for fn in image_files)
64
-
65
- if __name__ == '__main__':
66
- main()
@@ -1,189 +0,0 @@
1
- """
2
-
3
- resize_coco_dataset.py
4
-
5
- Given a COCO-formatted dataset, resizes all the images to a target size,
6
- scaling bounding boxes accordingly.
7
-
8
- """
9
-
10
- #%% Imports and constants
11
-
12
- import os
13
- import json
14
- import shutil
15
-
16
- from tqdm import tqdm
17
- from collections import defaultdict
18
-
19
- from md_utils.path_utils import insert_before_extension
20
- from md_visualization.visualization_utils import \
21
- open_image, resize_image, exif_preserving_save
22
-
23
-
24
- #%% Functions
25
-
26
- def resize_coco_dataset(input_folder,input_filename,
27
- output_folder,output_filename,
28
- target_size=(-1,-1),
29
- correct_size_image_handling='copy'):
30
- """
31
- Given a COCO-formatted dataset (images in input_folder, data in input_filename), resizes
32
- all the images to a target size (in output_folder) and scales bounding boxes accordingly.
33
-
34
- Args:
35
- input_folder (str): the folder where images live; filenames in [input_filename] should
36
- be relative to [input_folder]
37
- input_filename (str): the (input) COCO-formatted .json file containing annotations
38
- output_folder (str): the folder to which we should write resized images; can be the
39
- same as [input_folder], in which case images are over-written
40
- output_filename (str): the COCO-formatted .json file we should generate that refers to
41
- the resized images
42
- target_size (list or tuple of ints): this should be tuple/list of ints, with length 2 (w,h).
43
- If either dimension is -1, aspect ratio will be preserved. If both dimensions are -1, this means
44
- "keep the original size". If both dimensions are -1 and correct_size_image_handling is copy, this
45
- function is basically a no-op.
46
- correct_size_image_handling (str): can be 'copy' (in which case the original image is just copied
47
- to the output folder) or 'rewrite' (in which case the image is opened via PIL and re-written,
48
- attempting to preserve the same quality). The only reason to do use 'rewrite' 'is the case where
49
- you're superstitious about biases coming from images in a training set being written by different
50
- image encoders.
51
-
52
- Returns:
53
- dict: the COCO database with resized images, identical to the content of [output_filename]
54
- """
55
-
56
- # Read input data
57
- with open(input_filename,'r') as f:
58
- d = json.load(f)
59
-
60
- # Map image IDs to annotations
61
- image_id_to_annotations = defaultdict(list)
62
- for ann in d['annotations']:
63
- image_id_to_annotations[ann['image_id']].append(ann)
64
-
65
- # For each image
66
-
67
- # TODO: this is trivially parallelizable
68
- #
69
- # im = d['images'][0]
70
- for im in tqdm(d['images']):
71
-
72
- input_fn_relative = im['file_name']
73
- input_fn_abs = os.path.join(input_folder,input_fn_relative)
74
- assert os.path.isfile(input_fn_abs), "Can't find image file {}".format(input_fn_abs)
75
-
76
- output_fn_abs = os.path.join(output_folder,input_fn_relative)
77
- os.makedirs(os.path.dirname(output_fn_abs),exist_ok=True)
78
-
79
- pil_im = open_image(input_fn_abs)
80
- input_w = pil_im.width
81
- input_h = pil_im.height
82
-
83
- image_is_already_target_size = \
84
- (input_w == target_size[0]) and (input_h == target_size[1])
85
- preserve_original_size = \
86
- (target_size[0] == -1) and (target_size[1] == -1)
87
-
88
- # If the image is already the right size...
89
- if (image_is_already_target_size or preserve_original_size):
90
- output_w = input_w
91
- output_h = input_h
92
- if correct_size_image_handling == 'copy':
93
- shutil.copyfile(input_fn_abs,output_fn_abs)
94
- elif correct_size_image_handling == 'rewrite':
95
- exif_preserving_save(pil_im,output_fn_abs)
96
- else:
97
- raise ValueError('Unrecognized value {} for correct_size_image_handling'.format(
98
- correct_size_image_handling))
99
- else:
100
- pil_im = resize_image(pil_im, target_size[0], target_size[1])
101
- output_w = pil_im.width
102
- output_h = pil_im.height
103
- exif_preserving_save(pil_im,output_fn_abs)
104
-
105
- im['width'] = output_w
106
- im['height'] = output_h
107
-
108
- # For each box
109
- annotations_this_image = image_id_to_annotations[im['id']]
110
-
111
- # ann = annotations_this_image[0]
112
- for ann in annotations_this_image:
113
-
114
- if 'bbox' in ann:
115
-
116
- # boxes are [x,y,w,h]
117
- bbox = ann['bbox']
118
-
119
- # Do we need to scale this box?
120
- if (output_w != input_w) or (output_h != input_h):
121
- width_scale = output_w/input_w
122
- height_scale = output_h/input_h
123
- bbox = \
124
- [bbox[0] * width_scale,
125
- bbox[1] * height_scale,
126
- bbox[2] * width_scale,
127
- bbox[3] * height_scale]
128
-
129
- ann['bbox'] = bbox
130
-
131
- # ...if this annotation has a box
132
-
133
- # ...for each annotation
134
-
135
- # ...for each image
136
-
137
- # Write output file
138
- with open(output_filename,'w') as f:
139
- json.dump(d,f,indent=1)
140
-
141
- return d
142
-
143
- # ...def resize_coco_dataset(...)
144
-
145
-
146
- #%% Interactive driver
147
-
148
- if False:
149
-
150
- pass
151
-
152
- #%% Test resizing
153
-
154
- input_folder = os.path.expanduser('~/data/usgs-tegus/usgs-kissel-training')
155
- input_filename = os.path.expanduser('~/data/usgs-tegus/usgs-kissel-training.json')
156
- target_size = (1600,-1)
157
-
158
- output_filename = insert_before_extension(input_filename,'resized-test')
159
- output_folder = input_folder + '-resized-test'
160
-
161
- correct_size_image_handling = 'rewrite'
162
-
163
- resize_coco_dataset(input_folder,input_filename,
164
- output_folder,output_filename,
165
- target_size=target_size,
166
- correct_size_image_handling=correct_size_image_handling)
167
-
168
-
169
- #%% Preview
170
-
171
- from md_visualization import visualize_db
172
- options = visualize_db.DbVizOptions()
173
- options.parallelize_rendering = True
174
- options.viz_size = (900, -1)
175
- options.num_to_visualize = 5000
176
-
177
- html_file,_ = visualize_db.visualize_db(output_filename,
178
- os.path.expanduser('~/tmp/resize_coco_preview'),
179
- output_folder,options)
180
-
181
-
182
- from md_utils import path_utils # noqa
183
- path_utils.open_file(html_file)
184
-
185
-
186
- #%% Command-line driver
187
-
188
- # TODO
189
-
@@ -1,246 +0,0 @@
1
- """
2
-
3
- wi_download_csv_to_coco.py
4
-
5
- Converts a .csv file from a Wildlife Insights project export to a COCO camera traps .json file.
6
-
7
- Currently assumes that common names are unique identifiers, which is convenient but unreliable.
8
-
9
- """
10
-
11
- #%% Imports and constants
12
-
13
- import os
14
- import json
15
- import pandas as pd
16
-
17
- from tqdm import tqdm
18
- from collections import defaultdict
19
-
20
- from md_visualization import visualization_utils as vis_utils
21
- from md_utils.ct_utils import isnan
22
-
23
- wi_extra_annotation_columns = \
24
- ('is_blank','identified_by','wi_taxon_id','class','order','family','genus','species','uncertainty',
25
- 'number_of_objects','age','sex','animal_recognizable','individual_id','individual_animal_notes',
26
- 'behavior','highlighted','markings')
27
-
28
- wi_extra_image_columns = ('project_id','deployment_id')
29
-
30
- def _make_location_id(project_id,deployment_id):
31
- return 'project_' + str(project_id) + '_deployment_' + deployment_id
32
-
33
- default_category_remappings = {
34
- 'Homo Species':'Human',
35
- 'Human-Camera Trapper':'Human',
36
- 'No CV Result':'Unknown'
37
- }
38
-
39
-
40
- #%% Main function
41
-
42
- def wi_download_csv_to_coco(csv_file_in,
43
- coco_file_out=None,
44
- image_folder=None,
45
- validate_images=False,
46
- gs_prefix=None,
47
- verbose=True,
48
- category_remappings=default_category_remappings):
49
- """
50
- Converts a .csv file from a Wildlife Insights project export to a COCO
51
- Camera Traps .json file.
52
-
53
- Args:
54
- csv_file_in (str): the downloaded .csv file we should convert to COCO
55
- coco_file_out (str, optional): the .json file we should write; if [coco_file_out] is None,
56
- uses [csv_file_in].json
57
- image_folder (str, optional): the folder where images live, only relevant if
58
- [validate_images] is True
59
- validate_images (bool, optional): whether to check images for corruption and load
60
- image sizes; if this is True, [image_folder] must be a valid folder
61
- gs_prefix (str, optional): a string to remove from GS URLs to convert to path names...
62
- for example, if your gs:// URLs look like:
63
-
64
- `gs://11234134_xyz/deployment/55554/dfadfasdfs.jpg`
65
-
66
- ...and you specify gs_prefix='11234134_xyz/deployment/', the filenames in
67
- the .json file will look like:
68
-
69
- `55554/dfadfasdfs.jpg`
70
- verbose (bool, optional): enable additional debug console output
71
- category_remappings (dict, optional): str --> str dict that maps any number of
72
- WI category names to output category names; for example defaults to mapping
73
- "Homo Species" to "Human", but leaves 99.99% of categories unchanged.
74
-
75
- Returns:
76
- dict: COCO-formatted data, identical to what's written to [coco_file_out]
77
- """
78
-
79
- ##%% Create COCO dictionaries
80
-
81
- category_name_to_id = {}
82
- category_name_to_id['empty'] = 0
83
-
84
- df = pd.read_csv(csv_file_in)
85
-
86
- print('Read {} rows from {}'.format(len(df),csv_file_in))
87
-
88
- image_id_to_image = {}
89
- image_id_to_annotations = defaultdict(list)
90
-
91
- # i_row = 0; row = df.iloc[i_row]
92
- for i_row,row in df.iterrows():
93
-
94
- image_id = row['image_id']
95
-
96
- if image_id not in image_id_to_image:
97
-
98
- im = {}
99
- image_id_to_image[image_id] = im
100
-
101
- im['id'] = image_id
102
-
103
- gs_url = row['location']
104
- assert gs_url.startswith('gs://')
105
-
106
- file_name = gs_url.replace('gs://','')
107
- if gs_prefix is not None:
108
- file_name = file_name.replace(gs_prefix,'')
109
-
110
- location_id = _make_location_id(row['project_id'],row['deployment_id'])
111
- im['file_name'] = file_name
112
- im['location'] = location_id
113
- im['datetime'] = row['timestamp']
114
-
115
- im['wi_image_info'] = {}
116
- for s in wi_extra_image_columns:
117
- im['wi_image_info'][s] = str(row[s])
118
-
119
- else:
120
-
121
- im = image_id_to_image[image_id]
122
- assert im['datetime'] == row['timestamp']
123
- location_id = _make_location_id(row['project_id'],row['deployment_id'])
124
- assert im['location'] == location_id
125
-
126
- category_name = row['common_name']
127
- if category_remappings is not None and category_name in category_remappings:
128
- category_name = category_remappings[category_name]
129
-
130
- if category_name == 'Blank':
131
- category_name = 'empty'
132
- assert row['is_blank'] == 1
133
- else:
134
- assert row['is_blank'] == 0
135
- assert isinstance(category_name,str)
136
- if category_name in category_name_to_id:
137
- category_id = category_name_to_id[category_name]
138
- else:
139
- category_id = len(category_name_to_id)
140
- category_name_to_id[category_name] = category_id
141
-
142
- ann = {}
143
- ann['image_id'] = image_id
144
- annotations_this_image = image_id_to_annotations[image_id]
145
- annotation_number = len(annotations_this_image)
146
- ann['id'] = image_id + '_' + str(annotation_number).zfill(2)
147
- ann['category_id'] = category_id
148
- annotations_this_image.append(ann)
149
-
150
- extra_info = {}
151
- for s in wi_extra_annotation_columns:
152
- v = row[s]
153
- if not isnan(v):
154
- extra_info[s] = v
155
- ann['wi_extra_info'] = extra_info
156
-
157
- # ...for each row
158
-
159
- images = list(image_id_to_image.values())
160
- categories = []
161
- for category_name in category_name_to_id:
162
- category_id = category_name_to_id[category_name]
163
- categories.append({'id':category_id,'name':category_name})
164
- annotations = []
165
- for image_id in image_id_to_annotations:
166
- annotations_this_image = image_id_to_annotations[image_id]
167
- for ann in annotations_this_image:
168
- annotations.append(ann)
169
- info = {'version':'1.00','description':'converted from WI export'}
170
- info['source_file'] = csv_file_in
171
- coco_data = {}
172
- coco_data['info'] = info
173
- coco_data['images'] = images
174
- coco_data['annotations'] = annotations
175
- coco_data['categories'] = categories
176
-
177
-
178
- ##%% Validate images, add sizes
179
-
180
- if validate_images:
181
-
182
- print('Validating images')
183
- # TODO: trivially parallelizable
184
-
185
- assert os.path.isdir(image_folder), \
186
- 'Must specify a valid image folder if you specify validate_images=True'
187
-
188
- # im = images[0]
189
- for im in tqdm(images):
190
- file_name_relative = im['file_name']
191
- file_name_abs = os.path.join(image_folder,file_name_relative)
192
- assert os.path.isfile(file_name_abs)
193
-
194
- im['corrupt'] = False
195
- try:
196
- pil_im = vis_utils.load_image(file_name_abs)
197
- except Exception:
198
- im['corrupt'] = True
199
- if not im['corrupt']:
200
- im['width'] = pil_im.width
201
- im['height'] = pil_im.height
202
-
203
-
204
- ##%% Write output json
205
-
206
- if coco_file_out is None:
207
- coco_file_out = csv_file_in + '.json'
208
-
209
- with open(coco_file_out,'w') as f:
210
- json.dump(coco_data,f,indent=1)
211
-
212
-
213
- ##%% Validate output
214
-
215
- from data_management.databases.integrity_check_json_db import \
216
- IntegrityCheckOptions,integrity_check_json_db
217
- options = IntegrityCheckOptions()
218
- options.baseDir = image_folder
219
- options.bCheckImageExistence = True
220
- options.verbose = verbose
221
- _ = integrity_check_json_db(coco_file_out,options)
222
-
223
- return coco_data
224
-
225
- # ...def wi_download_csv_to_coco(...)
226
-
227
-
228
- #%% Interactive driver
229
-
230
- if False:
231
-
232
- #%%
233
-
234
- base_folder = r'a/b/c'
235
- csv_file_in = os.path.join(base_folder,'images.csv')
236
- coco_file_out = None
237
- gs_prefix = 'a_b_c_main/'
238
- image_folder = os.path.join(base_folder,'images')
239
- validate_images = False
240
- verbose = True
241
- category_remappings = default_category_remappings
242
-
243
-
244
- #%% Command-line driver
245
-
246
- # TODO