megadetector 5.0.9__py3-none-any.whl → 5.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (226) hide show
  1. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/LICENSE +0 -0
  2. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/METADATA +12 -11
  3. megadetector-5.0.11.dist-info/RECORD +5 -0
  4. megadetector-5.0.11.dist-info/top_level.txt +1 -0
  5. api/__init__.py +0 -0
  6. api/batch_processing/__init__.py +0 -0
  7. api/batch_processing/api_core/__init__.py +0 -0
  8. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  9. api/batch_processing/api_core/batch_service/score.py +0 -439
  10. api/batch_processing/api_core/server.py +0 -294
  11. api/batch_processing/api_core/server_api_config.py +0 -98
  12. api/batch_processing/api_core/server_app_config.py +0 -55
  13. api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  14. api/batch_processing/api_core/server_job_status_table.py +0 -152
  15. api/batch_processing/api_core/server_orchestration.py +0 -360
  16. api/batch_processing/api_core/server_utils.py +0 -92
  17. api/batch_processing/api_core_support/__init__.py +0 -0
  18. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  19. api/batch_processing/api_support/__init__.py +0 -0
  20. api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  21. api/batch_processing/data_preparation/__init__.py +0 -0
  22. api/batch_processing/data_preparation/manage_local_batch.py +0 -2391
  23. api/batch_processing/data_preparation/manage_video_batch.py +0 -327
  24. api/batch_processing/integration/digiKam/setup.py +0 -6
  25. api/batch_processing/integration/digiKam/xmp_integration.py +0 -465
  26. api/batch_processing/integration/eMammal/test_scripts/config_template.py +0 -5
  27. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -126
  28. api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +0 -55
  29. api/batch_processing/postprocessing/__init__.py +0 -0
  30. api/batch_processing/postprocessing/add_max_conf.py +0 -64
  31. api/batch_processing/postprocessing/categorize_detections_by_size.py +0 -163
  32. api/batch_processing/postprocessing/combine_api_outputs.py +0 -249
  33. api/batch_processing/postprocessing/compare_batch_results.py +0 -958
  34. api/batch_processing/postprocessing/convert_output_format.py +0 -397
  35. api/batch_processing/postprocessing/load_api_results.py +0 -195
  36. api/batch_processing/postprocessing/md_to_coco.py +0 -310
  37. api/batch_processing/postprocessing/md_to_labelme.py +0 -330
  38. api/batch_processing/postprocessing/merge_detections.py +0 -401
  39. api/batch_processing/postprocessing/postprocess_batch_results.py +0 -1904
  40. api/batch_processing/postprocessing/remap_detection_categories.py +0 -170
  41. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +0 -661
  42. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +0 -211
  43. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +0 -82
  44. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +0 -1631
  45. api/batch_processing/postprocessing/separate_detections_into_folders.py +0 -731
  46. api/batch_processing/postprocessing/subset_json_detector_output.py +0 -696
  47. api/batch_processing/postprocessing/top_folders_to_bottom.py +0 -223
  48. api/synchronous/__init__.py +0 -0
  49. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  50. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -152
  51. api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -266
  52. api/synchronous/api_core/animal_detection_api/config.py +0 -35
  53. api/synchronous/api_core/animal_detection_api/data_management/annotations/annotation_constants.py +0 -47
  54. api/synchronous/api_core/animal_detection_api/detection/detector_training/copy_checkpoints.py +0 -43
  55. api/synchronous/api_core/animal_detection_api/detection/detector_training/model_main_tf2.py +0 -114
  56. api/synchronous/api_core/animal_detection_api/detection/process_video.py +0 -543
  57. api/synchronous/api_core/animal_detection_api/detection/pytorch_detector.py +0 -304
  58. api/synchronous/api_core/animal_detection_api/detection/run_detector.py +0 -627
  59. api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +0 -1029
  60. api/synchronous/api_core/animal_detection_api/detection/run_inference_with_yolov5_val.py +0 -581
  61. api/synchronous/api_core/animal_detection_api/detection/run_tiled_inference.py +0 -754
  62. api/synchronous/api_core/animal_detection_api/detection/tf_detector.py +0 -165
  63. api/synchronous/api_core/animal_detection_api/detection/video_utils.py +0 -495
  64. api/synchronous/api_core/animal_detection_api/md_utils/azure_utils.py +0 -174
  65. api/synchronous/api_core/animal_detection_api/md_utils/ct_utils.py +0 -262
  66. api/synchronous/api_core/animal_detection_api/md_utils/directory_listing.py +0 -251
  67. api/synchronous/api_core/animal_detection_api/md_utils/matlab_porting_tools.py +0 -97
  68. api/synchronous/api_core/animal_detection_api/md_utils/path_utils.py +0 -416
  69. api/synchronous/api_core/animal_detection_api/md_utils/process_utils.py +0 -110
  70. api/synchronous/api_core/animal_detection_api/md_utils/sas_blob_utils.py +0 -509
  71. api/synchronous/api_core/animal_detection_api/md_utils/string_utils.py +0 -59
  72. api/synchronous/api_core/animal_detection_api/md_utils/url_utils.py +0 -144
  73. api/synchronous/api_core/animal_detection_api/md_utils/write_html_image_list.py +0 -226
  74. api/synchronous/api_core/animal_detection_api/md_visualization/visualization_utils.py +0 -841
  75. api/synchronous/api_core/tests/__init__.py +0 -0
  76. api/synchronous/api_core/tests/load_test.py +0 -110
  77. classification/__init__.py +0 -0
  78. classification/aggregate_classifier_probs.py +0 -108
  79. classification/analyze_failed_images.py +0 -227
  80. classification/cache_batchapi_outputs.py +0 -198
  81. classification/create_classification_dataset.py +0 -627
  82. classification/crop_detections.py +0 -516
  83. classification/csv_to_json.py +0 -226
  84. classification/detect_and_crop.py +0 -855
  85. classification/efficientnet/__init__.py +0 -9
  86. classification/efficientnet/model.py +0 -415
  87. classification/efficientnet/utils.py +0 -610
  88. classification/evaluate_model.py +0 -520
  89. classification/identify_mislabeled_candidates.py +0 -152
  90. classification/json_to_azcopy_list.py +0 -63
  91. classification/json_validator.py +0 -695
  92. classification/map_classification_categories.py +0 -276
  93. classification/merge_classification_detection_output.py +0 -506
  94. classification/prepare_classification_script.py +0 -194
  95. classification/prepare_classification_script_mc.py +0 -228
  96. classification/run_classifier.py +0 -286
  97. classification/save_mislabeled.py +0 -110
  98. classification/train_classifier.py +0 -825
  99. classification/train_classifier_tf.py +0 -724
  100. classification/train_utils.py +0 -322
  101. data_management/__init__.py +0 -0
  102. data_management/annotations/__init__.py +0 -0
  103. data_management/annotations/annotation_constants.py +0 -34
  104. data_management/camtrap_dp_to_coco.py +0 -238
  105. data_management/cct_json_utils.py +0 -395
  106. data_management/cct_to_md.py +0 -176
  107. data_management/cct_to_wi.py +0 -289
  108. data_management/coco_to_labelme.py +0 -272
  109. data_management/coco_to_yolo.py +0 -662
  110. data_management/databases/__init__.py +0 -0
  111. data_management/databases/add_width_and_height_to_db.py +0 -33
  112. data_management/databases/combine_coco_camera_traps_files.py +0 -206
  113. data_management/databases/integrity_check_json_db.py +0 -477
  114. data_management/databases/subset_json_db.py +0 -115
  115. data_management/generate_crops_from_cct.py +0 -149
  116. data_management/get_image_sizes.py +0 -188
  117. data_management/importers/add_nacti_sizes.py +0 -52
  118. data_management/importers/add_timestamps_to_icct.py +0 -79
  119. data_management/importers/animl_results_to_md_results.py +0 -158
  120. data_management/importers/auckland_doc_test_to_json.py +0 -372
  121. data_management/importers/auckland_doc_to_json.py +0 -200
  122. data_management/importers/awc_to_json.py +0 -189
  123. data_management/importers/bellevue_to_json.py +0 -273
  124. data_management/importers/cacophony-thermal-importer.py +0 -796
  125. data_management/importers/carrizo_shrubfree_2018.py +0 -268
  126. data_management/importers/carrizo_trail_cam_2017.py +0 -287
  127. data_management/importers/cct_field_adjustments.py +0 -57
  128. data_management/importers/channel_islands_to_cct.py +0 -913
  129. data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  130. data_management/importers/eMammal/eMammal_helpers.py +0 -249
  131. data_management/importers/eMammal/make_eMammal_json.py +0 -223
  132. data_management/importers/ena24_to_json.py +0 -275
  133. data_management/importers/filenames_to_json.py +0 -385
  134. data_management/importers/helena_to_cct.py +0 -282
  135. data_management/importers/idaho-camera-traps.py +0 -1407
  136. data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  137. data_management/importers/jb_csv_to_json.py +0 -150
  138. data_management/importers/mcgill_to_json.py +0 -250
  139. data_management/importers/missouri_to_json.py +0 -489
  140. data_management/importers/nacti_fieldname_adjustments.py +0 -79
  141. data_management/importers/noaa_seals_2019.py +0 -181
  142. data_management/importers/pc_to_json.py +0 -365
  143. data_management/importers/plot_wni_giraffes.py +0 -123
  144. data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
  145. data_management/importers/prepare_zsl_imerit.py +0 -131
  146. data_management/importers/rspb_to_json.py +0 -356
  147. data_management/importers/save_the_elephants_survey_A.py +0 -320
  148. data_management/importers/save_the_elephants_survey_B.py +0 -332
  149. data_management/importers/snapshot_safari_importer.py +0 -758
  150. data_management/importers/snapshot_safari_importer_reprise.py +0 -665
  151. data_management/importers/snapshot_serengeti_lila.py +0 -1067
  152. data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  153. data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  154. data_management/importers/sulross_get_exif.py +0 -65
  155. data_management/importers/timelapse_csv_set_to_json.py +0 -490
  156. data_management/importers/ubc_to_json.py +0 -399
  157. data_management/importers/umn_to_json.py +0 -507
  158. data_management/importers/wellington_to_json.py +0 -263
  159. data_management/importers/wi_to_json.py +0 -441
  160. data_management/importers/zamba_results_to_md_results.py +0 -181
  161. data_management/labelme_to_coco.py +0 -548
  162. data_management/labelme_to_yolo.py +0 -272
  163. data_management/lila/__init__.py +0 -0
  164. data_management/lila/add_locations_to_island_camera_traps.py +0 -97
  165. data_management/lila/add_locations_to_nacti.py +0 -147
  166. data_management/lila/create_lila_blank_set.py +0 -557
  167. data_management/lila/create_lila_test_set.py +0 -151
  168. data_management/lila/create_links_to_md_results_files.py +0 -106
  169. data_management/lila/download_lila_subset.py +0 -177
  170. data_management/lila/generate_lila_per_image_labels.py +0 -515
  171. data_management/lila/get_lila_annotation_counts.py +0 -170
  172. data_management/lila/get_lila_image_counts.py +0 -111
  173. data_management/lila/lila_common.py +0 -300
  174. data_management/lila/test_lila_metadata_urls.py +0 -132
  175. data_management/ocr_tools.py +0 -874
  176. data_management/read_exif.py +0 -681
  177. data_management/remap_coco_categories.py +0 -84
  178. data_management/remove_exif.py +0 -66
  179. data_management/resize_coco_dataset.py +0 -189
  180. data_management/wi_download_csv_to_coco.py +0 -246
  181. data_management/yolo_output_to_md_output.py +0 -441
  182. data_management/yolo_to_coco.py +0 -676
  183. detection/__init__.py +0 -0
  184. detection/detector_training/__init__.py +0 -0
  185. detection/detector_training/model_main_tf2.py +0 -114
  186. detection/process_video.py +0 -703
  187. detection/pytorch_detector.py +0 -337
  188. detection/run_detector.py +0 -779
  189. detection/run_detector_batch.py +0 -1219
  190. detection/run_inference_with_yolov5_val.py +0 -917
  191. detection/run_tiled_inference.py +0 -935
  192. detection/tf_detector.py +0 -188
  193. detection/video_utils.py +0 -606
  194. docs/source/conf.py +0 -43
  195. md_utils/__init__.py +0 -0
  196. md_utils/azure_utils.py +0 -174
  197. md_utils/ct_utils.py +0 -612
  198. md_utils/directory_listing.py +0 -246
  199. md_utils/md_tests.py +0 -968
  200. md_utils/path_utils.py +0 -1044
  201. md_utils/process_utils.py +0 -157
  202. md_utils/sas_blob_utils.py +0 -509
  203. md_utils/split_locations_into_train_val.py +0 -228
  204. md_utils/string_utils.py +0 -92
  205. md_utils/url_utils.py +0 -323
  206. md_utils/write_html_image_list.py +0 -225
  207. md_visualization/__init__.py +0 -0
  208. md_visualization/plot_utils.py +0 -293
  209. md_visualization/render_images_with_thumbnails.py +0 -275
  210. md_visualization/visualization_utils.py +0 -1537
  211. md_visualization/visualize_db.py +0 -551
  212. md_visualization/visualize_detector_output.py +0 -406
  213. megadetector-5.0.9.dist-info/RECORD +0 -224
  214. megadetector-5.0.9.dist-info/top_level.txt +0 -8
  215. taxonomy_mapping/__init__.py +0 -0
  216. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +0 -491
  217. taxonomy_mapping/map_new_lila_datasets.py +0 -154
  218. taxonomy_mapping/prepare_lila_taxonomy_release.py +0 -142
  219. taxonomy_mapping/preview_lila_taxonomy.py +0 -591
  220. taxonomy_mapping/retrieve_sample_image.py +0 -71
  221. taxonomy_mapping/simple_image_download.py +0 -218
  222. taxonomy_mapping/species_lookup.py +0 -834
  223. taxonomy_mapping/taxonomy_csv_checker.py +0 -159
  224. taxonomy_mapping/taxonomy_graph.py +0 -346
  225. taxonomy_mapping/validate_lila_category_mappings.py +0 -83
  226. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/WHEEL +0 -0
@@ -1,356 +0,0 @@
1
- """
2
-
3
- rspb_to_json.py
4
-
5
- Convert the .csv file provided for the RSPB data set to a
6
- COCO-camera-traps .json file
7
-
8
- """
9
-
10
- #%% Constants and environment
11
-
12
- import pandas as pd
13
- import os
14
- import glob
15
- import json
16
- import re
17
- import uuid
18
- import tqdm
19
- import time
20
- import ntpath
21
- import humanfriendly
22
- import PIL
23
-
24
- from data_management.databases import integrity_check_json_db
25
- from md_visualization import visualize_db
26
-
27
- # [location] is an obfuscation
28
- baseDir = r'e:\wildlife_data\rspb_gola_data'
29
- metadataFile = os.path.join(baseDir,'gola_camtrapr_master_renaming_table_2019-01-31.csv')
30
- outputFile = os.path.join(baseDir,'rspb_gola_labeled.json')
31
- imageBaseDir = os.path.join(baseDir,'gola_camtrapr_data')
32
- imageFlatDir = os.path.join(baseDir,'gola_camtrapr_data_flat')
33
- unmatchedImagesFile = os.path.join(baseDir,'unmatchedImages.txt')
34
- assert(os.path.isdir(imageBaseDir))
35
-
36
-
37
- #%% Create info struct
38
-
39
- info = {}
40
- info['year'] = 2019
41
- info['version'] = 1
42
- info['description'] = 'COCO style database for RSPB gola data'
43
- info['secondary_contributor'] = 'Converted to COCO .json by Dan Morris'
44
- info['contributor'] = 'RSPB'
45
-
46
-
47
- #%% Read source data
48
-
49
- metadataTable = pd.read_csv(metadataFile)
50
-
51
- print('Read {} columns and {} rows from metadata file'.format(len(metadataTable.columns),
52
- len(metadataTable)))
53
-
54
- # metadataTable.columns.values
55
- #
56
- # array(['Project', 'inDir', 'FileName', 'Station', 'Camera',
57
- # 'StationCameraFileName', 'DateTimeOriginal', 'DateReadable',
58
- # 'outDir', 'filename_new', 'fileExistsAlready', 'CopyStatus',
59
- # 'Species'], dtype=object)
60
-
61
- metadataTable[['Species']] = metadataTable[['Species']].fillna(value='unlabeled')
62
-
63
- # We'll populate these later
64
- metadataTable['sequenceID'] = ''
65
- metadataTable['frameNumber'] = ''
66
- metadataTable['filePath'] = ''
67
-
68
- failedCopies = metadataTable[~metadataTable.CopyStatus]
69
- print('Removing {} rows that were failed copies'.format(len(failedCopies)))
70
-
71
- metadataTable = metadataTable[metadataTable.CopyStatus]
72
-
73
- species = list(metadataTable.Species)
74
- uniqueSpecies = set(species)
75
-
76
- print('Read {} unique species in {} rows'.format(len(uniqueSpecies),len(metadataTable)))
77
-
78
- speciesMappings = {}
79
-
80
- # keys should be lowercase
81
- speciesMappings['blank'] = 'empty'
82
- speciesMappings[''] = 'unlabeled'
83
-
84
-
85
- #%% Enumerate images, confirm filename uniqueness
86
-
87
- imageFullPaths = glob.glob(os.path.join(imageBaseDir,r'**\*.JPG'),recursive=True)
88
-
89
- print('Counted {} images'.format(len(imageFullPaths)))
90
-
91
- filenamesOnly = set()
92
-
93
- for p in imageFullPaths:
94
-
95
- fn = ntpath.basename(p)
96
- assert fn not in filenamesOnly
97
- filenamesOnly.add(fn)
98
-
99
- print('Finished uniqueness checking')
100
-
101
-
102
- #%% Update metadata filenames to include site and camera folders, check existence
103
- #
104
- # Takes ~1min
105
-
106
- filenamesToRows = {}
107
-
108
- startTime = time.time()
109
-
110
- newRows = []
111
- matchFailures = []
112
-
113
- # iRow = 0; row = metadataTable.iloc[iRow]
114
- for iRow,row in tqdm.tqdm(metadataTable.iterrows(), total=metadataTable.shape[0]):
115
-
116
- baseFn = row['filename_new']
117
- station = row['Station']
118
-
119
- filenamesToRows[baseFn] = iRow
120
-
121
- # There's a bug in the metadata; the 'camera' column isn't correct.
122
- # camera = row['Camera']
123
- # These appear as, e.g., '3.22e12'
124
- # camera = str(int(float(camera)))
125
-
126
- # Let's pull this out of the file name instead
127
- #
128
- # Filenames look like one of the following:
129
- #
130
- # A1__03224850850507__2015-11-28__10-45-04(1).JPG
131
- # Bayama2PH__C05__NA(NA).JPG
132
- pat = '^(?P<station>.+?)__(?P<camera>.+?)__((?P<date>.+?)__)?(?P<time>[^_\()]+?)\((?P<frame>.+?)\)\.JPG'
133
- match = re.match(pat,baseFn)
134
- if match is None:
135
- raise ValueError('Regex failure at row {}: {}'.format(iRow,baseFn))
136
- assert(station == match.group('station'))
137
- camera = match.group('camera')
138
- row['Camera'] = camera
139
-
140
- assert match.group('station') is not None
141
- assert match.group('camera') is not None
142
- assert match.group('frame') is not None
143
-
144
- if match.group('date') is None:
145
- imgDate = ''
146
- else:
147
- imgDate = match.group('date')
148
-
149
- if match.group('time') is None:
150
- imgTime = ''
151
- else:
152
- imgTime = match.group('time')
153
-
154
- frame = -1
155
- try:
156
- frame = int(match.group['frame'])
157
- except:
158
- pass
159
- row['frameNumber'] = frame
160
-
161
- fn = os.path.join(station,camera,baseFn)
162
- fullPath = os.path.join(imageBaseDir,fn)
163
- row['filePath'] = fn
164
- # assert(os.path.isfile(fullPath))
165
- if not os.path.isfile(fullPath):
166
- print('Failed to match image {}'.format(fullPath))
167
- matchFailures.append(fullPath)
168
- continue
169
-
170
- # metadataTable.iloc[iRow] = row
171
- newRows.append(row)
172
-
173
- elapsed = time.time() - startTime
174
-
175
- # Re-assemble into an updated table
176
- metadataTable = pd.DataFrame(newRows)
177
-
178
- print('Finished checking file existence, extracting metadata in {}, couldn''t find {} images'.format(
179
- humanfriendly.format_timespan(elapsed),len(matchFailures)))
180
-
181
-
182
- #%% Check for images that aren't included in the metadata file
183
-
184
- imagesNotInMetadata = []
185
-
186
- # Enumerate all images
187
- for iImage,imagePath in enumerate(imageFullPaths):
188
-
189
- fn = ntpath.basename(imagePath)
190
- if(fn not in filenamesToRows):
191
- imagesNotInMetadata.append(imagePath)
192
-
193
- print('Finished matching {} images, failed to match {}'.format(
194
- len(imageFullPaths),len(imagesNotInMetadata)))
195
-
196
- # Write to a text file
197
- with open(unmatchedImagesFile, 'w') as f:
198
- for fn in imagesNotInMetadata:
199
- f.write('{}\n'.format(fn))
200
-
201
-
202
- #%% Create CCT dictionaries
203
-
204
- # Also gets image sizes, so this takes ~6 minutes
205
- #
206
- # Implicitly checks images for overt corruptness, i.e. by not crashing.
207
-
208
- images = []
209
- annotations = []
210
-
211
- # Map categories to integer IDs (that's what COCO likes)
212
- nextCategoryID = 1
213
- categoriesToCategoryId = {'empty':0}
214
- categoriesToCounts = {'empty':0}
215
-
216
- # For each image
217
- #
218
- # Because in practice images are 1:1 with annotations in this data set,
219
- # this is also a loop over annotations.
220
-
221
- startTime = time.time()
222
-
223
- # iRow = 0; row = metadataTable.iloc[iRow]
224
- for iRow,row in tqdm.tqdm(metadataTable.iterrows(), total=metadataTable.shape[0]):
225
-
226
- im = {}
227
-
228
- # A1__03224850850507__2015-11-28__10-45-04(1).JPG
229
- fn = row['filename_new']
230
- assert '.JPG' in fn
231
- fn = fn.replace('.JPG','')
232
- im['id'] = fn
233
-
234
- # 'A1\\03224850850507\\A1__03224850850507__2015-11-28__10-45-04(1).JPG'
235
- im['file_name'] = row['filePath']
236
-
237
- # Not currently populated
238
- im['seq_id'] = row['sequenceID']
239
-
240
- # Often -1, sometimes a semi-meaningful int
241
- im['frame_num'] = row['frameNumber']
242
-
243
- # A1
244
- im['site']= row['Station']
245
-
246
- # 03224850850507
247
- im['camera'] = row['Camera']
248
-
249
- # In variable form, but sometimes '28/11/2015 10:45'
250
- im['datetime'] = row['DateTimeOriginal']
251
-
252
- images.append(im)
253
-
254
- # Check image height and width
255
- imagePath = os.path.join(imageBaseDir,im['file_name'])
256
- assert(os.path.isfile(imagePath))
257
- pilImage = PIL.Image.open(imagePath)
258
- width, height = pilImage.size
259
- im['width'] = width
260
- im['height'] = height
261
-
262
- category = row['Species'].lower()
263
- if category in speciesMappings:
264
- category = speciesMappings[category]
265
-
266
- # Have we seen this category before?
267
- if category in categoriesToCategoryId:
268
- categoryID = categoriesToCategoryId[category]
269
- categoriesToCounts[category] += 1
270
- else:
271
- categoryID = nextCategoryID
272
- categoriesToCategoryId[category] = categoryID
273
- categoriesToCounts[category] = 0
274
- nextCategoryID += 1
275
-
276
- # Create an annotation
277
- ann = {}
278
-
279
- # The Internet tells me this guarantees uniqueness to a reasonable extent, even
280
- # beyond the sheer improbability of collisions.
281
- ann['id'] = str(uuid.uuid1())
282
- ann['image_id'] = im['id']
283
- ann['category_id'] = categoryID
284
-
285
- annotations.append(ann)
286
-
287
- # ...for each image
288
-
289
- # Convert categories to a CCT-style dictionary
290
-
291
- categories = []
292
-
293
- for category in categoriesToCounts:
294
-
295
- print('Category {}, count {}'.format(category,categoriesToCounts[category]))
296
- categoryID = categoriesToCategoryId[category]
297
- cat = {}
298
- cat['name'] = category
299
- cat['id'] = categoryID
300
- categories.append(cat)
301
-
302
- elapsed = time.time() - startTime
303
-
304
- print('Finished creating CCT dictionaries in {}'.format(
305
- humanfriendly.format_timespan(elapsed)))
306
-
307
-
308
- #%% Write output
309
-
310
- json_data = {}
311
- json_data['images'] = images
312
- json_data['annotations'] = annotations
313
- json_data['categories'] = categories
314
- json_data['info'] = info
315
- json.dump(json_data,open(outputFile,'w'),indent=4)
316
-
317
- print('Finished writing .json file with {} images, {} annotations, and {} categories'.format(
318
- len(images),len(annotations),len(categories)))
319
-
320
-
321
- #%% Check database integrity
322
-
323
- options = integrity_check_json_db.IntegrityCheckOptions()
324
- options.baseDir = imageBaseDir
325
- options.bCheckImageSizes = False
326
- options.bFindUnusedImages = False
327
- integrity_check_json_db.integrity_check_json_db(outputFile, options)
328
-
329
-
330
- #%% Preview a few images to make sure labels were passed along sensibly
331
-
332
- db_path = outputFile
333
- output_dir = os.path.join(baseDir,'label_preview')
334
- image_base_dir = imageBaseDir
335
- options = visualize_db.DbVizOptions()
336
- options.num_to_visualize = 100
337
- htmlOutputFile = visualize_db.visualize_db(db_path,output_dir,image_base_dir,options)
338
-
339
-
340
- #%% One-time processing step: copy images to a flat directory for annotation
341
-
342
- if False:
343
-
344
- #%%
345
-
346
- from shutil import copyfile
347
- os.makedirs(imageFlatDir,exist_ok=True)
348
-
349
- for sourcePath in tqdm.tqdm(imageFullPaths):
350
- fn = ntpath.basename(sourcePath)
351
- targetPath = os.path.join(imageFlatDir,fn)
352
- assert not os.path.isfile(targetPath)
353
- copyfile(sourcePath,targetPath)
354
-
355
- print('Copied {} files'.format(len(imageFullPaths)))
356
-
@@ -1,320 +0,0 @@
1
- """
2
-
3
- save_the_elephants_survey_A.py
4
-
5
- Convert the .csv file provided for the Save the Elephants Survey A data set to a
6
- COCO-camera-traps .json file
7
-
8
- """
9
-
10
- #%% Constants and environment
11
-
12
- import pandas as pd
13
- import os
14
- import json
15
- import uuid
16
- import time
17
- import humanfriendly
18
- import numpy as np
19
- from tqdm import tqdm
20
-
21
- from md_utils.path_utils import find_images
22
-
23
- input_base = r'z:/ste_2019_08_drop'
24
- input_metadata_file = os.path.join(input_base,'SURVEY_A.xlsx')
25
-
26
- output_base = r'f:/save_the_elephants/survey_a'
27
- output_json_file = os.path.join(output_base,'ste_survey_a.json')
28
- image_directory = os.path.join(input_base,'SURVEY A with False Triggers')
29
-
30
- os.makedirs(output_base,exist_ok=True)
31
- assert(os.path.isdir(image_directory))
32
- assert(os.path.isfile(input_metadata_file))
33
-
34
- # Handle all unstructured fields in the source data as extra fields in the annotations
35
- mapped_fields = {'No. of Animals in Photo':'num_animals',
36
- 'No. of new indiviauls (first sighting of new individual)':'num_new_individuals',
37
- 'Number Adult Males (first sighting of new individual)':'num_adult_males',
38
- 'Number Adult Females (first sighting of new individual)':'num_adult_females',
39
- 'Number Adult Unknown (first sighting of new individual)':'num_adult_unknown',
40
- 'Number Sub-adult Males (first sighting of new individual)':'num_subadult_males',
41
- 'Number Sub-adult Females (first sighting of new individual)':'num_subadult_females',
42
- 'Number Sub-adult Unknown (first sighting of new individual)':'num_subadult_unknown',
43
- 'Number Juvenile (first sighting of new individual)':'num_juvenile',
44
- 'Number Newborn (first sighting of new individual)':'num_newborn',
45
- 'Activity':'activity',
46
- 'Animal ID':'animal_id',
47
- 'Specific Notes':'notes'}
48
-
49
- # photo_type really should be an image property, but there are a few conflicts
50
- # that forced me to handle it as an annotation proprerty
51
- mapped_fields['Photo Type '] = 'photo_type'
52
-
53
-
54
- #%% Read source data
55
-
56
- input_metadata = pd.read_excel(input_metadata_file, sheet_name='9. CT Image')
57
- input_metadata = input_metadata.iloc[2:]
58
-
59
- print('Read {} columns and {} rows from metadata file'.format(len(input_metadata.columns),
60
- len(input_metadata)))
61
-
62
-
63
- #%% Map filenames to rows, verify image existence
64
-
65
- start_time = time.time()
66
-
67
- # Maps relative paths to row indices in input_metadata
68
- filenames_to_rows = {}
69
- filenames_with_multiple_annotations = []
70
- missing_images = []
71
-
72
- # Build up a map from filenames to a list of rows, checking image existence as we go
73
- for i_row, fn in tqdm(enumerate(input_metadata['Image Name']),total=len(input_metadata)):
74
-
75
- # Ignore directories
76
- if not fn.endswith('.JPG'):
77
- continue
78
-
79
- if fn in filenames_to_rows:
80
- filenames_with_multiple_annotations.append(fn)
81
- filenames_to_rows[fn].append(i_row)
82
- else:
83
- filenames_to_rows[fn] = [i_row]
84
- image_path = os.path.join(image_directory, fn)
85
- if not os.path.isfile(image_path):
86
- missing_images.append(image_path)
87
-
88
- elapsed = time.time() - start_time
89
-
90
- print('Finished verifying image existence for {} files in {}, found {} filenames with multiple labels, {} missing images'.format(
91
- len(filenames_to_rows), humanfriendly.format_timespan(elapsed),
92
- len(filenames_with_multiple_annotations),len(missing_images)))
93
-
94
-
95
- #%% Make sure the multiple-annotation cases make sense
96
-
97
- if False:
98
-
99
- #%%
100
-
101
- fn = filenames_with_multiple_annotations[1000]
102
- rows = filenames_to_rows[fn]
103
- assert(len(rows) > 1)
104
- for i_row in rows:
105
- print(input_metadata.iloc[i_row]['Species'])
106
-
107
-
108
- #%% Check for images that aren't included in the metadata file
109
-
110
- # Enumerate all images
111
- image_full_paths = find_images(image_directory, bRecursive=True)
112
-
113
- unannotated_images = []
114
-
115
- for iImage, image_path in tqdm(enumerate(image_full_paths),total=len(image_full_paths)):
116
- relative_path = os.path.relpath(image_path,image_directory)
117
- if relative_path not in filenames_to_rows:
118
- unannotated_images.append(relative_path)
119
-
120
- print('Finished checking {} images to make sure they\'re in the metadata, found {} unannotated images'.format(
121
- len(image_full_paths),len(unannotated_images)))
122
-
123
-
124
- #%% Create CCT dictionaries
125
-
126
- images = []
127
- annotations = []
128
- categories = []
129
-
130
- image_ids_to_images = {}
131
-
132
- category_name_to_category = {}
133
-
134
- # Force the empty category to be ID 0
135
- empty_category = {}
136
- empty_category['name'] = 'empty'
137
- empty_category['id'] = 0
138
- category_name_to_category['empty'] = empty_category
139
- categories.append(empty_category)
140
- next_category_id = 1
141
-
142
- start_time = time.time()
143
-
144
- # i_image = 0; image_name = list(filenames_to_rows.keys())[i_image]
145
- for image_name in tqdm(list(filenames_to_rows.keys())):
146
-
147
- # Example filename:
148
- #
149
- # 'Site 1_Oloisukut_1\Oloisukut_A11_UP\Service_2\100EK113\EK001382.JPG'
150
- # 'Site 1_Oloisukut_1\Oloisukut_A11_UP\Service_2.1\100EK113\EK001382.JPG'
151
- img_id = image_name.replace('\\','/').replace('/','_').replace(' ','_')
152
-
153
- row_indices = filenames_to_rows[image_name]
154
-
155
- # i_row = row_indices[0]
156
- for i_row in row_indices:
157
-
158
- row = input_metadata.iloc[i_row]
159
- assert(row['Image Name'] == image_name)
160
-
161
- timestamp = row['Date'].strftime("%d/%m/%Y")
162
- station_label = row['Camera Trap Station Label']
163
- photo_type = row['Photo Type ']
164
- if isinstance(photo_type,float):
165
- photo_type = ''
166
- photo_type = photo_type.strip().lower()
167
-
168
- if img_id in image_ids_to_images:
169
-
170
- im = image_ids_to_images[img_id]
171
- assert im['file_name'] == image_name
172
- assert im['station_label'] == station_label
173
-
174
- # There are a small handful of datetime mismatches across annotations
175
- # for the same image
176
- # assert im['datetime'] == timestamp
177
- if im['datetime'] != timestamp:
178
- print('Warning: timestamp conflict for image {}: {},{}'.format(
179
- image_name,im['datetime'],timestamp))
180
-
181
- else:
182
-
183
- im = {}
184
- im['id'] = img_id
185
- im['file_name'] = image_name
186
- im['datetime'] = timestamp
187
- im['station_label'] = station_label
188
- im['photo_type'] = photo_type
189
-
190
- image_ids_to_images[img_id] = im
191
- images.append(im)
192
-
193
- species = row['Species']
194
-
195
- if (isinstance(species,float) or \
196
- (isinstance(species,str) and (len(species) == 0))):
197
- category_name = 'empty'
198
- else:
199
- category_name = species
200
-
201
- # Special cases based on the 'photo type' field
202
- if 'vehicle' in photo_type:
203
- category_name = 'vehicle'
204
- # Various spellings of 'community'
205
- elif 'comm' in photo_type:
206
- category_name = 'human'
207
- elif 'camera' in photo_type or 'researcher' in photo_type:
208
- category_name = 'human'
209
- elif 'livestock' in photo_type:
210
- category_name = 'livestock'
211
- elif 'blank' in photo_type:
212
- category_name = 'empty'
213
- elif 'plant movement' in photo_type:
214
- category_name = 'empty'
215
-
216
- category_name = category_name.strip().lower()
217
-
218
- # Have we seen this category before?
219
- if category_name in category_name_to_category:
220
- category_id = category_name_to_category[category_name]['id']
221
- else:
222
- category_id = next_category_id
223
- category = {}
224
- category['id'] = category_id
225
- category['name'] = category_name
226
- category_name_to_category[category_name] = category
227
- categories.append(category)
228
- next_category_id += 1
229
-
230
- # Create an annotation
231
- ann = {}
232
- ann['id'] = str(uuid.uuid1())
233
- ann['image_id'] = im['id']
234
- ann['category_id'] = category_id
235
-
236
- # fieldname = list(mapped_fields.keys())[0]
237
- for fieldname in mapped_fields:
238
- target_field = mapped_fields[fieldname]
239
- val = row[fieldname]
240
- if isinstance(val,float) and np.isnan(val):
241
- val = ''
242
- else:
243
- val = str(val).strip()
244
- ann[target_field] = val
245
-
246
- annotations.append(ann)
247
-
248
- # ...for each row
249
-
250
- # ...for each image
251
-
252
- print('Finished creating CCT dictionaries in {}'.format(
253
- humanfriendly.format_timespan(elapsed)))
254
-
255
-
256
- #%% Create info struct
257
-
258
- info = {}
259
- info['year'] = 2019
260
- info['version'] = 1
261
- info['description'] = 'Save the Elephants Survey A'
262
- info['contributor'] = 'Save the Elephants'
263
-
264
-
265
- #%% Write output
266
-
267
- json_data = {}
268
- json_data['images'] = images
269
- json_data['annotations'] = annotations
270
- json_data['categories'] = categories
271
- json_data['info'] = info
272
- json.dump(json_data, open(output_json_file, 'w'), indent=2)
273
-
274
- print('Finished writing .json file with {} images, {} annotations, and {} categories'.format(
275
- len(images),len(annotations),len(categories)))
276
-
277
-
278
- #%% Validate output
279
-
280
- from data_management.databases import integrity_check_json_db
281
-
282
- options = integrity_check_json_db.IntegrityCheckOptions()
283
- options.baseDir = image_directory
284
- options.bCheckImageSizes = False
285
- options.bCheckImageExistence = False
286
- options.bFindUnusedImages = False
287
-
288
- sortedCategories, data = integrity_check_json_db.integrity_check_json_db(output_json_file,options)
289
-
290
-
291
- #%% Preview labels
292
-
293
- from md_visualization import visualize_db
294
- from data_management.databases import integrity_check_json_db
295
-
296
- viz_options = visualize_db.DbVizOptions()
297
- viz_options.num_to_visualize = 1000
298
- viz_options.trim_to_images_with_bboxes = False
299
- viz_options.add_search_links = True
300
- viz_options.sort_by_filename = False
301
- viz_options.parallelize_rendering = True
302
- html_output_file,image_db = visualize_db.visualize_db(db_path=output_json_file,
303
- output_dir=os.path.join(output_base,'preview'),
304
- image_base_dir=image_directory,
305
- options=viz_options)
306
- os.startfile(html_output_file)
307
-
308
-
309
- #%% Scrap
310
-
311
- if False:
312
-
313
- pass
314
-
315
- #%% Find unique photo types
316
-
317
- annotations = image_db['annotations']
318
- photo_types = set()
319
- for ann in tqdm(annotations):
320
- photo_types.add(ann['photo_type'])