megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (197) hide show
  1. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  2. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  3. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  4. megadetector/classification/aggregate_classifier_probs.py +3 -3
  5. megadetector/classification/analyze_failed_images.py +5 -5
  6. megadetector/classification/cache_batchapi_outputs.py +5 -5
  7. megadetector/classification/create_classification_dataset.py +11 -12
  8. megadetector/classification/crop_detections.py +10 -10
  9. megadetector/classification/csv_to_json.py +8 -8
  10. megadetector/classification/detect_and_crop.py +13 -15
  11. megadetector/classification/efficientnet/model.py +8 -8
  12. megadetector/classification/efficientnet/utils.py +6 -5
  13. megadetector/classification/evaluate_model.py +7 -7
  14. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  15. megadetector/classification/json_to_azcopy_list.py +1 -1
  16. megadetector/classification/json_validator.py +29 -32
  17. megadetector/classification/map_classification_categories.py +9 -9
  18. megadetector/classification/merge_classification_detection_output.py +12 -9
  19. megadetector/classification/prepare_classification_script.py +19 -19
  20. megadetector/classification/prepare_classification_script_mc.py +26 -26
  21. megadetector/classification/run_classifier.py +4 -4
  22. megadetector/classification/save_mislabeled.py +6 -6
  23. megadetector/classification/train_classifier.py +1 -1
  24. megadetector/classification/train_classifier_tf.py +9 -9
  25. megadetector/classification/train_utils.py +10 -10
  26. megadetector/data_management/annotations/annotation_constants.py +1 -2
  27. megadetector/data_management/camtrap_dp_to_coco.py +79 -46
  28. megadetector/data_management/cct_json_utils.py +103 -103
  29. megadetector/data_management/cct_to_md.py +49 -49
  30. megadetector/data_management/cct_to_wi.py +33 -33
  31. megadetector/data_management/coco_to_labelme.py +75 -75
  32. megadetector/data_management/coco_to_yolo.py +210 -193
  33. megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
  34. megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
  35. megadetector/data_management/databases/integrity_check_json_db.py +228 -200
  36. megadetector/data_management/databases/subset_json_db.py +33 -33
  37. megadetector/data_management/generate_crops_from_cct.py +88 -39
  38. megadetector/data_management/get_image_sizes.py +54 -49
  39. megadetector/data_management/labelme_to_coco.py +133 -125
  40. megadetector/data_management/labelme_to_yolo.py +159 -73
  41. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  42. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  43. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  44. megadetector/data_management/lila/download_lila_subset.py +21 -24
  45. megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
  46. megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
  47. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  48. megadetector/data_management/lila/lila_common.py +73 -70
  49. megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
  50. megadetector/data_management/mewc_to_md.py +344 -340
  51. megadetector/data_management/ocr_tools.py +262 -255
  52. megadetector/data_management/read_exif.py +249 -227
  53. megadetector/data_management/remap_coco_categories.py +90 -28
  54. megadetector/data_management/remove_exif.py +81 -21
  55. megadetector/data_management/rename_images.py +187 -187
  56. megadetector/data_management/resize_coco_dataset.py +588 -120
  57. megadetector/data_management/speciesnet_to_md.py +41 -41
  58. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  59. megadetector/data_management/yolo_output_to_md_output.py +248 -122
  60. megadetector/data_management/yolo_to_coco.py +333 -191
  61. megadetector/detection/change_detection.py +832 -0
  62. megadetector/detection/process_video.py +340 -337
  63. megadetector/detection/pytorch_detector.py +358 -278
  64. megadetector/detection/run_detector.py +399 -186
  65. megadetector/detection/run_detector_batch.py +404 -377
  66. megadetector/detection/run_inference_with_yolov5_val.py +340 -327
  67. megadetector/detection/run_tiled_inference.py +257 -249
  68. megadetector/detection/tf_detector.py +24 -24
  69. megadetector/detection/video_utils.py +332 -295
  70. megadetector/postprocessing/add_max_conf.py +19 -11
  71. megadetector/postprocessing/categorize_detections_by_size.py +45 -45
  72. megadetector/postprocessing/classification_postprocessing.py +468 -433
  73. megadetector/postprocessing/combine_batch_outputs.py +23 -23
  74. megadetector/postprocessing/compare_batch_results.py +590 -525
  75. megadetector/postprocessing/convert_output_format.py +106 -102
  76. megadetector/postprocessing/create_crop_folder.py +347 -147
  77. megadetector/postprocessing/detector_calibration.py +173 -168
  78. megadetector/postprocessing/generate_csv_report.py +508 -499
  79. megadetector/postprocessing/load_api_results.py +48 -27
  80. megadetector/postprocessing/md_to_coco.py +133 -102
  81. megadetector/postprocessing/md_to_labelme.py +107 -90
  82. megadetector/postprocessing/md_to_wi.py +40 -40
  83. megadetector/postprocessing/merge_detections.py +92 -114
  84. megadetector/postprocessing/postprocess_batch_results.py +319 -301
  85. megadetector/postprocessing/remap_detection_categories.py +91 -38
  86. megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
  87. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  88. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  89. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
  90. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  91. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  92. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  93. megadetector/postprocessing/validate_batch_results.py +70 -70
  94. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  95. megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
  96. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
  97. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
  98. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  99. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  100. megadetector/taxonomy_mapping/species_lookup.py +156 -74
  101. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  102. megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
  103. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  104. megadetector/utils/ct_utils.py +1049 -211
  105. megadetector/utils/directory_listing.py +21 -77
  106. megadetector/utils/gpu_test.py +22 -22
  107. megadetector/utils/md_tests.py +632 -529
  108. megadetector/utils/path_utils.py +1520 -431
  109. megadetector/utils/process_utils.py +41 -41
  110. megadetector/utils/split_locations_into_train_val.py +62 -62
  111. megadetector/utils/string_utils.py +148 -27
  112. megadetector/utils/url_utils.py +489 -176
  113. megadetector/utils/wi_utils.py +2658 -2526
  114. megadetector/utils/write_html_image_list.py +137 -137
  115. megadetector/visualization/plot_utils.py +34 -30
  116. megadetector/visualization/render_images_with_thumbnails.py +39 -74
  117. megadetector/visualization/visualization_utils.py +487 -435
  118. megadetector/visualization/visualize_db.py +232 -198
  119. megadetector/visualization/visualize_detector_output.py +82 -76
  120. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
  121. megadetector-10.0.0.dist-info/RECORD +139 -0
  122. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
  123. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  124. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  125. megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
  126. megadetector/api/batch_processing/api_core/server.py +0 -294
  127. megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
  128. megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
  129. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  130. megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
  131. megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
  132. megadetector/api/batch_processing/api_core/server_utils.py +0 -88
  133. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  134. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  135. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  136. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  137. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  138. megadetector/api/synchronous/__init__.py +0 -0
  139. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  140. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
  141. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
  142. megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
  143. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  144. megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
  145. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  146. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  147. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  148. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  149. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  150. megadetector/data_management/importers/awc_to_json.py +0 -191
  151. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  152. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  153. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  154. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  155. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  156. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  157. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  158. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  159. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  160. megadetector/data_management/importers/ena24_to_json.py +0 -276
  161. megadetector/data_management/importers/filenames_to_json.py +0 -386
  162. megadetector/data_management/importers/helena_to_cct.py +0 -283
  163. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  164. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  165. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  166. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  167. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  168. megadetector/data_management/importers/missouri_to_json.py +0 -490
  169. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  170. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  171. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  172. megadetector/data_management/importers/pc_to_json.py +0 -365
  173. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  174. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  175. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  176. megadetector/data_management/importers/rspb_to_json.py +0 -356
  177. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  178. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  179. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  180. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  181. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  182. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  183. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  184. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  185. megadetector/data_management/importers/ubc_to_json.py +0 -399
  186. megadetector/data_management/importers/umn_to_json.py +0 -507
  187. megadetector/data_management/importers/wellington_to_json.py +0 -263
  188. megadetector/data_management/importers/wi_to_json.py +0 -442
  189. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  190. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  191. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  192. megadetector/utils/azure_utils.py +0 -178
  193. megadetector/utils/sas_blob_utils.py +0 -509
  194. megadetector-5.0.28.dist-info/RECORD +0 -209
  195. /megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
  196. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
  197. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0
@@ -1,356 +0,0 @@
1
- """
2
-
3
- rspb_to_json.py
4
-
5
- Convert the .csv file provided for the RSPB data set to a
6
- COCO-camera-traps .json file
7
-
8
- """
9
-
10
- #%% Constants and environment
11
-
12
- import pandas as pd
13
- import os
14
- import glob
15
- import json
16
- import re
17
- import uuid
18
- import tqdm
19
- import time
20
- import ntpath
21
- import humanfriendly
22
- import PIL
23
-
24
- from megadetector.data_management.databases import integrity_check_json_db
25
- from megadetector.visualization import visualize_db
26
-
27
- # [location] is an obfuscation
28
- baseDir = r'e:\wildlife_data\rspb_gola_data'
29
- metadataFile = os.path.join(baseDir,'gola_camtrapr_master_renaming_table_2019-01-31.csv')
30
- outputFile = os.path.join(baseDir,'rspb_gola_labeled.json')
31
- imageBaseDir = os.path.join(baseDir,'gola_camtrapr_data')
32
- imageFlatDir = os.path.join(baseDir,'gola_camtrapr_data_flat')
33
- unmatchedImagesFile = os.path.join(baseDir,'unmatchedImages.txt')
34
- assert(os.path.isdir(imageBaseDir))
35
-
36
-
37
- #%% Create info struct
38
-
39
- info = {}
40
- info['year'] = 2019
41
- info['version'] = 1
42
- info['description'] = 'COCO style database for RSPB gola data'
43
- info['secondary_contributor'] = 'Converted to COCO .json by Dan Morris'
44
- info['contributor'] = 'RSPB'
45
-
46
-
47
- #%% Read source data
48
-
49
- metadataTable = pd.read_csv(metadataFile)
50
-
51
- print('Read {} columns and {} rows from metadata file'.format(len(metadataTable.columns),
52
- len(metadataTable)))
53
-
54
- # metadataTable.columns.values
55
- #
56
- # array(['Project', 'inDir', 'FileName', 'Station', 'Camera',
57
- # 'StationCameraFileName', 'DateTimeOriginal', 'DateReadable',
58
- # 'outDir', 'filename_new', 'fileExistsAlready', 'CopyStatus',
59
- # 'Species'], dtype=object)
60
-
61
- metadataTable[['Species']] = metadataTable[['Species']].fillna(value='unlabeled')
62
-
63
- # We'll populate these later
64
- metadataTable['sequenceID'] = ''
65
- metadataTable['frameNumber'] = ''
66
- metadataTable['filePath'] = ''
67
-
68
- failedCopies = metadataTable[~metadataTable.CopyStatus]
69
- print('Removing {} rows that were failed copies'.format(len(failedCopies)))
70
-
71
- metadataTable = metadataTable[metadataTable.CopyStatus]
72
-
73
- species = list(metadataTable.Species)
74
- uniqueSpecies = set(species)
75
-
76
- print('Read {} unique species in {} rows'.format(len(uniqueSpecies),len(metadataTable)))
77
-
78
- speciesMappings = {}
79
-
80
- # keys should be lowercase
81
- speciesMappings['blank'] = 'empty'
82
- speciesMappings[''] = 'unlabeled'
83
-
84
-
85
- #%% Enumerate images, confirm filename uniqueness
86
-
87
- imageFullPaths = glob.glob(os.path.join(imageBaseDir,r'**\*.JPG'),recursive=True)
88
-
89
- print('Counted {} images'.format(len(imageFullPaths)))
90
-
91
- filenamesOnly = set()
92
-
93
- for p in imageFullPaths:
94
-
95
- fn = ntpath.basename(p)
96
- assert fn not in filenamesOnly
97
- filenamesOnly.add(fn)
98
-
99
- print('Finished uniqueness checking')
100
-
101
-
102
- #%% Update metadata filenames to include site and camera folders, check existence
103
- #
104
- # Takes ~1min
105
-
106
- filenamesToRows = {}
107
-
108
- startTime = time.time()
109
-
110
- newRows = []
111
- matchFailures = []
112
-
113
- # iRow = 0; row = metadataTable.iloc[iRow]
114
- for iRow,row in tqdm.tqdm(metadataTable.iterrows(), total=metadataTable.shape[0]):
115
-
116
- baseFn = row['filename_new']
117
- station = row['Station']
118
-
119
- filenamesToRows[baseFn] = iRow
120
-
121
- # There's a bug in the metadata; the 'camera' column isn't correct.
122
- # camera = row['Camera']
123
- # These appear as, e.g., '3.22e12'
124
- # camera = str(int(float(camera)))
125
-
126
- # Let's pull this out of the file name instead
127
- #
128
- # Filenames look like one of the following:
129
- #
130
- # A1__03224850850507__2015-11-28__10-45-04(1).JPG
131
- # Bayama2PH__C05__NA(NA).JPG
132
- pat = '^(?P<station>.+?)__(?P<camera>.+?)__((?P<date>.+?)__)?(?P<time>[^_\()]+?)\((?P<frame>.+?)\)\.JPG'
133
- match = re.match(pat,baseFn)
134
- if match is None:
135
- raise ValueError('Regex failure at row {}: {}'.format(iRow,baseFn))
136
- assert(station == match.group('station'))
137
- camera = match.group('camera')
138
- row['Camera'] = camera
139
-
140
- assert match.group('station') is not None
141
- assert match.group('camera') is not None
142
- assert match.group('frame') is not None
143
-
144
- if match.group('date') is None:
145
- imgDate = ''
146
- else:
147
- imgDate = match.group('date')
148
-
149
- if match.group('time') is None:
150
- imgTime = ''
151
- else:
152
- imgTime = match.group('time')
153
-
154
- frame = -1
155
- try:
156
- frame = int(match.group['frame'])
157
- except:
158
- pass
159
- row['frameNumber'] = frame
160
-
161
- fn = os.path.join(station,camera,baseFn)
162
- fullPath = os.path.join(imageBaseDir,fn)
163
- row['filePath'] = fn
164
- # assert(os.path.isfile(fullPath))
165
- if not os.path.isfile(fullPath):
166
- print('Failed to match image {}'.format(fullPath))
167
- matchFailures.append(fullPath)
168
- continue
169
-
170
- # metadataTable.iloc[iRow] = row
171
- newRows.append(row)
172
-
173
- elapsed = time.time() - startTime
174
-
175
- # Re-assemble into an updated table
176
- metadataTable = pd.DataFrame(newRows)
177
-
178
- print('Finished checking file existence, extracting metadata in {}, couldn''t find {} images'.format(
179
- humanfriendly.format_timespan(elapsed),len(matchFailures)))
180
-
181
-
182
- #%% Check for images that aren't included in the metadata file
183
-
184
- imagesNotInMetadata = []
185
-
186
- # Enumerate all images
187
- for iImage,imagePath in enumerate(imageFullPaths):
188
-
189
- fn = ntpath.basename(imagePath)
190
- if(fn not in filenamesToRows):
191
- imagesNotInMetadata.append(imagePath)
192
-
193
- print('Finished matching {} images, failed to match {}'.format(
194
- len(imageFullPaths),len(imagesNotInMetadata)))
195
-
196
- # Write to a text file
197
- with open(unmatchedImagesFile, 'w') as f:
198
- for fn in imagesNotInMetadata:
199
- f.write('{}\n'.format(fn))
200
-
201
-
202
- #%% Create CCT dictionaries
203
-
204
- # Also gets image sizes, so this takes ~6 minutes
205
- #
206
- # Implicitly checks images for overt corruptness, i.e. by not crashing.
207
-
208
- images = []
209
- annotations = []
210
-
211
- # Map categories to integer IDs (that's what COCO likes)
212
- nextCategoryID = 1
213
- categoriesToCategoryId = {'empty':0}
214
- categoriesToCounts = {'empty':0}
215
-
216
- # For each image
217
- #
218
- # Because in practice images are 1:1 with annotations in this data set,
219
- # this is also a loop over annotations.
220
-
221
- startTime = time.time()
222
-
223
- # iRow = 0; row = metadataTable.iloc[iRow]
224
- for iRow,row in tqdm.tqdm(metadataTable.iterrows(), total=metadataTable.shape[0]):
225
-
226
- im = {}
227
-
228
- # A1__03224850850507__2015-11-28__10-45-04(1).JPG
229
- fn = row['filename_new']
230
- assert '.JPG' in fn
231
- fn = fn.replace('.JPG','')
232
- im['id'] = fn
233
-
234
- # 'A1\\03224850850507\\A1__03224850850507__2015-11-28__10-45-04(1).JPG'
235
- im['file_name'] = row['filePath']
236
-
237
- # Not currently populated
238
- im['seq_id'] = row['sequenceID']
239
-
240
- # Often -1, sometimes a semi-meaningful int
241
- im['frame_num'] = row['frameNumber']
242
-
243
- # A1
244
- im['site']= row['Station']
245
-
246
- # 03224850850507
247
- im['camera'] = row['Camera']
248
-
249
- # In variable form, but sometimes '28/11/2015 10:45'
250
- im['datetime'] = row['DateTimeOriginal']
251
-
252
- images.append(im)
253
-
254
- # Check image height and width
255
- imagePath = os.path.join(imageBaseDir,im['file_name'])
256
- assert(os.path.isfile(imagePath))
257
- pilImage = PIL.Image.open(imagePath)
258
- width, height = pilImage.size
259
- im['width'] = width
260
- im['height'] = height
261
-
262
- category = row['Species'].lower()
263
- if category in speciesMappings:
264
- category = speciesMappings[category]
265
-
266
- # Have we seen this category before?
267
- if category in categoriesToCategoryId:
268
- categoryID = categoriesToCategoryId[category]
269
- categoriesToCounts[category] += 1
270
- else:
271
- categoryID = nextCategoryID
272
- categoriesToCategoryId[category] = categoryID
273
- categoriesToCounts[category] = 0
274
- nextCategoryID += 1
275
-
276
- # Create an annotation
277
- ann = {}
278
-
279
- # The Internet tells me this guarantees uniqueness to a reasonable extent, even
280
- # beyond the sheer improbability of collisions.
281
- ann['id'] = str(uuid.uuid1())
282
- ann['image_id'] = im['id']
283
- ann['category_id'] = categoryID
284
-
285
- annotations.append(ann)
286
-
287
- # ...for each image
288
-
289
- # Convert categories to a CCT-style dictionary
290
-
291
- categories = []
292
-
293
- for category in categoriesToCounts:
294
-
295
- print('Category {}, count {}'.format(category,categoriesToCounts[category]))
296
- categoryID = categoriesToCategoryId[category]
297
- cat = {}
298
- cat['name'] = category
299
- cat['id'] = categoryID
300
- categories.append(cat)
301
-
302
- elapsed = time.time() - startTime
303
-
304
- print('Finished creating CCT dictionaries in {}'.format(
305
- humanfriendly.format_timespan(elapsed)))
306
-
307
-
308
- #%% Write output
309
-
310
- json_data = {}
311
- json_data['images'] = images
312
- json_data['annotations'] = annotations
313
- json_data['categories'] = categories
314
- json_data['info'] = info
315
- json.dump(json_data,open(outputFile,'w'),indent=4)
316
-
317
- print('Finished writing .json file with {} images, {} annotations, and {} categories'.format(
318
- len(images),len(annotations),len(categories)))
319
-
320
-
321
- #%% Check database integrity
322
-
323
- options = integrity_check_json_db.IntegrityCheckOptions()
324
- options.baseDir = imageBaseDir
325
- options.bCheckImageSizes = False
326
- options.bFindUnusedImages = False
327
- integrity_check_json_db.integrity_check_json_db(outputFile, options)
328
-
329
-
330
- #%% Preview a few images to make sure labels were passed along sensibly
331
-
332
- db_path = outputFile
333
- output_dir = os.path.join(baseDir,'label_preview')
334
- image_base_dir = imageBaseDir
335
- options = visualize_db.DbVizOptions()
336
- options.num_to_visualize = 100
337
- htmlOutputFile = visualize_db.visualize_db(db_path,output_dir,image_base_dir,options)
338
-
339
-
340
- #%% One-time processing step: copy images to a flat directory for annotation
341
-
342
- if False:
343
-
344
- #%%
345
-
346
- from shutil import copyfile
347
- os.makedirs(imageFlatDir,exist_ok=True)
348
-
349
- for sourcePath in tqdm.tqdm(imageFullPaths):
350
- fn = ntpath.basename(sourcePath)
351
- targetPath = os.path.join(imageFlatDir,fn)
352
- assert not os.path.isfile(targetPath)
353
- copyfile(sourcePath,targetPath)
354
-
355
- print('Copied {} files'.format(len(imageFullPaths)))
356
-
@@ -1,320 +0,0 @@
1
- """
2
-
3
- save_the_elephants_survey_A.py
4
-
5
- Convert the .csv file provided for the Save the Elephants Survey A data set to a
6
- COCO-camera-traps .json file
7
-
8
- """
9
-
10
- #%% Constants and environment
11
-
12
- import pandas as pd
13
- import os
14
- import json
15
- import uuid
16
- import time
17
- import humanfriendly
18
- import numpy as np
19
- from tqdm import tqdm
20
-
21
- from megadetector.utils.path_utils import find_images
22
-
23
- input_base = r'z:/ste_2019_08_drop'
24
- input_metadata_file = os.path.join(input_base,'SURVEY_A.xlsx')
25
-
26
- output_base = r'f:/save_the_elephants/survey_a'
27
- output_json_file = os.path.join(output_base,'ste_survey_a.json')
28
- image_directory = os.path.join(input_base,'SURVEY A with False Triggers')
29
-
30
- os.makedirs(output_base,exist_ok=True)
31
- assert(os.path.isdir(image_directory))
32
- assert(os.path.isfile(input_metadata_file))
33
-
34
- # Handle all unstructured fields in the source data as extra fields in the annotations
35
- mapped_fields = {'No. of Animals in Photo':'num_animals',
36
- 'No. of new indiviauls (first sighting of new individual)':'num_new_individuals',
37
- 'Number Adult Males (first sighting of new individual)':'num_adult_males',
38
- 'Number Adult Females (first sighting of new individual)':'num_adult_females',
39
- 'Number Adult Unknown (first sighting of new individual)':'num_adult_unknown',
40
- 'Number Sub-adult Males (first sighting of new individual)':'num_subadult_males',
41
- 'Number Sub-adult Females (first sighting of new individual)':'num_subadult_females',
42
- 'Number Sub-adult Unknown (first sighting of new individual)':'num_subadult_unknown',
43
- 'Number Juvenile (first sighting of new individual)':'num_juvenile',
44
- 'Number Newborn (first sighting of new individual)':'num_newborn',
45
- 'Activity':'activity',
46
- 'Animal ID':'animal_id',
47
- 'Specific Notes':'notes'}
48
-
49
- # photo_type really should be an image property, but there are a few conflicts
50
- # that forced me to handle it as an annotation proprerty
51
- mapped_fields['Photo Type '] = 'photo_type'
52
-
53
-
54
- #%% Read source data
55
-
56
- input_metadata = pd.read_excel(input_metadata_file, sheet_name='9. CT Image')
57
- input_metadata = input_metadata.iloc[2:]
58
-
59
- print('Read {} columns and {} rows from metadata file'.format(len(input_metadata.columns),
60
- len(input_metadata)))
61
-
62
-
63
- #%% Map filenames to rows, verify image existence
64
-
65
- start_time = time.time()
66
-
67
- # Maps relative paths to row indices in input_metadata
68
- filenames_to_rows = {}
69
- filenames_with_multiple_annotations = []
70
- missing_images = []
71
-
72
- # Build up a map from filenames to a list of rows, checking image existence as we go
73
- for i_row, fn in tqdm(enumerate(input_metadata['Image Name']),total=len(input_metadata)):
74
-
75
- # Ignore directories
76
- if not fn.endswith('.JPG'):
77
- continue
78
-
79
- if fn in filenames_to_rows:
80
- filenames_with_multiple_annotations.append(fn)
81
- filenames_to_rows[fn].append(i_row)
82
- else:
83
- filenames_to_rows[fn] = [i_row]
84
- image_path = os.path.join(image_directory, fn)
85
- if not os.path.isfile(image_path):
86
- missing_images.append(image_path)
87
-
88
- elapsed = time.time() - start_time
89
-
90
- print('Finished verifying image existence for {} files in {}, found {} filenames with multiple labels, {} missing images'.format(
91
- len(filenames_to_rows), humanfriendly.format_timespan(elapsed),
92
- len(filenames_with_multiple_annotations),len(missing_images)))
93
-
94
-
95
- #%% Make sure the multiple-annotation cases make sense
96
-
97
- if False:
98
-
99
- #%%
100
-
101
- fn = filenames_with_multiple_annotations[1000]
102
- rows = filenames_to_rows[fn]
103
- assert(len(rows) > 1)
104
- for i_row in rows:
105
- print(input_metadata.iloc[i_row]['Species'])
106
-
107
-
108
- #%% Check for images that aren't included in the metadata file
109
-
110
- # Enumerate all images
111
- image_full_paths = find_images(image_directory, bRecursive=True)
112
-
113
- unannotated_images = []
114
-
115
- for iImage, image_path in tqdm(enumerate(image_full_paths),total=len(image_full_paths)):
116
- relative_path = os.path.relpath(image_path,image_directory)
117
- if relative_path not in filenames_to_rows:
118
- unannotated_images.append(relative_path)
119
-
120
- print('Finished checking {} images to make sure they\'re in the metadata, found {} unannotated images'.format(
121
- len(image_full_paths),len(unannotated_images)))
122
-
123
-
124
- #%% Create CCT dictionaries
125
-
126
- images = []
127
- annotations = []
128
- categories = []
129
-
130
- image_ids_to_images = {}
131
-
132
- category_name_to_category = {}
133
-
134
- # Force the empty category to be ID 0
135
- empty_category = {}
136
- empty_category['name'] = 'empty'
137
- empty_category['id'] = 0
138
- category_name_to_category['empty'] = empty_category
139
- categories.append(empty_category)
140
- next_category_id = 1
141
-
142
- start_time = time.time()
143
-
144
- # i_image = 0; image_name = list(filenames_to_rows.keys())[i_image]
145
- for image_name in tqdm(list(filenames_to_rows.keys())):
146
-
147
- # Example filename:
148
- #
149
- # 'Site 1_Oloisukut_1\Oloisukut_A11_UP\Service_2\100EK113\EK001382.JPG'
150
- # 'Site 1_Oloisukut_1\Oloisukut_A11_UP\Service_2.1\100EK113\EK001382.JPG'
151
- img_id = image_name.replace('\\','/').replace('/','_').replace(' ','_')
152
-
153
- row_indices = filenames_to_rows[image_name]
154
-
155
- # i_row = row_indices[0]
156
- for i_row in row_indices:
157
-
158
- row = input_metadata.iloc[i_row]
159
- assert(row['Image Name'] == image_name)
160
-
161
- timestamp = row['Date'].strftime("%d/%m/%Y")
162
- station_label = row['Camera Trap Station Label']
163
- photo_type = row['Photo Type ']
164
- if isinstance(photo_type,float):
165
- photo_type = ''
166
- photo_type = photo_type.strip().lower()
167
-
168
- if img_id in image_ids_to_images:
169
-
170
- im = image_ids_to_images[img_id]
171
- assert im['file_name'] == image_name
172
- assert im['station_label'] == station_label
173
-
174
- # There are a small handful of datetime mismatches across annotations
175
- # for the same image
176
- # assert im['datetime'] == timestamp
177
- if im['datetime'] != timestamp:
178
- print('Warning: timestamp conflict for image {}: {},{}'.format(
179
- image_name,im['datetime'],timestamp))
180
-
181
- else:
182
-
183
- im = {}
184
- im['id'] = img_id
185
- im['file_name'] = image_name
186
- im['datetime'] = timestamp
187
- im['station_label'] = station_label
188
- im['photo_type'] = photo_type
189
-
190
- image_ids_to_images[img_id] = im
191
- images.append(im)
192
-
193
- species = row['Species']
194
-
195
- if (isinstance(species,float) or \
196
- (isinstance(species,str) and (len(species) == 0))):
197
- category_name = 'empty'
198
- else:
199
- category_name = species
200
-
201
- # Special cases based on the 'photo type' field
202
- if 'vehicle' in photo_type:
203
- category_name = 'vehicle'
204
- # Various spellings of 'community'
205
- elif 'comm' in photo_type:
206
- category_name = 'human'
207
- elif 'camera' in photo_type or 'researcher' in photo_type:
208
- category_name = 'human'
209
- elif 'livestock' in photo_type:
210
- category_name = 'livestock'
211
- elif 'blank' in photo_type:
212
- category_name = 'empty'
213
- elif 'plant movement' in photo_type:
214
- category_name = 'empty'
215
-
216
- category_name = category_name.strip().lower()
217
-
218
- # Have we seen this category before?
219
- if category_name in category_name_to_category:
220
- category_id = category_name_to_category[category_name]['id']
221
- else:
222
- category_id = next_category_id
223
- category = {}
224
- category['id'] = category_id
225
- category['name'] = category_name
226
- category_name_to_category[category_name] = category
227
- categories.append(category)
228
- next_category_id += 1
229
-
230
- # Create an annotation
231
- ann = {}
232
- ann['id'] = str(uuid.uuid1())
233
- ann['image_id'] = im['id']
234
- ann['category_id'] = category_id
235
-
236
- # fieldname = list(mapped_fields.keys())[0]
237
- for fieldname in mapped_fields:
238
- target_field = mapped_fields[fieldname]
239
- val = row[fieldname]
240
- if isinstance(val,float) and np.isnan(val):
241
- val = ''
242
- else:
243
- val = str(val).strip()
244
- ann[target_field] = val
245
-
246
- annotations.append(ann)
247
-
248
- # ...for each row
249
-
250
- # ...for each image
251
-
252
- print('Finished creating CCT dictionaries in {}'.format(
253
- humanfriendly.format_timespan(elapsed)))
254
-
255
-
256
- #%% Create info struct
257
-
258
- info = {}
259
- info['year'] = 2019
260
- info['version'] = 1
261
- info['description'] = 'Save the Elephants Survey A'
262
- info['contributor'] = 'Save the Elephants'
263
-
264
-
265
- #%% Write output
266
-
267
- json_data = {}
268
- json_data['images'] = images
269
- json_data['annotations'] = annotations
270
- json_data['categories'] = categories
271
- json_data['info'] = info
272
- json.dump(json_data, open(output_json_file, 'w'), indent=2)
273
-
274
- print('Finished writing .json file with {} images, {} annotations, and {} categories'.format(
275
- len(images),len(annotations),len(categories)))
276
-
277
-
278
- #%% Validate output
279
-
280
- from megadetector.data_management.databases import integrity_check_json_db
281
-
282
- options = integrity_check_json_db.IntegrityCheckOptions()
283
- options.baseDir = image_directory
284
- options.bCheckImageSizes = False
285
- options.bCheckImageExistence = False
286
- options.bFindUnusedImages = False
287
-
288
- sortedCategories, data = integrity_check_json_db.integrity_check_json_db(output_json_file,options)
289
-
290
-
291
- #%% Preview labels
292
-
293
- from megadetector.visualization import visualize_db
294
- from megadetector.data_management.databases import integrity_check_json_db
295
-
296
- viz_options = visualize_db.DbVizOptions()
297
- viz_options.num_to_visualize = 1000
298
- viz_options.trim_to_images_with_bboxes = False
299
- viz_options.add_search_links = True
300
- viz_options.sort_by_filename = False
301
- viz_options.parallelize_rendering = True
302
- html_output_file,image_db = visualize_db.visualize_db(db_path=output_json_file,
303
- output_dir=os.path.join(output_base,'preview'),
304
- image_base_dir=image_directory,
305
- options=viz_options)
306
- os.startfile(html_output_file)
307
-
308
-
309
- #%% Scrap
310
-
311
- if False:
312
-
313
- pass
314
-
315
- #%% Find unique photo types
316
-
317
- annotations = image_db['annotations']
318
- photo_types = set()
319
- for ann in tqdm(annotations):
320
- photo_types.add(ann['photo_type'])