megadetector 5.0.10__py3-none-any.whl → 5.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (226) hide show
  1. {megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/LICENSE +0 -0
  2. {megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/METADATA +12 -11
  3. megadetector-5.0.11.dist-info/RECORD +5 -0
  4. megadetector-5.0.11.dist-info/top_level.txt +1 -0
  5. api/__init__.py +0 -0
  6. api/batch_processing/__init__.py +0 -0
  7. api/batch_processing/api_core/__init__.py +0 -0
  8. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  9. api/batch_processing/api_core/batch_service/score.py +0 -439
  10. api/batch_processing/api_core/server.py +0 -294
  11. api/batch_processing/api_core/server_api_config.py +0 -98
  12. api/batch_processing/api_core/server_app_config.py +0 -55
  13. api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  14. api/batch_processing/api_core/server_job_status_table.py +0 -152
  15. api/batch_processing/api_core/server_orchestration.py +0 -360
  16. api/batch_processing/api_core/server_utils.py +0 -92
  17. api/batch_processing/api_core_support/__init__.py +0 -0
  18. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  19. api/batch_processing/api_support/__init__.py +0 -0
  20. api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  21. api/batch_processing/data_preparation/__init__.py +0 -0
  22. api/batch_processing/data_preparation/manage_local_batch.py +0 -2391
  23. api/batch_processing/data_preparation/manage_video_batch.py +0 -327
  24. api/batch_processing/integration/digiKam/setup.py +0 -6
  25. api/batch_processing/integration/digiKam/xmp_integration.py +0 -465
  26. api/batch_processing/integration/eMammal/test_scripts/config_template.py +0 -5
  27. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -126
  28. api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +0 -55
  29. api/batch_processing/postprocessing/__init__.py +0 -0
  30. api/batch_processing/postprocessing/add_max_conf.py +0 -64
  31. api/batch_processing/postprocessing/categorize_detections_by_size.py +0 -163
  32. api/batch_processing/postprocessing/combine_api_outputs.py +0 -249
  33. api/batch_processing/postprocessing/compare_batch_results.py +0 -958
  34. api/batch_processing/postprocessing/convert_output_format.py +0 -397
  35. api/batch_processing/postprocessing/load_api_results.py +0 -195
  36. api/batch_processing/postprocessing/md_to_coco.py +0 -310
  37. api/batch_processing/postprocessing/md_to_labelme.py +0 -330
  38. api/batch_processing/postprocessing/merge_detections.py +0 -401
  39. api/batch_processing/postprocessing/postprocess_batch_results.py +0 -1904
  40. api/batch_processing/postprocessing/remap_detection_categories.py +0 -170
  41. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +0 -661
  42. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +0 -211
  43. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +0 -82
  44. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +0 -1631
  45. api/batch_processing/postprocessing/separate_detections_into_folders.py +0 -731
  46. api/batch_processing/postprocessing/subset_json_detector_output.py +0 -696
  47. api/batch_processing/postprocessing/top_folders_to_bottom.py +0 -223
  48. api/synchronous/__init__.py +0 -0
  49. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  50. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -152
  51. api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -266
  52. api/synchronous/api_core/animal_detection_api/config.py +0 -35
  53. api/synchronous/api_core/animal_detection_api/data_management/annotations/annotation_constants.py +0 -47
  54. api/synchronous/api_core/animal_detection_api/detection/detector_training/copy_checkpoints.py +0 -43
  55. api/synchronous/api_core/animal_detection_api/detection/detector_training/model_main_tf2.py +0 -114
  56. api/synchronous/api_core/animal_detection_api/detection/process_video.py +0 -543
  57. api/synchronous/api_core/animal_detection_api/detection/pytorch_detector.py +0 -304
  58. api/synchronous/api_core/animal_detection_api/detection/run_detector.py +0 -627
  59. api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +0 -1029
  60. api/synchronous/api_core/animal_detection_api/detection/run_inference_with_yolov5_val.py +0 -581
  61. api/synchronous/api_core/animal_detection_api/detection/run_tiled_inference.py +0 -754
  62. api/synchronous/api_core/animal_detection_api/detection/tf_detector.py +0 -165
  63. api/synchronous/api_core/animal_detection_api/detection/video_utils.py +0 -495
  64. api/synchronous/api_core/animal_detection_api/md_utils/azure_utils.py +0 -174
  65. api/synchronous/api_core/animal_detection_api/md_utils/ct_utils.py +0 -262
  66. api/synchronous/api_core/animal_detection_api/md_utils/directory_listing.py +0 -251
  67. api/synchronous/api_core/animal_detection_api/md_utils/matlab_porting_tools.py +0 -97
  68. api/synchronous/api_core/animal_detection_api/md_utils/path_utils.py +0 -416
  69. api/synchronous/api_core/animal_detection_api/md_utils/process_utils.py +0 -110
  70. api/synchronous/api_core/animal_detection_api/md_utils/sas_blob_utils.py +0 -509
  71. api/synchronous/api_core/animal_detection_api/md_utils/string_utils.py +0 -59
  72. api/synchronous/api_core/animal_detection_api/md_utils/url_utils.py +0 -144
  73. api/synchronous/api_core/animal_detection_api/md_utils/write_html_image_list.py +0 -226
  74. api/synchronous/api_core/animal_detection_api/md_visualization/visualization_utils.py +0 -841
  75. api/synchronous/api_core/tests/__init__.py +0 -0
  76. api/synchronous/api_core/tests/load_test.py +0 -110
  77. classification/__init__.py +0 -0
  78. classification/aggregate_classifier_probs.py +0 -108
  79. classification/analyze_failed_images.py +0 -227
  80. classification/cache_batchapi_outputs.py +0 -198
  81. classification/create_classification_dataset.py +0 -627
  82. classification/crop_detections.py +0 -516
  83. classification/csv_to_json.py +0 -226
  84. classification/detect_and_crop.py +0 -855
  85. classification/efficientnet/__init__.py +0 -9
  86. classification/efficientnet/model.py +0 -415
  87. classification/efficientnet/utils.py +0 -610
  88. classification/evaluate_model.py +0 -520
  89. classification/identify_mislabeled_candidates.py +0 -152
  90. classification/json_to_azcopy_list.py +0 -63
  91. classification/json_validator.py +0 -695
  92. classification/map_classification_categories.py +0 -276
  93. classification/merge_classification_detection_output.py +0 -506
  94. classification/prepare_classification_script.py +0 -194
  95. classification/prepare_classification_script_mc.py +0 -228
  96. classification/run_classifier.py +0 -286
  97. classification/save_mislabeled.py +0 -110
  98. classification/train_classifier.py +0 -825
  99. classification/train_classifier_tf.py +0 -724
  100. classification/train_utils.py +0 -322
  101. data_management/__init__.py +0 -0
  102. data_management/annotations/__init__.py +0 -0
  103. data_management/annotations/annotation_constants.py +0 -34
  104. data_management/camtrap_dp_to_coco.py +0 -238
  105. data_management/cct_json_utils.py +0 -395
  106. data_management/cct_to_md.py +0 -176
  107. data_management/cct_to_wi.py +0 -289
  108. data_management/coco_to_labelme.py +0 -272
  109. data_management/coco_to_yolo.py +0 -662
  110. data_management/databases/__init__.py +0 -0
  111. data_management/databases/add_width_and_height_to_db.py +0 -33
  112. data_management/databases/combine_coco_camera_traps_files.py +0 -206
  113. data_management/databases/integrity_check_json_db.py +0 -477
  114. data_management/databases/subset_json_db.py +0 -115
  115. data_management/generate_crops_from_cct.py +0 -149
  116. data_management/get_image_sizes.py +0 -188
  117. data_management/importers/add_nacti_sizes.py +0 -52
  118. data_management/importers/add_timestamps_to_icct.py +0 -79
  119. data_management/importers/animl_results_to_md_results.py +0 -158
  120. data_management/importers/auckland_doc_test_to_json.py +0 -372
  121. data_management/importers/auckland_doc_to_json.py +0 -200
  122. data_management/importers/awc_to_json.py +0 -189
  123. data_management/importers/bellevue_to_json.py +0 -273
  124. data_management/importers/cacophony-thermal-importer.py +0 -796
  125. data_management/importers/carrizo_shrubfree_2018.py +0 -268
  126. data_management/importers/carrizo_trail_cam_2017.py +0 -287
  127. data_management/importers/cct_field_adjustments.py +0 -57
  128. data_management/importers/channel_islands_to_cct.py +0 -913
  129. data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  130. data_management/importers/eMammal/eMammal_helpers.py +0 -249
  131. data_management/importers/eMammal/make_eMammal_json.py +0 -223
  132. data_management/importers/ena24_to_json.py +0 -275
  133. data_management/importers/filenames_to_json.py +0 -385
  134. data_management/importers/helena_to_cct.py +0 -282
  135. data_management/importers/idaho-camera-traps.py +0 -1407
  136. data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  137. data_management/importers/jb_csv_to_json.py +0 -150
  138. data_management/importers/mcgill_to_json.py +0 -250
  139. data_management/importers/missouri_to_json.py +0 -489
  140. data_management/importers/nacti_fieldname_adjustments.py +0 -79
  141. data_management/importers/noaa_seals_2019.py +0 -181
  142. data_management/importers/pc_to_json.py +0 -365
  143. data_management/importers/plot_wni_giraffes.py +0 -123
  144. data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
  145. data_management/importers/prepare_zsl_imerit.py +0 -131
  146. data_management/importers/rspb_to_json.py +0 -356
  147. data_management/importers/save_the_elephants_survey_A.py +0 -320
  148. data_management/importers/save_the_elephants_survey_B.py +0 -332
  149. data_management/importers/snapshot_safari_importer.py +0 -758
  150. data_management/importers/snapshot_safari_importer_reprise.py +0 -665
  151. data_management/importers/snapshot_serengeti_lila.py +0 -1067
  152. data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  153. data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  154. data_management/importers/sulross_get_exif.py +0 -65
  155. data_management/importers/timelapse_csv_set_to_json.py +0 -490
  156. data_management/importers/ubc_to_json.py +0 -399
  157. data_management/importers/umn_to_json.py +0 -507
  158. data_management/importers/wellington_to_json.py +0 -263
  159. data_management/importers/wi_to_json.py +0 -441
  160. data_management/importers/zamba_results_to_md_results.py +0 -181
  161. data_management/labelme_to_coco.py +0 -548
  162. data_management/labelme_to_yolo.py +0 -272
  163. data_management/lila/__init__.py +0 -0
  164. data_management/lila/add_locations_to_island_camera_traps.py +0 -97
  165. data_management/lila/add_locations_to_nacti.py +0 -147
  166. data_management/lila/create_lila_blank_set.py +0 -557
  167. data_management/lila/create_lila_test_set.py +0 -151
  168. data_management/lila/create_links_to_md_results_files.py +0 -106
  169. data_management/lila/download_lila_subset.py +0 -177
  170. data_management/lila/generate_lila_per_image_labels.py +0 -515
  171. data_management/lila/get_lila_annotation_counts.py +0 -170
  172. data_management/lila/get_lila_image_counts.py +0 -111
  173. data_management/lila/lila_common.py +0 -300
  174. data_management/lila/test_lila_metadata_urls.py +0 -132
  175. data_management/ocr_tools.py +0 -874
  176. data_management/read_exif.py +0 -681
  177. data_management/remap_coco_categories.py +0 -84
  178. data_management/remove_exif.py +0 -66
  179. data_management/resize_coco_dataset.py +0 -189
  180. data_management/wi_download_csv_to_coco.py +0 -246
  181. data_management/yolo_output_to_md_output.py +0 -441
  182. data_management/yolo_to_coco.py +0 -676
  183. detection/__init__.py +0 -0
  184. detection/detector_training/__init__.py +0 -0
  185. detection/detector_training/model_main_tf2.py +0 -114
  186. detection/process_video.py +0 -703
  187. detection/pytorch_detector.py +0 -337
  188. detection/run_detector.py +0 -779
  189. detection/run_detector_batch.py +0 -1219
  190. detection/run_inference_with_yolov5_val.py +0 -917
  191. detection/run_tiled_inference.py +0 -935
  192. detection/tf_detector.py +0 -188
  193. detection/video_utils.py +0 -606
  194. docs/source/conf.py +0 -43
  195. md_utils/__init__.py +0 -0
  196. md_utils/azure_utils.py +0 -174
  197. md_utils/ct_utils.py +0 -612
  198. md_utils/directory_listing.py +0 -246
  199. md_utils/md_tests.py +0 -968
  200. md_utils/path_utils.py +0 -1044
  201. md_utils/process_utils.py +0 -157
  202. md_utils/sas_blob_utils.py +0 -509
  203. md_utils/split_locations_into_train_val.py +0 -228
  204. md_utils/string_utils.py +0 -92
  205. md_utils/url_utils.py +0 -323
  206. md_utils/write_html_image_list.py +0 -225
  207. md_visualization/__init__.py +0 -0
  208. md_visualization/plot_utils.py +0 -293
  209. md_visualization/render_images_with_thumbnails.py +0 -275
  210. md_visualization/visualization_utils.py +0 -1537
  211. md_visualization/visualize_db.py +0 -551
  212. md_visualization/visualize_detector_output.py +0 -406
  213. megadetector-5.0.10.dist-info/RECORD +0 -224
  214. megadetector-5.0.10.dist-info/top_level.txt +0 -8
  215. taxonomy_mapping/__init__.py +0 -0
  216. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +0 -491
  217. taxonomy_mapping/map_new_lila_datasets.py +0 -154
  218. taxonomy_mapping/prepare_lila_taxonomy_release.py +0 -142
  219. taxonomy_mapping/preview_lila_taxonomy.py +0 -591
  220. taxonomy_mapping/retrieve_sample_image.py +0 -71
  221. taxonomy_mapping/simple_image_download.py +0 -218
  222. taxonomy_mapping/species_lookup.py +0 -834
  223. taxonomy_mapping/taxonomy_csv_checker.py +0 -159
  224. taxonomy_mapping/taxonomy_graph.py +0 -346
  225. taxonomy_mapping/validate_lila_category_mappings.py +0 -83
  226. {megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/WHEEL +0 -0
@@ -1,489 +0,0 @@
1
- """
2
-
3
- missouri_to_json.py
4
-
5
- Create .json files from the original source files for the Missouri Camera Traps
6
- data set. Metadata was provided here in two formats:
7
-
8
- 1) In one subset of the data, folder names indicated species names. In Set 1,
9
- there are no empty sequences. Set 1 has a metadata file to indicate image-level
10
- bounding boxes.
11
-
12
- 2) A subset of the data (overlapping with (1)) was annotated with bounding
13
- boxes, specified in a whitespace-delimited text file. In set 2, there are
14
- some sequences omitted from the metadata file, which implied emptiness.
15
-
16
- In the end, set 2 labels were not reliable enough to publish, so LILA includes only set 1.
17
-
18
- """
19
-
20
- #%% Constants and imports
21
-
22
- import json
23
- import os
24
- import uuid
25
- import time
26
- import humanfriendly
27
- import warnings
28
- import ntpath
29
- import datetime
30
- from PIL import Image
31
-
32
- # ignoring all "PIL cannot read EXIF metainfo for the images" warnings
33
- warnings.filterwarnings("ignore", "(Possibly )?corrupt EXIF data", UserWarning)
34
- # Metadata Warning, tag 256 had too many entries: 42, expected 1
35
- warnings.filterwarnings("ignore", "Metadata warning", UserWarning)
36
-
37
- baseDir = os.path.expanduser('~/tmp/mct')
38
- imageDir = os.path.join(baseDir,'images')
39
-
40
- outputJsonFilenameSet1 = os.path.join(baseDir,'missouri_camera_traps_set1.json')
41
- outputEncoding = 'utf-8'
42
- fileListJsonFilename = os.path.join(baseDir,'images.json')
43
-
44
- # This will be a list of filenames that need re-annotation due to redundant boxes
45
- set1RedundantBoxListFilename = os.path.join(baseDir,'mct_images_with_redundant_boxes.txt')
46
-
47
- set1BaseDir = os.path.join(imageDir,'Set1')
48
-
49
- metadataFilenameSet1 = os.path.join(set1BaseDir,'labels.txt')
50
-
51
- assert(os.path.isdir(baseDir))
52
- assert(os.path.isfile(metadataFilenameSet1))
53
-
54
- info = {}
55
- info['year'] = 2019
56
- info['version'] = '1.21'
57
- info['description'] = 'Missouri Camera Traps (set 1)'
58
- info['contributor'] = ''
59
- info['date_created'] = str(datetime.date.today())
60
- infoSet1 = info
61
-
62
- maxFiles = -1
63
- emptyCategoryId = 0
64
- emptyCategoryName = 'empty'
65
-
66
-
67
- #%% Enumerate files, read image sizes
68
-
69
- # Takes a few minutes, since we're reading image sizes.
70
-
71
- # Each element will be a list of relative path/full path/width/height
72
- fileInfo = []
73
- nonImages = []
74
- nFiles = 0
75
-
76
- relPathToIm = {}
77
- imageIdToImage = {}
78
-
79
- set1ImageIDs = []
80
-
81
- sequenceIDtoCount = {}
82
-
83
- print('Enumerating files from {} to {}'.format(imageDir,fileListJsonFilename))
84
-
85
- startTime = time.time()
86
-
87
- for root, subdirs, files in os.walk(imageDir):
88
-
89
- if root == imageDir:
90
- continue
91
-
92
- bn = ntpath.basename(root)
93
-
94
- # Only process leaf nodes corresponding to sequences, which look like:
95
- #
96
- # Set1/1.02-Agouti/SEQ75583
97
- #
98
- if ('Set1' in root and 'SEQ' in bn):
99
- sequenceID = bn
100
- assert sequenceID not in sequenceIDtoCount
101
- sequenceIDtoCount[sequenceID] = 0
102
- else:
103
- print('Skipping folder {}:{}'.format(root,bn))
104
- continue
105
- # assert len(files) <= 2
106
-
107
- for fname in files:
108
-
109
- nFiles = nFiles + 1
110
- if maxFiles >= 0 and nFiles > maxFiles:
111
- print('Warning: early break at {} files'.format(maxFiles))
112
- break
113
-
114
- fullPath = os.path.join(root,fname)
115
- relativePath = os.path.relpath(fullPath,imageDir)
116
-
117
- if maxFiles >= 0:
118
- print(relativePath)
119
-
120
- h = -1
121
- w = -1
122
-
123
- # Read the image
124
- try:
125
-
126
- im = Image.open(fullPath)
127
- h = im.height
128
- w = im.width
129
-
130
- except:
131
-
132
- # Not an image...
133
- continue
134
-
135
- # Store file info
136
- im = {}
137
- im['id'] = str(uuid.uuid1())
138
- im['file_name'] = relativePath
139
- im['height'] = h
140
- im['width'] = w
141
- im['location'] = 'missouri_camera_traps'
142
-
143
- im['seq_id'] = sequenceID
144
- im['seq_num_frames'] = -1
145
-
146
- frame_number = sequenceIDtoCount[sequenceID]
147
- im['frame_num'] = frame_number
148
- sequenceIDtoCount[sequenceID] = sequenceIDtoCount[sequenceID] + 1
149
-
150
- imageIdToImage[im['id']] = im
151
- relPathToIm[relativePath] = im
152
-
153
- if 'Set1' in relativePath:
154
- set1ImageIDs.append(im['id'])
155
- else:
156
- raise Exception('Oops, can''t assign this image to a set')
157
-
158
- # ...if we didn't hit the max file limit, keep going
159
-
160
- else:
161
-
162
- continue
163
-
164
- break
165
-
166
- # ...for each file
167
-
168
- elapsed = time.time() - startTime
169
- print('Finished file enumeration in {}'.format(
170
- humanfriendly.format_timespan(elapsed)))
171
-
172
-
173
- #%% Add sequence lengths
174
-
175
- for imageID in imageIdToImage:
176
-
177
- im = imageIdToImage[imageID]
178
- sequenceID = im['seq_id']
179
- seq_num_frames = sequenceIDtoCount[sequenceID]
180
- assert(im['seq_num_frames'] == -1)
181
- im['seq_num_frames'] = seq_num_frames
182
-
183
-
184
- #%% Load the metadata (.txt) file
185
-
186
- with open(metadataFilenameSet1) as f:
187
- metadataSet1Lines = f.readlines()
188
-
189
- metadataSet1Lines = [x.strip() for x in metadataSet1Lines]
190
-
191
-
192
- #%% Map relative paths to metadata
193
-
194
- # List of lists, length varies according to number of bounding boxes
195
- #
196
- # Preserves original ordering
197
- missingFilesSet1 = []
198
- correctedFiles = []
199
-
200
- relPathToMetadataSet1 = {}
201
-
202
- # iLine = 0; line = metadataSet1Lines[0]
203
- for iLine,line in enumerate(metadataSet1Lines):
204
-
205
- tokens = line.split()
206
- nTokens = len(tokens)
207
-
208
- # Lines should be filename, number of bounding boxes, boxes (four values per box)
209
- assert ((nTokens - 2) % 4) == 0
210
- relPath = tokens[0].replace('/',os.sep).replace('\\',os.sep)
211
- relPath = os.path.join('Set1',relPath)
212
- absPath = os.path.join(imageDir,relPath)
213
-
214
- originalAbsPath = absPath
215
- originalRelPath = relPath
216
-
217
- if not os.path.isfile(absPath):
218
-
219
- absPath = originalAbsPath.replace('IMG','IMG_')
220
- relPath = originalRelPath.replace('IMG','IMG_')
221
- if os.path.isfile(absPath):
222
- correctedFiles.append([relPath,originalRelPath,absPath,originalAbsPath])
223
-
224
- if not os.path.isfile(absPath):
225
-
226
- absPath = originalAbsPath.replace('Red_Deer','Red_Brocket_Deer').replace('IMG','IMG_')
227
- relPath = originalRelPath.replace('Red_Deer','Red_Brocket_Deer').replace('IMG','IMG_')
228
- if os.path.isfile(absPath):
229
- correctedFiles.append([relPath,originalRelPath,absPath,originalAbsPath])
230
-
231
- if not os.path.isfile(absPath):
232
-
233
- missingFilesSet1.append([originalRelPath,originalAbsPath])
234
-
235
- else:
236
-
237
- relPathToMetadataSet1[relPath] = tokens
238
-
239
- # Make sure we have image info for this image
240
- assert relPath in relPathToIm
241
-
242
- print('Corrected {} paths, missing {} images of {}'.format(len(correctedFiles),
243
- len(missingFilesSet1),len(metadataSet1Lines)))
244
-
245
-
246
- #%% Print missing files from Set 1 metadata
247
-
248
- # The only missing file (and it's really just missing):
249
- #
250
- # Set1/1.58-Roe_Deer/SEQ75631/SEQ75631_IMG_0011.JPG
251
-
252
- print('Missing files in Set 1:\n')
253
- for iFile,fInfo in enumerate(missingFilesSet1):
254
- print(fInfo[0])
255
-
256
-
257
- #%% Create categories and annotations for set 1
258
-
259
- imagesSet1 = []
260
- categoriesSet1 = []
261
- annotationsSet1 = []
262
-
263
- categoryNameToId = {}
264
- idToCategory = {}
265
-
266
- # Though we have no empty sequences, we do have empty images in this set
267
- emptyCat = {}
268
- emptyCat['id'] = emptyCategoryId
269
- emptyCat['name'] = emptyCategoryName
270
- emptyCat['count'] = 0
271
- categoriesSet1.append(emptyCat)
272
-
273
- nextCategoryId = emptyCategoryId + 1
274
-
275
- nFoundMetadata = 0
276
- nTotalBoxes = 0
277
- nImageLevelEmpties = 0
278
- nSequenceLevelAnnotations = 0
279
- nRedundantBoxes = 0
280
-
281
- imageIDsWithRedundantBoxes = set()
282
-
283
- # For each image
284
- #
285
- # iImage = 0; imageID = set1ImageIDs[iImage]
286
- for iImage,imageID in enumerate(set1ImageIDs):
287
-
288
- im = imageIdToImage[imageID]
289
- imagesSet1.append(im)
290
-
291
- # E.g. Set1\\1.80-Coiban_Agouti\\SEQ83155\\SEQ83155_IMG_0010.JPG
292
- relPath = im['file_name']
293
-
294
- # Find the species name
295
- tokens = os.path.normpath(relPath).split(os.sep)
296
- speciesTag = tokens[1]
297
- tokens = speciesTag.split('-',1)
298
- assert(len(tokens) == 2)
299
- categoryName = tokens[1].lower()
300
-
301
- category = None
302
- categoryId = None
303
-
304
- if categoryName not in categoryNameToId:
305
-
306
- categoryId = nextCategoryId
307
- nextCategoryId += 1
308
- categoryNameToId[categoryName] = categoryId
309
- newCat = {}
310
- newCat['id'] = categoryNameToId[categoryName]
311
- newCat['name'] = categoryName
312
- newCat['count'] = 0
313
- categoriesSet1.append(newCat)
314
- idToCategory[categoryId] = newCat
315
- category = newCat
316
-
317
- else:
318
-
319
- categoryId = categoryNameToId[categoryName]
320
- category = idToCategory[categoryId]
321
-
322
- # This image may still be empty...
323
- # category['count'] = category['count'] + 1
324
-
325
- # If we have bounding boxes, create image-level annotations
326
- if relPath in relPathToMetadataSet1:
327
-
328
- nFoundMetadata += 1
329
-
330
- # This tuple is:
331
- #
332
- # filename (possibly no longer correct)
333
- # number of bounding boxes
334
- # [...boxes (four values per box)]
335
- imageMetadata = relPathToMetadataSet1[relPath]
336
-
337
- nBoxes = int(imageMetadata[1])
338
- im['n_boxes'] = nBoxes
339
-
340
- if nBoxes == 0:
341
-
342
- ann = {}
343
- ann['id'] = str(uuid.uuid1())
344
- ann['image_id'] = im['id']
345
- ann['category_id'] = emptyCategoryId
346
- ann['sequence_level_annotation'] = False
347
- annotationsSet1.append(ann)
348
- emptyCat['count'] = emptyCat['count'] + 1
349
- nImageLevelEmpties += 1
350
-
351
- else:
352
-
353
- # This image is non-empty
354
- category['count'] = category['count'] + 1
355
-
356
- for iBox in range(0,nBoxes):
357
-
358
- boxCoords = imageMetadata[2+(iBox*4):6+(iBox*4)]
359
- boxCoords = list(map(int, boxCoords))
360
-
361
- # Some redundant bounding boxes crept in, don't add them twice
362
- bRedundantBox = False
363
-
364
- # Check this bbox against previous bboxes
365
- #
366
- # Inefficient? Yes. In an important way? No.
367
- for iBoxComparison in range(0,iBox):
368
- assert iBox != iBoxComparison
369
- boxCoordsComparison = imageMetadata[2+(iBoxComparison*4):6+(iBoxComparison*4)]
370
- boxCoordsComparison = list(map(int, boxCoordsComparison))
371
- if boxCoordsComparison == boxCoords:
372
- # print('Warning: redundant box on image {}'.format(relPath))
373
- bRedundantBox = True
374
- nRedundantBoxes += 1
375
- break
376
-
377
- if bRedundantBox:
378
- imageIDsWithRedundantBoxes.add(im['id'])
379
- continue
380
-
381
- # Bounding box values are in absolute coordinates, with the origin
382
- # at the upper-left of the image, as [xmin1 ymin1 xmax1 ymax1].
383
- #
384
- # Convert to floats and to x/y/w/h, as per CCT standard
385
- bboxW = boxCoords[2] - boxCoords[0]
386
- bboxH = boxCoords[3] - boxCoords[1]
387
-
388
- box = [boxCoords[0], boxCoords[1], bboxW, bboxH]
389
- box = list(map(float, box))
390
-
391
- ann = {}
392
- ann['id'] = str(uuid.uuid1())
393
- ann['image_id'] = im['id']
394
- ann['category_id'] = categoryId
395
- ann['sequence_level_annotation'] = False
396
- ann['bbox'] = box
397
- annotationsSet1.append(ann)
398
- nTotalBoxes += 1
399
-
400
- # ...for each box
401
-
402
- # if we do/don't have boxes for this image
403
-
404
- # Else create a sequence-level annotation
405
- else:
406
-
407
- ann = {}
408
- ann['id'] = str(uuid.uuid1())
409
- ann['image_id'] = im['id']
410
- ann['category_id'] = categoryId
411
- ann['sequence_level_annotation'] = True
412
- annotationsSet1.append(ann)
413
- nSequenceLevelAnnotations += 1
414
-
415
- # ...for each image
416
-
417
- print('Finished processing set 1, found metadata for {} of {} images'.format(nFoundMetadata,len(set1ImageIDs)))
418
- print('Created {} annotations and {} boxes in {} categories'.format(
419
- len(annotationsSet1),nTotalBoxes,len(categoriesSet1)))
420
- print('Found {} redundant annotations'.format(nRedundantBoxes))
421
-
422
- assert len(annotationsSet1) == nSequenceLevelAnnotations + nTotalBoxes + nImageLevelEmpties
423
- assert len(set1ImageIDs) == nSequenceLevelAnnotations + nFoundMetadata
424
-
425
- print('Found {} images with redundant boxes'.format(len(imageIDsWithRedundantBoxes)))
426
-
427
-
428
- #%% Write out the list of images with redundant boxes
429
-
430
- imageFileNamesWithRedundantBoxes = []
431
- for image_id in imageIDsWithRedundantBoxes:
432
- im = imageIdToImage[image_id]
433
- imageFileNamesWithRedundantBoxes.append(im['file_name'])
434
- imageFileNamesWithRedundantBoxes.sort()
435
-
436
- with open(set1RedundantBoxListFilename,'w') as f:
437
- for fn in imageFileNamesWithRedundantBoxes:
438
- f.write(fn + '\n')
439
-
440
-
441
- #%% The 'count' field isn't really meaningful, delete it
442
-
443
- # It's really the count of image-level annotations, not total images assigned to a class
444
- for d in categoriesSet1:
445
- del d['count']
446
-
447
-
448
- #%% Write output .json files
449
-
450
- data = {}
451
- data['info'] = infoSet1
452
- data['images'] = imagesSet1
453
- data['annotations'] = annotationsSet1
454
- data['categories'] = categoriesSet1
455
- json.dump(data, open(outputJsonFilenameSet1,'w'), indent=4)
456
- print('Finished writing json to {}'.format(outputJsonFilenameSet1))
457
-
458
-
459
- #%% Consistency-check final set 1 .json file
460
-
461
- from data_management.databases import integrity_check_json_db
462
- options = integrity_check_json_db.IntegrityCheckOptions()
463
- options.baseDir = imageDir
464
- options.bCheckImageSizes = True
465
- options.bCheckImageExistence = True
466
- options.bFindUnusedImages = True
467
- options.bRequireLocation = False
468
- options.nThreads = 10
469
- sortedCategories,data,_ = integrity_check_json_db.integrity_check_json_db(outputJsonFilenameSet1, options)
470
- sortedCategories
471
-
472
-
473
- #%% Generate previews
474
-
475
- from md_visualization import visualize_db
476
-
477
- output_dir = os.path.join(baseDir,'preview')
478
-
479
- options = visualize_db.DbVizOptions()
480
- options.num_to_visualize = 5000
481
- options.sort_by_filename = False
482
- options.classes_to_exclude = None
483
- options.trim_to_images_with_bboxes = False
484
- options.parallelize_rendering = True
485
-
486
- htmlOutputFile,_ = visualize_db.visualize_db(outputJsonFilenameSet1,output_dir,imageDir,options)
487
-
488
- from md_utils.path_utils import open_file
489
- open_file(htmlOutputFile)
@@ -1,79 +0,0 @@
1
- """
2
-
3
- nacti_fieldname_adjustments.py
4
-
5
- NACTI metadata was posted with "filename" in images instead of "file_name", and
6
- used string (rather than int) category IDs (in categories, but not in annotations).
7
-
8
- This script fixes those issues and rev's the version number.
9
-
10
- """
11
-
12
- #%% Constants and environment
13
-
14
- import json
15
- import os
16
-
17
- inputJsonFile = r'/datadrive1/nacti_metadata_orig.json'
18
- outputJsonFile = r'/datadrive1/nacti_metadata.json'
19
-
20
- assert os.path.isfile(inputJsonFile)
21
-
22
-
23
- #%% Read .json file
24
-
25
- with open(inputJsonFile,'r') as f:
26
- data = json.load(f)
27
-
28
- images = data['images']
29
- annotations = data['annotations']
30
- categories = data['categories']
31
- info = data['info']
32
-
33
- print('Finished reading input .json')
34
-
35
-
36
- #%% Rev version number, update field names and types
37
-
38
- assert(info['version'] == 1.0)
39
- info['version'] = 1.1
40
- nFilenameConversions = 0
41
- nCatConversions = 0
42
- nAnnConversions = 0
43
-
44
- for image in images:
45
-
46
- assert 'path' in image and isinstance(image['path'],str)
47
- image['file_name'] = image['path']
48
- del image['path']
49
- nFilenameConversions += 1
50
- assert 'seq_no' in image
51
- del image['seq_no']
52
- assert 'width' in image and isinstance(image['width'],str)
53
- assert 'height' in image and isinstance(image['height'],str)
54
- image['width'] = int(image['width'])
55
- image['height'] = int(image['height'])
56
-
57
- for cat in categories:
58
-
59
- assert 'id' in cat and isinstance(cat['id'],str)
60
- cat['id'] = int(cat['id'])
61
- nCatConversions += 1
62
-
63
- for ann in annotations:
64
-
65
- assert 'id' in ann and isinstance(ann['id'],str)
66
- assert 'category_id' in ann and isinstance(ann['category_id'],str)
67
- ann['category_id'] = int(ann['category_id'])
68
- nAnnConversions += 1
69
-
70
- print('Finished checking data, converted {} filename fields, {} category IDs, {} annotation category IDs'.format(
71
- nFilenameConversions,nCatConversions,nAnnConversions))
72
-
73
-
74
- #%% Write json file
75
-
76
- json.dump(data, open(outputJsonFile, 'w'), indent=4)
77
-
78
- print('Finished writing output .json to {}'.format(outputJsonFile))
79
-