megadetector 5.0.10__py3-none-any.whl → 5.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (226) hide show
  1. {megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/LICENSE +0 -0
  2. {megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/METADATA +12 -11
  3. megadetector-5.0.11.dist-info/RECORD +5 -0
  4. megadetector-5.0.11.dist-info/top_level.txt +1 -0
  5. api/__init__.py +0 -0
  6. api/batch_processing/__init__.py +0 -0
  7. api/batch_processing/api_core/__init__.py +0 -0
  8. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  9. api/batch_processing/api_core/batch_service/score.py +0 -439
  10. api/batch_processing/api_core/server.py +0 -294
  11. api/batch_processing/api_core/server_api_config.py +0 -98
  12. api/batch_processing/api_core/server_app_config.py +0 -55
  13. api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  14. api/batch_processing/api_core/server_job_status_table.py +0 -152
  15. api/batch_processing/api_core/server_orchestration.py +0 -360
  16. api/batch_processing/api_core/server_utils.py +0 -92
  17. api/batch_processing/api_core_support/__init__.py +0 -0
  18. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  19. api/batch_processing/api_support/__init__.py +0 -0
  20. api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  21. api/batch_processing/data_preparation/__init__.py +0 -0
  22. api/batch_processing/data_preparation/manage_local_batch.py +0 -2391
  23. api/batch_processing/data_preparation/manage_video_batch.py +0 -327
  24. api/batch_processing/integration/digiKam/setup.py +0 -6
  25. api/batch_processing/integration/digiKam/xmp_integration.py +0 -465
  26. api/batch_processing/integration/eMammal/test_scripts/config_template.py +0 -5
  27. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -126
  28. api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +0 -55
  29. api/batch_processing/postprocessing/__init__.py +0 -0
  30. api/batch_processing/postprocessing/add_max_conf.py +0 -64
  31. api/batch_processing/postprocessing/categorize_detections_by_size.py +0 -163
  32. api/batch_processing/postprocessing/combine_api_outputs.py +0 -249
  33. api/batch_processing/postprocessing/compare_batch_results.py +0 -958
  34. api/batch_processing/postprocessing/convert_output_format.py +0 -397
  35. api/batch_processing/postprocessing/load_api_results.py +0 -195
  36. api/batch_processing/postprocessing/md_to_coco.py +0 -310
  37. api/batch_processing/postprocessing/md_to_labelme.py +0 -330
  38. api/batch_processing/postprocessing/merge_detections.py +0 -401
  39. api/batch_processing/postprocessing/postprocess_batch_results.py +0 -1904
  40. api/batch_processing/postprocessing/remap_detection_categories.py +0 -170
  41. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +0 -661
  42. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +0 -211
  43. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +0 -82
  44. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +0 -1631
  45. api/batch_processing/postprocessing/separate_detections_into_folders.py +0 -731
  46. api/batch_processing/postprocessing/subset_json_detector_output.py +0 -696
  47. api/batch_processing/postprocessing/top_folders_to_bottom.py +0 -223
  48. api/synchronous/__init__.py +0 -0
  49. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  50. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -152
  51. api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -266
  52. api/synchronous/api_core/animal_detection_api/config.py +0 -35
  53. api/synchronous/api_core/animal_detection_api/data_management/annotations/annotation_constants.py +0 -47
  54. api/synchronous/api_core/animal_detection_api/detection/detector_training/copy_checkpoints.py +0 -43
  55. api/synchronous/api_core/animal_detection_api/detection/detector_training/model_main_tf2.py +0 -114
  56. api/synchronous/api_core/animal_detection_api/detection/process_video.py +0 -543
  57. api/synchronous/api_core/animal_detection_api/detection/pytorch_detector.py +0 -304
  58. api/synchronous/api_core/animal_detection_api/detection/run_detector.py +0 -627
  59. api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +0 -1029
  60. api/synchronous/api_core/animal_detection_api/detection/run_inference_with_yolov5_val.py +0 -581
  61. api/synchronous/api_core/animal_detection_api/detection/run_tiled_inference.py +0 -754
  62. api/synchronous/api_core/animal_detection_api/detection/tf_detector.py +0 -165
  63. api/synchronous/api_core/animal_detection_api/detection/video_utils.py +0 -495
  64. api/synchronous/api_core/animal_detection_api/md_utils/azure_utils.py +0 -174
  65. api/synchronous/api_core/animal_detection_api/md_utils/ct_utils.py +0 -262
  66. api/synchronous/api_core/animal_detection_api/md_utils/directory_listing.py +0 -251
  67. api/synchronous/api_core/animal_detection_api/md_utils/matlab_porting_tools.py +0 -97
  68. api/synchronous/api_core/animal_detection_api/md_utils/path_utils.py +0 -416
  69. api/synchronous/api_core/animal_detection_api/md_utils/process_utils.py +0 -110
  70. api/synchronous/api_core/animal_detection_api/md_utils/sas_blob_utils.py +0 -509
  71. api/synchronous/api_core/animal_detection_api/md_utils/string_utils.py +0 -59
  72. api/synchronous/api_core/animal_detection_api/md_utils/url_utils.py +0 -144
  73. api/synchronous/api_core/animal_detection_api/md_utils/write_html_image_list.py +0 -226
  74. api/synchronous/api_core/animal_detection_api/md_visualization/visualization_utils.py +0 -841
  75. api/synchronous/api_core/tests/__init__.py +0 -0
  76. api/synchronous/api_core/tests/load_test.py +0 -110
  77. classification/__init__.py +0 -0
  78. classification/aggregate_classifier_probs.py +0 -108
  79. classification/analyze_failed_images.py +0 -227
  80. classification/cache_batchapi_outputs.py +0 -198
  81. classification/create_classification_dataset.py +0 -627
  82. classification/crop_detections.py +0 -516
  83. classification/csv_to_json.py +0 -226
  84. classification/detect_and_crop.py +0 -855
  85. classification/efficientnet/__init__.py +0 -9
  86. classification/efficientnet/model.py +0 -415
  87. classification/efficientnet/utils.py +0 -610
  88. classification/evaluate_model.py +0 -520
  89. classification/identify_mislabeled_candidates.py +0 -152
  90. classification/json_to_azcopy_list.py +0 -63
  91. classification/json_validator.py +0 -695
  92. classification/map_classification_categories.py +0 -276
  93. classification/merge_classification_detection_output.py +0 -506
  94. classification/prepare_classification_script.py +0 -194
  95. classification/prepare_classification_script_mc.py +0 -228
  96. classification/run_classifier.py +0 -286
  97. classification/save_mislabeled.py +0 -110
  98. classification/train_classifier.py +0 -825
  99. classification/train_classifier_tf.py +0 -724
  100. classification/train_utils.py +0 -322
  101. data_management/__init__.py +0 -0
  102. data_management/annotations/__init__.py +0 -0
  103. data_management/annotations/annotation_constants.py +0 -34
  104. data_management/camtrap_dp_to_coco.py +0 -238
  105. data_management/cct_json_utils.py +0 -395
  106. data_management/cct_to_md.py +0 -176
  107. data_management/cct_to_wi.py +0 -289
  108. data_management/coco_to_labelme.py +0 -272
  109. data_management/coco_to_yolo.py +0 -662
  110. data_management/databases/__init__.py +0 -0
  111. data_management/databases/add_width_and_height_to_db.py +0 -33
  112. data_management/databases/combine_coco_camera_traps_files.py +0 -206
  113. data_management/databases/integrity_check_json_db.py +0 -477
  114. data_management/databases/subset_json_db.py +0 -115
  115. data_management/generate_crops_from_cct.py +0 -149
  116. data_management/get_image_sizes.py +0 -188
  117. data_management/importers/add_nacti_sizes.py +0 -52
  118. data_management/importers/add_timestamps_to_icct.py +0 -79
  119. data_management/importers/animl_results_to_md_results.py +0 -158
  120. data_management/importers/auckland_doc_test_to_json.py +0 -372
  121. data_management/importers/auckland_doc_to_json.py +0 -200
  122. data_management/importers/awc_to_json.py +0 -189
  123. data_management/importers/bellevue_to_json.py +0 -273
  124. data_management/importers/cacophony-thermal-importer.py +0 -796
  125. data_management/importers/carrizo_shrubfree_2018.py +0 -268
  126. data_management/importers/carrizo_trail_cam_2017.py +0 -287
  127. data_management/importers/cct_field_adjustments.py +0 -57
  128. data_management/importers/channel_islands_to_cct.py +0 -913
  129. data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  130. data_management/importers/eMammal/eMammal_helpers.py +0 -249
  131. data_management/importers/eMammal/make_eMammal_json.py +0 -223
  132. data_management/importers/ena24_to_json.py +0 -275
  133. data_management/importers/filenames_to_json.py +0 -385
  134. data_management/importers/helena_to_cct.py +0 -282
  135. data_management/importers/idaho-camera-traps.py +0 -1407
  136. data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  137. data_management/importers/jb_csv_to_json.py +0 -150
  138. data_management/importers/mcgill_to_json.py +0 -250
  139. data_management/importers/missouri_to_json.py +0 -489
  140. data_management/importers/nacti_fieldname_adjustments.py +0 -79
  141. data_management/importers/noaa_seals_2019.py +0 -181
  142. data_management/importers/pc_to_json.py +0 -365
  143. data_management/importers/plot_wni_giraffes.py +0 -123
  144. data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
  145. data_management/importers/prepare_zsl_imerit.py +0 -131
  146. data_management/importers/rspb_to_json.py +0 -356
  147. data_management/importers/save_the_elephants_survey_A.py +0 -320
  148. data_management/importers/save_the_elephants_survey_B.py +0 -332
  149. data_management/importers/snapshot_safari_importer.py +0 -758
  150. data_management/importers/snapshot_safari_importer_reprise.py +0 -665
  151. data_management/importers/snapshot_serengeti_lila.py +0 -1067
  152. data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  153. data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  154. data_management/importers/sulross_get_exif.py +0 -65
  155. data_management/importers/timelapse_csv_set_to_json.py +0 -490
  156. data_management/importers/ubc_to_json.py +0 -399
  157. data_management/importers/umn_to_json.py +0 -507
  158. data_management/importers/wellington_to_json.py +0 -263
  159. data_management/importers/wi_to_json.py +0 -441
  160. data_management/importers/zamba_results_to_md_results.py +0 -181
  161. data_management/labelme_to_coco.py +0 -548
  162. data_management/labelme_to_yolo.py +0 -272
  163. data_management/lila/__init__.py +0 -0
  164. data_management/lila/add_locations_to_island_camera_traps.py +0 -97
  165. data_management/lila/add_locations_to_nacti.py +0 -147
  166. data_management/lila/create_lila_blank_set.py +0 -557
  167. data_management/lila/create_lila_test_set.py +0 -151
  168. data_management/lila/create_links_to_md_results_files.py +0 -106
  169. data_management/lila/download_lila_subset.py +0 -177
  170. data_management/lila/generate_lila_per_image_labels.py +0 -515
  171. data_management/lila/get_lila_annotation_counts.py +0 -170
  172. data_management/lila/get_lila_image_counts.py +0 -111
  173. data_management/lila/lila_common.py +0 -300
  174. data_management/lila/test_lila_metadata_urls.py +0 -132
  175. data_management/ocr_tools.py +0 -874
  176. data_management/read_exif.py +0 -681
  177. data_management/remap_coco_categories.py +0 -84
  178. data_management/remove_exif.py +0 -66
  179. data_management/resize_coco_dataset.py +0 -189
  180. data_management/wi_download_csv_to_coco.py +0 -246
  181. data_management/yolo_output_to_md_output.py +0 -441
  182. data_management/yolo_to_coco.py +0 -676
  183. detection/__init__.py +0 -0
  184. detection/detector_training/__init__.py +0 -0
  185. detection/detector_training/model_main_tf2.py +0 -114
  186. detection/process_video.py +0 -703
  187. detection/pytorch_detector.py +0 -337
  188. detection/run_detector.py +0 -779
  189. detection/run_detector_batch.py +0 -1219
  190. detection/run_inference_with_yolov5_val.py +0 -917
  191. detection/run_tiled_inference.py +0 -935
  192. detection/tf_detector.py +0 -188
  193. detection/video_utils.py +0 -606
  194. docs/source/conf.py +0 -43
  195. md_utils/__init__.py +0 -0
  196. md_utils/azure_utils.py +0 -174
  197. md_utils/ct_utils.py +0 -612
  198. md_utils/directory_listing.py +0 -246
  199. md_utils/md_tests.py +0 -968
  200. md_utils/path_utils.py +0 -1044
  201. md_utils/process_utils.py +0 -157
  202. md_utils/sas_blob_utils.py +0 -509
  203. md_utils/split_locations_into_train_val.py +0 -228
  204. md_utils/string_utils.py +0 -92
  205. md_utils/url_utils.py +0 -323
  206. md_utils/write_html_image_list.py +0 -225
  207. md_visualization/__init__.py +0 -0
  208. md_visualization/plot_utils.py +0 -293
  209. md_visualization/render_images_with_thumbnails.py +0 -275
  210. md_visualization/visualization_utils.py +0 -1537
  211. md_visualization/visualize_db.py +0 -551
  212. md_visualization/visualize_detector_output.py +0 -406
  213. megadetector-5.0.10.dist-info/RECORD +0 -224
  214. megadetector-5.0.10.dist-info/top_level.txt +0 -8
  215. taxonomy_mapping/__init__.py +0 -0
  216. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +0 -491
  217. taxonomy_mapping/map_new_lila_datasets.py +0 -154
  218. taxonomy_mapping/prepare_lila_taxonomy_release.py +0 -142
  219. taxonomy_mapping/preview_lila_taxonomy.py +0 -591
  220. taxonomy_mapping/retrieve_sample_image.py +0 -71
  221. taxonomy_mapping/simple_image_download.py +0 -218
  222. taxonomy_mapping/species_lookup.py +0 -834
  223. taxonomy_mapping/taxonomy_csv_checker.py +0 -159
  224. taxonomy_mapping/taxonomy_graph.py +0 -346
  225. taxonomy_mapping/validate_lila_category_mappings.py +0 -83
  226. {megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/WHEEL +0 -0
@@ -1,268 +0,0 @@
1
- """
2
-
3
- carrizo_shrubfree_2018.py
4
-
5
- Convert the .csv file provided for the Carrizo Mojave data set to a
6
- COCO-camera-traps .json file
7
-
8
- """
9
-
10
- #%% Constants and environment
11
-
12
- import pandas as pd
13
- import os
14
- import json
15
- import uuid
16
- import time
17
- import humanfriendly
18
-
19
- from tqdm import tqdm
20
- from PIL import Image
21
- import numpy as np
22
-
23
- from md_utils.path_utils import find_images
24
-
25
- input_base = r'Z:\Shrub-free zone Carrizo 2018'
26
- input_metadata_file = os.path.join(input_base,'shrub-free 2018.csv')
27
-
28
- output_base = r'G:\carrizo-mojave'
29
- output_json_file = os.path.join(output_base,'carrizo shrub-free 2018.json')
30
-
31
- image_directory = input_base
32
-
33
- load_width_and_height = False
34
-
35
- assert(os.path.isdir(image_directory))
36
-
37
- category_replacements = {'ammosphermophilus nelsoni':'ammospermophilus nelsoni'}
38
-
39
- annotation_fields_to_copy = ['rep','photo.rep','timeblock','night.day','observations']
40
-
41
-
42
- #%% Read source data
43
-
44
- input_metadata = pd.read_csv(input_metadata_file)
45
-
46
- # Original .csv file had superfluous spaces in column names
47
- input_metadata = input_metadata.rename(columns=lambda x: x.strip())
48
-
49
- print('Read {} columns and {} rows from metadata file'.format(len(input_metadata.columns),
50
- len(input_metadata)))
51
-
52
- input_metadata['file'] = 1 + input_metadata.groupby('rep').cumcount()
53
- input_metadata['file'] = input_metadata[['file', 'rep']].apply(lambda x: "{0}\IMG_{1}.JPG".format(x[1], str(x[0]).zfill(4)), axis=1)
54
-
55
-
56
- #%% Map filenames to rows, verify image existence
57
-
58
- start_time = time.time()
59
- filenames_to_rows = {}
60
- image_filenames = input_metadata.file
61
-
62
- missing_files = []
63
- duplicate_rows = []
64
-
65
- # Build up a map from filenames to a list of rows, checking image existence as we go
66
- for iFile, fn in tqdm(enumerate(image_filenames),total=len(image_filenames)):
67
- if (fn in filenames_to_rows):
68
- duplicate_rows.append(iFile)
69
- filenames_to_rows[fn].append(iFile)
70
- else:
71
- filenames_to_rows[fn] = [iFile]
72
- image_path = os.path.join(image_directory, fn)
73
- if not os.path.isfile(image_path):
74
- missing_files.append(fn)
75
-
76
- elapsed = time.time() - start_time
77
-
78
- print('Finished verifying image existence in {}, found {} missing files (of {})'.format(
79
- humanfriendly.format_timespan(elapsed),
80
- len(missing_files),len(image_filenames)))
81
-
82
- assert len(duplicate_rows) == 0
83
-
84
- # 58 missing files (of 17652)
85
-
86
-
87
- #%% Check for images that aren't included in the metadata file
88
-
89
- image_full_paths = find_images(image_directory, bRecursive=True)
90
- images_missing_from_metadata = []
91
-
92
- for iImage, image_path in tqdm(enumerate(image_full_paths),total=len(image_full_paths)):
93
-
94
- relative_path = os.path.relpath(image_path,input_base)
95
- if relative_path not in filenames_to_rows:
96
- images_missing_from_metadata.append(relative_path)
97
-
98
- print('{} of {} files are not in metadata'.format(len(images_missing_from_metadata),len(image_full_paths)))
99
-
100
- # 3012 of 20606 files are not in metadata
101
-
102
-
103
- #%% Create CCT dictionaries
104
-
105
- images = []
106
- annotations = []
107
-
108
- # Map categories to integer IDs
109
- #
110
- # The category '0' is reserved for 'empty'
111
-
112
- categories_to_category_id = {}
113
- categories_to_counts = {}
114
- categories_to_category_id['empty'] = 0
115
- categories_to_counts['empty'] = 0
116
-
117
- next_category_id = 1
118
-
119
- # For each image
120
- #
121
- # Because in practice images are 1:1 with annotations in this data set,
122
- # this is also a loop over annotations.
123
-
124
- start_time = time.time()
125
-
126
- for image_name in image_filenames:
127
-
128
- rows = filenames_to_rows[image_name]
129
-
130
- # Each filename should just match one row
131
- assert(len(rows) == 1)
132
-
133
- iRow = rows[0]
134
- row = input_metadata.iloc[iRow]
135
- im = {}
136
- im['id'] = image_name.split('.')[0]
137
- im['file_name'] = image_name
138
- im['datetime'] = row['date']
139
- im['location'] = "{0}_{1}_{2}".format(row['region'], row['site'], row['microsite'])
140
-
141
- image_path = os.path.join(image_directory, image_name)
142
-
143
- # Don't include images that don't exist on disk
144
- if not os.path.isfile(image_path):
145
- continue
146
-
147
- if load_width_and_height:
148
- pilImage = Image.open(image_path)
149
- width, height = pilImage.size
150
- im['width'] = width
151
- im['height'] = height
152
- else:
153
- im['width'] = -1
154
- im['height'] = -1
155
-
156
- images.append(im)
157
-
158
- is_image = row['animal.capture']
159
-
160
- if (is_image == 0):
161
- category = 'empty'
162
- else:
163
- if row['latin.bionomial'] is np.nan:
164
- category = 'unidentifiable'
165
- else:
166
- category = row['latin.bionomial'].replace(' ',' ').lower().strip()
167
-
168
- if category in category_replacements:
169
- category = category_replacements[category]
170
-
171
- # Have we seen this category before?
172
- if category in categories_to_category_id:
173
- categoryID = categories_to_category_id[category]
174
- categories_to_counts[category] += 1
175
- else:
176
- categoryID = next_category_id
177
- categories_to_category_id[category] = categoryID
178
- categories_to_counts[category] = 1
179
- next_category_id += 1
180
-
181
- # Create an annotation
182
- ann = {}
183
-
184
- # The Internet tells me this guarantees uniqueness to a reasonable extent, even
185
- # beyond the sheer improbability of collisions.
186
- ann['id'] = str(uuid.uuid1())
187
- ann['image_id'] = im['id']
188
- ann['category_id'] = categoryID
189
-
190
- for fieldname in annotation_fields_to_copy:
191
- ann[fieldname] = row[fieldname]
192
- if ann[fieldname] is np.nan:
193
- ann[fieldname] = ''
194
- ann[fieldname] = str(ann[fieldname])
195
-
196
- annotations.append(ann)
197
-
198
- # ...for each image
199
-
200
- # Convert categories to a CCT-style dictionary
201
- categories = []
202
-
203
- for category in categories_to_counts:
204
- print('Category {}, count {}'.format(category,categories_to_counts[category]))
205
- categoryID = categories_to_category_id[category]
206
- cat = {}
207
- cat['name'] = category
208
- cat['id'] = categoryID
209
- categories.append(cat)
210
-
211
- elapsed = time.time() - start_time
212
- print('Finished creating CCT dictionaries in {}'.format(
213
- humanfriendly.format_timespan(elapsed)))
214
-
215
-
216
- #%% Create info struct
217
-
218
- info = {}
219
- info['year'] = 2018
220
- info['version'] = 1
221
- info['description'] = 'Carrizo Shrub-Free 2018'
222
- info['contributor'] = 'York University'
223
-
224
-
225
- #%% Write output
226
-
227
- json_data = {}
228
- json_data['images'] = images
229
- json_data['annotations'] = annotations
230
- json_data['categories'] = categories
231
- json_data['info'] = info
232
- json.dump(json_data, open(output_json_file, 'w'), indent=4)
233
-
234
- print('Finished writing .json file with {} images, {} annotations, and {} categories'.format(
235
- len(images),len(annotations),len(categories)))
236
-
237
-
238
- #%% Validate output
239
-
240
- from data_management.databases import integrity_check_json_db
241
-
242
- options = integrity_check_json_db.IntegrityCheckOptions()
243
- options.baseDir = image_directory
244
- options.bCheckImageSizes = False
245
- options.bCheckImageExistence = False
246
- options.bFindUnusedImages = False
247
- data = integrity_check_json_db.integrity_check_json_db(output_json_file,options)
248
-
249
-
250
- #%% Preview labels
251
-
252
- from md_visualization import visualize_db
253
- from data_management.databases import integrity_check_json_db
254
-
255
- viz_options = visualize_db.DbVizOptions()
256
- viz_options.num_to_visualize = None
257
- viz_options.trim_to_images_with_bboxes = False
258
- viz_options.add_search_links = True
259
- viz_options.sort_by_filename = False
260
- viz_options.parallelize_rendering = True
261
- viz_options.classes_to_exclude = ['empty']
262
- html_output_file,image_db = visualize_db.visualize_db(db_path=output_json_file,
263
- output_dir=os.path.join(
264
- output_base, 'carrizo shrub-free 2018/preview'),
265
- image_base_dir=image_directory,
266
- options=viz_options)
267
- os.startfile(html_output_file)
268
-
@@ -1,287 +0,0 @@
1
- """
2
-
3
- carrizo_trail_cam_2017.py
4
-
5
- Convert the .csv files provided for the "Trail Cam Carrizo" 2017 data set to
6
- a COCO-camera-traps .json file.
7
-
8
- """
9
-
10
- #%% Constants and environment
11
-
12
- import pandas as pd
13
- import os
14
- import json
15
- import uuid
16
- import time
17
- import humanfriendly
18
- from PIL import Image
19
- import numpy as np
20
-
21
- from tqdm import tqdm
22
- from md_utils.path_utils import find_images
23
-
24
- input_base = r'Z:\Trail Cam Carrizo 2017'
25
- open_metadata_file = os.path.join(input_base, 'Carrizo open 2017.csv')
26
- shrub_metadata_file = os.path.join(input_base, 'Carrizo Shrub 2017.csv')
27
-
28
- output_base = r'G:\carrizo-mojave'
29
- output_json_file = os.path.join(output_base, 'carrizo trail cam 2017.json')
30
- image_directory = input_base
31
- input_metadata_files = [open_metadata_file, shrub_metadata_file]
32
-
33
- load_width_and_height = False
34
-
35
- assert(os.path.isdir(image_directory))
36
-
37
- category_replacements = {'unidnetifiable':'unidentifiable','unidentifiable animal':'unidentifiable'}
38
-
39
- annotation_fields_to_copy = ['rep','photo rep','timeblock','night.day','observations']
40
-
41
-
42
- #%% Read source data
43
-
44
- final_data = pd.DataFrame()
45
-
46
- for inp_file in input_metadata_files:
47
-
48
- print("Reading: {0}".format(inp_file))
49
- input_metadata = pd.read_csv(inp_file)
50
-
51
- print('Read {} columns and {} rows from metadata file'.format(len(input_metadata.columns),
52
- len(input_metadata)))
53
-
54
- # Removing the empty records
55
- input_metadata = input_metadata[~np.isnan(input_metadata['rep'])]
56
-
57
- input_metadata['file'] = input_metadata.groupby(["rep", "week"]).cumcount()
58
- week_folder_format = {1: 'week 1- 2017', 2: 'week2- 2017', 3: 'week3-2017'}
59
-
60
- input_metadata['file'] = input_metadata[['file', 'rep', 'week', 'microsite']].apply(
61
- lambda x: "{3}/{4}{1}-week{0}-carrizo-2017/IMG_{2}.JPG".format(int(x[2]), int(x[1]),
62
- str(int(x[0]+1)).zfill(4),
63
- week_folder_format[int(x[2])],
64
- x[3].lower()), axis=1)
65
-
66
- final_data = final_data.append(input_metadata)
67
-
68
- print('Read {} metadata rows'.format(len(final_data)))
69
-
70
-
71
- #%% Map filenames to rows, verify image existence
72
-
73
- start_time = time.time()
74
- filenames_to_rows = {}
75
- image_filenames = input_metadata.file
76
-
77
- missing_files = []
78
- duplicate_rows = []
79
-
80
- # Build up a map from filenames to a list of rows, checking image existence as we go
81
- for iFile, fn in tqdm(enumerate(image_filenames),total=len(image_filenames)):
82
- if (fn in filenames_to_rows):
83
- duplicate_rows.append(iFile)
84
- filenames_to_rows[fn].append(iFile)
85
- else:
86
- filenames_to_rows[fn] = [iFile]
87
- image_path = os.path.join(image_directory, fn)
88
- if not os.path.isfile(image_path):
89
- missing_files.append(fn)
90
-
91
- elapsed = time.time() - start_time
92
-
93
- print('Finished verifying image existence in {}, found {} missing files (of {})'.format(
94
- humanfriendly.format_timespan(elapsed),
95
- len(missing_files),len(image_filenames)))
96
-
97
- assert len(duplicate_rows) == 0
98
-
99
- # 908 missing files (of 60562)
100
-
101
-
102
- #%% Check for images that aren't included in the metadata file
103
-
104
- image_full_paths = find_images(image_directory, bRecursive=True)
105
- images_missing_from_metadata = []
106
-
107
- for iImage, image_path in tqdm(enumerate(image_full_paths),total=len(image_full_paths)):
108
-
109
- relative_path = os.path.relpath(image_path,input_base).replace('\\','/')
110
- if relative_path not in filenames_to_rows:
111
- images_missing_from_metadata.append(relative_path)
112
-
113
- print('{} of {} files are not in metadata'.format(len(images_missing_from_metadata),len(image_full_paths)))
114
-
115
- # 105329 of 164983 files are not in metadata
116
-
117
-
118
- #%% Create CCT dictionaries
119
-
120
- images = []
121
- annotations = []
122
-
123
- # Map categories to integer IDs
124
- #
125
- # The category '0' is reserved for 'empty'
126
-
127
- categories_to_category_id = {}
128
- categories_to_counts = {}
129
- categories_to_category_id['empty'] = 0
130
- categories_to_counts['empty'] = 0
131
-
132
- next_category_id = 1
133
-
134
- # For each image
135
- #
136
- # Because in practice images are 1:1 with annotations in this data set,
137
- # this is also a loop over annotations.
138
-
139
- start_time = time.time()
140
-
141
- for image_name in tqdm(image_filenames):
142
-
143
- rows = filenames_to_rows[image_name]
144
-
145
- # Each filename should just match one row
146
- assert(len(rows) == 1)
147
-
148
- iRow = rows[0]
149
- row = input_metadata.iloc[iRow]
150
- im = {}
151
- im['id'] = image_name.replace('\\','/').replace('/','_').replace(' ','_')
152
- im['file_name'] = image_name
153
- im['region'] = row['region']
154
- im['site']= row['site']
155
- im['mircosite'] = row['microsite']
156
- im['datetime'] = row['calendar date']
157
- im['location'] = "{0}_{1}_{2}".format(row['region'], row['site'], row['microsite'])
158
-
159
- image_path = os.path.join(image_directory, image_name)
160
-
161
- # Don't include images that don't exist on disk
162
- if not os.path.isfile(image_path):
163
- continue
164
-
165
- if load_width_and_height:
166
- pilImage = Image.open(image_path)
167
- width, height = pilImage.size
168
- im['width'] = width
169
- im['height'] = height
170
- else:
171
- im['width'] = -1
172
- im['height'] = -1
173
-
174
- images.append(im)
175
-
176
- is_image = row['animal.capture']
177
-
178
- if (is_image == 0):
179
- category = 'empty'
180
- else:
181
- if row['animal'] is np.nan:
182
- category = 'unidentifiable'
183
- else:
184
- category = row['animal'].strip()
185
-
186
- if category in category_replacements:
187
- category = category_replacements[category]
188
-
189
- # Have we seen this category before?
190
- if category in categories_to_category_id:
191
- categoryID = categories_to_category_id[category]
192
- categories_to_counts[category] += 1
193
- else:
194
- categoryID = next_category_id
195
- categories_to_category_id[category] = categoryID
196
- categories_to_counts[category] = 1
197
- next_category_id += 1
198
-
199
- # Create an annotation
200
- ann = {}
201
-
202
- # The Internet tells me this guarantees uniqueness to a reasonable extent, even
203
- # beyond the sheer improbability of collisions.
204
- ann['id'] = str(uuid.uuid1())
205
- ann['image_id'] = im['id']
206
- ann['category_id'] = categoryID
207
-
208
- for fieldname in annotation_fields_to_copy:
209
- ann[fieldname] = row[fieldname]
210
- if ann[fieldname] is np.nan:
211
- ann[fieldname] = ''
212
- ann[fieldname] = str(ann[fieldname])
213
-
214
- annotations.append(ann)
215
-
216
- # ...for each image
217
-
218
- # Convert categories to a CCT-style dictionary
219
-
220
- categories = []
221
-
222
- for category in categories_to_counts:
223
- print('Category {}, count {}'.format(category,categories_to_counts[category]))
224
- categoryID = categories_to_category_id[category]
225
- cat = {}
226
- cat['name'] = category
227
- cat['id'] = categoryID
228
- categories.append(cat)
229
-
230
- elapsed = time.time() - start_time
231
- print('Finished creating CCT dictionaries in {}'.format(
232
- humanfriendly.format_timespan(elapsed)))
233
-
234
-
235
- #%% Create info struct
236
-
237
- info = {}
238
- info['year'] = 2017
239
- info['version'] = 1
240
- info['description'] = 'Carrizo Trail Cam 2017'
241
- info['contributor'] = 'York University'
242
-
243
-
244
- #%% Write output
245
-
246
- json_data = {}
247
- json_data['images'] = images
248
- json_data['annotations'] = annotations
249
- json_data['categories'] = categories
250
- json_data['info'] = info
251
- json.dump(json_data, open(output_json_file, 'w'), indent=1)
252
-
253
- print('Finished writing .json file with {} images, {} annotations, and {} categories'.format(
254
- len(images),len(annotations),len(categories)))
255
-
256
-
257
- #%% Validate output
258
-
259
- from data_management.databases import integrity_check_json_db
260
-
261
- options = integrity_check_json_db.IntegrityCheckOptions()
262
- options.baseDir = image_directory
263
- options.bCheckImageSizes = False
264
- options.bCheckImageExistence = False
265
- options.bFindUnusedImages = False
266
- data = integrity_check_json_db.integrity_check_json_db(output_json_file,options)
267
-
268
-
269
- #%% Preview labels
270
-
271
- from md_visualization import visualize_db
272
- from data_management.databases import integrity_check_json_db
273
-
274
- viz_options = visualize_db.DbVizOptions()
275
- viz_options.num_to_visualize = None
276
- viz_options.trim_to_images_with_bboxes = False
277
- viz_options.add_search_links = False
278
- viz_options.sort_by_filename = False
279
- viz_options.parallelize_rendering = True
280
- viz_options.classes_to_exclude = ['empty']
281
- html_output_file,image_db = visualize_db.visualize_db(db_path=output_json_file,
282
- output_dir=os.path.join(
283
- output_base, 'carrizo trail cam 2017/preview'),
284
- image_base_dir=image_directory,
285
- options=viz_options)
286
- os.startfile(html_output_file)
287
-
@@ -1,57 +0,0 @@
1
- """
2
-
3
- cct_field_adjustments.py
4
-
5
- CCT metadata was posted with int locations instead of strings.
6
-
7
- This script fixes those issues and rev's the version number.
8
-
9
- """
10
-
11
- #%% Constants and environment
12
-
13
- from data_management.databases import integrity_check_json_db
14
- import json
15
- import os
16
-
17
- inputJsonFile = r"D:\temp\CaltechCameraTraps_v2.0.json"
18
- outputJsonFile = r"D:\temp\CaltechCameraTraps_v2.1.json"
19
-
20
- assert os.path.isfile(inputJsonFile)
21
-
22
-
23
- #%% Read .json file
24
-
25
- with open(inputJsonFile,'r') as f:
26
- data = json.load(f)
27
-
28
- images = data['images']
29
- annotations = data['annotations']
30
- categories = data['categories']
31
- info = data['info']
32
-
33
- print('Finished reading input .json')
34
-
35
-
36
- #%% Rev version number, update field names and types
37
-
38
- assert(info['version'] == 'Caltech Camera Traps - v2')
39
- info['version'] = 2.1
40
- info['description'] = 'Caltech Camera Traps: camera trap images collected from the NPS and the USGS with help from Justin Brown and Erin Boydston'
41
-
42
- for image in images:
43
-
44
- assert 'location' in image and isinstance(image['location'],int)
45
- image['location'] = str(image['location'])
46
-
47
-
48
- #%% Write json file
49
-
50
- json.dump(data, open(outputJsonFile, 'w'), indent=4)
51
-
52
- print('Finished writing output .json to {}'.format(outputJsonFile))
53
-
54
-
55
- #%% Check output data file
56
-
57
- integrity_check_json_db.integrity_check_json_db(outputJsonFile)