megadetector 5.0.10__py3-none-any.whl → 5.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (226) hide show
  1. {megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/LICENSE +0 -0
  2. {megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/METADATA +12 -11
  3. megadetector-5.0.11.dist-info/RECORD +5 -0
  4. megadetector-5.0.11.dist-info/top_level.txt +1 -0
  5. api/__init__.py +0 -0
  6. api/batch_processing/__init__.py +0 -0
  7. api/batch_processing/api_core/__init__.py +0 -0
  8. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  9. api/batch_processing/api_core/batch_service/score.py +0 -439
  10. api/batch_processing/api_core/server.py +0 -294
  11. api/batch_processing/api_core/server_api_config.py +0 -98
  12. api/batch_processing/api_core/server_app_config.py +0 -55
  13. api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  14. api/batch_processing/api_core/server_job_status_table.py +0 -152
  15. api/batch_processing/api_core/server_orchestration.py +0 -360
  16. api/batch_processing/api_core/server_utils.py +0 -92
  17. api/batch_processing/api_core_support/__init__.py +0 -0
  18. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  19. api/batch_processing/api_support/__init__.py +0 -0
  20. api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  21. api/batch_processing/data_preparation/__init__.py +0 -0
  22. api/batch_processing/data_preparation/manage_local_batch.py +0 -2391
  23. api/batch_processing/data_preparation/manage_video_batch.py +0 -327
  24. api/batch_processing/integration/digiKam/setup.py +0 -6
  25. api/batch_processing/integration/digiKam/xmp_integration.py +0 -465
  26. api/batch_processing/integration/eMammal/test_scripts/config_template.py +0 -5
  27. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -126
  28. api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +0 -55
  29. api/batch_processing/postprocessing/__init__.py +0 -0
  30. api/batch_processing/postprocessing/add_max_conf.py +0 -64
  31. api/batch_processing/postprocessing/categorize_detections_by_size.py +0 -163
  32. api/batch_processing/postprocessing/combine_api_outputs.py +0 -249
  33. api/batch_processing/postprocessing/compare_batch_results.py +0 -958
  34. api/batch_processing/postprocessing/convert_output_format.py +0 -397
  35. api/batch_processing/postprocessing/load_api_results.py +0 -195
  36. api/batch_processing/postprocessing/md_to_coco.py +0 -310
  37. api/batch_processing/postprocessing/md_to_labelme.py +0 -330
  38. api/batch_processing/postprocessing/merge_detections.py +0 -401
  39. api/batch_processing/postprocessing/postprocess_batch_results.py +0 -1904
  40. api/batch_processing/postprocessing/remap_detection_categories.py +0 -170
  41. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +0 -661
  42. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +0 -211
  43. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +0 -82
  44. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +0 -1631
  45. api/batch_processing/postprocessing/separate_detections_into_folders.py +0 -731
  46. api/batch_processing/postprocessing/subset_json_detector_output.py +0 -696
  47. api/batch_processing/postprocessing/top_folders_to_bottom.py +0 -223
  48. api/synchronous/__init__.py +0 -0
  49. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  50. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -152
  51. api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -266
  52. api/synchronous/api_core/animal_detection_api/config.py +0 -35
  53. api/synchronous/api_core/animal_detection_api/data_management/annotations/annotation_constants.py +0 -47
  54. api/synchronous/api_core/animal_detection_api/detection/detector_training/copy_checkpoints.py +0 -43
  55. api/synchronous/api_core/animal_detection_api/detection/detector_training/model_main_tf2.py +0 -114
  56. api/synchronous/api_core/animal_detection_api/detection/process_video.py +0 -543
  57. api/synchronous/api_core/animal_detection_api/detection/pytorch_detector.py +0 -304
  58. api/synchronous/api_core/animal_detection_api/detection/run_detector.py +0 -627
  59. api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +0 -1029
  60. api/synchronous/api_core/animal_detection_api/detection/run_inference_with_yolov5_val.py +0 -581
  61. api/synchronous/api_core/animal_detection_api/detection/run_tiled_inference.py +0 -754
  62. api/synchronous/api_core/animal_detection_api/detection/tf_detector.py +0 -165
  63. api/synchronous/api_core/animal_detection_api/detection/video_utils.py +0 -495
  64. api/synchronous/api_core/animal_detection_api/md_utils/azure_utils.py +0 -174
  65. api/synchronous/api_core/animal_detection_api/md_utils/ct_utils.py +0 -262
  66. api/synchronous/api_core/animal_detection_api/md_utils/directory_listing.py +0 -251
  67. api/synchronous/api_core/animal_detection_api/md_utils/matlab_porting_tools.py +0 -97
  68. api/synchronous/api_core/animal_detection_api/md_utils/path_utils.py +0 -416
  69. api/synchronous/api_core/animal_detection_api/md_utils/process_utils.py +0 -110
  70. api/synchronous/api_core/animal_detection_api/md_utils/sas_blob_utils.py +0 -509
  71. api/synchronous/api_core/animal_detection_api/md_utils/string_utils.py +0 -59
  72. api/synchronous/api_core/animal_detection_api/md_utils/url_utils.py +0 -144
  73. api/synchronous/api_core/animal_detection_api/md_utils/write_html_image_list.py +0 -226
  74. api/synchronous/api_core/animal_detection_api/md_visualization/visualization_utils.py +0 -841
  75. api/synchronous/api_core/tests/__init__.py +0 -0
  76. api/synchronous/api_core/tests/load_test.py +0 -110
  77. classification/__init__.py +0 -0
  78. classification/aggregate_classifier_probs.py +0 -108
  79. classification/analyze_failed_images.py +0 -227
  80. classification/cache_batchapi_outputs.py +0 -198
  81. classification/create_classification_dataset.py +0 -627
  82. classification/crop_detections.py +0 -516
  83. classification/csv_to_json.py +0 -226
  84. classification/detect_and_crop.py +0 -855
  85. classification/efficientnet/__init__.py +0 -9
  86. classification/efficientnet/model.py +0 -415
  87. classification/efficientnet/utils.py +0 -610
  88. classification/evaluate_model.py +0 -520
  89. classification/identify_mislabeled_candidates.py +0 -152
  90. classification/json_to_azcopy_list.py +0 -63
  91. classification/json_validator.py +0 -695
  92. classification/map_classification_categories.py +0 -276
  93. classification/merge_classification_detection_output.py +0 -506
  94. classification/prepare_classification_script.py +0 -194
  95. classification/prepare_classification_script_mc.py +0 -228
  96. classification/run_classifier.py +0 -286
  97. classification/save_mislabeled.py +0 -110
  98. classification/train_classifier.py +0 -825
  99. classification/train_classifier_tf.py +0 -724
  100. classification/train_utils.py +0 -322
  101. data_management/__init__.py +0 -0
  102. data_management/annotations/__init__.py +0 -0
  103. data_management/annotations/annotation_constants.py +0 -34
  104. data_management/camtrap_dp_to_coco.py +0 -238
  105. data_management/cct_json_utils.py +0 -395
  106. data_management/cct_to_md.py +0 -176
  107. data_management/cct_to_wi.py +0 -289
  108. data_management/coco_to_labelme.py +0 -272
  109. data_management/coco_to_yolo.py +0 -662
  110. data_management/databases/__init__.py +0 -0
  111. data_management/databases/add_width_and_height_to_db.py +0 -33
  112. data_management/databases/combine_coco_camera_traps_files.py +0 -206
  113. data_management/databases/integrity_check_json_db.py +0 -477
  114. data_management/databases/subset_json_db.py +0 -115
  115. data_management/generate_crops_from_cct.py +0 -149
  116. data_management/get_image_sizes.py +0 -188
  117. data_management/importers/add_nacti_sizes.py +0 -52
  118. data_management/importers/add_timestamps_to_icct.py +0 -79
  119. data_management/importers/animl_results_to_md_results.py +0 -158
  120. data_management/importers/auckland_doc_test_to_json.py +0 -372
  121. data_management/importers/auckland_doc_to_json.py +0 -200
  122. data_management/importers/awc_to_json.py +0 -189
  123. data_management/importers/bellevue_to_json.py +0 -273
  124. data_management/importers/cacophony-thermal-importer.py +0 -796
  125. data_management/importers/carrizo_shrubfree_2018.py +0 -268
  126. data_management/importers/carrizo_trail_cam_2017.py +0 -287
  127. data_management/importers/cct_field_adjustments.py +0 -57
  128. data_management/importers/channel_islands_to_cct.py +0 -913
  129. data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  130. data_management/importers/eMammal/eMammal_helpers.py +0 -249
  131. data_management/importers/eMammal/make_eMammal_json.py +0 -223
  132. data_management/importers/ena24_to_json.py +0 -275
  133. data_management/importers/filenames_to_json.py +0 -385
  134. data_management/importers/helena_to_cct.py +0 -282
  135. data_management/importers/idaho-camera-traps.py +0 -1407
  136. data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  137. data_management/importers/jb_csv_to_json.py +0 -150
  138. data_management/importers/mcgill_to_json.py +0 -250
  139. data_management/importers/missouri_to_json.py +0 -489
  140. data_management/importers/nacti_fieldname_adjustments.py +0 -79
  141. data_management/importers/noaa_seals_2019.py +0 -181
  142. data_management/importers/pc_to_json.py +0 -365
  143. data_management/importers/plot_wni_giraffes.py +0 -123
  144. data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
  145. data_management/importers/prepare_zsl_imerit.py +0 -131
  146. data_management/importers/rspb_to_json.py +0 -356
  147. data_management/importers/save_the_elephants_survey_A.py +0 -320
  148. data_management/importers/save_the_elephants_survey_B.py +0 -332
  149. data_management/importers/snapshot_safari_importer.py +0 -758
  150. data_management/importers/snapshot_safari_importer_reprise.py +0 -665
  151. data_management/importers/snapshot_serengeti_lila.py +0 -1067
  152. data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  153. data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  154. data_management/importers/sulross_get_exif.py +0 -65
  155. data_management/importers/timelapse_csv_set_to_json.py +0 -490
  156. data_management/importers/ubc_to_json.py +0 -399
  157. data_management/importers/umn_to_json.py +0 -507
  158. data_management/importers/wellington_to_json.py +0 -263
  159. data_management/importers/wi_to_json.py +0 -441
  160. data_management/importers/zamba_results_to_md_results.py +0 -181
  161. data_management/labelme_to_coco.py +0 -548
  162. data_management/labelme_to_yolo.py +0 -272
  163. data_management/lila/__init__.py +0 -0
  164. data_management/lila/add_locations_to_island_camera_traps.py +0 -97
  165. data_management/lila/add_locations_to_nacti.py +0 -147
  166. data_management/lila/create_lila_blank_set.py +0 -557
  167. data_management/lila/create_lila_test_set.py +0 -151
  168. data_management/lila/create_links_to_md_results_files.py +0 -106
  169. data_management/lila/download_lila_subset.py +0 -177
  170. data_management/lila/generate_lila_per_image_labels.py +0 -515
  171. data_management/lila/get_lila_annotation_counts.py +0 -170
  172. data_management/lila/get_lila_image_counts.py +0 -111
  173. data_management/lila/lila_common.py +0 -300
  174. data_management/lila/test_lila_metadata_urls.py +0 -132
  175. data_management/ocr_tools.py +0 -874
  176. data_management/read_exif.py +0 -681
  177. data_management/remap_coco_categories.py +0 -84
  178. data_management/remove_exif.py +0 -66
  179. data_management/resize_coco_dataset.py +0 -189
  180. data_management/wi_download_csv_to_coco.py +0 -246
  181. data_management/yolo_output_to_md_output.py +0 -441
  182. data_management/yolo_to_coco.py +0 -676
  183. detection/__init__.py +0 -0
  184. detection/detector_training/__init__.py +0 -0
  185. detection/detector_training/model_main_tf2.py +0 -114
  186. detection/process_video.py +0 -703
  187. detection/pytorch_detector.py +0 -337
  188. detection/run_detector.py +0 -779
  189. detection/run_detector_batch.py +0 -1219
  190. detection/run_inference_with_yolov5_val.py +0 -917
  191. detection/run_tiled_inference.py +0 -935
  192. detection/tf_detector.py +0 -188
  193. detection/video_utils.py +0 -606
  194. docs/source/conf.py +0 -43
  195. md_utils/__init__.py +0 -0
  196. md_utils/azure_utils.py +0 -174
  197. md_utils/ct_utils.py +0 -612
  198. md_utils/directory_listing.py +0 -246
  199. md_utils/md_tests.py +0 -968
  200. md_utils/path_utils.py +0 -1044
  201. md_utils/process_utils.py +0 -157
  202. md_utils/sas_blob_utils.py +0 -509
  203. md_utils/split_locations_into_train_val.py +0 -228
  204. md_utils/string_utils.py +0 -92
  205. md_utils/url_utils.py +0 -323
  206. md_utils/write_html_image_list.py +0 -225
  207. md_visualization/__init__.py +0 -0
  208. md_visualization/plot_utils.py +0 -293
  209. md_visualization/render_images_with_thumbnails.py +0 -275
  210. md_visualization/visualization_utils.py +0 -1537
  211. md_visualization/visualize_db.py +0 -551
  212. md_visualization/visualize_detector_output.py +0 -406
  213. megadetector-5.0.10.dist-info/RECORD +0 -224
  214. megadetector-5.0.10.dist-info/top_level.txt +0 -8
  215. taxonomy_mapping/__init__.py +0 -0
  216. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +0 -491
  217. taxonomy_mapping/map_new_lila_datasets.py +0 -154
  218. taxonomy_mapping/prepare_lila_taxonomy_release.py +0 -142
  219. taxonomy_mapping/preview_lila_taxonomy.py +0 -591
  220. taxonomy_mapping/retrieve_sample_image.py +0 -71
  221. taxonomy_mapping/simple_image_download.py +0 -218
  222. taxonomy_mapping/species_lookup.py +0 -834
  223. taxonomy_mapping/taxonomy_csv_checker.py +0 -159
  224. taxonomy_mapping/taxonomy_graph.py +0 -346
  225. taxonomy_mapping/validate_lila_category_mappings.py +0 -83
  226. {megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/WHEEL +0 -0
@@ -1,1407 +0,0 @@
1
- """
2
-
3
- idaho-camera-traps.py
4
-
5
- Prepare the Idaho Camera Traps dataset for release on LILA.
6
-
7
- """
8
-
9
- #%% Imports and constants
10
-
11
- import json
12
- import os
13
- import numpy as np
14
- import dateutil
15
- import pandas as pd
16
- import datetime
17
- import shutil
18
-
19
- from tqdm import tqdm
20
- from bson import json_util
21
-
22
- from collections import defaultdict
23
-
24
- # Multi-threading for .csv file comparison and image existence validation
25
- from multiprocessing.pool import Pool as Pool
26
- from multiprocessing.pool import ThreadPool as ThreadPool
27
- n_threads = 14
28
- n_threads_file_copy = 20
29
-
30
- input_base = r'i:\idfg-images'
31
- output_base = r'h:\idaho-camera-traps'
32
- output_image_base = r'j:\idaho-camera-traps-output'
33
- assert os.path.isdir(input_base)
34
- assert os.path.isdir(output_base)
35
- assert os.path.isdir(output_image_base)
36
-
37
- output_image_base_public = os.path.join(output_image_base,'public')
38
- output_image_base_private = os.path.join(output_image_base,'private')
39
-
40
- # We are going to map the original filenames/locations to obfuscated strings, but once
41
- # we've done that, we will re-use the mappings every time we run this script.
42
- force_generate_mappings = False
43
-
44
- # This is the file to which mappings get saved
45
- id_mapping_file = os.path.join(output_base,'id_mapping.json')
46
-
47
- # The maximum time (in seconds) between images within which two images are considered the
48
- # same sequence.
49
- max_gap_within_sequence = 30
50
-
51
- # This is a two-column file, where each line is [string in the original metadata],[category name we want to map it to]
52
- category_mapping_file = os.path.join(output_base,'category_mapping.csv')
53
-
54
- # The output file, using the original strings
55
- output_json_original_strings = os.path.join(output_base,'idaho-camera-traps-original-strings.json')
56
-
57
- # The output file, using obfuscated strings for everything but filenamed
58
- output_json_remapped_ids = os.path.join(output_base,'idaho-camera-traps-remapped-ids.json')
59
-
60
- # The output file, using obfuscated strings and obfuscated filenames
61
- output_json = os.path.join(output_base,'idaho-camera-traps.json')
62
-
63
- # One time only, I ran MegaDetector on the whole dataset...
64
- megadetector_results_file = r'H:\idaho-camera-traps\idfg-2021-07-26idaho-camera-traps_detections.json'
65
-
66
- # ...then set aside any images that *may* have contained humans that had not already been
67
- # annotated as such. Those went in this folder...
68
- human_review_folder = os.path.join(output_base,'human_review')
69
-
70
- # ...and the ones that *actually* had humans (identified via manual review) got
71
- # copied to this folder...
72
- human_review_selection_folder = os.path.join(output_base,'human_review_selections')
73
-
74
- # ...which was enumerated to this text file, which is a manually-curated list of
75
- # images that were flagged as human.
76
- human_review_list = os.path.join(output_base,'human_flagged_images.txt')
77
-
78
- # Unopinionated .json conversion of the .csv metadata
79
- sequence_info_cache = os.path.join(output_base,'sequence_info.json')
80
-
81
- valid_opstates = ['normal','maintenance','snow on lens','foggy lens','foggy weather',
82
- 'malfunction','misdirected','snow on lense','poop/slobber','sun','tilted','vegetation obstruction']
83
- opstate_mappings = {'snow on lense':'snow on lens','poop/slobber':'lens obscured','maintenance':'human'}
84
-
85
- survey_species_presence_columns = ['elkpresent','deerpresent','prongpresent']
86
-
87
- presence_to_count_columns = {
88
- 'otherpresent':['MooseAntlerless','MooseCalf','MooseOther','MooseBull','MooseUnkn',
89
- 'BlackBearAdult','BlackBearCub','LionAdult','LionKitten','WolfAdult',
90
- 'WolfPup','CattleCow','CattleCalf','other'],
91
- 'elkpresent':['ElkSpike','ElkAntlerless','ElkCalf','ElkRaghorn','ElkMatBull','ElkUnkn','ElkPedNub'],
92
- 'deerpresent':['MDbuck','MDantlerless','MDfawn','WTDbuck','WTDantlerless','WTDfawn','WTDunkn','MDunkn'],
93
- 'prongpresent':['PronghornBuck','PronghornFawn','PHunkn']
94
- }
95
-
96
- required_columns = ['File','Folder','Date','Time','otherpresent','other','otherwhat','opstate']
97
- expected_presence_columns = ['elkpresent','deerpresent','prongpresent','humanpresent','otherpresent']
98
-
99
- expected_count_columns = set()
100
- for presence_column in presence_to_count_columns.keys():
101
- count_columns = presence_to_count_columns[presence_column]
102
- for count_column in count_columns:
103
- expected_count_columns.add(count_column)
104
-
105
- def list_is_sorted(l):
106
- return all(l[i] <= l[i+1] for i in range(len(l)-1))
107
-
108
-
109
- #%% List files (images + .csv)
110
-
111
- def get_files():
112
-
113
- all_files_list = os.path.join(output_base,'all_files.json')
114
- force_file_enumeration = False
115
-
116
- if (os.path.isfile(all_files_list) and (not force_file_enumeration)):
117
-
118
- print('File list exists, bypassing enumeration')
119
- with open(all_files_list,'r') as f:
120
- all_files = json.load(f)
121
-
122
- else:
123
-
124
- from pathlib import Path
125
- all_files = []
126
- for path in Path(input_base).rglob('*.*'):
127
- path = str(path)
128
- path = os.path.relpath(path,input_base)
129
- all_files.append(path)
130
- with open(all_files_list,'w') as f:
131
- json.dump(all_files,f,indent=1)
132
-
133
- print('Enumerated {} files'.format(len(all_files)))
134
-
135
- image_files = [s for s in all_files if (s.lower().endswith('.jpg') or s.lower().endswith('.jpeg'))]
136
- csv_files = [s for s in all_files if (\
137
- (s.lower().endswith('.csv')) and \
138
- ('Backups' not in s) and \
139
- ('Metadata.csv' not in s) and \
140
- ('ExportedDataFiles' not in s) and \
141
- ('CSV Files' not in s)
142
- )]
143
-
144
- print('{} image files, {} .csv files'.format(len(image_files),len(csv_files)))
145
-
146
- # Ignore .csv files in folders with multiple .csv files
147
-
148
- # ...which would require some extra work to decipher.
149
-
150
- csv_files_to_ignore = []
151
-
152
- folder_to_csv_files = defaultdict(list)
153
-
154
- # fn = csv_files[0]
155
- for fn in csv_files:
156
- folder_name = os.path.dirname(fn)
157
- folder_to_csv_files[folder_name].append(fn)
158
-
159
- for folder_name in folder_to_csv_files.keys():
160
- if len(folder_to_csv_files[folder_name]) > 1:
161
- print('Multiple .csv files for {}:'.format(folder_name))
162
- for csv_file in folder_to_csv_files[folder_name]:
163
- print(csv_file)
164
- csv_files_to_ignore.append(csv_file)
165
- print('')
166
-
167
- n_csv_original = len(csv_files)
168
- csv_files = [s for s in csv_files if s not in csv_files_to_ignore]
169
-
170
- print('Processing {} of {} csv files'.format(len(csv_files),n_csv_original))
171
-
172
- return image_files,csv_files
173
-
174
-
175
- #%% Parse each .csv file into sequences (function)
176
-
177
- # csv_file = csv_files[-1]
178
- def csv_to_sequences(csv_file):
179
-
180
- print('Processing {}'.format(csv_file))
181
-
182
- csv_file_absolute = os.path.join(input_base,csv_file)
183
- # os.startfile(csv_file_absolute)
184
-
185
- sequences = []
186
- # survey = csv_file.split('\\')[0]
187
-
188
- # Sample paths from which we need to derive locations:
189
- #
190
- # St.Joe_elk\AM99\Trip 1\100RECNX\TimelapseData.csv
191
- # Beaverhead_elk\AM34\Trip 1\100RECNX\TimelapseData.csv
192
- #
193
- # ClearCreek_mustelids\Winter2015-16\FS-001-P\FS-001-P.csv
194
- # ClearCreek_mustelids\Summer2015\FS-001\FS-001.csv
195
- # ClearCreek_mustelids\Summer2016\IDFG-016\IDFG-016.csv
196
- #
197
- # I:\idfg-images\ClearCreek_mustelids\Summer2016\IDFG-017b
198
- # I:\idfg-images\ClearCreek_mustelids\Summer2016\IDFG-017a
199
- if 'St.Joe_elk' in csv_file or 'Beaverhead_elk' in csv_file:
200
- location_name = '_'.join(csv_file.split('\\')[0:2]).replace(' ','')
201
- else:
202
- assert 'ClearCreek_mustelids' in csv_file
203
- tokens = csv_file.split('\\')
204
- assert 'FS-' in tokens[2] or 'IDFG-' in tokens[2]
205
- location_name = '_'.join([tokens[0],tokens[2]]).replace('-P','')
206
- if location_name.endswith('017a') or location_name.endswith('017b'):
207
- location_name = location_name[:-1]
208
-
209
- # Load .csv file
210
- df = pd.read_csv(csv_file_absolute)
211
- df['datetime'] = None
212
- df['seq_id'] = None
213
- df['synthetic_frame_number'] = None
214
-
215
- # Validate the opstate column
216
- opstates = set(df['opstate'])
217
- for s in opstates:
218
- if isinstance(s,str):
219
- s = s.strip()
220
- if len(s) > 0:
221
- assert s in valid_opstates,'Invalid opstate: {}'.format(s)
222
-
223
- column_names = list(df.columns)
224
-
225
- for s in required_columns:
226
- assert s in column_names
227
-
228
- count_columns = [s for s in column_names if s in expected_count_columns]
229
-
230
- presence_columns = [s for s in column_names if s.endswith('present')]
231
-
232
- for s in presence_columns:
233
- if s not in expected_presence_columns:
234
- assert 'Unexpected presence column {} in {}'.format(s,csv_file)
235
- for s in expected_presence_columns:
236
- if s not in presence_columns:
237
- assert 'Missing presence column {} in {}'.format(s,csv_file)
238
-
239
- if False:
240
- for s in expected_count_columns:
241
- if s not in count_columns:
242
- print('Missing count column {} in {}'.format(s,csv_file))
243
-
244
- ## Create datetimes
245
-
246
- # print('Creating datetimes')
247
-
248
- # i_row = 0; row = df.iloc[i_row]
249
- for i_row,row in df.iterrows():
250
-
251
- date = row['Date']
252
- time = row['Time']
253
- datestring = date + ' ' + time
254
- dt = dateutil.parser.parse(datestring)
255
- assert dt.year >= 2015 and dt.year <= 2019
256
- df.loc[i_row,'datetime'] = dt
257
-
258
- # Make sure data are sorted chronologically
259
- #
260
- # In odd circumstances, they are not... so sort them first, but warn
261
- datetimes = list(df['datetime'])
262
- if not list_is_sorted(datetimes):
263
- print('Datetimes not sorted for {}'.format(csv_file))
264
-
265
- df = df.sort_values('datetime')
266
- df.reset_index(drop=True, inplace=True)
267
- datetimes = list(df['datetime'])
268
- assert list_is_sorted(datetimes)
269
-
270
- # Debugging when I was trying to see what was up with the unsorted dates
271
- if False:
272
- for i in range(0,len(datetimes)-1):
273
- dt = datetimes[i+1]
274
- prev_dt = datetimes[i]
275
- delta = dt - prev_dt
276
- assert delta >= datetime.timedelta(0)
277
-
278
- ## Parse into sequences
279
-
280
- # print('Creating sequences')
281
-
282
- current_sequence_id = None
283
- next_frame_number = 0
284
- previous_datetime = None
285
-
286
- sequence_id_to_rows = defaultdict(list)
287
-
288
- # i_row = 0; row = df.iloc[i_row]
289
- for i_row,row in df.iterrows():
290
-
291
- dt = row['datetime']
292
- assert dt is not None and isinstance(dt,datetime.datetime)
293
-
294
- # Start a new sequence if:
295
- #
296
- # * This image has no timestamp
297
- # * This image has a frame number of zero
298
- # * We have no previous image timestamp
299
- #
300
- if previous_datetime is None:
301
- delta = None
302
- else:
303
- delta = (dt - previous_datetime).total_seconds()
304
-
305
- # Start a new sequence if necessary
306
- if delta is None or delta > max_gap_within_sequence:
307
- next_frame_number = 0
308
- current_sequence_id = location_name + '_seq_' + str(dt) # str(uuid.uuid1())
309
-
310
- assert current_sequence_id is not None
311
-
312
- sequence_id_to_rows[current_sequence_id].append(i_row)
313
- df.loc[i_row,'seq_id'] = current_sequence_id
314
- df.loc[i_row,'synthetic_frame_number'] = next_frame_number
315
- next_frame_number = next_frame_number + 1
316
- previous_datetime = dt
317
-
318
- # ...for each row
319
-
320
- location_sequences = list(set(list(df['seq_id'])))
321
- location_sequences.sort()
322
-
323
- inconsistent_sequences = []
324
-
325
-
326
- ## Parse labels for each sequence
327
-
328
- # sequence_id = location_sequences[0]
329
- for sequence_id in location_sequences:
330
-
331
- sequence_row_indices = sequence_id_to_rows[sequence_id]
332
- assert len(sequence_row_indices) > 0
333
-
334
- # Row indices in a sequence should be adjacent
335
- if len(sequence_row_indices) > 1:
336
- d = np.diff(sequence_row_indices)
337
- assert(all(d==1))
338
-
339
- # sequence_df = df[df['seq_id']==sequence_id]
340
- sequence_df = df.iloc[sequence_row_indices]
341
-
342
-
343
- ## Determine what's present
344
-
345
- presence_columns_marked = []
346
- survey_species = []
347
- other_species = []
348
-
349
- # Be conservative; assume humans are present in all maintenance images
350
- opstates = set(sequence_df['opstate'])
351
- assert all([ ( (isinstance(s,float)) or (len(s.strip())== 0) or \
352
- (s.strip() in valid_opstates)) for s in opstates]),\
353
- 'Invalid optstate in: {}'.format(' | '.join(opstates))
354
-
355
- for presence_column in presence_columns:
356
-
357
- presence_values = list(sequence_df[presence_column])
358
-
359
- # The presence columns are *almost* always identical for all images in a sequence
360
- single_presence_value = (len(set(presence_values)) == 1)
361
- # assert single_presence_value
362
- if not single_presence_value:
363
- # print('Warning: presence value for {} is inconsistent for {}'.format(
364
- # presence_column,sequence_id))
365
- inconsistent_sequences.append(sequence_id)
366
-
367
- if any(presence_values):
368
- presence_columns_marked.append(presence_column)
369
-
370
- # ...for each presence column
371
-
372
- # Tally up the standard (survey) species
373
- survey_species = [s.replace('present','') for s in presence_columns_marked if s != 'otherpresent']
374
- for opstate in opstates:
375
- if not isinstance(opstate,str):
376
- continue
377
- opstate = opstate.strip()
378
- if len(opstate) == 0:
379
- continue
380
- if opstate in opstate_mappings:
381
- opstate = opstate_mappings[opstate]
382
- if (opstate != 'normal') and (opstate not in survey_species):
383
- survey_species.append(opstate)
384
-
385
- # If no presence columns are marked, all counts should be zero
386
- if len(presence_columns_marked) == 0:
387
-
388
- # count_column = count_columns[0]
389
- for count_column in count_columns:
390
-
391
- values = list(set(list(sequence_df[count_column])))
392
-
393
- # Occasionally a count gets entered (correctly) without the presence column being marked
394
- # assert len(values) == 1 and values[0] == 0, 'Non-zero counts with no presence
395
- # columns marked for sequence {}'.format(sequence_id)
396
- if (not(len(values) == 1 and values[0] == 0)):
397
- print('Warning: presence and counts are inconsistent for {}'.format(sequence_id))
398
-
399
- # Handle this by virtually checking the "right" box
400
- for presence_column in presence_to_count_columns.keys():
401
- count_columns_this_species = presence_to_count_columns[presence_column]
402
- if count_column in count_columns_this_species:
403
- if presence_column not in presence_columns_marked:
404
- presence_columns_marked.append(presence_column)
405
-
406
- # Make sure we found a match
407
- assert len(presence_columns_marked) > 0
408
-
409
- # Handle 'other' tags
410
- if 'otherpresent' in presence_columns_marked:
411
-
412
- sequence_otherwhats = set()
413
- sequence_comments = set()
414
-
415
- for i,r in sequence_df.iterrows():
416
- otherwhat = r['otherwhat']
417
- if isinstance(otherwhat,str):
418
- otherwhat = otherwhat.strip()
419
- if len(otherwhat) > 0:
420
- sequence_otherwhats.add(otherwhat)
421
- comment = r['comment']
422
- if isinstance(comment,str):
423
- comment = comment.strip()
424
- if len(comment) > 0:
425
- sequence_comments.add(comment)
426
-
427
- freetext_species = []
428
- for s in sequence_otherwhats:
429
- freetext_species.append(s)
430
- for s in sequence_comments:
431
- freetext_species.append(s)
432
-
433
- counted_species = []
434
-
435
- otherpresent_columns = presence_to_count_columns['otherpresent']
436
-
437
- # column_name = otherpresent_columns[0]
438
- for column_name in otherpresent_columns:
439
-
440
- if column_name in sequence_df and column_name != 'other':
441
-
442
- column_counts = list(sequence_df[column_name])
443
- column_count_positive = any([c > 0 for c in column_counts])
444
-
445
- if column_count_positive:
446
- # print('Found non-survey counted species column: {}'.format(column_name))
447
- counted_species.append(column_name)
448
-
449
- # ...for each non-empty presence column
450
-
451
- # Very rarely, the "otherpresent" column is checked, but no more detail is available
452
- if not ( (len(freetext_species) > 0) or (len(counted_species) > 0) ):
453
- other_species.append('unknown')
454
-
455
- other_species += freetext_species
456
- other_species += counted_species
457
-
458
- # ...handling non-survey species
459
-
460
- all_species = other_species + survey_species
461
-
462
- # Build the sequence data
463
-
464
- images = []
465
- # i_row = 0; row = sequence_df.iloc[i_row]
466
- for i_row,row in sequence_df.iterrows():
467
- im = {}
468
- # Only one folder used a single .csv file for two subfolders
469
- if ('RelativePath' in row) and (isinstance(row['RelativePath'],str)) \
470
- and (len(row['RelativePath'].strip()) > 0):
471
- assert 'IDFG-028' in location_name
472
- im['file_name'] = os.path.join(row['RelativePath'],row['File'])
473
- else:
474
- im['file_name'] = row['File']
475
- im['datetime'] = row['datetime']
476
- images.append(im)
477
-
478
- sequence = {}
479
- sequence['csv_source'] = csv_file
480
- sequence['sequence_id'] = sequence_id
481
- sequence['images'] = images
482
- sequence['species_present'] = all_species
483
- sequence['location'] = location_name
484
-
485
- sequences.append(sequence)
486
-
487
- # ...for each sequence
488
-
489
- return sequences
490
-
491
- # ...def csv_to_sequences()
492
-
493
-
494
- #%% Parse each .csv file into sequences (loop)
495
-
496
- if __name__ == "__main__":
497
-
498
- #%%
499
-
500
- import multiprocessing
501
- multiprocessing.freeze_support()
502
- image_files,csv_files = get_files()
503
-
504
- #%%
505
-
506
- if n_threads == 1:
507
-
508
- # i_file = -1; csv_file = csv_files[i_file]
509
- sequences_by_file = []
510
- for i_file,csv_file in enumerate(csv_files):
511
- print('Processing file {} of {}'.format(i_file,len(csv_files)))
512
- sequences = csv_to_sequences(csv_file)
513
- sequences_by_file.append(sequences)
514
-
515
- else:
516
-
517
- pool = Pool(n_threads)
518
- sequences_by_file = list(pool.imap(csv_to_sequences,csv_files))
519
-
520
-
521
- #%% Save sequence data
522
-
523
- with open(sequence_info_cache,'w') as f:
524
- json.dump(sequences_by_file,f,indent=2,default=json_util.default)
525
-
526
-
527
- #%% Load sequence data
528
-
529
- if False:
530
-
531
- #%%
532
-
533
- with open(sequence_info_cache,'r') as f:
534
- sequences_by_file = json.load(f,object_hook=json_util.object_hook)
535
-
536
-
537
- #%% Validate file mapping (based on the existing enumeration)
538
-
539
- missing_images = []
540
- image_files_set = set(image_files)
541
- n_images_in_sequences = 0
542
- sequence_ids = set()
543
-
544
- # sequences = sequences_by_file[0]
545
- for i_sequences,sequences in enumerate(tqdm(sequences_by_file)):
546
-
547
- assert len(sequences) > 0
548
- csv_source = sequences[0]['csv_source']
549
- csv_file_absolute = os.path.join(input_base,csv_source)
550
- csv_folder = os.path.dirname(csv_file_absolute)
551
- assert os.path.isfile(csv_file_absolute)
552
-
553
- # sequence = sequences[0]
554
- for i_sequence,sequence in enumerate(sequences):
555
-
556
- assert sequence['csv_source'] == csv_source
557
- sequence_id = sequence['sequence_id']
558
- if sequence_id in sequence_ids:
559
- print('Warning: duplicate sequence for {}, creating new sequence'.format(sequence_id))
560
- sequence['sequence_id'] = sequence['sequence_id'] + '_' + str(i_sequences) + \
561
- '_' + str(i_sequence)
562
- sequence_id = sequence['sequence_id']
563
- assert sequence_id not in sequence_ids
564
-
565
- sequence_ids.add(sequence_id)
566
-
567
- species_present = sequence['species_present']
568
- images = sequence['images']
569
-
570
- for im in images:
571
-
572
- n_images_in_sequences += 1
573
- image_file_relative = im['file_name']
574
-
575
- # Actually, one folder has relative paths
576
- # assert '\\' not in image_file_relative and '/' not in image_file_relative
577
-
578
- image_file_absolute = os.path.join(csv_folder,image_file_relative)
579
- image_file_container_relative = os.path.relpath(image_file_absolute,input_base)
580
-
581
- # os.startfile(csv_folder)
582
- # assert os.path.isfile(image_file_absolute)
583
- # found_file = os.path.isfile(image_file_absolute)
584
- found_file = image_file_container_relative in image_files_set
585
- if not found_file:
586
- print('Warning: can\'t find image {}'.format(image_file_absolute))
587
- missing_images.append(image_file_absolute)
588
-
589
- # ...for each image
590
-
591
- # ...for each sequence
592
-
593
- # ...for each .csv file
594
-
595
- print('{} of {} images missing ({} on disk)'.format(len(missing_images),n_images_in_sequences,
596
- len(image_files)))
597
-
598
-
599
- #%% Load manual category mappings
600
-
601
- with open(category_mapping_file,'r') as f:
602
- category_mapping_lines = f.readlines()
603
- category_mapping_lines = [s.strip() for s in category_mapping_lines]
604
-
605
- category_mappings = {}
606
- for s in category_mapping_lines:
607
- tokens = s.split(',',1)
608
- category_name = tokens[0].strip()
609
- category_value = tokens[1].strip().replace('"','').replace(',','+')
610
- assert ',' not in category_name
611
- assert ',' not in category_value
612
-
613
- # The second column is blank when the first column already represents the category name
614
- if len(category_value) == 0:
615
- category_value = category_name
616
- category_mappings[category_name] = category_value
617
-
618
-
619
- #%% Convert to CCT .json (original strings)
620
-
621
- human_flagged_images = []
622
- with open(human_review_list,'r') as f:
623
- human_flagged_images = f.readlines()
624
- human_flagged_images = [s.strip().replace('/','\\') for s in human_flagged_images]
625
- human_flagged_images = set(human_flagged_images)
626
- print('Read {} human flagged images'.format(len(human_flagged_images)))
627
-
628
- annotations = []
629
- image_id_to_image = {}
630
- category_name_to_category = {}
631
-
632
- # Force the empty category to be ID 0
633
- empty_category_id = 0
634
- empty_category = {}
635
- empty_category['id'] = empty_category_id
636
- empty_category['name'] = 'empty'
637
- category_name_to_category['empty'] = empty_category
638
-
639
- human_category_id = 1
640
- human_category = {}
641
- human_category['id'] = human_category_id
642
- human_category['name'] = 'human'
643
- category_name_to_category['human'] = human_category
644
-
645
- next_category_id = 2
646
-
647
- annotation_ids = set()
648
-
649
- if False:
650
- target_folder = r'ClearCreek_mustelids\Summer2015\FS-035'
651
- for sequences in sequences_by_file:
652
- if target_folder in sequences[0]['csv_source']:
653
- break
654
-
655
- # For each .csv file...
656
- #
657
- # sequences = sequences_by_file[0]
658
- for sequences in tqdm(sequences_by_file):
659
-
660
- # For each sequence...
661
- #
662
- # sequence = sequences[0]
663
- for sequence in sequences:
664
-
665
- species_present = sequence['species_present']
666
- species_present = [s.lower().strip().replace(',',';') for s in species_present]
667
-
668
- sequence_images = sequence['images']
669
- location = sequence['location'].lower().strip()
670
- sequence_id = sequence['sequence_id']
671
- csv_source = sequence['csv_source']
672
- csv_folder_relative = os.path.dirname(csv_source)
673
-
674
- sequence_category_ids = set()
675
-
676
- # Find categories for this image
677
- if len(species_present) == 0:
678
-
679
- sequence_category_ids.add(0)
680
- assert category_name_to_category['empty']['id'] == list(sequence_category_ids)[0]
681
-
682
- else:
683
-
684
- # When 'unknown' is used in combination with another label, use that
685
- # label; the "unknown" here doesn't mean "another unknown species", it means
686
- # there is some other unknown property about the main species.
687
- if 'unknown' in species_present and len(species_present) > 1:
688
- assert all([((s in category_mappings) or (s in valid_opstates) or \
689
- (s in opstate_mappings.values()))\
690
- for s in species_present if s != 'unknown'])
691
- species_present = [s for s in species_present if s != 'unknown']
692
-
693
- # category_name_string = species_present[0]
694
- for category_name_string in species_present:
695
-
696
- # This piece of text had a lot of complicated syntax in it, and it would have
697
- # been too complicated to handle in a general way
698
- if 'coyotoes' in category_name_string:
699
- # print('Ignoring category {}'.format(category_name_string))
700
- continue
701
-
702
- if category_name_string not in category_mappings:
703
- assert category_name_string in valid_opstates or \
704
- category_name_string in opstate_mappings.values()
705
- else:
706
- category_name_string = category_mappings[category_name_string]
707
- assert ',' not in category_name_string
708
-
709
- category_names = category_name_string.split('+')
710
- assert len(category_names) <= 2
711
-
712
- # Don't process redundant labels
713
- category_names = set(category_names)
714
-
715
- # category_name = category_names[0]
716
- for category_name in category_names:
717
-
718
- if category_name == 'ignore':
719
- continue
720
-
721
- category_name = category_name.replace('"','')
722
-
723
- # If we've seen this category before...
724
- if category_name in category_name_to_category:
725
-
726
- category = category_name_to_category[category_name]
727
- category_id = category['id']
728
-
729
- # If this is a new category...
730
- else:
731
-
732
- # print('Adding new category for {}'.format(category_name))
733
- category_id = next_category_id
734
- category = {}
735
- category['id'] = category_id
736
- category['name'] = category_name
737
- category_name_to_category[category_name] = category
738
- next_category_id += 1
739
-
740
- sequence_category_ids.add(category_id)
741
-
742
- # ...for each category (inner)
743
-
744
- # ...for each category (outer)
745
-
746
- # ...if we do/don't have species in this sequence
747
-
748
- # We should have at least one category assigned (which may be "empty" or "unknown")
749
- assert len(sequence_category_ids) > 0
750
-
751
- # assert len(sequence_category_ids) > 0
752
-
753
- # Was any image in this sequence manually flagged as human?
754
- for i_image,im in enumerate(sequence_images):
755
-
756
- file_name_relative = os.path.join(csv_folder_relative,im['file_name'])
757
- if file_name_relative in human_flagged_images:
758
- # print('Flagging sequence {} as human based on manual review'.format(sequence_id))
759
- assert human_category_id not in sequence_category_ids
760
- sequence_category_ids.add(human_category_id)
761
- break
762
-
763
- # For each image in this sequence...
764
- #
765
- # i_image = 0; im = images[i_image]
766
- for i_image,im in enumerate(sequence_images):
767
-
768
- image_id = sequence_id + '_' + im['file_name']
769
- assert image_id not in image_id_to_image
770
-
771
- output_im = {}
772
- output_im['id'] = image_id
773
- output_im['file_name'] = os.path.join(csv_folder_relative,im['file_name'])
774
- output_im['seq_id'] = sequence_id
775
- output_im['seq_num_frames'] = len(sequence)
776
- output_im['frame_num'] = i_image
777
- output_im['datetime'] = str(im['datetime'])
778
- output_im['location'] = location
779
-
780
- image_id_to_image[image_id] = output_im
781
-
782
- # Create annotations for this image
783
- for i_ann,category_id in enumerate(sequence_category_ids):
784
-
785
- ann = {}
786
- ann['id'] = 'ann_' + image_id + '_' + str(i_ann)
787
- assert ann['id'] not in annotation_ids
788
- annotation_ids.add(ann['id'])
789
- ann['image_id'] = image_id
790
- ann['category_id'] = category_id
791
- ann['sequence_level_annotation'] = True
792
- annotations.append(ann)
793
-
794
- # ...for each image in this sequence
795
-
796
- # ...for each sequence
797
-
798
- # ...for each .csv file
799
-
800
- images = list(image_id_to_image.values())
801
- categories = list(category_name_to_category.values())
802
- print('Loaded {} annotations in {} categories for {} images'.format(
803
- len(annotations),len(categories),len(images)))
804
-
805
- # Verify that all images have annotations
806
- image_id_to_annotations = defaultdict(list)
807
-
808
- # ann = ict_data['annotations'][0]
809
-
810
- # For debugging only
811
- categories_to_counts = defaultdict(int)
812
- for ann in tqdm(annotations):
813
- image_id_to_annotations[ann['image_id']].append(ann)
814
- categories_to_counts[ann['category_id']] = categories_to_counts[ann['category_id']] + 1
815
-
816
- for im in tqdm(images):
817
- image_annotations = image_id_to_annotations[im['id']]
818
- assert len(image_annotations) > 0
819
-
820
-
821
- #%% Create output (original strings)
822
-
823
- info = {}
824
- info['contributor'] = 'Idaho Department of Fish and Game'
825
- info['description'] = 'Idaho Camera traps'
826
- info['version'] = '2021.07.19'
827
-
828
- output_data = {}
829
- output_data['images'] = images
830
- output_data['annotations'] = annotations
831
- output_data['categories'] = categories
832
- output_data['info'] = info
833
-
834
- with open(output_json_original_strings,'w') as f:
835
- json.dump(output_data,f,indent=1)
836
-
837
-
838
- #%% Validate .json file
839
-
840
- from data_management.databases import integrity_check_json_db
841
-
842
- options = integrity_check_json_db.IntegrityCheckOptions()
843
- options.baseDir = input_base
844
- options.bCheckImageSizes = False
845
- options.bCheckImageExistence = False
846
- options.bFindUnusedImages = False
847
-
848
- _, _, _ = integrity_check_json_db.integrity_check_json_db(output_json_original_strings, options)
849
-
850
-
851
- #%% Preview labels
852
-
853
- from md_visualization import visualize_db
854
-
855
- viz_options = visualize_db.DbVizOptions()
856
- viz_options.num_to_visualize = 1000
857
- viz_options.trim_to_images_with_bboxes = False
858
- viz_options.add_search_links = False
859
- viz_options.sort_by_filename = False
860
- viz_options.parallelize_rendering = True
861
- viz_options.include_filename_links = True
862
-
863
- viz_options.classes_to_exclude = ['empty','deer','elk']
864
- html_output_file, _ = visualize_db.visualize_db(db_path=output_json_original_strings,
865
- output_dir=os.path.join(
866
- output_base,'preview'),
867
- image_base_dir=input_base,
868
- options=viz_options)
869
- os.startfile(html_output_file)
870
-
871
-
872
- #%% Look for humans that were found by MegaDetector that haven't already been identified as human
873
-
874
- # This whole step only needed to get run once
875
-
876
- if False:
877
-
878
- pass
879
-
880
- #%%
881
-
882
- human_confidence_threshold = 0.5
883
-
884
- # Load MD results
885
- with open(megadetector_results_file,'r') as f:
886
- md_results = json.load(f)
887
-
888
- # Get a list of filenames that MD tagged as human
889
-
890
- human_md_categories =\
891
- [category_id for category_id in md_results['detection_categories'] if \
892
- ((md_results['detection_categories'][category_id] == 'person') or \
893
- (md_results['detection_categories'][category_id] == 'vehicle'))]
894
- assert len(human_md_categories) == 2
895
-
896
- # im = md_results['images'][0]
897
- md_human_images = set()
898
-
899
- for im in md_results['images']:
900
- if 'detections' not in im:
901
- continue
902
- if im['max_detection_conf'] < human_confidence_threshold:
903
- continue
904
- for detection in im['detections']:
905
- if detection['category'] not in human_md_categories:
906
- continue
907
- elif detection['conf'] < human_confidence_threshold:
908
- continue
909
- else:
910
- md_human_images.add(im['file'])
911
- break
912
-
913
- # ...for each detection
914
-
915
- # ...for each image
916
-
917
- print('MD found {} potential human images (of {})'.format(
918
- len(md_human_images),len(md_results['images'])))
919
-
920
- # Map images to annotations in ICT
921
-
922
- with open(output_json_original_strings,'r') as f:
923
- ict_data = json.load(f)
924
-
925
- category_id_to_name = {c['id']:c['name'] for c in categories}
926
-
927
- image_id_to_annotations = defaultdict(list)
928
-
929
- # ann = ict_data['annotations'][0]
930
- for ann in tqdm(ict_data['annotations']):
931
- image_id_to_annotations[ann['image_id']].append(ann)
932
-
933
- human_ict_categories = ['human']
934
- manual_human_images = set()
935
-
936
- # For every image
937
- # im = ict_data['images'][0]
938
- for im in tqdm(ict_data['images']):
939
-
940
- # Does this image already have a human annotation?
941
- manual_human = False
942
-
943
- annotations = image_id_to_annotations[im['id']]
944
- assert len(annotations) > 0
945
-
946
- for ann in annotations:
947
- category_name = category_id_to_name[ann['category_id']]
948
- if category_name in human_ict_categories:
949
- manual_human_images.add(im['file_name'].replace('\\','/'))
950
-
951
- # ...for each annotation
952
-
953
- # ...for each image
954
-
955
- print('{} images identified as human in source metadata'.format(len(manual_human_images)))
956
-
957
- missing_human_images = []
958
-
959
- for fn in md_human_images:
960
- if fn not in manual_human_images:
961
- missing_human_images.append(fn)
962
-
963
- print('{} potentially untagged human images'.format(len(missing_human_images)))
964
-
965
-
966
- #%% Copy images for review to a new folder
967
-
968
- os.makedirs(human_review_folder,exist_ok=True)
969
- missing_human_images.sort()
970
-
971
- # fn = missing_human_images[0]
972
- for i_image,fn in enumerate(tqdm(missing_human_images)):
973
- input_fn_absolute = os.path.join(input_base,fn).replace('\\','/')
974
- assert os.path.isfile(input_fn_absolute)
975
- output_path = os.path.join(human_review_folder,str(i_image).zfill(4) + '_' + fn.replace('/','~'))
976
- shutil.copyfile(input_fn_absolute,output_path)
977
-
978
-
979
- #%% Manual step...
980
-
981
- # Copy any images from that list that have humans in them to...
982
- human_review_selection_folder = r'H:\idaho-camera-traps\human_review_selections'
983
- assert os.path.isdir(human_review_selection_folder)
984
-
985
-
986
- #%% Create a list of the images we just manually flagged
987
-
988
- human_tagged_filenames = os.listdir(human_review_selection_folder)
989
- human_tagged_relative_paths = []
990
- # fn = human_tagged_filenames[0]
991
- for fn in human_tagged_filenames:
992
-
993
- # E.g. '0000_Beaverhead_elk~AM174~Trip 1~100RECNX~IMG_1397.JPG'
994
- relative_path = fn[5:].replace('~','/')
995
- human_tagged_relative_paths.append(relative_path)
996
-
997
- with open(human_review_list,'w') as f:
998
- for s in human_tagged_relative_paths:
999
- f.write(s + '\n')
1000
-
1001
-
1002
- #%% Translate location, image, sequence IDs
1003
-
1004
- # Load mappings if available
1005
- if (not force_generate_mappings) and (os.path.isfile(id_mapping_file)):
1006
-
1007
- print('Loading ID mappings from {}'.format(id_mapping_file))
1008
-
1009
- with open(id_mapping_file,'r') as f:
1010
- mappings = json.load(f)
1011
-
1012
- image_id_mappings = mappings['image_id_mappings']
1013
- annotation_id_mappings = mappings['annotation_id_mappings']
1014
- location_id_mappings = mappings['location_id_mappings']
1015
- sequence_id_mappings = mappings['sequence_id_mappings']
1016
-
1017
- else:
1018
-
1019
- # Generate mappings
1020
- mappings = {}
1021
-
1022
- next_location_id = 0
1023
- location_id_string_to_n_sequences = defaultdict(int)
1024
- location_id_string_to_n_images = defaultdict(int)
1025
-
1026
- image_id_mappings = {}
1027
- annotation_id_mappings = {}
1028
- location_id_mappings = {}
1029
- sequence_id_mappings = {}
1030
-
1031
- for im in tqdm(images):
1032
-
1033
- # If we've seen this location before...
1034
- if im['location'] in location_id_mappings:
1035
- location_id = location_id_mappings[im['location']]
1036
- else:
1037
- # Otherwise assign a string-formatted int as the ID
1038
- location_id = str(next_location_id)
1039
-
1040
- location_id_mappings[im['location']] = location_id
1041
- next_location_id += 1
1042
-
1043
- # If we've seen this sequence before...
1044
- if im['seq_id'] in sequence_id_mappings:
1045
- sequence_id = sequence_id_mappings[im['seq_id']]
1046
- else:
1047
- # Otherwise assign a string-formatted int as the ID
1048
- n_sequences_this_location = location_id_string_to_n_sequences[location_id]
1049
- sequence_id = 'loc_{}_seq_{}'.format(
1050
- location_id.zfill(4),str(n_sequences_this_location).zfill(6))
1051
- sequence_id_mappings[im['seq_id']] = sequence_id
1052
-
1053
- n_sequences_this_location += 1
1054
- location_id_string_to_n_sequences[location_id] = n_sequences_this_location
1055
-
1056
- assert im['id'] not in image_id_mappings
1057
-
1058
- # Assign an image ID
1059
-
1060
- n_images_this_location = location_id_string_to_n_images[location_id]
1061
- image_id_mappings[im['id']] = 'loc_{}_im_{}'.format(
1062
- location_id.zfill(4),str(n_images_this_location).zfill(6))
1063
-
1064
- n_images_this_location += 1
1065
- location_id_string_to_n_images[location_id] = n_images_this_location
1066
-
1067
- # ...for each image
1068
-
1069
- # Assign annotation mappings
1070
- for i_ann,ann in enumerate(tqdm(annotations)):
1071
- assert ann['image_id'] in image_id_mappings
1072
- assert ann['id'] not in annotation_id_mappings
1073
- annotation_id_mappings[ann['id']] = 'ann_{}'.format(str(i_ann).zfill(8))
1074
-
1075
- mappings['image_id_mappings'] = image_id_mappings
1076
- mappings['annotation_id_mappings'] = annotation_id_mappings
1077
- mappings['location_id_mappings'] = location_id_mappings
1078
- mappings['sequence_id_mappings'] = sequence_id_mappings
1079
-
1080
- # Save mappings
1081
- with open(id_mapping_file,'w') as f:
1082
- json.dump(mappings,f,indent=2)
1083
-
1084
- print('Saved ID mappings to {}'.format(id_mapping_file))
1085
-
1086
- # Back this file up, lest we should accidentally re-run this script
1087
- # with force_generate_mappings = True and overwrite the mappings we used.
1088
- datestr = str(datetime.datetime.now()).replace(':','-')
1089
- backup_file = id_mapping_file.replace('.json','_' + datestr + '.json')
1090
- shutil.copyfile(id_mapping_file,backup_file)
1091
-
1092
- # ...if we are/aren't re-generating mappings
1093
-
1094
-
1095
- #%% Apply mappings
1096
-
1097
- for im in images:
1098
- im['id'] = image_id_mappings[im['id']]
1099
- im['seq_id'] = sequence_id_mappings[im['seq_id']]
1100
- im['location'] = location_id_mappings[im['location']]
1101
- for ann in annotations:
1102
- ann['id'] = annotation_id_mappings[ann['id']]
1103
- ann['image_id'] = image_id_mappings[ann['image_id']]
1104
-
1105
- print('Applied mappings')
1106
-
1107
-
1108
- #%% Write new dictionaries (modified strings, original files)
1109
-
1110
- output_data = {}
1111
- output_data['images'] = images
1112
- output_data['annotations'] = annotations
1113
- output_data['categories'] = categories
1114
- output_data['info'] = info
1115
-
1116
- with open(output_json_remapped_ids,'w') as f:
1117
- json.dump(output_data,f,indent=2)
1118
-
1119
-
1120
- #%% Validate .json file (modified strings, original files)
1121
-
1122
- from data_management.databases import integrity_check_json_db
1123
-
1124
- options = integrity_check_json_db.IntegrityCheckOptions()
1125
- options.baseDir = input_base
1126
- options.bCheckImageSizes = False
1127
- options.bCheckImageExistence = False
1128
- options.bFindUnusedImages = False
1129
-
1130
- _, _, _ = integrity_check_json_db.integrity_check_json_db(output_json_remapped_ids, options)
1131
-
1132
-
1133
- #%% Preview labels (original files)
1134
-
1135
- from md_visualization import visualize_db
1136
-
1137
- viz_options = visualize_db.DbVizOptions()
1138
- viz_options.num_to_visualize = 1000
1139
- viz_options.trim_to_images_with_bboxes = False
1140
- viz_options.add_search_links = False
1141
- viz_options.sort_by_filename = False
1142
- viz_options.parallelize_rendering = True
1143
- viz_options.include_filename_links = True
1144
-
1145
- # viz_options.classes_to_exclude = ['empty','deer','elk']
1146
- # viz_options.classes_to_include = ['bobcat']
1147
- viz_options.classes_to_include = [viz_options.multiple_categories_tag]
1148
-
1149
- html_output_file, _ = visualize_db.visualize_db(db_path=output_json_remapped_ids,
1150
- output_dir=os.path.join(
1151
- output_base,'preview'),
1152
- image_base_dir=input_base,
1153
- options=viz_options)
1154
- os.startfile(html_output_file)
1155
-
1156
-
1157
- #%% Copy images to final output folder (prep)
1158
-
1159
- force_copy = False
1160
-
1161
- with open(output_json_remapped_ids,'r') as f:
1162
- d = json.load(f)
1163
-
1164
- images = d['images']
1165
-
1166
- private_categories = ['human','domestic dog','vehicle']
1167
-
1168
- private_image_ids = set()
1169
-
1170
- category_id_to_name = {c['id']:c['name'] for c in d['categories']}
1171
-
1172
- # ann = d['annotations'][0]
1173
- for ann in d['annotations']:
1174
- category_name = category_id_to_name[ann['category_id']]
1175
- if category_name in private_categories:
1176
- private_image_ids.add(ann['image_id'])
1177
-
1178
- print('Moving {} of {} images to the private folder'.format(len(private_image_ids),len(images)))
1179
-
1180
- def process_image(im):
1181
-
1182
- input_relative_path = im['file_name']
1183
- input_absolute_path = os.path.join(input_base,input_relative_path)
1184
-
1185
- if not os.path.isfile(input_absolute_path):
1186
- print('Warning: file {} is not available'.format(input_absolute_path))
1187
- return
1188
-
1189
- location = im['location']
1190
- image_id = im['id']
1191
-
1192
- location_folder = 'loc_' + location.zfill(4)
1193
- assert location_folder in image_id
1194
-
1195
- output_relative_path = location_folder + '/' + image_id + '.jpg'
1196
-
1197
- # Is this a public or private image?
1198
- private_image = (image_id in private_image_ids)
1199
-
1200
- # Generate absolute path
1201
- if private_image:
1202
- output_absolute_path = os.path.join(output_image_base_private,output_relative_path)
1203
- else:
1204
- output_absolute_path = os.path.join(output_image_base_public,output_relative_path)
1205
-
1206
- # Copy to output
1207
- output_dir = os.path.dirname(output_absolute_path)
1208
- os.makedirs(output_dir,exist_ok=True)
1209
-
1210
- if force_copy or (not os.path.isfile(output_absolute_path)):
1211
- shutil.copyfile(input_absolute_path,output_absolute_path)
1212
-
1213
- # Update the filename reference
1214
- im['file_name'] = output_relative_path
1215
-
1216
- # ...def process_image(im)
1217
-
1218
-
1219
- #%% Copy images to final output folder (execution)
1220
-
1221
- # For each image
1222
- if n_threads_file_copy == 1:
1223
- # im = images[0]
1224
- for im in tqdm(images):
1225
- process_image(im)
1226
- else:
1227
- pool = ThreadPool(n_threads_file_copy)
1228
- pool.map(process_image,images)
1229
-
1230
- print('Finished copying, writing .json output')
1231
-
1232
- # Write output .json
1233
- with open(output_json,'w') as f:
1234
- json.dump(d,f,indent=1)
1235
-
1236
-
1237
- #%% Make sure the right number of images got there
1238
-
1239
- from pathlib import Path
1240
- all_output_files = []
1241
- all_output_files_list = os.path.join(output_base,'all_output_files.json')
1242
-
1243
- for path in Path(output_image_base).rglob('*.*'):
1244
- path = str(path)
1245
- path = os.path.relpath(path,output_image_base)
1246
- all_output_files.append(path)
1247
- with open(all_output_files_list,'w') as f:
1248
- json.dump(all_output_files,f,indent=1)
1249
-
1250
- print('Enumerated {} output files (of {} images)'.format(len(all_output_files),len(images)))
1251
-
1252
-
1253
- #%% Validate .json file (final filenames)
1254
-
1255
- from data_management.databases import integrity_check_json_db
1256
-
1257
- options = integrity_check_json_db.IntegrityCheckOptions()
1258
- options.baseDir = input_base
1259
- options.bCheckImageSizes = False
1260
- options.bCheckImageExistence = False
1261
- options.bFindUnusedImages = False
1262
-
1263
- _, _, _ = integrity_check_json_db.integrity_check_json_db(output_json, options)
1264
-
1265
-
1266
- #%% Preview labels (final filenames)
1267
-
1268
- from md_visualization import visualize_db
1269
-
1270
- viz_options = visualize_db.DbVizOptions()
1271
- viz_options.num_to_visualize = 1500
1272
- viz_options.trim_to_images_with_bboxes = False
1273
- viz_options.add_search_links = False
1274
- viz_options.sort_by_filename = False
1275
- viz_options.parallelize_rendering = True
1276
- viz_options.include_filename_links = True
1277
-
1278
- # viz_options.classes_to_exclude = ['empty','deer','elk']
1279
- viz_options.classes_to_include = ['bear','mountain lion']
1280
- # viz_options.classes_to_include = ['horse']
1281
- # viz_options.classes_to_include = [viz_options.multiple_categories_tag]
1282
- # viz_options.classes_to_include = ['human','vehicle','domestic dog']
1283
-
1284
- html_output_file, _ = visualize_db.visualize_db(db_path=output_json,
1285
- output_dir=os.path.join(
1286
- output_base,'final-preview-01'),
1287
- image_base_dir=output_image_base_public,
1288
- options=viz_options)
1289
- os.startfile(html_output_file)
1290
-
1291
-
1292
- #%% Create zipfiles
1293
-
1294
- #%% List public files
1295
-
1296
- from pathlib import Path
1297
- all_public_output_files = []
1298
- all_public_output_files_list = os.path.join(output_base,'all_public_output_files.json')
1299
-
1300
- if not os.path.isfile(all_public_output_files_list):
1301
- for path in Path(output_image_base_public).rglob('*.*'):
1302
- path = str(path)
1303
- path = os.path.relpath(path,output_image_base)
1304
- all_public_output_files.append(path)
1305
- with open(all_public_output_files_list,'w') as f:
1306
- json.dump(all_public_output_files,f,indent=1)
1307
- else:
1308
- with open(all_public_output_files_list,'r') as f:
1309
- all_public_output_files = json.load(f)
1310
-
1311
- print('Enumerated {} public output files'.format(len(all_public_output_files)))
1312
-
1313
-
1314
- #%% Find the size of each file
1315
-
1316
- filename_to_size = {}
1317
-
1318
- all_public_output_sizes_list = os.path.join(output_base,'all_public_output_sizes.json')
1319
-
1320
- if not os.path.isfile(all_public_output_sizes_list):
1321
- # fn = all_public_output_files[0]
1322
- for fn in tqdm(all_public_output_files):
1323
- p = os.path.join(output_image_base,fn)
1324
- assert os.path.isfile(p)
1325
- filename_to_size[fn] = os.path.getsize(p)
1326
-
1327
- with open(all_public_output_sizes_list,'w') as f:
1328
- json.dump(filename_to_size,f,indent=1)
1329
- else:
1330
- with open(all_public_output_sizes_list,'r') as f:
1331
- filename_to_size = json.load(f)
1332
-
1333
- assert len(filename_to_size) == len(all_public_output_files)
1334
-
1335
-
1336
- #%% Split into chunks of approximately-equal size
1337
-
1338
- import humanfriendly
1339
- total_size = sum(filename_to_size.values())
1340
- print('{} in {} files'.format(humanfriendly.format_size(total_size),len(all_public_output_files)))
1341
-
1342
- bytes_per_part = 320e9
1343
-
1344
- file_lists = []
1345
-
1346
- current_file_list = []
1347
- n_bytes_current_file_list = 0
1348
-
1349
- for fn in all_public_output_files:
1350
- size = filename_to_size[fn]
1351
- current_file_list.append(fn)
1352
- n_bytes_current_file_list += size
1353
- if n_bytes_current_file_list > bytes_per_part:
1354
- file_lists.append(current_file_list)
1355
- current_file_list = []
1356
- n_bytes_current_file_list = 0
1357
- # ...for each file
1358
-
1359
- file_lists.append(current_file_list)
1360
-
1361
- assert sum([len(l) for l in file_lists]) == len(all_public_output_files)
1362
-
1363
- print('List sizes:')
1364
- for l in file_lists:
1365
- print(len(l))
1366
-
1367
-
1368
- #%% Create a zipfile for each chunk
1369
-
1370
- from zipfile import ZipFile
1371
- import zipfile
1372
- import os
1373
-
1374
- def create_zipfile(i_file_list):
1375
-
1376
- file_list = file_lists[i_file_list]
1377
- zipfile_name = os.path.join('k:\\idaho-camera-traps-images.part_{}.zip'.format(i_file_list))
1378
-
1379
- print('Processing archive {} to file {}'.format(i_file_list,zipfile_name))
1380
-
1381
- with ZipFile(zipfile_name, 'w') as zipObj:
1382
-
1383
- for filename_relative in file_list:
1384
-
1385
- assert filename_relative.startswith('public')
1386
- filename_absolute = os.path.join(output_image_base,filename_relative)
1387
- zipObj.write(filename_absolute.replace('\\','/'),
1388
- filename_relative, compress_type=zipfile.ZIP_STORED)
1389
-
1390
- # ...for each filename
1391
-
1392
- # with ZipFile()
1393
-
1394
- # ...def create_zipfile()
1395
-
1396
- # i_file_list = 0; file_list = file_lists[i_file_list]
1397
-
1398
- n_zip_threads = 1 # len(file_lists)
1399
- if n_zip_threads == 1:
1400
- for i_file_list in range(0,len(file_lists)):
1401
- create_zipfile(i_file_list)
1402
- else:
1403
- pool = ThreadPool(n_zip_threads)
1404
- indices = list(range(0,len(file_lists)))
1405
- pool.map(create_zipfile,indices)
1406
-
1407
- # ....if __name__ == "__main__"