megadetector 5.0.10__py3-none-any.whl → 5.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (226) hide show
  1. {megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/LICENSE +0 -0
  2. {megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/METADATA +12 -11
  3. megadetector-5.0.11.dist-info/RECORD +5 -0
  4. megadetector-5.0.11.dist-info/top_level.txt +1 -0
  5. api/__init__.py +0 -0
  6. api/batch_processing/__init__.py +0 -0
  7. api/batch_processing/api_core/__init__.py +0 -0
  8. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  9. api/batch_processing/api_core/batch_service/score.py +0 -439
  10. api/batch_processing/api_core/server.py +0 -294
  11. api/batch_processing/api_core/server_api_config.py +0 -98
  12. api/batch_processing/api_core/server_app_config.py +0 -55
  13. api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  14. api/batch_processing/api_core/server_job_status_table.py +0 -152
  15. api/batch_processing/api_core/server_orchestration.py +0 -360
  16. api/batch_processing/api_core/server_utils.py +0 -92
  17. api/batch_processing/api_core_support/__init__.py +0 -0
  18. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  19. api/batch_processing/api_support/__init__.py +0 -0
  20. api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  21. api/batch_processing/data_preparation/__init__.py +0 -0
  22. api/batch_processing/data_preparation/manage_local_batch.py +0 -2391
  23. api/batch_processing/data_preparation/manage_video_batch.py +0 -327
  24. api/batch_processing/integration/digiKam/setup.py +0 -6
  25. api/batch_processing/integration/digiKam/xmp_integration.py +0 -465
  26. api/batch_processing/integration/eMammal/test_scripts/config_template.py +0 -5
  27. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -126
  28. api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +0 -55
  29. api/batch_processing/postprocessing/__init__.py +0 -0
  30. api/batch_processing/postprocessing/add_max_conf.py +0 -64
  31. api/batch_processing/postprocessing/categorize_detections_by_size.py +0 -163
  32. api/batch_processing/postprocessing/combine_api_outputs.py +0 -249
  33. api/batch_processing/postprocessing/compare_batch_results.py +0 -958
  34. api/batch_processing/postprocessing/convert_output_format.py +0 -397
  35. api/batch_processing/postprocessing/load_api_results.py +0 -195
  36. api/batch_processing/postprocessing/md_to_coco.py +0 -310
  37. api/batch_processing/postprocessing/md_to_labelme.py +0 -330
  38. api/batch_processing/postprocessing/merge_detections.py +0 -401
  39. api/batch_processing/postprocessing/postprocess_batch_results.py +0 -1904
  40. api/batch_processing/postprocessing/remap_detection_categories.py +0 -170
  41. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +0 -661
  42. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +0 -211
  43. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +0 -82
  44. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +0 -1631
  45. api/batch_processing/postprocessing/separate_detections_into_folders.py +0 -731
  46. api/batch_processing/postprocessing/subset_json_detector_output.py +0 -696
  47. api/batch_processing/postprocessing/top_folders_to_bottom.py +0 -223
  48. api/synchronous/__init__.py +0 -0
  49. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  50. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -152
  51. api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -266
  52. api/synchronous/api_core/animal_detection_api/config.py +0 -35
  53. api/synchronous/api_core/animal_detection_api/data_management/annotations/annotation_constants.py +0 -47
  54. api/synchronous/api_core/animal_detection_api/detection/detector_training/copy_checkpoints.py +0 -43
  55. api/synchronous/api_core/animal_detection_api/detection/detector_training/model_main_tf2.py +0 -114
  56. api/synchronous/api_core/animal_detection_api/detection/process_video.py +0 -543
  57. api/synchronous/api_core/animal_detection_api/detection/pytorch_detector.py +0 -304
  58. api/synchronous/api_core/animal_detection_api/detection/run_detector.py +0 -627
  59. api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +0 -1029
  60. api/synchronous/api_core/animal_detection_api/detection/run_inference_with_yolov5_val.py +0 -581
  61. api/synchronous/api_core/animal_detection_api/detection/run_tiled_inference.py +0 -754
  62. api/synchronous/api_core/animal_detection_api/detection/tf_detector.py +0 -165
  63. api/synchronous/api_core/animal_detection_api/detection/video_utils.py +0 -495
  64. api/synchronous/api_core/animal_detection_api/md_utils/azure_utils.py +0 -174
  65. api/synchronous/api_core/animal_detection_api/md_utils/ct_utils.py +0 -262
  66. api/synchronous/api_core/animal_detection_api/md_utils/directory_listing.py +0 -251
  67. api/synchronous/api_core/animal_detection_api/md_utils/matlab_porting_tools.py +0 -97
  68. api/synchronous/api_core/animal_detection_api/md_utils/path_utils.py +0 -416
  69. api/synchronous/api_core/animal_detection_api/md_utils/process_utils.py +0 -110
  70. api/synchronous/api_core/animal_detection_api/md_utils/sas_blob_utils.py +0 -509
  71. api/synchronous/api_core/animal_detection_api/md_utils/string_utils.py +0 -59
  72. api/synchronous/api_core/animal_detection_api/md_utils/url_utils.py +0 -144
  73. api/synchronous/api_core/animal_detection_api/md_utils/write_html_image_list.py +0 -226
  74. api/synchronous/api_core/animal_detection_api/md_visualization/visualization_utils.py +0 -841
  75. api/synchronous/api_core/tests/__init__.py +0 -0
  76. api/synchronous/api_core/tests/load_test.py +0 -110
  77. classification/__init__.py +0 -0
  78. classification/aggregate_classifier_probs.py +0 -108
  79. classification/analyze_failed_images.py +0 -227
  80. classification/cache_batchapi_outputs.py +0 -198
  81. classification/create_classification_dataset.py +0 -627
  82. classification/crop_detections.py +0 -516
  83. classification/csv_to_json.py +0 -226
  84. classification/detect_and_crop.py +0 -855
  85. classification/efficientnet/__init__.py +0 -9
  86. classification/efficientnet/model.py +0 -415
  87. classification/efficientnet/utils.py +0 -610
  88. classification/evaluate_model.py +0 -520
  89. classification/identify_mislabeled_candidates.py +0 -152
  90. classification/json_to_azcopy_list.py +0 -63
  91. classification/json_validator.py +0 -695
  92. classification/map_classification_categories.py +0 -276
  93. classification/merge_classification_detection_output.py +0 -506
  94. classification/prepare_classification_script.py +0 -194
  95. classification/prepare_classification_script_mc.py +0 -228
  96. classification/run_classifier.py +0 -286
  97. classification/save_mislabeled.py +0 -110
  98. classification/train_classifier.py +0 -825
  99. classification/train_classifier_tf.py +0 -724
  100. classification/train_utils.py +0 -322
  101. data_management/__init__.py +0 -0
  102. data_management/annotations/__init__.py +0 -0
  103. data_management/annotations/annotation_constants.py +0 -34
  104. data_management/camtrap_dp_to_coco.py +0 -238
  105. data_management/cct_json_utils.py +0 -395
  106. data_management/cct_to_md.py +0 -176
  107. data_management/cct_to_wi.py +0 -289
  108. data_management/coco_to_labelme.py +0 -272
  109. data_management/coco_to_yolo.py +0 -662
  110. data_management/databases/__init__.py +0 -0
  111. data_management/databases/add_width_and_height_to_db.py +0 -33
  112. data_management/databases/combine_coco_camera_traps_files.py +0 -206
  113. data_management/databases/integrity_check_json_db.py +0 -477
  114. data_management/databases/subset_json_db.py +0 -115
  115. data_management/generate_crops_from_cct.py +0 -149
  116. data_management/get_image_sizes.py +0 -188
  117. data_management/importers/add_nacti_sizes.py +0 -52
  118. data_management/importers/add_timestamps_to_icct.py +0 -79
  119. data_management/importers/animl_results_to_md_results.py +0 -158
  120. data_management/importers/auckland_doc_test_to_json.py +0 -372
  121. data_management/importers/auckland_doc_to_json.py +0 -200
  122. data_management/importers/awc_to_json.py +0 -189
  123. data_management/importers/bellevue_to_json.py +0 -273
  124. data_management/importers/cacophony-thermal-importer.py +0 -796
  125. data_management/importers/carrizo_shrubfree_2018.py +0 -268
  126. data_management/importers/carrizo_trail_cam_2017.py +0 -287
  127. data_management/importers/cct_field_adjustments.py +0 -57
  128. data_management/importers/channel_islands_to_cct.py +0 -913
  129. data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  130. data_management/importers/eMammal/eMammal_helpers.py +0 -249
  131. data_management/importers/eMammal/make_eMammal_json.py +0 -223
  132. data_management/importers/ena24_to_json.py +0 -275
  133. data_management/importers/filenames_to_json.py +0 -385
  134. data_management/importers/helena_to_cct.py +0 -282
  135. data_management/importers/idaho-camera-traps.py +0 -1407
  136. data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  137. data_management/importers/jb_csv_to_json.py +0 -150
  138. data_management/importers/mcgill_to_json.py +0 -250
  139. data_management/importers/missouri_to_json.py +0 -489
  140. data_management/importers/nacti_fieldname_adjustments.py +0 -79
  141. data_management/importers/noaa_seals_2019.py +0 -181
  142. data_management/importers/pc_to_json.py +0 -365
  143. data_management/importers/plot_wni_giraffes.py +0 -123
  144. data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
  145. data_management/importers/prepare_zsl_imerit.py +0 -131
  146. data_management/importers/rspb_to_json.py +0 -356
  147. data_management/importers/save_the_elephants_survey_A.py +0 -320
  148. data_management/importers/save_the_elephants_survey_B.py +0 -332
  149. data_management/importers/snapshot_safari_importer.py +0 -758
  150. data_management/importers/snapshot_safari_importer_reprise.py +0 -665
  151. data_management/importers/snapshot_serengeti_lila.py +0 -1067
  152. data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  153. data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  154. data_management/importers/sulross_get_exif.py +0 -65
  155. data_management/importers/timelapse_csv_set_to_json.py +0 -490
  156. data_management/importers/ubc_to_json.py +0 -399
  157. data_management/importers/umn_to_json.py +0 -507
  158. data_management/importers/wellington_to_json.py +0 -263
  159. data_management/importers/wi_to_json.py +0 -441
  160. data_management/importers/zamba_results_to_md_results.py +0 -181
  161. data_management/labelme_to_coco.py +0 -548
  162. data_management/labelme_to_yolo.py +0 -272
  163. data_management/lila/__init__.py +0 -0
  164. data_management/lila/add_locations_to_island_camera_traps.py +0 -97
  165. data_management/lila/add_locations_to_nacti.py +0 -147
  166. data_management/lila/create_lila_blank_set.py +0 -557
  167. data_management/lila/create_lila_test_set.py +0 -151
  168. data_management/lila/create_links_to_md_results_files.py +0 -106
  169. data_management/lila/download_lila_subset.py +0 -177
  170. data_management/lila/generate_lila_per_image_labels.py +0 -515
  171. data_management/lila/get_lila_annotation_counts.py +0 -170
  172. data_management/lila/get_lila_image_counts.py +0 -111
  173. data_management/lila/lila_common.py +0 -300
  174. data_management/lila/test_lila_metadata_urls.py +0 -132
  175. data_management/ocr_tools.py +0 -874
  176. data_management/read_exif.py +0 -681
  177. data_management/remap_coco_categories.py +0 -84
  178. data_management/remove_exif.py +0 -66
  179. data_management/resize_coco_dataset.py +0 -189
  180. data_management/wi_download_csv_to_coco.py +0 -246
  181. data_management/yolo_output_to_md_output.py +0 -441
  182. data_management/yolo_to_coco.py +0 -676
  183. detection/__init__.py +0 -0
  184. detection/detector_training/__init__.py +0 -0
  185. detection/detector_training/model_main_tf2.py +0 -114
  186. detection/process_video.py +0 -703
  187. detection/pytorch_detector.py +0 -337
  188. detection/run_detector.py +0 -779
  189. detection/run_detector_batch.py +0 -1219
  190. detection/run_inference_with_yolov5_val.py +0 -917
  191. detection/run_tiled_inference.py +0 -935
  192. detection/tf_detector.py +0 -188
  193. detection/video_utils.py +0 -606
  194. docs/source/conf.py +0 -43
  195. md_utils/__init__.py +0 -0
  196. md_utils/azure_utils.py +0 -174
  197. md_utils/ct_utils.py +0 -612
  198. md_utils/directory_listing.py +0 -246
  199. md_utils/md_tests.py +0 -968
  200. md_utils/path_utils.py +0 -1044
  201. md_utils/process_utils.py +0 -157
  202. md_utils/sas_blob_utils.py +0 -509
  203. md_utils/split_locations_into_train_val.py +0 -228
  204. md_utils/string_utils.py +0 -92
  205. md_utils/url_utils.py +0 -323
  206. md_utils/write_html_image_list.py +0 -225
  207. md_visualization/__init__.py +0 -0
  208. md_visualization/plot_utils.py +0 -293
  209. md_visualization/render_images_with_thumbnails.py +0 -275
  210. md_visualization/visualization_utils.py +0 -1537
  211. md_visualization/visualize_db.py +0 -551
  212. md_visualization/visualize_detector_output.py +0 -406
  213. megadetector-5.0.10.dist-info/RECORD +0 -224
  214. megadetector-5.0.10.dist-info/top_level.txt +0 -8
  215. taxonomy_mapping/__init__.py +0 -0
  216. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +0 -491
  217. taxonomy_mapping/map_new_lila_datasets.py +0 -154
  218. taxonomy_mapping/prepare_lila_taxonomy_release.py +0 -142
  219. taxonomy_mapping/preview_lila_taxonomy.py +0 -591
  220. taxonomy_mapping/retrieve_sample_image.py +0 -71
  221. taxonomy_mapping/simple_image_download.py +0 -218
  222. taxonomy_mapping/species_lookup.py +0 -834
  223. taxonomy_mapping/taxonomy_csv_checker.py +0 -159
  224. taxonomy_mapping/taxonomy_graph.py +0 -346
  225. taxonomy_mapping/validate_lila_category_mappings.py +0 -83
  226. {megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/WHEEL +0 -0
@@ -1,796 +0,0 @@
1
- """
2
-
3
- cacophony-thermal-importer.py
4
-
5
- Create data and metadata for LILA from the Cacophony thermal dataset. Takes a folder
6
- of HDF files, and produces .json metadata, along with compressed/normalized videos for
7
- each HDF file.
8
-
9
- Source format notes for this dataset:
10
-
11
- https://docs.google.com/document/d/12sw5JtwdMf9MiXuNCBcvhvZ04Jwa1TH2Lf6LnJmF8Bk/edit
12
-
13
- """
14
-
15
- #%% Imports and constants
16
-
17
- import os
18
- import h5py
19
- import numpy as np
20
- import json
21
-
22
- from tqdm import tqdm
23
- from copy import deepcopy
24
- from collections import defaultdict
25
- from multiprocessing.pool import Pool
26
-
27
- import zipfile
28
- from zipfile import ZipFile
29
-
30
- import cv2
31
-
32
- from md_utils.ct_utils import truncate_float
33
- from md_utils import path_utils
34
-
35
- base_dir = '/bigdata/home/sftp/cacophony-ferraro_/data/cacophony-thermal/'
36
- output_base = os.path.expanduser('~/tmp/new-zealand-wildlife-thermal-imaging')
37
- video_output_folder = os.path.join(output_base,'videos')
38
- individual_metadata_output_folder = os.path.join(output_base,'individual-metadata')
39
-
40
- os.makedirs(video_output_folder,exist_ok=True)
41
- os.makedirs(individual_metadata_output_folder,exist_ok=True)
42
-
43
- main_metadata_filename = 'new-zealand-wildlife-thermal-imaging.json'
44
-
45
- # Every HDF file specifies a crop rectangle within which the pixels are trustworthy;
46
- # in practice this is the same across all files.
47
- expected_crop_rectangle = [1,1,159,119]
48
-
49
- # Required attributes for each video
50
- expected_clip_attributes = ['clip_id', 'crop_rectangle', 'ffc_frames', 'frame_temp_max',
51
- 'frame_temp_mean', 'frame_temp_median', 'frame_temp_min', 'max_temp',
52
- 'mean_temp', 'min_temp', 'num_frames', 'res_x', 'res_y', 'start_time',
53
- 'station_id']
54
-
55
- # Attributes that may or may not be present for each video
56
- optional_clip_attributes = ['temp_thresh','model']
57
-
58
- # Required attributes for each trck
59
- expected_track_attributes = ['end_frame', 'id', 'start_frame']
60
-
61
- # Attributes that may or may not be present for each track
62
- optional_track_attributes = ['human_tag', 'human_tag_confidence', 'human_tags',
63
- 'human_tags_confidence', 'ai_tag', 'ai_tag_confidence']
64
-
65
- labels_to_ignore_when_another_label_is_present = ['false-positive','unidentified','part','poor tracking']
66
-
67
- frame_rate = 9
68
-
69
- use_default_filtering = False
70
- write_as_color = False
71
-
72
- # codec = 'ffv1'
73
- # codec = 'hfyu'
74
- codec = 'h264'
75
- overwrite_video = True
76
-
77
- codec_to_extension = {'mp4v':'.mp4','ffv1':'.avi','hfyu':'.avi','h264':'.mp4'}
78
-
79
- # Set to >0 to process only a subset of clips
80
- debug_n = -1
81
- n_workers = 16
82
- confidence_digits = 3
83
-
84
- # Standardize a few tag names
85
- tag_mappings = {
86
- 'bird/kiwi':'bird',
87
- 'allbirds':'bird',
88
- 'not identifiable':'unidentified',
89
- 'part':'unidentified',
90
- 'pest':'unidentified'
91
- }
92
-
93
- # Discard tracks and labels that are below this confidence threshold.
94
- confidence_threshold = 0.001
95
-
96
-
97
- #%% Support functions
98
-
99
- def remove_tracking_points(clip_metadata):
100
- """
101
- As a debugging convenience, take the metadata for a clip (after conversion to
102
- the output format) and remove the only field that makes it hard to read in a
103
- console (the track coordinates).
104
- """
105
-
106
- slim_metadata = deepcopy(clip_metadata)
107
- if 'tracks' in slim_metadata:
108
- for t in slim_metadata['tracks']:
109
- del t['points']
110
- return slim_metadata
111
-
112
-
113
- def norm_image(image,vmin=None,vmax=None,do_normalization=True,stack_channels=True):
114
- """
115
- Normalize an MxN 2D numpy ndarray (may be any type, but typically uint16) into the range
116
- 0,255.
117
-
118
- If stack_channels==True, return as an MxNx3 uint8 matrix (content is replicated across
119
- all three channels).
120
- """
121
-
122
- if vmin is not None:
123
- assert vmax is not None
124
- assert vmax > vmin
125
- if vmax is not None:
126
- assert vmin is not None
127
-
128
- assert isinstance(image,np.ndarray)
129
- assert isinstance(image[0][0],np.uint16) or isinstance(image[0][0],np.float32), \
130
- 'First pixel is of type {}'.format(type(image[0][0]))
131
- assert len(image.shape) == 2
132
-
133
- norm = np.float32(image)
134
-
135
- if do_normalization:
136
-
137
- if vmin is None:
138
- vmin = np.amin(image)
139
- vmax = np.amax(image)
140
-
141
- norm = 255 * (norm - vmin) / (vmax - vmin)
142
-
143
- norm = np.uint8(norm)
144
- norm = norm[:, :, np.newaxis]
145
- if stack_channels:
146
- norm = np.repeat(norm, 3, axis=2)
147
- return norm
148
-
149
-
150
- #%% Enumerate files
151
-
152
- all_files = path_utils.recursive_file_list(base_dir)
153
- all_hdf_files_relative = [os.path.relpath(fn,base_dir) for fn in all_files if fn.lower().endswith('.hdf5')]
154
-
155
- print('Found {} HDF files (of {} total files)'.format(
156
- len(all_hdf_files_relative),len(all_files)))
157
-
158
-
159
- #%% Process one file
160
-
161
- def process_file(fn_relative,verbose=False):
162
- """
163
- Read the HDF file, convert to video files with/without filtering, and return
164
- a metadata dict for this file.
165
- """
166
-
167
- fn_abs = os.path.join(base_dir,fn_relative)
168
-
169
- clip_id = int(os.path.basename(fn_relative).split('.')[0])
170
- metadata_fn = os.path.join(individual_metadata_output_folder,str(clip_id) + '_metadata.json')
171
-
172
- clip_metadata = {}
173
- clip_metadata['hdf_filename'] = os.path.basename(fn_relative)
174
- clip_metadata['id'] = clip_id
175
- clip_metadata['error'] = None
176
-
177
- try:
178
- h5f = h5py.File(fn_abs, 'r')
179
- except Exception as e:
180
- print('Could not open file {}: {}'.format(
181
- fn_relative,str(e)))
182
- clip_metadata['error'] = str(e)
183
- with open(metadata_fn,'w') as f:
184
- json.dump(clip_metadata,f,indent=1)
185
- return clip_metadata
186
-
187
- clip_attrs = h5f.attrs
188
-
189
- for s in expected_clip_attributes:
190
- assert s in clip_attrs
191
-
192
- assert clip_id == int(clip_attrs.get('clip_id'))
193
- assert os.path.basename(fn_relative).startswith(str(clip_id))
194
-
195
- station_id = clip_attrs.get('station_id')
196
- assert isinstance(station_id,np.int64)
197
- station_id = int(station_id)
198
-
199
- crop_rectangle = clip_attrs.get('crop_rectangle')
200
- assert len(crop_rectangle) == 4
201
- for i_coord in range(0,4):
202
- assert crop_rectangle[i_coord] == expected_crop_rectangle[i_coord]
203
-
204
- frames = h5f['frames']
205
- assert 'thermals' in frames
206
-
207
- # This is an HDF dataset of size n_frames,y,x
208
- thermal_frames = frames['thermals']
209
- assert len(thermal_frames.shape) == 3
210
-
211
- # If present, this is an HDF dataset of size y,x
212
- if 'background' in frames:
213
- background_frame = frames['background']
214
- assert len(background_frame.shape) == 2
215
- assert background_frame.shape[0] == thermal_frames.shape[1]
216
- assert background_frame.shape[1] == thermal_frames.shape[2]
217
- else:
218
- background_frame = None
219
- calibration_frame_indices = clip_attrs.get('ffc_frames')
220
-
221
- if len(calibration_frame_indices) > 0:
222
- assert max(calibration_frame_indices) < thermal_frames.shape[0]
223
-
224
- assert clip_attrs.get('num_frames') == thermal_frames.shape[0]
225
- assert clip_attrs.get('res_x') == thermal_frames.shape[2]
226
- assert clip_attrs.get('res_y') == thermal_frames.shape[1]
227
- assert clip_attrs.get('model') in [None,'lepton3.5','lepton3']
228
-
229
- tracks = h5f['tracks']
230
-
231
- track_ids = list(tracks.keys())
232
-
233
- # List of dicts
234
- tracks_this_clip = []
235
-
236
- # i_track = 0; track_id = track_ids[i_track]
237
- for i_track,track_id in enumerate(track_ids):
238
-
239
- track = tracks[track_id]
240
-
241
- if 'human_tags' not in track.attrs.keys():
242
- continue
243
-
244
- track_info = {}
245
-
246
- # 'human_tags' is all the tags that were assigned to this track by humans
247
- # 'human_tags_confidence' is the confidence for each of those assignments
248
- #
249
- # If there is a clear "winner", 'human_tag' and 'human_tag' confidence will
250
- # identify the clear winner.
251
- if 'human_tag' in track.attrs.keys():
252
-
253
- assert 'human_tags' in track.attrs.keys()
254
- assert 'human_tags_confidence' in track.attrs.keys()
255
- assert 'human_tag_confidence' in track.attrs.keys()
256
-
257
- track_tags = []
258
-
259
- if 'human_tags' in track.attrs.keys():
260
-
261
- assert 'human_tags_confidence' in track.attrs.keys()
262
- assert len(track.attrs.get('human_tags_confidence')) == \
263
- len(track.attrs.get('human_tags'))
264
-
265
- human_tags_this_clip = list(track.attrs.get('human_tags'))
266
- human_tag_confidences_this_clip = list(track.attrs.get('human_tags_confidence'))
267
-
268
- for i_tag,tag in enumerate(human_tags_this_clip):
269
- assert isinstance(tag,str)
270
- tag_info = {}
271
- tag_info['label'] = tag
272
- conf = float(human_tag_confidences_this_clip[i_tag])
273
- tag_info['confidence'] = truncate_float(conf,confidence_digits)
274
- track_tags.append(tag_info)
275
-
276
- track_start_frame = int(round(track.attrs.get('start_frame')))
277
- track_end_frame = int(round(track.attrs.get('end_frame')))
278
- track_info['start_frame'] = track_start_frame
279
- track_info['end_frame'] = track_end_frame
280
- track_info['tags'] = track_tags
281
-
282
- # A list of x/y/frame tuples
283
- track_info['points'] = []
284
-
285
- for s in expected_track_attributes:
286
- assert s in track.attrs
287
-
288
- positions = track['regions']
289
-
290
- # Positions is an N x 7 matrix in which each row looks like:
291
- #
292
- # [left,top,right,bottom,frame_number,mass,blank_frame]
293
- #
294
- # The origin appears to be in the upper-left.
295
- #
296
- # "blank_frame" indicates that the tracked object is not visible in this frame,
297
- # but was predicted from previous frames.
298
- assert positions.shape[1] == 7
299
-
300
- # The number of items in the positions array should be equal to the length of the track, but this
301
- # can be off by a little when 'start_frame' and/or 'end_frame' are not integers. Make sure this
302
- # is approximately true.
303
-
304
- # assert positions.shape[0] == 1 + (track.attrs.get('end_frame') - track.attrs.get('start_frame'))
305
- track_length_error = abs(positions.shape[0] -
306
- (1 + (track.attrs.get('end_frame') - track.attrs.get('start_frame'))))
307
- assert track_length_error < 2
308
-
309
- # i_position = 0; position = positions[i_position]
310
- for i_position,position in enumerate(positions):
311
-
312
- left = float(position[0])
313
- top = float(position[1])
314
- right = float(position[2])
315
- bottom = float(position[3])
316
- frame_number = int(position[4])
317
-
318
- # TODO: I'm being lazy about the fact that these don't reflect the
319
- # pixels cropped out of the border. IMO this is OK because for this dataset,
320
- # this is just an approximate set of coordinates used to disambiguate simultaneous
321
- # areas of movement when multiple different labels are present in the same video.
322
- position_info = [left+float((right-left)/2),
323
- top+float((bottom-top)/2),
324
- int(frame_number)]
325
- track_info['points'].append(position_info)
326
-
327
- # In a small number of tracks, boxes are turned upside-down or left-over-right,
328
- # we don't bother checking for coordinate validity in those tracks.
329
- if left <= right:
330
- assert left >= 0 and left < clip_attrs.get('res_x')
331
- assert right >= 0 and right < clip_attrs.get('res_x')
332
-
333
- if top <= bottom:
334
- assert top >= 0 and top < clip_attrs.get('res_y')
335
- assert bottom >= 0 and bottom < clip_attrs.get('res_y')
336
-
337
- # frame_number should be approximately equal to i_position + start_frame, but this
338
- # can be off by a little when 'start_frame' and/or 'end_frame' are not integers.
339
- # Make sure this is approximately true.
340
-
341
- # assert frame_number == i_position + track.attrs.get('start_frame')
342
- frame_number_error = abs(frame_number - (i_position + track.attrs.get('start_frame')))
343
- assert frame_number_error <= 2
344
-
345
- # ...for each position in this track
346
-
347
- tracks_this_clip.append(track_info)
348
-
349
- # ...for each track ID
350
-
351
- clip_metadata['tracks'] = tracks_this_clip
352
-
353
- assert len(human_tags_this_clip) > 0
354
-
355
- ffc_frames = clip_attrs.get('ffc_frames').tolist()
356
- if len(ffc_frames) > 0:
357
- assert max(ffc_frames) < thermal_frames.shape[0]
358
- n_ffc_frames = len(ffc_frames)
359
- n_frames = thermal_frames.shape[0]
360
- if verbose:
361
- if (n_ffc_frames / n_frames) > 0.2:
362
- print('Warning: in video {}, {} of {} frames are FFC frames (tags: {})'.format(
363
- fn_relative,n_ffc_frames,n_frames,str(human_tags_this_clip)))
364
-
365
- frames = h5f["frames"]
366
-
367
- if "background" in frames:
368
- background = frames["background"]
369
- background_frame_present = True
370
- else:
371
- background = frames["thermals"][0]
372
- background_frame_present = False
373
-
374
- crop_rectangle = clip_attrs["crop_rectangle"]
375
- background = background[
376
- crop_rectangle[1]:crop_rectangle[3],
377
- crop_rectangle[0]:crop_rectangle[2]
378
- ]
379
-
380
- # Compute the median frame value
381
- #
382
- # (...which we may use for filtering)
383
-
384
- frames_array = np.array(frames['thermals'])
385
- frames_array = frames_array[:,crop_rectangle[1] : crop_rectangle[3], crop_rectangle[0] : crop_rectangle[2]]
386
- median_values = np.float32(np.median(frames_array,0))
387
-
388
- if (background_frame_present or use_default_filtering):
389
- background_for_filtering = background
390
- else:
391
- if verbose:
392
- print('No background present: using median values for background')
393
- background_for_filtering = median_values
394
-
395
- # Find the largest value by which any pixel in this video exceeds the background
396
- #
397
- # (...which we may use for normalization)
398
-
399
- max_pixel_diff = 0
400
-
401
- for frame in frames["thermals"]:
402
- cropped_frame = frame[
403
- crop_rectangle[1]:crop_rectangle[3],
404
- crop_rectangle[0]:crop_rectangle[2]
405
- ]
406
-
407
- filtered_frame = np.float32(cropped_frame) - background_for_filtering
408
- max_pixel_diff_this_frame = np.amax(filtered_frame)
409
- if max_pixel_diff_this_frame > max_pixel_diff:
410
- max_pixel_diff = max_pixel_diff_this_frame
411
-
412
- filtered_frames = []
413
- original_frames = []
414
-
415
- # i_frame = 0; frame = frames["thermals"][i_frame]
416
- for i_frame,frame in enumerate(frames["thermals"]):
417
-
418
- cropped_frame = frame[crop_rectangle[1] : crop_rectangle[3], crop_rectangle[0] : crop_rectangle[2]]
419
-
420
- # Subtract the background frame
421
- filtered_frame = np.float32(cropped_frame) - background_for_filtering
422
-
423
- # Assume that nothing can be cooler than the background
424
- filtered_frame[filtered_frame < 0] = 0
425
-
426
- # Normalize filtered frame (and convert to three channels)
427
-
428
- if use_default_filtering:
429
- filtered_frame = norm_image(filtered_frame,stack_channels=write_as_color)
430
- else:
431
- filtered_frame = norm_image(filtered_frame,vmin=0,vmax=max_pixel_diff,stack_channels=write_as_color)
432
-
433
- # Normalize original frame (and convert to three channels)
434
-
435
- original_frame = norm_image(cropped_frame,stack_channels=write_as_color)
436
-
437
- filtered_frames.append(filtered_frame)
438
- original_frames.append(original_frame)
439
-
440
- # ...for each frame
441
-
442
- # filtered_frames[0].shape[1] is 158, clip_attrs.get('res_x') is 160, ergo shape is h,w
443
- video_w = filtered_frames[0].shape[1]
444
- video_h = filtered_frames[0].shape[0]
445
-
446
- clip_metadata['width'] = video_w
447
- clip_metadata['height'] = video_h
448
- clip_metadata['frame_rate'] = frame_rate
449
-
450
- filtered_video_fn = os.path.join(video_output_folder,str(clip_id) + '_filtered' + codec_to_extension[codec])
451
- unfiltered_video_fn = os.path.join(video_output_folder,str(clip_id) + codec_to_extension[codec])
452
-
453
- if overwrite_video or (not os.path.isfile(filtered_video_fn)):
454
-
455
- filtered_video_out = cv2.VideoWriter(filtered_video_fn, cv2.VideoWriter_fourcc(*codec), frame_rate,
456
- (video_w, video_h), isColor=write_as_color)
457
-
458
- for i_frame,filtered_frame in enumerate(filtered_frames):
459
- filtered_video_out.write(filtered_frame)
460
- filtered_video_out.release()
461
-
462
- if overwrite_video or (not os.path.isfile(unfiltered_video_fn)):
463
-
464
- unfiltered_video_out = cv2.VideoWriter(unfiltered_video_fn, cv2.VideoWriter_fourcc(*codec), frame_rate,
465
- (video_w, video_h), isColor=write_as_color)
466
-
467
- for i_frame,frame in enumerate(original_frames):
468
- unfiltered_video_out.write(frame)
469
- unfiltered_video_out.release()
470
-
471
- labels_this_clip = set()
472
-
473
-
474
- ## Do some cleanup of tracks and track labels
475
-
476
- valid_tracks = []
477
-
478
- for track_info in clip_metadata['tracks']:
479
-
480
- valid_tags = []
481
-
482
- # Replace some tags with standardized names (e.g. map "allbirds" to "bird")
483
- for tag in track_info['tags']:
484
- if tag['label'] in tag_mappings:
485
- tag['label'] = tag_mappings[tag['label']]
486
-
487
- # Discard tags below the minimum confidence
488
- if tag['confidence'] >= confidence_threshold:
489
- valid_tags.append(tag)
490
- else:
491
- # TODO
492
- print('Zero-confidence tag in {}'.format(fn_relative))
493
-
494
- track_info['tags'] = valid_tags
495
-
496
- # Don't keep any tracks that had no tags above the minimum confidence
497
- if len(valid_tags) > 0:
498
- valid_tracks.append(track_info)
499
- else:
500
- # TODO
501
- print('Invalid track in {}'.format(fn_relative))
502
-
503
- # ...for each track
504
-
505
- if (len(clip_metadata['tracks']) > 0) and (len(valid_tracks) == 0):
506
- # TODO
507
- print('Removed all tracks from {}'.format(fn_relative))
508
-
509
- clip_metadata['tracks'] = valid_tracks
510
-
511
- # Build up the list of labels for this clip
512
- for track_info in clip_metadata['tracks']:
513
- for tag in track_info['tags']:
514
- tag_label = tag['label']
515
- labels_this_clip.add(tag_label)
516
-
517
- clip_metadata['labels'] = sorted(list(labels_this_clip))
518
-
519
- metadata_fn = os.path.join(individual_metadata_output_folder,str(clip_id) + '_metadata.json')
520
-
521
- # clip_metadata['id'] = clip_id
522
- # clip_metadata['hdf_filename'] = os.path.basename(fn_relative)
523
-
524
- clip_metadata['video_filename'] = os.path.basename(unfiltered_video_fn)
525
- clip_metadata['filtered_video_filename'] = os.path.basename(filtered_video_fn)
526
- clip_metadata['location'] = station_id
527
- clip_metadata['calibration_frames'] = ffc_frames
528
- clip_metadata['metadata_filename'] = os.path.basename(metadata_fn)
529
-
530
- with open(metadata_fn,'w') as f:
531
- json.dump(clip_metadata,f,indent=1)
532
-
533
- return clip_metadata
534
-
535
- # ...process_file(...)
536
-
537
-
538
- #%% Process files
539
-
540
- n_workers = 16
541
-
542
- if debug_n > 0:
543
- files_to_process = all_hdf_files_relative[0:debug_n]
544
- else:
545
- files_to_process = all_hdf_files_relative
546
-
547
- if n_workers <= 1:
548
-
549
- all_clip_metadata = []
550
- for i_file,fn_relative in tqdm(enumerate(files_to_process),total=len(files_to_process)):
551
- clip_metadata = process_file(fn_relative)
552
- all_clip_metadata.append(clip_metadata)
553
-
554
- else:
555
-
556
- pool = Pool(n_workers)
557
- all_clip_metadata = list(tqdm(pool.imap(process_file,files_to_process),
558
- total=len(files_to_process)))
559
-
560
-
561
- #%% Postprocessing
562
-
563
- failed_file_to_error = {}
564
-
565
- label_to_video_count = defaultdict(int)
566
-
567
- # clip_metadata = all_clip_metadata[0]
568
- for clip_metadata in all_clip_metadata:
569
-
570
- if clip_metadata['error'] is not None:
571
- failed_file_to_error[clip_metadata['hdf_filename']] = clip_metadata['error']
572
- continue
573
-
574
- labels_this_clip = set()
575
-
576
- # track_info = clip_metadata['tracks'][0]
577
- for track_info in clip_metadata['tracks']:
578
- for tag in track_info['tags']:
579
- tag_label = tag['label']
580
- labels_this_clip.add(tag_label)
581
-
582
- for label in labels_this_clip:
583
- label_to_video_count[label] += 1
584
-
585
- # ...for each clip
586
-
587
- label_to_video_count = {k: v for k, v in sorted(label_to_video_count.items(),
588
- key=lambda item: item[1], reverse=True)}
589
-
590
- print('Failed to open {} of {} files'.format(
591
- len(failed_file_to_error),len(all_hdf_files_relative)))
592
-
593
- print('Labels:\n')
594
-
595
- for label in label_to_video_count:
596
- print('{}: {}'.format(label,label_to_video_count[label]))
597
-
598
-
599
- #%% Write count .csv
600
-
601
- count_csv_file_name = os.path.join(output_base,'new-zealand-wildlife-thermal-imaging-counts.csv')
602
-
603
- with open(count_csv_file_name,'w') as f:
604
- f.write('label,count\n')
605
- for label in label_to_video_count:
606
- f.write('{},{}\n'.format(label,label_to_video_count[label]))
607
-
608
-
609
- #%% Build and zip the main .json file
610
-
611
- main_metadata_filename_abs = os.path.join(output_base,main_metadata_filename)
612
-
613
- info = {}
614
- info['version'] = '1.0.0'
615
- info['description'] = 'New Zealand Thermal Wildlife Imaging'
616
- info['contributor'] = 'Cacophony Project'
617
-
618
- main_metadata = {}
619
- main_metadata['info'] = info
620
- main_metadata['clips'] = []
621
-
622
- # clip_metadata = all_clip_metadata[0]
623
- for clip_metadata in tqdm(all_clip_metadata):
624
- slim_metadata = remove_tracking_points(clip_metadata)
625
-
626
- if 'tracks' in slim_metadata:
627
- for track in slim_metadata['tracks']:
628
- for tag in track['tags']:
629
- tag['confidence'] = truncate_float(tag['confidence'],confidence_digits)
630
-
631
- main_metadata['clips'].append(slim_metadata)
632
-
633
- with open(main_metadata_filename_abs,'w') as f:
634
- json.dump(main_metadata,f,indent=1)
635
-
636
- zip_file_name = main_metadata_filename_abs.replace('.json','-metadata.json.zip')
637
- with ZipFile(zip_file_name,'w',zipfile.ZIP_DEFLATED) as zipf:
638
- zipf.write(main_metadata_filename_abs,
639
- arcname=os.path.basename(main_metadata_filename_abs),
640
- compresslevel=9,compress_type=zipfile.ZIP_DEFLATED)
641
-
642
-
643
- #%% Create a zipfile containing videos, main metadata, and individual metadata
644
-
645
- zip_file_name = os.path.join(output_base,'new-zealand-wildlife-thermal-imaging.zip')
646
-
647
- all_files = path_utils.recursive_file_list(output_base)
648
- all_files_relative = [os.path.relpath(fn,output_base) for fn in all_files]
649
- all_files_to_zip_relative = [fn for fn in all_files_relative if \
650
- (\
651
- ('individual-metadata/' in fn) or \
652
- ('videos/' in fn) or \
653
- (fn.endswith('.json'))
654
- )]
655
-
656
- print('Zipping {} files (of {} total files)'.format(len(all_files_to_zip_relative),len(all_files)))
657
-
658
- with ZipFile(zip_file_name,'w',zipfile.ZIP_DEFLATED) as zipf:
659
- for fn_relative in tqdm(all_files_to_zip_relative):
660
- fn_abs = os.path.join(output_base,fn_relative)
661
- if fn_abs.endswith('.mp4'):
662
- zipf.write(fn_abs,arcname=fn_relative,compresslevel=0,compress_type=zipfile.ZIP_STORED)
663
- else:
664
- zipf.write(fn_abs,arcname=fn_relative,compresslevel=9,compress_type=zipfile.ZIP_DEFLATED)
665
-
666
-
667
- #%% Scrap
668
-
669
- if False:
670
-
671
- pass
672
-
673
- #%% Process one file
674
-
675
- # i_file = 110680; fn_relative = all_hdf_files_relative[i_file]
676
- # i_file = 8; fn_relative = all_hdf_files_relative[i_file]
677
-
678
- fn_relative = [fn for fn in all_hdf_files_relative if '450281' in fn][0]
679
-
680
- clip_metadata = process_file(fn_relative)
681
-
682
-
683
- #%% Move individual metadata files
684
-
685
- source_folder = base_dir
686
- target_folder = os.path.expanduser('~/tmp/cacophony-thermal-out-individual-metadata')
687
- assert os.path.isdir(source_folder) and os.path.isdir(target_folder)
688
-
689
- from md_utils import path_utils
690
- all_files = path_utils.recursive_file_list(source_folder)
691
- files_to_move = [fn for fn in all_files if '_metadata.json' in fn]
692
- print('Moving {} of {} files'.format(len(files_to_move),len(all_files)))
693
-
694
- import shutil
695
- # source_fn = files_to_move[0]
696
- for source_fn in tqdm(files_to_move):
697
- target_fn = os.path.join(target_folder,os.path.basename(source_fn))
698
- shutil.move(source_fn,target_fn)
699
-
700
-
701
- #%% Choose a random video with a particular label
702
-
703
- target_label = 'pukeko'
704
- target_clips = []
705
-
706
- for clip_metadata in all_clip_metadata:
707
-
708
- if clip_metadata['error'] is not None:
709
- continue
710
-
711
- labels_this_clip = set()
712
-
713
- # track_info = clip_metadata['tracks'][0]
714
- for track_info in clip_metadata['tracks']:
715
- for tag in track_info['tags']:
716
- tag_label = tag['label']
717
- labels_this_clip.add(tag_label)
718
-
719
- if target_label in labels_this_clip:
720
- target_clips.append(clip_metadata)
721
-
722
- print('Found {} matches'.format(len(target_clips)))
723
-
724
- import random
725
- selected_clip = random.choice(target_clips)
726
- filtered_video_filename = selected_clip['filtered_video_filename']
727
- video_filename = selected_clip['video_filename']
728
-
729
- from md_utils.path_utils import open_file
730
- # open_file(os.path.join(output_base,video_filename))
731
- open_file(os.path.join(output_base,filtered_video_filename))
732
-
733
- # import clipboard; clipboard.copy(os.path.join(output_base,video_filename))
734
-
735
-
736
- #%% Look for clips with multiple different labels
737
-
738
- for i_clip,clip_metadata in enumerate(all_clip_metadata):
739
-
740
- if clip_metadata['error'] is not None:
741
- continue
742
-
743
- labels_this_clip = set()
744
-
745
- # track_info = clip_metadata['tracks'][0]
746
- for track_info in clip_metadata['tracks']:
747
- for tag in track_info['tags']:
748
- tag_label = tag['label']
749
- if tag_label not in labels_to_ignore_when_another_label_is_present:
750
- labels_this_clip.add(tag_label)
751
-
752
- assert len(labels_this_clip) <= 3
753
-
754
- if len(labels_this_clip) > 1:
755
- print('Clip {} has {} labels: {}'.format(
756
- i_clip,len(labels_this_clip),str(labels_this_clip)))
757
-
758
- # remove_tracking_points(clip_metadata)
759
-
760
-
761
- #%% Add the .json filename to each clip in all_clip_metadata
762
-
763
- for i_clip,clip_metadata in tqdm(enumerate(all_clip_metadata),
764
- total=len(all_clip_metadata)):
765
-
766
- clip_metadata['metadata_filename'] = clip_metadata['hdf_filename'].replace('.hdf5',
767
- '_metadata.json')
768
-
769
-
770
- #%% Add a "labels" field to each .json file
771
-
772
- # This was only necessary during debugging; this is added in the main loop now.
773
-
774
- for i_clip,clip_metadata in tqdm(enumerate(all_clip_metadata),
775
- total=len(all_clip_metadata)):
776
-
777
- if clip_metadata['error'] is not None:
778
- continue
779
-
780
- labels_this_clip = set()
781
-
782
- # track_info = clip_metadata['tracks'][0]
783
- for track_info in clip_metadata['tracks']:
784
- for tag in track_info['tags']:
785
- tag_label = tag['label']
786
- # if tag_label not in labels_to_ignore_when_another_label_is_present:
787
- if True:
788
- labels_this_clip.add(tag_label)
789
-
790
- clip_metadata['labels'] = sorted(list(labels_this_clip))
791
-
792
- json_filename = os.path.join(output_base,str(clip_metadata['id']) + '_metadata.json')
793
- assert os.path.isfile(json_filename)
794
-
795
- with open(json_filename,'w') as f:
796
- json.dump(clip_metadata,f,indent=1)