megadetector 5.0.11__py3-none-any.whl → 5.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (203) hide show
  1. megadetector/api/__init__.py +0 -0
  2. megadetector/api/batch_processing/__init__.py +0 -0
  3. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  4. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. megadetector/api/batch_processing/api_core/batch_service/score.py +439 -0
  6. megadetector/api/batch_processing/api_core/server.py +294 -0
  7. megadetector/api/batch_processing/api_core/server_api_config.py +97 -0
  8. megadetector/api/batch_processing/api_core/server_app_config.py +55 -0
  9. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +220 -0
  10. megadetector/api/batch_processing/api_core/server_job_status_table.py +149 -0
  11. megadetector/api/batch_processing/api_core/server_orchestration.py +360 -0
  12. megadetector/api/batch_processing/api_core/server_utils.py +88 -0
  13. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  14. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +46 -0
  15. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  16. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +152 -0
  17. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  18. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  19. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  20. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  21. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
  22. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  23. megadetector/api/synchronous/__init__.py +0 -0
  24. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  25. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +152 -0
  26. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +263 -0
  27. megadetector/api/synchronous/api_core/animal_detection_api/config.py +35 -0
  28. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  29. megadetector/api/synchronous/api_core/tests/load_test.py +110 -0
  30. megadetector/classification/__init__.py +0 -0
  31. megadetector/classification/aggregate_classifier_probs.py +108 -0
  32. megadetector/classification/analyze_failed_images.py +227 -0
  33. megadetector/classification/cache_batchapi_outputs.py +198 -0
  34. megadetector/classification/create_classification_dataset.py +627 -0
  35. megadetector/classification/crop_detections.py +516 -0
  36. megadetector/classification/csv_to_json.py +226 -0
  37. megadetector/classification/detect_and_crop.py +855 -0
  38. megadetector/classification/efficientnet/__init__.py +9 -0
  39. megadetector/classification/efficientnet/model.py +415 -0
  40. megadetector/classification/efficientnet/utils.py +607 -0
  41. megadetector/classification/evaluate_model.py +520 -0
  42. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  43. megadetector/classification/json_to_azcopy_list.py +63 -0
  44. megadetector/classification/json_validator.py +699 -0
  45. megadetector/classification/map_classification_categories.py +276 -0
  46. megadetector/classification/merge_classification_detection_output.py +506 -0
  47. megadetector/classification/prepare_classification_script.py +194 -0
  48. megadetector/classification/prepare_classification_script_mc.py +228 -0
  49. megadetector/classification/run_classifier.py +287 -0
  50. megadetector/classification/save_mislabeled.py +110 -0
  51. megadetector/classification/train_classifier.py +827 -0
  52. megadetector/classification/train_classifier_tf.py +725 -0
  53. megadetector/classification/train_utils.py +323 -0
  54. megadetector/data_management/__init__.py +0 -0
  55. megadetector/data_management/annotations/__init__.py +0 -0
  56. megadetector/data_management/annotations/annotation_constants.py +34 -0
  57. megadetector/data_management/camtrap_dp_to_coco.py +237 -0
  58. megadetector/data_management/cct_json_utils.py +404 -0
  59. megadetector/data_management/cct_to_md.py +176 -0
  60. megadetector/data_management/cct_to_wi.py +289 -0
  61. megadetector/data_management/coco_to_labelme.py +283 -0
  62. megadetector/data_management/coco_to_yolo.py +662 -0
  63. megadetector/data_management/databases/__init__.py +0 -0
  64. megadetector/data_management/databases/add_width_and_height_to_db.py +33 -0
  65. megadetector/data_management/databases/combine_coco_camera_traps_files.py +206 -0
  66. megadetector/data_management/databases/integrity_check_json_db.py +493 -0
  67. megadetector/data_management/databases/subset_json_db.py +115 -0
  68. megadetector/data_management/generate_crops_from_cct.py +149 -0
  69. megadetector/data_management/get_image_sizes.py +189 -0
  70. megadetector/data_management/importers/add_nacti_sizes.py +52 -0
  71. megadetector/data_management/importers/add_timestamps_to_icct.py +79 -0
  72. megadetector/data_management/importers/animl_results_to_md_results.py +158 -0
  73. megadetector/data_management/importers/auckland_doc_test_to_json.py +373 -0
  74. megadetector/data_management/importers/auckland_doc_to_json.py +201 -0
  75. megadetector/data_management/importers/awc_to_json.py +191 -0
  76. megadetector/data_management/importers/bellevue_to_json.py +273 -0
  77. megadetector/data_management/importers/cacophony-thermal-importer.py +793 -0
  78. megadetector/data_management/importers/carrizo_shrubfree_2018.py +269 -0
  79. megadetector/data_management/importers/carrizo_trail_cam_2017.py +289 -0
  80. megadetector/data_management/importers/cct_field_adjustments.py +58 -0
  81. megadetector/data_management/importers/channel_islands_to_cct.py +913 -0
  82. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +180 -0
  83. megadetector/data_management/importers/eMammal/eMammal_helpers.py +249 -0
  84. megadetector/data_management/importers/eMammal/make_eMammal_json.py +223 -0
  85. megadetector/data_management/importers/ena24_to_json.py +276 -0
  86. megadetector/data_management/importers/filenames_to_json.py +386 -0
  87. megadetector/data_management/importers/helena_to_cct.py +283 -0
  88. megadetector/data_management/importers/idaho-camera-traps.py +1407 -0
  89. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +294 -0
  90. megadetector/data_management/importers/jb_csv_to_json.py +150 -0
  91. megadetector/data_management/importers/mcgill_to_json.py +250 -0
  92. megadetector/data_management/importers/missouri_to_json.py +490 -0
  93. megadetector/data_management/importers/nacti_fieldname_adjustments.py +79 -0
  94. megadetector/data_management/importers/noaa_seals_2019.py +181 -0
  95. megadetector/data_management/importers/pc_to_json.py +365 -0
  96. megadetector/data_management/importers/plot_wni_giraffes.py +123 -0
  97. megadetector/data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -0
  98. megadetector/data_management/importers/prepare_zsl_imerit.py +131 -0
  99. megadetector/data_management/importers/rspb_to_json.py +356 -0
  100. megadetector/data_management/importers/save_the_elephants_survey_A.py +320 -0
  101. megadetector/data_management/importers/save_the_elephants_survey_B.py +329 -0
  102. megadetector/data_management/importers/snapshot_safari_importer.py +758 -0
  103. megadetector/data_management/importers/snapshot_safari_importer_reprise.py +665 -0
  104. megadetector/data_management/importers/snapshot_serengeti_lila.py +1067 -0
  105. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +150 -0
  106. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +153 -0
  107. megadetector/data_management/importers/sulross_get_exif.py +65 -0
  108. megadetector/data_management/importers/timelapse_csv_set_to_json.py +490 -0
  109. megadetector/data_management/importers/ubc_to_json.py +399 -0
  110. megadetector/data_management/importers/umn_to_json.py +507 -0
  111. megadetector/data_management/importers/wellington_to_json.py +263 -0
  112. megadetector/data_management/importers/wi_to_json.py +442 -0
  113. megadetector/data_management/importers/zamba_results_to_md_results.py +181 -0
  114. megadetector/data_management/labelme_to_coco.py +547 -0
  115. megadetector/data_management/labelme_to_yolo.py +272 -0
  116. megadetector/data_management/lila/__init__.py +0 -0
  117. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +97 -0
  118. megadetector/data_management/lila/add_locations_to_nacti.py +147 -0
  119. megadetector/data_management/lila/create_lila_blank_set.py +558 -0
  120. megadetector/data_management/lila/create_lila_test_set.py +152 -0
  121. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  122. megadetector/data_management/lila/download_lila_subset.py +178 -0
  123. megadetector/data_management/lila/generate_lila_per_image_labels.py +516 -0
  124. megadetector/data_management/lila/get_lila_annotation_counts.py +170 -0
  125. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  126. megadetector/data_management/lila/lila_common.py +300 -0
  127. megadetector/data_management/lila/test_lila_metadata_urls.py +132 -0
  128. megadetector/data_management/ocr_tools.py +870 -0
  129. megadetector/data_management/read_exif.py +809 -0
  130. megadetector/data_management/remap_coco_categories.py +84 -0
  131. megadetector/data_management/remove_exif.py +66 -0
  132. megadetector/data_management/rename_images.py +187 -0
  133. megadetector/data_management/resize_coco_dataset.py +189 -0
  134. megadetector/data_management/wi_download_csv_to_coco.py +247 -0
  135. megadetector/data_management/yolo_output_to_md_output.py +446 -0
  136. megadetector/data_management/yolo_to_coco.py +676 -0
  137. megadetector/detection/__init__.py +0 -0
  138. megadetector/detection/detector_training/__init__.py +0 -0
  139. megadetector/detection/detector_training/model_main_tf2.py +114 -0
  140. megadetector/detection/process_video.py +846 -0
  141. megadetector/detection/pytorch_detector.py +355 -0
  142. megadetector/detection/run_detector.py +779 -0
  143. megadetector/detection/run_detector_batch.py +1219 -0
  144. megadetector/detection/run_inference_with_yolov5_val.py +1087 -0
  145. megadetector/detection/run_tiled_inference.py +934 -0
  146. megadetector/detection/tf_detector.py +192 -0
  147. megadetector/detection/video_utils.py +698 -0
  148. megadetector/postprocessing/__init__.py +0 -0
  149. megadetector/postprocessing/add_max_conf.py +64 -0
  150. megadetector/postprocessing/categorize_detections_by_size.py +165 -0
  151. megadetector/postprocessing/classification_postprocessing.py +716 -0
  152. megadetector/postprocessing/combine_api_outputs.py +249 -0
  153. megadetector/postprocessing/compare_batch_results.py +966 -0
  154. megadetector/postprocessing/convert_output_format.py +396 -0
  155. megadetector/postprocessing/load_api_results.py +195 -0
  156. megadetector/postprocessing/md_to_coco.py +310 -0
  157. megadetector/postprocessing/md_to_labelme.py +330 -0
  158. megadetector/postprocessing/merge_detections.py +412 -0
  159. megadetector/postprocessing/postprocess_batch_results.py +1908 -0
  160. megadetector/postprocessing/remap_detection_categories.py +170 -0
  161. megadetector/postprocessing/render_detection_confusion_matrix.py +660 -0
  162. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +211 -0
  163. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +83 -0
  164. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1635 -0
  165. megadetector/postprocessing/separate_detections_into_folders.py +730 -0
  166. megadetector/postprocessing/subset_json_detector_output.py +700 -0
  167. megadetector/postprocessing/top_folders_to_bottom.py +223 -0
  168. megadetector/taxonomy_mapping/__init__.py +0 -0
  169. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  170. megadetector/taxonomy_mapping/map_new_lila_datasets.py +150 -0
  171. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -0
  172. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +588 -0
  173. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  174. megadetector/taxonomy_mapping/simple_image_download.py +219 -0
  175. megadetector/taxonomy_mapping/species_lookup.py +834 -0
  176. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  177. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  178. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  179. megadetector/utils/__init__.py +0 -0
  180. megadetector/utils/azure_utils.py +178 -0
  181. megadetector/utils/ct_utils.py +613 -0
  182. megadetector/utils/directory_listing.py +246 -0
  183. megadetector/utils/md_tests.py +1164 -0
  184. megadetector/utils/path_utils.py +1045 -0
  185. megadetector/utils/process_utils.py +160 -0
  186. megadetector/utils/sas_blob_utils.py +509 -0
  187. megadetector/utils/split_locations_into_train_val.py +228 -0
  188. megadetector/utils/string_utils.py +92 -0
  189. megadetector/utils/url_utils.py +323 -0
  190. megadetector/utils/write_html_image_list.py +225 -0
  191. megadetector/visualization/__init__.py +0 -0
  192. megadetector/visualization/plot_utils.py +293 -0
  193. megadetector/visualization/render_images_with_thumbnails.py +275 -0
  194. megadetector/visualization/visualization_utils.py +1536 -0
  195. megadetector/visualization/visualize_db.py +552 -0
  196. megadetector/visualization/visualize_detector_output.py +405 -0
  197. {megadetector-5.0.11.dist-info → megadetector-5.0.13.dist-info}/LICENSE +0 -0
  198. {megadetector-5.0.11.dist-info → megadetector-5.0.13.dist-info}/METADATA +2 -2
  199. megadetector-5.0.13.dist-info/RECORD +201 -0
  200. megadetector-5.0.13.dist-info/top_level.txt +1 -0
  201. megadetector-5.0.11.dist-info/RECORD +0 -5
  202. megadetector-5.0.11.dist-info/top_level.txt +0 -1
  203. {megadetector-5.0.11.dist-info → megadetector-5.0.13.dist-info}/WHEEL +0 -0
@@ -0,0 +1,793 @@
1
+ """
2
+
3
+ cacophony-thermal-importer.py
4
+
5
+ Create data and metadata for LILA from the Cacophony thermal dataset. Takes a folder
6
+ of HDF files, and produces .json metadata, along with compressed/normalized videos for
7
+ each HDF file.
8
+
9
+ Source format notes for this dataset:
10
+
11
+ https://docs.google.com/document/d/12sw5JtwdMf9MiXuNCBcvhvZ04Jwa1TH2Lf6LnJmF8Bk/edit
12
+
13
+ """
14
+
15
+ #%% Imports and constants
16
+
17
+ import os
18
+ import h5py
19
+ import numpy as np
20
+ import json
21
+
22
+ from tqdm import tqdm
23
+ from copy import deepcopy
24
+ from collections import defaultdict
25
+ from multiprocessing.pool import Pool
26
+
27
+ import zipfile
28
+ from zipfile import ZipFile
29
+
30
+ import cv2
31
+
32
+ from megadetector.utils.ct_utils import truncate_float
33
+ from megadetector.utils import path_utils
34
+
35
+ base_dir = '/bigdata/home/sftp/cacophony-ferraro_/data/cacophony-thermal/'
36
+ output_base = os.path.expanduser('~/tmp/new-zealand-wildlife-thermal-imaging')
37
+ video_output_folder = os.path.join(output_base,'videos')
38
+ individual_metadata_output_folder = os.path.join(output_base,'individual-metadata')
39
+
40
+ os.makedirs(video_output_folder,exist_ok=True)
41
+ os.makedirs(individual_metadata_output_folder,exist_ok=True)
42
+
43
+ main_metadata_filename = 'new-zealand-wildlife-thermal-imaging.json'
44
+
45
+ # Every HDF file specifies a crop rectangle within which the pixels are trustworthy;
46
+ # in practice this is the same across all files.
47
+ expected_crop_rectangle = [1,1,159,119]
48
+
49
+ # Required attributes for each video
50
+ expected_clip_attributes = ['clip_id', 'crop_rectangle', 'ffc_frames', 'frame_temp_max',
51
+ 'frame_temp_mean', 'frame_temp_median', 'frame_temp_min', 'max_temp',
52
+ 'mean_temp', 'min_temp', 'num_frames', 'res_x', 'res_y', 'start_time',
53
+ 'station_id']
54
+
55
+ # Attributes that may or may not be present for each video
56
+ optional_clip_attributes = ['temp_thresh','model']
57
+
58
+ # Required attributes for each trck
59
+ expected_track_attributes = ['end_frame', 'id', 'start_frame']
60
+
61
+ # Attributes that may or may not be present for each track
62
+ optional_track_attributes = ['human_tag', 'human_tag_confidence', 'human_tags',
63
+ 'human_tags_confidence', 'ai_tag', 'ai_tag_confidence']
64
+
65
+ labels_to_ignore_when_another_label_is_present = ['false-positive','unidentified','part','poor tracking']
66
+
67
+ frame_rate = 9
68
+
69
+ use_default_filtering = False
70
+ write_as_color = False
71
+
72
+ # codec = 'ffv1'
73
+ # codec = 'hfyu'
74
+ codec = 'h264'
75
+ overwrite_video = True
76
+
77
+ codec_to_extension = {'mp4v':'.mp4','ffv1':'.avi','hfyu':'.avi','h264':'.mp4'}
78
+
79
+ # Set to >0 to process only a subset of clips
80
+ debug_n = -1
81
+ n_workers = 16
82
+ confidence_digits = 3
83
+
84
+ # Standardize a few tag names
85
+ tag_mappings = {
86
+ 'bird/kiwi':'bird',
87
+ 'allbirds':'bird',
88
+ 'not identifiable':'unidentified',
89
+ 'part':'unidentified',
90
+ 'pest':'unidentified'
91
+ }
92
+
93
+ # Discard tracks and labels that are below this confidence threshold.
94
+ confidence_threshold = 0.001
95
+
96
+
97
+ #%% Support functions
98
+
99
+ def remove_tracking_points(clip_metadata):
100
+ """
101
+ As a debugging convenience, take the metadata for a clip (after conversion to
102
+ the output format) and remove the only field that makes it hard to read in a
103
+ console (the track coordinates).
104
+ """
105
+
106
+ slim_metadata = deepcopy(clip_metadata)
107
+ if 'tracks' in slim_metadata:
108
+ for t in slim_metadata['tracks']:
109
+ del t['points']
110
+ return slim_metadata
111
+
112
+
113
+ def norm_image(image,vmin=None,vmax=None,do_normalization=True,stack_channels=True):
114
+ """
115
+ Normalize an MxN 2D numpy ndarray (may be any type, but typically uint16) into the range
116
+ 0,255.
117
+
118
+ If stack_channels==True, return as an MxNx3 uint8 matrix (content is replicated across
119
+ all three channels).
120
+ """
121
+
122
+ if vmin is not None:
123
+ assert vmax is not None
124
+ assert vmax > vmin
125
+ if vmax is not None:
126
+ assert vmin is not None
127
+
128
+ assert isinstance(image,np.ndarray)
129
+ assert isinstance(image[0][0],np.uint16) or isinstance(image[0][0],np.float32), \
130
+ 'First pixel is of type {}'.format(type(image[0][0]))
131
+ assert len(image.shape) == 2
132
+
133
+ norm = np.float32(image)
134
+
135
+ if do_normalization:
136
+
137
+ if vmin is None:
138
+ vmin = np.amin(image)
139
+ vmax = np.amax(image)
140
+
141
+ norm = 255 * (norm - vmin) / (vmax - vmin)
142
+
143
+ norm = np.uint8(norm)
144
+ norm = norm[:, :, np.newaxis]
145
+ if stack_channels:
146
+ norm = np.repeat(norm, 3, axis=2)
147
+ return norm
148
+
149
+
150
+ #%% Enumerate files
151
+
152
+ all_files = path_utils.recursive_file_list(base_dir)
153
+ all_hdf_files_relative = [os.path.relpath(fn,base_dir) for fn in all_files if fn.lower().endswith('.hdf5')]
154
+
155
+ print('Found {} HDF files (of {} total files)'.format(
156
+ len(all_hdf_files_relative),len(all_files)))
157
+
158
+
159
+ #%% Process one file
160
+
161
+ def process_file(fn_relative,verbose=False):
162
+ """
163
+ Read the HDF file, convert to video files with/without filtering, and return
164
+ a metadata dict for this file.
165
+ """
166
+
167
+ fn_abs = os.path.join(base_dir,fn_relative)
168
+
169
+ clip_id = int(os.path.basename(fn_relative).split('.')[0])
170
+ metadata_fn = os.path.join(individual_metadata_output_folder,str(clip_id) + '_metadata.json')
171
+
172
+ clip_metadata = {}
173
+ clip_metadata['hdf_filename'] = os.path.basename(fn_relative)
174
+ clip_metadata['id'] = clip_id
175
+ clip_metadata['error'] = None
176
+
177
+ try:
178
+ h5f = h5py.File(fn_abs, 'r')
179
+ except Exception as e:
180
+ print('Could not open file {}: {}'.format(
181
+ fn_relative,str(e)))
182
+ clip_metadata['error'] = str(e)
183
+ with open(metadata_fn,'w') as f:
184
+ json.dump(clip_metadata,f,indent=1)
185
+ return clip_metadata
186
+
187
+ clip_attrs = h5f.attrs
188
+
189
+ for s in expected_clip_attributes:
190
+ assert s in clip_attrs
191
+
192
+ assert clip_id == int(clip_attrs.get('clip_id'))
193
+ assert os.path.basename(fn_relative).startswith(str(clip_id))
194
+
195
+ station_id = clip_attrs.get('station_id')
196
+ assert isinstance(station_id,np.int64)
197
+ station_id = int(station_id)
198
+
199
+ crop_rectangle = clip_attrs.get('crop_rectangle')
200
+ assert len(crop_rectangle) == 4
201
+ for i_coord in range(0,4):
202
+ assert crop_rectangle[i_coord] == expected_crop_rectangle[i_coord]
203
+
204
+ frames = h5f['frames']
205
+ assert 'thermals' in frames
206
+
207
+ # This is an HDF dataset of size n_frames,y,x
208
+ thermal_frames = frames['thermals']
209
+ assert len(thermal_frames.shape) == 3
210
+
211
+ # If present, this is an HDF dataset of size y,x
212
+ if 'background' in frames:
213
+ background_frame = frames['background']
214
+ assert len(background_frame.shape) == 2
215
+ assert background_frame.shape[0] == thermal_frames.shape[1]
216
+ assert background_frame.shape[1] == thermal_frames.shape[2]
217
+ else:
218
+ background_frame = None
219
+ calibration_frame_indices = clip_attrs.get('ffc_frames')
220
+
221
+ if len(calibration_frame_indices) > 0:
222
+ assert max(calibration_frame_indices) < thermal_frames.shape[0]
223
+
224
+ assert clip_attrs.get('num_frames') == thermal_frames.shape[0]
225
+ assert clip_attrs.get('res_x') == thermal_frames.shape[2]
226
+ assert clip_attrs.get('res_y') == thermal_frames.shape[1]
227
+ assert clip_attrs.get('model') in [None,'lepton3.5','lepton3']
228
+
229
+ tracks = h5f['tracks']
230
+
231
+ track_ids = list(tracks.keys())
232
+
233
+ # List of dicts
234
+ tracks_this_clip = []
235
+
236
+ # i_track = 0; track_id = track_ids[i_track]
237
+ for i_track,track_id in enumerate(track_ids):
238
+
239
+ track = tracks[track_id]
240
+
241
+ if 'human_tags' not in track.attrs.keys():
242
+ continue
243
+
244
+ track_info = {}
245
+
246
+ # 'human_tags' is all the tags that were assigned to this track by humans
247
+ # 'human_tags_confidence' is the confidence for each of those assignments
248
+ #
249
+ # If there is a clear "winner", 'human_tag' and 'human_tag' confidence will
250
+ # identify the clear winner.
251
+ if 'human_tag' in track.attrs.keys():
252
+
253
+ assert 'human_tags' in track.attrs.keys()
254
+ assert 'human_tags_confidence' in track.attrs.keys()
255
+ assert 'human_tag_confidence' in track.attrs.keys()
256
+
257
+ track_tags = []
258
+
259
+ if 'human_tags' in track.attrs.keys():
260
+
261
+ assert 'human_tags_confidence' in track.attrs.keys()
262
+ assert len(track.attrs.get('human_tags_confidence')) == \
263
+ len(track.attrs.get('human_tags'))
264
+
265
+ human_tags_this_clip = list(track.attrs.get('human_tags'))
266
+ human_tag_confidences_this_clip = list(track.attrs.get('human_tags_confidence'))
267
+
268
+ for i_tag,tag in enumerate(human_tags_this_clip):
269
+ assert isinstance(tag,str)
270
+ tag_info = {}
271
+ tag_info['label'] = tag
272
+ conf = float(human_tag_confidences_this_clip[i_tag])
273
+ tag_info['confidence'] = truncate_float(conf,confidence_digits)
274
+ track_tags.append(tag_info)
275
+
276
+ track_start_frame = int(round(track.attrs.get('start_frame')))
277
+ track_end_frame = int(round(track.attrs.get('end_frame')))
278
+ track_info['start_frame'] = track_start_frame
279
+ track_info['end_frame'] = track_end_frame
280
+ track_info['tags'] = track_tags
281
+
282
+ # A list of x/y/frame tuples
283
+ track_info['points'] = []
284
+
285
+ for s in expected_track_attributes:
286
+ assert s in track.attrs
287
+
288
+ positions = track['regions']
289
+
290
+ # Positions is an N x 7 matrix in which each row looks like:
291
+ #
292
+ # [left,top,right,bottom,frame_number,mass,blank_frame]
293
+ #
294
+ # The origin appears to be in the upper-left.
295
+ #
296
+ # "blank_frame" indicates that the tracked object is not visible in this frame,
297
+ # but was predicted from previous frames.
298
+ assert positions.shape[1] == 7
299
+
300
+ # The number of items in the positions array should be equal to the length of the track, but this
301
+ # can be off by a little when 'start_frame' and/or 'end_frame' are not integers. Make sure this
302
+ # is approximately true.
303
+
304
+ # assert positions.shape[0] == 1 + (track.attrs.get('end_frame') - track.attrs.get('start_frame'))
305
+ track_length_error = abs(positions.shape[0] -
306
+ (1 + (track.attrs.get('end_frame') - track.attrs.get('start_frame'))))
307
+ assert track_length_error < 2
308
+
309
+ # i_position = 0; position = positions[i_position]
310
+ for i_position,position in enumerate(positions):
311
+
312
+ left = float(position[0])
313
+ top = float(position[1])
314
+ right = float(position[2])
315
+ bottom = float(position[3])
316
+ frame_number = int(position[4])
317
+
318
+ # I'm being lazy about the fact that these don't reflect the
319
+ # pixels cropped out of the border. IMO this is OK because for this dataset,
320
+ # this is just an approximate set of coordinates used to disambiguate simultaneous
321
+ # areas of movement when multiple different labels are present in the same video.
322
+ position_info = [left+float((right-left)/2),
323
+ top+float((bottom-top)/2),
324
+ int(frame_number)]
325
+ track_info['points'].append(position_info)
326
+
327
+ # In a small number of tracks, boxes are turned upside-down or left-over-right,
328
+ # we don't bother checking for coordinate validity in those tracks.
329
+ if left <= right:
330
+ assert left >= 0 and left < clip_attrs.get('res_x')
331
+ assert right >= 0 and right < clip_attrs.get('res_x')
332
+
333
+ if top <= bottom:
334
+ assert top >= 0 and top < clip_attrs.get('res_y')
335
+ assert bottom >= 0 and bottom < clip_attrs.get('res_y')
336
+
337
+ # frame_number should be approximately equal to i_position + start_frame, but this
338
+ # can be off by a little when 'start_frame' and/or 'end_frame' are not integers.
339
+ # Make sure this is approximately true.
340
+
341
+ # assert frame_number == i_position + track.attrs.get('start_frame')
342
+ frame_number_error = abs(frame_number - (i_position + track.attrs.get('start_frame')))
343
+ assert frame_number_error <= 2
344
+
345
+ # ...for each position in this track
346
+
347
+ tracks_this_clip.append(track_info)
348
+
349
+ # ...for each track ID
350
+
351
+ clip_metadata['tracks'] = tracks_this_clip
352
+
353
+ assert len(human_tags_this_clip) > 0
354
+
355
+ ffc_frames = clip_attrs.get('ffc_frames').tolist()
356
+ if len(ffc_frames) > 0:
357
+ assert max(ffc_frames) < thermal_frames.shape[0]
358
+ n_ffc_frames = len(ffc_frames)
359
+ n_frames = thermal_frames.shape[0]
360
+ if verbose:
361
+ if (n_ffc_frames / n_frames) > 0.2:
362
+ print('Warning: in video {}, {} of {} frames are FFC frames (tags: {})'.format(
363
+ fn_relative,n_ffc_frames,n_frames,str(human_tags_this_clip)))
364
+
365
+ frames = h5f["frames"]
366
+
367
+ if "background" in frames:
368
+ background = frames["background"]
369
+ background_frame_present = True
370
+ else:
371
+ background = frames["thermals"][0]
372
+ background_frame_present = False
373
+
374
+ crop_rectangle = clip_attrs["crop_rectangle"]
375
+ background = background[
376
+ crop_rectangle[1]:crop_rectangle[3],
377
+ crop_rectangle[0]:crop_rectangle[2]
378
+ ]
379
+
380
+ # Compute the median frame value
381
+ #
382
+ # (...which we may use for filtering)
383
+
384
+ frames_array = np.array(frames['thermals'])
385
+ frames_array = frames_array[:,crop_rectangle[1] : crop_rectangle[3], crop_rectangle[0] : crop_rectangle[2]]
386
+ median_values = np.float32(np.median(frames_array,0))
387
+
388
+ if (background_frame_present or use_default_filtering):
389
+ background_for_filtering = background
390
+ else:
391
+ if verbose:
392
+ print('No background present: using median values for background')
393
+ background_for_filtering = median_values
394
+
395
+ # Find the largest value by which any pixel in this video exceeds the background
396
+ #
397
+ # (...which we may use for normalization)
398
+
399
+ max_pixel_diff = 0
400
+
401
+ for frame in frames["thermals"]:
402
+ cropped_frame = frame[
403
+ crop_rectangle[1]:crop_rectangle[3],
404
+ crop_rectangle[0]:crop_rectangle[2]
405
+ ]
406
+
407
+ filtered_frame = np.float32(cropped_frame) - background_for_filtering
408
+ max_pixel_diff_this_frame = np.amax(filtered_frame)
409
+ if max_pixel_diff_this_frame > max_pixel_diff:
410
+ max_pixel_diff = max_pixel_diff_this_frame
411
+
412
+ filtered_frames = []
413
+ original_frames = []
414
+
415
+ # i_frame = 0; frame = frames["thermals"][i_frame]
416
+ for i_frame,frame in enumerate(frames["thermals"]):
417
+
418
+ cropped_frame = frame[crop_rectangle[1] : crop_rectangle[3], crop_rectangle[0] : crop_rectangle[2]]
419
+
420
+ # Subtract the background frame
421
+ filtered_frame = np.float32(cropped_frame) - background_for_filtering
422
+
423
+ # Assume that nothing can be cooler than the background
424
+ filtered_frame[filtered_frame < 0] = 0
425
+
426
+ # Normalize filtered frame (and convert to three channels)
427
+
428
+ if use_default_filtering:
429
+ filtered_frame = norm_image(filtered_frame,stack_channels=write_as_color)
430
+ else:
431
+ filtered_frame = norm_image(filtered_frame,vmin=0,vmax=max_pixel_diff,stack_channels=write_as_color)
432
+
433
+ # Normalize original frame (and convert to three channels)
434
+
435
+ original_frame = norm_image(cropped_frame,stack_channels=write_as_color)
436
+
437
+ filtered_frames.append(filtered_frame)
438
+ original_frames.append(original_frame)
439
+
440
+ # ...for each frame
441
+
442
+ # filtered_frames[0].shape[1] is 158, clip_attrs.get('res_x') is 160, ergo shape is h,w
443
+ video_w = filtered_frames[0].shape[1]
444
+ video_h = filtered_frames[0].shape[0]
445
+
446
+ clip_metadata['width'] = video_w
447
+ clip_metadata['height'] = video_h
448
+ clip_metadata['frame_rate'] = frame_rate
449
+
450
+ filtered_video_fn = os.path.join(video_output_folder,str(clip_id) + '_filtered' + codec_to_extension[codec])
451
+ unfiltered_video_fn = os.path.join(video_output_folder,str(clip_id) + codec_to_extension[codec])
452
+
453
+ if overwrite_video or (not os.path.isfile(filtered_video_fn)):
454
+
455
+ filtered_video_out = cv2.VideoWriter(filtered_video_fn, cv2.VideoWriter_fourcc(*codec), frame_rate,
456
+ (video_w, video_h), isColor=write_as_color)
457
+
458
+ for i_frame,filtered_frame in enumerate(filtered_frames):
459
+ filtered_video_out.write(filtered_frame)
460
+ filtered_video_out.release()
461
+
462
+ if overwrite_video or (not os.path.isfile(unfiltered_video_fn)):
463
+
464
+ unfiltered_video_out = cv2.VideoWriter(unfiltered_video_fn, cv2.VideoWriter_fourcc(*codec), frame_rate,
465
+ (video_w, video_h), isColor=write_as_color)
466
+
467
+ for i_frame,frame in enumerate(original_frames):
468
+ unfiltered_video_out.write(frame)
469
+ unfiltered_video_out.release()
470
+
471
+ labels_this_clip = set()
472
+
473
+
474
+ ## Do some cleanup of tracks and track labels
475
+
476
+ valid_tracks = []
477
+
478
+ for track_info in clip_metadata['tracks']:
479
+
480
+ valid_tags = []
481
+
482
+ # Replace some tags with standardized names (e.g. map "allbirds" to "bird")
483
+ for tag in track_info['tags']:
484
+ if tag['label'] in tag_mappings:
485
+ tag['label'] = tag_mappings[tag['label']]
486
+
487
+ # Discard tags below the minimum confidence
488
+ if tag['confidence'] >= confidence_threshold:
489
+ valid_tags.append(tag)
490
+ else:
491
+ print('Zero-confidence tag in {}'.format(fn_relative))
492
+
493
+ track_info['tags'] = valid_tags
494
+
495
+ # Don't keep any tracks that had no tags above the minimum confidence
496
+ if len(valid_tags) > 0:
497
+ valid_tracks.append(track_info)
498
+ else:
499
+ print('Invalid track in {}'.format(fn_relative))
500
+
501
+ # ...for each track
502
+
503
+ if (len(clip_metadata['tracks']) > 0) and (len(valid_tracks) == 0):
504
+ print('Removed all tracks from {}'.format(fn_relative))
505
+
506
+ clip_metadata['tracks'] = valid_tracks
507
+
508
+ # Build up the list of labels for this clip
509
+ for track_info in clip_metadata['tracks']:
510
+ for tag in track_info['tags']:
511
+ tag_label = tag['label']
512
+ labels_this_clip.add(tag_label)
513
+
514
+ clip_metadata['labels'] = sorted(list(labels_this_clip))
515
+
516
+ metadata_fn = os.path.join(individual_metadata_output_folder,str(clip_id) + '_metadata.json')
517
+
518
+ # clip_metadata['id'] = clip_id
519
+ # clip_metadata['hdf_filename'] = os.path.basename(fn_relative)
520
+
521
+ clip_metadata['video_filename'] = os.path.basename(unfiltered_video_fn)
522
+ clip_metadata['filtered_video_filename'] = os.path.basename(filtered_video_fn)
523
+ clip_metadata['location'] = station_id
524
+ clip_metadata['calibration_frames'] = ffc_frames
525
+ clip_metadata['metadata_filename'] = os.path.basename(metadata_fn)
526
+
527
+ with open(metadata_fn,'w') as f:
528
+ json.dump(clip_metadata,f,indent=1)
529
+
530
+ return clip_metadata
531
+
532
+ # ...process_file(...)
533
+
534
+
535
+ #%% Process files
536
+
537
+ n_workers = 16
538
+
539
+ if debug_n > 0:
540
+ files_to_process = all_hdf_files_relative[0:debug_n]
541
+ else:
542
+ files_to_process = all_hdf_files_relative
543
+
544
+ if n_workers <= 1:
545
+
546
+ all_clip_metadata = []
547
+ for i_file,fn_relative in tqdm(enumerate(files_to_process),total=len(files_to_process)):
548
+ clip_metadata = process_file(fn_relative)
549
+ all_clip_metadata.append(clip_metadata)
550
+
551
+ else:
552
+
553
+ pool = Pool(n_workers)
554
+ all_clip_metadata = list(tqdm(pool.imap(process_file,files_to_process),
555
+ total=len(files_to_process)))
556
+
557
+
558
+ #%% Postprocessing
559
+
560
+ failed_file_to_error = {}
561
+
562
+ label_to_video_count = defaultdict(int)
563
+
564
+ # clip_metadata = all_clip_metadata[0]
565
+ for clip_metadata in all_clip_metadata:
566
+
567
+ if clip_metadata['error'] is not None:
568
+ failed_file_to_error[clip_metadata['hdf_filename']] = clip_metadata['error']
569
+ continue
570
+
571
+ labels_this_clip = set()
572
+
573
+ # track_info = clip_metadata['tracks'][0]
574
+ for track_info in clip_metadata['tracks']:
575
+ for tag in track_info['tags']:
576
+ tag_label = tag['label']
577
+ labels_this_clip.add(tag_label)
578
+
579
+ for label in labels_this_clip:
580
+ label_to_video_count[label] += 1
581
+
582
+ # ...for each clip
583
+
584
+ label_to_video_count = {k: v for k, v in sorted(label_to_video_count.items(),
585
+ key=lambda item: item[1], reverse=True)}
586
+
587
+ print('Failed to open {} of {} files'.format(
588
+ len(failed_file_to_error),len(all_hdf_files_relative)))
589
+
590
+ print('Labels:\n')
591
+
592
+ for label in label_to_video_count:
593
+ print('{}: {}'.format(label,label_to_video_count[label]))
594
+
595
+
596
+ #%% Write count .csv
597
+
598
+ count_csv_file_name = os.path.join(output_base,'new-zealand-wildlife-thermal-imaging-counts.csv')
599
+
600
+ with open(count_csv_file_name,'w') as f:
601
+ f.write('label,count\n')
602
+ for label in label_to_video_count:
603
+ f.write('{},{}\n'.format(label,label_to_video_count[label]))
604
+
605
+
606
+ #%% Build and zip the main .json file
607
+
608
+ main_metadata_filename_abs = os.path.join(output_base,main_metadata_filename)
609
+
610
+ info = {}
611
+ info['version'] = '1.0.0'
612
+ info['description'] = 'New Zealand Thermal Wildlife Imaging'
613
+ info['contributor'] = 'Cacophony Project'
614
+
615
+ main_metadata = {}
616
+ main_metadata['info'] = info
617
+ main_metadata['clips'] = []
618
+
619
+ # clip_metadata = all_clip_metadata[0]
620
+ for clip_metadata in tqdm(all_clip_metadata):
621
+ slim_metadata = remove_tracking_points(clip_metadata)
622
+
623
+ if 'tracks' in slim_metadata:
624
+ for track in slim_metadata['tracks']:
625
+ for tag in track['tags']:
626
+ tag['confidence'] = truncate_float(tag['confidence'],confidence_digits)
627
+
628
+ main_metadata['clips'].append(slim_metadata)
629
+
630
+ with open(main_metadata_filename_abs,'w') as f:
631
+ json.dump(main_metadata,f,indent=1)
632
+
633
+ zip_file_name = main_metadata_filename_abs.replace('.json','-metadata.json.zip')
634
+ with ZipFile(zip_file_name,'w',zipfile.ZIP_DEFLATED) as zipf:
635
+ zipf.write(main_metadata_filename_abs,
636
+ arcname=os.path.basename(main_metadata_filename_abs),
637
+ compresslevel=9,compress_type=zipfile.ZIP_DEFLATED)
638
+
639
+
640
+ #%% Create a zipfile containing videos, main metadata, and individual metadata
641
+
642
+ zip_file_name = os.path.join(output_base,'new-zealand-wildlife-thermal-imaging.zip')
643
+
644
+ all_files = path_utils.recursive_file_list(output_base)
645
+ all_files_relative = [os.path.relpath(fn,output_base) for fn in all_files]
646
+ all_files_to_zip_relative = [fn for fn in all_files_relative if \
647
+ (\
648
+ ('individual-metadata/' in fn) or \
649
+ ('videos/' in fn) or \
650
+ (fn.endswith('.json'))
651
+ )]
652
+
653
+ print('Zipping {} files (of {} total files)'.format(len(all_files_to_zip_relative),len(all_files)))
654
+
655
+ with ZipFile(zip_file_name,'w',zipfile.ZIP_DEFLATED) as zipf:
656
+ for fn_relative in tqdm(all_files_to_zip_relative):
657
+ fn_abs = os.path.join(output_base,fn_relative)
658
+ if fn_abs.endswith('.mp4'):
659
+ zipf.write(fn_abs,arcname=fn_relative,compresslevel=0,compress_type=zipfile.ZIP_STORED)
660
+ else:
661
+ zipf.write(fn_abs,arcname=fn_relative,compresslevel=9,compress_type=zipfile.ZIP_DEFLATED)
662
+
663
+
664
+ #%% Scrap
665
+
666
+ if False:
667
+
668
+ pass
669
+
670
+ #%% Process one file
671
+
672
+ # i_file = 110680; fn_relative = all_hdf_files_relative[i_file]
673
+ # i_file = 8; fn_relative = all_hdf_files_relative[i_file]
674
+
675
+ fn_relative = [fn for fn in all_hdf_files_relative if '450281' in fn][0]
676
+
677
+ clip_metadata = process_file(fn_relative)
678
+
679
+
680
+ #%% Move individual metadata files
681
+
682
+ source_folder = base_dir
683
+ target_folder = os.path.expanduser('~/tmp/cacophony-thermal-out-individual-metadata')
684
+ assert os.path.isdir(source_folder) and os.path.isdir(target_folder)
685
+
686
+ from megadetector.utils import path_utils
687
+ all_files = path_utils.recursive_file_list(source_folder)
688
+ files_to_move = [fn for fn in all_files if '_metadata.json' in fn]
689
+ print('Moving {} of {} files'.format(len(files_to_move),len(all_files)))
690
+
691
+ import shutil
692
+ # source_fn = files_to_move[0]
693
+ for source_fn in tqdm(files_to_move):
694
+ target_fn = os.path.join(target_folder,os.path.basename(source_fn))
695
+ shutil.move(source_fn,target_fn)
696
+
697
+
698
+ #%% Choose a random video with a particular label
699
+
700
+ target_label = 'pukeko'
701
+ target_clips = []
702
+
703
+ for clip_metadata in all_clip_metadata:
704
+
705
+ if clip_metadata['error'] is not None:
706
+ continue
707
+
708
+ labels_this_clip = set()
709
+
710
+ # track_info = clip_metadata['tracks'][0]
711
+ for track_info in clip_metadata['tracks']:
712
+ for tag in track_info['tags']:
713
+ tag_label = tag['label']
714
+ labels_this_clip.add(tag_label)
715
+
716
+ if target_label in labels_this_clip:
717
+ target_clips.append(clip_metadata)
718
+
719
+ print('Found {} matches'.format(len(target_clips)))
720
+
721
+ import random
722
+ selected_clip = random.choice(target_clips)
723
+ filtered_video_filename = selected_clip['filtered_video_filename']
724
+ video_filename = selected_clip['video_filename']
725
+
726
+ from megadetector.utils.path_utils import open_file
727
+ # open_file(os.path.join(output_base,video_filename))
728
+ open_file(os.path.join(output_base,filtered_video_filename))
729
+
730
+ # import clipboard; clipboard.copy(os.path.join(output_base,video_filename))
731
+
732
+
733
+ #%% Look for clips with multiple different labels
734
+
735
+ for i_clip,clip_metadata in enumerate(all_clip_metadata):
736
+
737
+ if clip_metadata['error'] is not None:
738
+ continue
739
+
740
+ labels_this_clip = set()
741
+
742
+ # track_info = clip_metadata['tracks'][0]
743
+ for track_info in clip_metadata['tracks']:
744
+ for tag in track_info['tags']:
745
+ tag_label = tag['label']
746
+ if tag_label not in labels_to_ignore_when_another_label_is_present:
747
+ labels_this_clip.add(tag_label)
748
+
749
+ assert len(labels_this_clip) <= 3
750
+
751
+ if len(labels_this_clip) > 1:
752
+ print('Clip {} has {} labels: {}'.format(
753
+ i_clip,len(labels_this_clip),str(labels_this_clip)))
754
+
755
+ # remove_tracking_points(clip_metadata)
756
+
757
+
758
+ #%% Add the .json filename to each clip in all_clip_metadata
759
+
760
+ for i_clip,clip_metadata in tqdm(enumerate(all_clip_metadata),
761
+ total=len(all_clip_metadata)):
762
+
763
+ clip_metadata['metadata_filename'] = clip_metadata['hdf_filename'].replace('.hdf5',
764
+ '_metadata.json')
765
+
766
+
767
+ #%% Add a "labels" field to each .json file
768
+
769
+ # This was only necessary during debugging; this is added in the main loop now.
770
+
771
+ for i_clip,clip_metadata in tqdm(enumerate(all_clip_metadata),
772
+ total=len(all_clip_metadata)):
773
+
774
+ if clip_metadata['error'] is not None:
775
+ continue
776
+
777
+ labels_this_clip = set()
778
+
779
+ # track_info = clip_metadata['tracks'][0]
780
+ for track_info in clip_metadata['tracks']:
781
+ for tag in track_info['tags']:
782
+ tag_label = tag['label']
783
+ # if tag_label not in labels_to_ignore_when_another_label_is_present:
784
+ if True:
785
+ labels_this_clip.add(tag_label)
786
+
787
+ clip_metadata['labels'] = sorted(list(labels_this_clip))
788
+
789
+ json_filename = os.path.join(output_base,str(clip_metadata['id']) + '_metadata.json')
790
+ assert os.path.isfile(json_filename)
791
+
792
+ with open(json_filename,'w') as f:
793
+ json.dump(clip_metadata,f,indent=1)