megadetector 5.0.11__py3-none-any.whl → 5.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (201) hide show
  1. megadetector/api/__init__.py +0 -0
  2. megadetector/api/batch_processing/__init__.py +0 -0
  3. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  4. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. megadetector/api/batch_processing/api_core/batch_service/score.py +439 -0
  6. megadetector/api/batch_processing/api_core/server.py +294 -0
  7. megadetector/api/batch_processing/api_core/server_api_config.py +98 -0
  8. megadetector/api/batch_processing/api_core/server_app_config.py +55 -0
  9. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +220 -0
  10. megadetector/api/batch_processing/api_core/server_job_status_table.py +152 -0
  11. megadetector/api/batch_processing/api_core/server_orchestration.py +360 -0
  12. megadetector/api/batch_processing/api_core/server_utils.py +92 -0
  13. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  14. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +46 -0
  15. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  16. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +152 -0
  17. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  18. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  19. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  20. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  21. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +126 -0
  22. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  23. megadetector/api/synchronous/__init__.py +0 -0
  24. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  25. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +152 -0
  26. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -0
  27. megadetector/api/synchronous/api_core/animal_detection_api/config.py +35 -0
  28. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  29. megadetector/api/synchronous/api_core/tests/load_test.py +110 -0
  30. megadetector/classification/__init__.py +0 -0
  31. megadetector/classification/aggregate_classifier_probs.py +108 -0
  32. megadetector/classification/analyze_failed_images.py +227 -0
  33. megadetector/classification/cache_batchapi_outputs.py +198 -0
  34. megadetector/classification/create_classification_dataset.py +627 -0
  35. megadetector/classification/crop_detections.py +516 -0
  36. megadetector/classification/csv_to_json.py +226 -0
  37. megadetector/classification/detect_and_crop.py +855 -0
  38. megadetector/classification/efficientnet/__init__.py +9 -0
  39. megadetector/classification/efficientnet/model.py +415 -0
  40. megadetector/classification/efficientnet/utils.py +610 -0
  41. megadetector/classification/evaluate_model.py +520 -0
  42. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  43. megadetector/classification/json_to_azcopy_list.py +63 -0
  44. megadetector/classification/json_validator.py +699 -0
  45. megadetector/classification/map_classification_categories.py +276 -0
  46. megadetector/classification/merge_classification_detection_output.py +506 -0
  47. megadetector/classification/prepare_classification_script.py +194 -0
  48. megadetector/classification/prepare_classification_script_mc.py +228 -0
  49. megadetector/classification/run_classifier.py +287 -0
  50. megadetector/classification/save_mislabeled.py +110 -0
  51. megadetector/classification/train_classifier.py +827 -0
  52. megadetector/classification/train_classifier_tf.py +725 -0
  53. megadetector/classification/train_utils.py +323 -0
  54. megadetector/data_management/__init__.py +0 -0
  55. megadetector/data_management/annotations/__init__.py +0 -0
  56. megadetector/data_management/annotations/annotation_constants.py +34 -0
  57. megadetector/data_management/camtrap_dp_to_coco.py +239 -0
  58. megadetector/data_management/cct_json_utils.py +395 -0
  59. megadetector/data_management/cct_to_md.py +176 -0
  60. megadetector/data_management/cct_to_wi.py +289 -0
  61. megadetector/data_management/coco_to_labelme.py +272 -0
  62. megadetector/data_management/coco_to_yolo.py +662 -0
  63. megadetector/data_management/databases/__init__.py +0 -0
  64. megadetector/data_management/databases/add_width_and_height_to_db.py +33 -0
  65. megadetector/data_management/databases/combine_coco_camera_traps_files.py +206 -0
  66. megadetector/data_management/databases/integrity_check_json_db.py +477 -0
  67. megadetector/data_management/databases/subset_json_db.py +115 -0
  68. megadetector/data_management/generate_crops_from_cct.py +149 -0
  69. megadetector/data_management/get_image_sizes.py +189 -0
  70. megadetector/data_management/importers/add_nacti_sizes.py +52 -0
  71. megadetector/data_management/importers/add_timestamps_to_icct.py +79 -0
  72. megadetector/data_management/importers/animl_results_to_md_results.py +158 -0
  73. megadetector/data_management/importers/auckland_doc_test_to_json.py +373 -0
  74. megadetector/data_management/importers/auckland_doc_to_json.py +201 -0
  75. megadetector/data_management/importers/awc_to_json.py +191 -0
  76. megadetector/data_management/importers/bellevue_to_json.py +273 -0
  77. megadetector/data_management/importers/cacophony-thermal-importer.py +796 -0
  78. megadetector/data_management/importers/carrizo_shrubfree_2018.py +269 -0
  79. megadetector/data_management/importers/carrizo_trail_cam_2017.py +289 -0
  80. megadetector/data_management/importers/cct_field_adjustments.py +58 -0
  81. megadetector/data_management/importers/channel_islands_to_cct.py +913 -0
  82. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +180 -0
  83. megadetector/data_management/importers/eMammal/eMammal_helpers.py +249 -0
  84. megadetector/data_management/importers/eMammal/make_eMammal_json.py +223 -0
  85. megadetector/data_management/importers/ena24_to_json.py +276 -0
  86. megadetector/data_management/importers/filenames_to_json.py +386 -0
  87. megadetector/data_management/importers/helena_to_cct.py +283 -0
  88. megadetector/data_management/importers/idaho-camera-traps.py +1407 -0
  89. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +294 -0
  90. megadetector/data_management/importers/jb_csv_to_json.py +150 -0
  91. megadetector/data_management/importers/mcgill_to_json.py +250 -0
  92. megadetector/data_management/importers/missouri_to_json.py +490 -0
  93. megadetector/data_management/importers/nacti_fieldname_adjustments.py +79 -0
  94. megadetector/data_management/importers/noaa_seals_2019.py +181 -0
  95. megadetector/data_management/importers/pc_to_json.py +365 -0
  96. megadetector/data_management/importers/plot_wni_giraffes.py +123 -0
  97. megadetector/data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -0
  98. megadetector/data_management/importers/prepare_zsl_imerit.py +131 -0
  99. megadetector/data_management/importers/rspb_to_json.py +356 -0
  100. megadetector/data_management/importers/save_the_elephants_survey_A.py +320 -0
  101. megadetector/data_management/importers/save_the_elephants_survey_B.py +329 -0
  102. megadetector/data_management/importers/snapshot_safari_importer.py +758 -0
  103. megadetector/data_management/importers/snapshot_safari_importer_reprise.py +665 -0
  104. megadetector/data_management/importers/snapshot_serengeti_lila.py +1067 -0
  105. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +150 -0
  106. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +153 -0
  107. megadetector/data_management/importers/sulross_get_exif.py +65 -0
  108. megadetector/data_management/importers/timelapse_csv_set_to_json.py +490 -0
  109. megadetector/data_management/importers/ubc_to_json.py +399 -0
  110. megadetector/data_management/importers/umn_to_json.py +507 -0
  111. megadetector/data_management/importers/wellington_to_json.py +263 -0
  112. megadetector/data_management/importers/wi_to_json.py +442 -0
  113. megadetector/data_management/importers/zamba_results_to_md_results.py +181 -0
  114. megadetector/data_management/labelme_to_coco.py +547 -0
  115. megadetector/data_management/labelme_to_yolo.py +272 -0
  116. megadetector/data_management/lila/__init__.py +0 -0
  117. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +97 -0
  118. megadetector/data_management/lila/add_locations_to_nacti.py +147 -0
  119. megadetector/data_management/lila/create_lila_blank_set.py +558 -0
  120. megadetector/data_management/lila/create_lila_test_set.py +152 -0
  121. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  122. megadetector/data_management/lila/download_lila_subset.py +178 -0
  123. megadetector/data_management/lila/generate_lila_per_image_labels.py +516 -0
  124. megadetector/data_management/lila/get_lila_annotation_counts.py +170 -0
  125. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  126. megadetector/data_management/lila/lila_common.py +300 -0
  127. megadetector/data_management/lila/test_lila_metadata_urls.py +132 -0
  128. megadetector/data_management/ocr_tools.py +874 -0
  129. megadetector/data_management/read_exif.py +681 -0
  130. megadetector/data_management/remap_coco_categories.py +84 -0
  131. megadetector/data_management/remove_exif.py +66 -0
  132. megadetector/data_management/resize_coco_dataset.py +189 -0
  133. megadetector/data_management/wi_download_csv_to_coco.py +246 -0
  134. megadetector/data_management/yolo_output_to_md_output.py +441 -0
  135. megadetector/data_management/yolo_to_coco.py +676 -0
  136. megadetector/detection/__init__.py +0 -0
  137. megadetector/detection/detector_training/__init__.py +0 -0
  138. megadetector/detection/detector_training/model_main_tf2.py +114 -0
  139. megadetector/detection/process_video.py +702 -0
  140. megadetector/detection/pytorch_detector.py +341 -0
  141. megadetector/detection/run_detector.py +779 -0
  142. megadetector/detection/run_detector_batch.py +1219 -0
  143. megadetector/detection/run_inference_with_yolov5_val.py +917 -0
  144. megadetector/detection/run_tiled_inference.py +934 -0
  145. megadetector/detection/tf_detector.py +189 -0
  146. megadetector/detection/video_utils.py +606 -0
  147. megadetector/postprocessing/__init__.py +0 -0
  148. megadetector/postprocessing/add_max_conf.py +64 -0
  149. megadetector/postprocessing/categorize_detections_by_size.py +163 -0
  150. megadetector/postprocessing/combine_api_outputs.py +249 -0
  151. megadetector/postprocessing/compare_batch_results.py +958 -0
  152. megadetector/postprocessing/convert_output_format.py +396 -0
  153. megadetector/postprocessing/load_api_results.py +195 -0
  154. megadetector/postprocessing/md_to_coco.py +310 -0
  155. megadetector/postprocessing/md_to_labelme.py +330 -0
  156. megadetector/postprocessing/merge_detections.py +401 -0
  157. megadetector/postprocessing/postprocess_batch_results.py +1902 -0
  158. megadetector/postprocessing/remap_detection_categories.py +170 -0
  159. megadetector/postprocessing/render_detection_confusion_matrix.py +660 -0
  160. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +211 -0
  161. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +83 -0
  162. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1631 -0
  163. megadetector/postprocessing/separate_detections_into_folders.py +730 -0
  164. megadetector/postprocessing/subset_json_detector_output.py +696 -0
  165. megadetector/postprocessing/top_folders_to_bottom.py +223 -0
  166. megadetector/taxonomy_mapping/__init__.py +0 -0
  167. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  168. megadetector/taxonomy_mapping/map_new_lila_datasets.py +150 -0
  169. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -0
  170. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +590 -0
  171. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  172. megadetector/taxonomy_mapping/simple_image_download.py +219 -0
  173. megadetector/taxonomy_mapping/species_lookup.py +834 -0
  174. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  175. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  176. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  177. megadetector/utils/__init__.py +0 -0
  178. megadetector/utils/azure_utils.py +178 -0
  179. megadetector/utils/ct_utils.py +612 -0
  180. megadetector/utils/directory_listing.py +246 -0
  181. megadetector/utils/md_tests.py +968 -0
  182. megadetector/utils/path_utils.py +1044 -0
  183. megadetector/utils/process_utils.py +157 -0
  184. megadetector/utils/sas_blob_utils.py +509 -0
  185. megadetector/utils/split_locations_into_train_val.py +228 -0
  186. megadetector/utils/string_utils.py +92 -0
  187. megadetector/utils/url_utils.py +323 -0
  188. megadetector/utils/write_html_image_list.py +225 -0
  189. megadetector/visualization/__init__.py +0 -0
  190. megadetector/visualization/plot_utils.py +293 -0
  191. megadetector/visualization/render_images_with_thumbnails.py +275 -0
  192. megadetector/visualization/visualization_utils.py +1536 -0
  193. megadetector/visualization/visualize_db.py +550 -0
  194. megadetector/visualization/visualize_detector_output.py +405 -0
  195. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/METADATA +1 -1
  196. megadetector-5.0.12.dist-info/RECORD +199 -0
  197. megadetector-5.0.12.dist-info/top_level.txt +1 -0
  198. megadetector-5.0.11.dist-info/RECORD +0 -5
  199. megadetector-5.0.11.dist-info/top_level.txt +0 -1
  200. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/LICENSE +0 -0
  201. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/WHEEL +0 -0
@@ -0,0 +1,359 @@
1
+ """
2
+
3
+ Prepare a LILA-ready .json file for the NOAA Puget Sound Nearshore Fish dataset.
4
+
5
+ """
6
+
7
+ #%% Constants and imports
8
+
9
+ import os
10
+ import json
11
+ import uuid
12
+ import pandas as pd
13
+
14
+ from megadetector.utils.path_utils import open_file
15
+
16
+ base_folder = r'G:\temp\noaa'
17
+ output_json_fn = os.path.join(base_folder,'noaa_estuary_fish.json')
18
+ edited_image_folders = ['edited_clip_2017','edited_clip_2018']
19
+ jpeg_image_folder = 'JPEGImages'
20
+ metadata_file = 'MasterDataForMicrosoft.xlsx'
21
+
22
+
23
+ #%% Enumerate files
24
+
25
+ edited_image_files = []
26
+
27
+ # edited_image_folder = edited_image_folders[0]
28
+ for edited_image_folder in edited_image_folders:
29
+ folder_path = os.path.join(base_folder,edited_image_folder)
30
+ image_files = os.listdir(folder_path)
31
+ assert all([fn.endswith('.jpg') for fn in image_files])
32
+ edited_image_files.extend([os.path.join(folder_path,fn) for fn in image_files])
33
+
34
+ jpeg_image_folder_files = os.listdir(os.path.join(base_folder,jpeg_image_folder))
35
+ assert all([fn.endswith('.jpg') for fn in jpeg_image_folder_files])
36
+
37
+ relative_edited_image_files_set = set()
38
+
39
+ # fn = edited_image_files[0]
40
+ for fn in edited_image_files:
41
+ bn = os.path.basename(fn)
42
+ assert bn not in relative_edited_image_files_set
43
+ relative_edited_image_files_set.add(bn)
44
+
45
+ jpeg_image_folder_files_set = set(jpeg_image_folder_files)
46
+
47
+ assert len(jpeg_image_folder_files_set) == len(relative_edited_image_files_set)
48
+
49
+ assert jpeg_image_folder_files_set == relative_edited_image_files_set
50
+
51
+
52
+ #%% Read metadata and capture location information
53
+
54
+ df = pd.read_excel(os.path.join(base_folder,metadata_file))
55
+
56
+ print('Read {} rows from metadata file'.format(len(df)))
57
+
58
+ id_string_to_site = {}
59
+
60
+ # i_row = 0; row = df.iloc[i_row]
61
+ for i_row,row in df.iterrows():
62
+
63
+ assert row['sd'].lower().startswith('sd')
64
+ assert isinstance(row['id'],int) and row['id'] > 0 and row['id'] < 10000
65
+ date_string = row['date']
66
+ date_tokens = date_string.split('_')
67
+
68
+ # Sometimes '2017' was just '17' in the date column
69
+ if len(date_tokens[2]) != 4:
70
+ assert len(date_tokens[2]) == 2
71
+ date_tokens[2] = '20' + date_tokens[2]
72
+ date_string = '_'.join(date_tokens)
73
+ else:
74
+ assert date_tokens[2].startswith('201')
75
+
76
+ id_string = row['sd'].upper() + '_' + str(row['id']) + '_' + date_string
77
+ id_string_to_site[id_string] = row['site']
78
+
79
+ print('Found {} unique locations'.format(len(pd.unique(df['site']))))
80
+
81
+
82
+ #%% Read the .json files and build output dictionaries
83
+
84
+ json_files = [fn for fn in os.listdir(base_folder) if (fn.endswith('.json') and (fn != os.path.basename(output_json_fn)))]
85
+ json_files = [os.path.join(base_folder,fn) for fn in json_files]
86
+
87
+ fn_to_image = {}
88
+ annotations = []
89
+
90
+ CATEGORY_ID_EMPTY = 0
91
+ CATEGORY_ID_FISH = 1
92
+
93
+ categories = [{'id':CATEGORY_ID_EMPTY,'name':'empty'},{'id':CATEGORY_ID_FISH,'name':'animal'}]
94
+
95
+ empty_images = set()
96
+ non_empty_images = set()
97
+
98
+ n_matched_locations = 0
99
+ images_with_unmatched_locations = []
100
+
101
+ import random
102
+ random.seed(1)
103
+
104
+ site_to_location_id = {}
105
+
106
+ # json_fn = json_files[0]
107
+ for json_fn in json_files:
108
+
109
+ # if 'partial' in json_fn:
110
+ # continue
111
+
112
+ with open(json_fn,'r') as f:
113
+
114
+ lines = f.readlines()
115
+
116
+ # line = lines[0]
117
+ for line in lines:
118
+
119
+ d = json.loads(line)
120
+ image_fn = d['image']
121
+
122
+ # if image_fn == 'SD1_238_6_26_17_16_76.73.jpg':
123
+ # asdfad
124
+
125
+ # SD29_079_5_14_2018_17_52.85.jpg
126
+
127
+ tokens = image_fn.split('_')
128
+ assert len(tokens) == 7
129
+ assert tokens[0].startswith('SD')
130
+
131
+ # Re-write two-digit years as four-digit years
132
+ if len(tokens[4]) != 4:
133
+ assert len(tokens[4]) == 2
134
+ tokens[4] = '20' + tokens[4]
135
+ else:
136
+ assert tokens[4].startswith('201')
137
+
138
+ # Sometimes the year was written with two digits instead of 4
139
+ # assert len(tokens[4]) == 4 and tokens[4].startswith('20')
140
+
141
+ while tokens[1].startswith('0'):
142
+ tokens[1] = tokens[1][1:]
143
+ assert not tokens[1].startswith('0')
144
+ assert len(tokens[1]) > 0
145
+
146
+ id_string = '_'.join(tokens[0:5])
147
+
148
+ location_id = 'unknown'
149
+
150
+ if id_string in id_string_to_site:
151
+
152
+ site_id = id_string_to_site[id_string]
153
+
154
+ # Have we seen this location already?
155
+ if site_id in site_to_location_id:
156
+ location_id = site_to_location_id[site_id]
157
+ else:
158
+ location_id = 'loc_' + str(uuid.uuid1())
159
+ site_to_location_id[site_id] = location_id
160
+ print('Adding new location ID {} for site {}'.format(
161
+ location_id,site_id))
162
+ n_matched_locations += 1
163
+
164
+ else:
165
+ raise ValueError('Could not match location ID')
166
+ images_with_unmatched_locations.append(image_fn)
167
+
168
+ assert image_fn in jpeg_image_folder_files_set
169
+ assert d['type'] == 'image/jpg'
170
+ input_ann = d['annotations']
171
+ assert len(input_ann) == 1 and len(input_ann.keys()) == 1 and 'object' in input_ann
172
+ input_ann = input_ann['object']
173
+ assert input_ann['metainfo']['image']['height'] == 1080
174
+ assert input_ann['metainfo']['image']['width'] == 1920
175
+
176
+ im = {}
177
+
178
+ img_h = input_ann['metainfo']['image']['height']
179
+ img_w = input_ann['metainfo']['image']['width']
180
+
181
+ im['width'] = img_w
182
+ im['height'] = img_h
183
+ im['file_name'] = image_fn
184
+
185
+ if image_fn in fn_to_image:
186
+ assert fn_to_image[image_fn]['file_name'] == image_fn
187
+ assert fn_to_image[image_fn]['width'] == img_w
188
+ assert fn_to_image[image_fn]['height'] == img_h
189
+ im = fn_to_image[image_fn]
190
+ else:
191
+ fn_to_image[image_fn] = im
192
+ im['location'] = location_id
193
+ im['id'] = image_fn # str(uuid.uuid1())
194
+
195
+ # Not a typo, it's actually "formateddata"
196
+ formatted_data = input_ann['formateddata']
197
+ if len(formatted_data) == 0:
198
+
199
+ # An image shouldn't be annotated as both empty and non-empty
200
+ assert image_fn not in non_empty_images
201
+ empty_images.add(image_fn)
202
+ ann = {}
203
+ ann['id'] = str(uuid.uuid1())
204
+ ann['image_id'] = im['id']
205
+ ann['category_id'] = CATEGORY_ID_EMPTY
206
+ ann['sequence_level_annotation'] = False
207
+ annotations.append(ann)
208
+
209
+ else:
210
+
211
+ # An image shouldn't be annotated as both empty and non-empty
212
+ assert image_fn not in empty_images
213
+ non_empty_images.add(image_fn)
214
+
215
+ n_boxes = len(formatted_data)
216
+
217
+ # box = formatteddata[0]
218
+ for box in formatted_data:
219
+
220
+ attributes = box['attribute']
221
+ assert len(attributes) == 2 and 'occluded' in attributes and 'truncated' in attributes
222
+ coordinates = box['coordinates']
223
+ assert box['object_type'] == 'bbox'
224
+ assert box['class']['type'] == 'Fish'
225
+ assert len(coordinates) == 4
226
+ for coord in coordinates:
227
+ assert len(coord) == 2 and 'x' in coord and 'y' in coord
228
+ assert coordinates[0]['y'] == coordinates[1]['y']
229
+ assert coordinates[2]['y'] == coordinates[3]['y']
230
+ assert coordinates[0]['x'] == coordinates[3]['x']
231
+ assert coordinates[1]['x'] == coordinates[2]['x']
232
+
233
+ assert coordinates[0]['x'] < coordinates[1]['x']
234
+ assert coordinates[0]['y'] < coordinates[3]['y']
235
+
236
+ if False:
237
+ x = coordinates[0]['x'] / img_w
238
+ y = coordinates[0]['y'] / img_h
239
+ box_w = (coordinates[1]['x'] - coordinates[0]['x']) / img_w
240
+ box_h = (coordinates[3]['y'] - coordinates[0]['y']) / img_h
241
+ else:
242
+ x = coordinates[0]['x']
243
+ y = coordinates[0]['y']
244
+ box_w = (coordinates[1]['x'] - coordinates[0]['x'])
245
+ box_h = (coordinates[3]['y'] - coordinates[0]['y'])
246
+
247
+ bbox = [x,y,box_w,box_h]
248
+
249
+ ann = {}
250
+ ann['id'] = str(uuid.uuid1())
251
+ ann['image_id'] = im['id']
252
+ ann['category_id'] = CATEGORY_ID_FISH
253
+ ann['sequence_level_annotation'] = False
254
+ ann['bbox'] = bbox
255
+
256
+ annotations.append(ann)
257
+
258
+ # open_file(os.path.join(base_folder,jpeg_image_folder,image_fn))
259
+
260
+ # ...for each box
261
+
262
+ # ...if there are boxes on this image
263
+
264
+ # ...for each line
265
+
266
+ # ...with open()
267
+
268
+ # ...for each json file
269
+
270
+ print('Found annotations for {} images (of {})'.format(len(fn_to_image),
271
+ len(jpeg_image_folder_files_set)))
272
+
273
+
274
+ print('Matched locations for {} images (failed to match {})'.format(
275
+ n_matched_locations,len(images_with_unmatched_locations)))
276
+
277
+ images = list(fn_to_image.values())
278
+
279
+
280
+ #%% Prepare the output .json
281
+
282
+ info = {}
283
+ info['version'] = '2022.07.31.00'
284
+ info['description'] = 'NOAA Estuary Fish 2022'
285
+ info['year'] = 2022
286
+ info['contributor'] = 'NOAA Fisheries'
287
+
288
+ d = {}
289
+ d['info'] = info
290
+ d['annotations'] = annotations
291
+ d['images'] = images
292
+ d['categories'] = categories
293
+
294
+ with open(output_json_fn,'w') as f:
295
+ json.dump(d,f,indent=1)
296
+
297
+
298
+ #%% Check DB integrity
299
+
300
+ from megadetector.data_management.databases import integrity_check_json_db
301
+
302
+ options = integrity_check_json_db.IntegrityCheckOptions()
303
+ options.baseDir = os.path.join(base_folder,jpeg_image_folder)
304
+ options.bCheckImageSizes = False
305
+ options.bCheckImageExistence = True
306
+ options.bFindUnusedImages = True
307
+
308
+ _, _, _ = integrity_check_json_db.integrity_check_json_db(output_json_fn, options)
309
+
310
+
311
+ #%% Print unique locations
312
+
313
+ from collections import defaultdict
314
+ location_to_count = defaultdict(int)
315
+ for im in d['images']:
316
+ location_to_count[im['location']] += 1
317
+ for loc in location_to_count.keys():
318
+ print(loc + ': ' + str(location_to_count[loc]))
319
+
320
+ print('{} unique locations'.format(len(location_to_count)))
321
+ assert 'unknown' not in location_to_count.keys()
322
+
323
+ # SD12_202_6_23_2017_1_31.85.jpg
324
+
325
+
326
+ #%% Preview some images
327
+
328
+ from megadetector.visualization import visualize_db
329
+
330
+ viz_options = visualize_db.DbVizOptions()
331
+ viz_options.num_to_visualize = 10000
332
+ viz_options.trim_to_images_with_bboxes = False
333
+ viz_options.add_search_links = False
334
+ viz_options.sort_by_filename = False
335
+ viz_options.parallelize_rendering = True
336
+ viz_options.include_filename_links = True
337
+
338
+ html_output_file, _ = visualize_db.visualize_db(db_path=output_json_fn,
339
+ output_dir=os.path.join(base_folder,'preview'),
340
+ image_base_dir=os.path.join(base_folder,jpeg_image_folder),
341
+ options=viz_options)
342
+ open_file(html_output_file)
343
+
344
+
345
+ #%% Statistics
346
+
347
+ print('Empty: {}'.format(len(empty_images)))
348
+ print('Non-empty: {}'.format(len(non_empty_images)))
349
+
350
+ images_with_no_boxes = 0
351
+ n_boxes = 0
352
+ for ann in annotations:
353
+ if 'bbox' not in ann:
354
+ images_with_no_boxes += 1
355
+ else:
356
+ assert len(bbox) == 4
357
+ n_boxes += 1
358
+
359
+ print('N boxes: {}'.format(n_boxes))
@@ -0,0 +1,131 @@
1
+ """
2
+
3
+ prepare_zsl_imerit.py
4
+
5
+ Prepare ZSL Borneo data for annotation (convert input data to iMerit-friendly format).
6
+
7
+ """
8
+
9
+ #%% Imports and constants
10
+
11
+ import json
12
+ import os
13
+
14
+ from tqdm import tqdm
15
+ from operator import itemgetter
16
+ from shutil import copyfile
17
+
18
+ from megadetector.visualization import visualize_db
19
+ from megadetector.data_management.databases import integrity_check_json_db
20
+ from megadetector.data_management.cct_json_utils import IndexedJsonDb
21
+
22
+ annotation_list_filename = r'd:\wildlife_data\zsl_borneo\all_img_ids_to_bbox.json'
23
+ image_json = r'd:\wildlife_data\zsl_borneo\201906cameratraps\0.5\zsl_camera_traps_201906.json'
24
+ image_base = r'd:\wildlife_data\zsl_borneo\201906cameratraps\0.5'
25
+ output_base = r'd:\wildlife_data\zsl_borneo'
26
+
27
+ human_classes = ['human','hunter']
28
+
29
+
30
+ #%% Load data
31
+
32
+ with open(annotation_list_filename,'r') as f:
33
+ annotation_list = json.load(f)
34
+
35
+ # with open(image_json,'r') as f:
36
+ # data = json.load(f)
37
+ indexedData = IndexedJsonDb(image_json)
38
+
39
+ print('Done loading data')
40
+
41
+
42
+ #%% Validate data
43
+
44
+ options = integrity_check_json_db.IntegrityCheckOptions()
45
+ options.baseDir = image_base
46
+ options.bCheckImageSizes = False
47
+ options.bCheckImageExistence = True
48
+ options.bFindUnusedImages = False
49
+
50
+ sortedCategories = integrity_check_json_db.integrity_check_json_db(indexedData.db,options)
51
+
52
+
53
+ #%% Label previews
54
+
55
+ viz_options = visualize_db.DbVizOptions()
56
+ viz_options.num_to_visualize = 500
57
+ viz_options.trim_to_images_with_bboxes = False
58
+ viz_options.add_search_links = True
59
+ viz_options.sort_by_filename = False
60
+ html_output_file,image_db = visualize_db.visualize_db(indexedData.db,
61
+ os.path.join(output_base,'preview'),
62
+ image_base,viz_options)
63
+ os.startfile(html_output_file)
64
+
65
+
66
+ #%% Collect images to annotate
67
+
68
+ images_to_annotate = []
69
+
70
+ annotation_list = set(annotation_list)
71
+ n_humans = 0
72
+
73
+ for im in tqdm(indexedData.db['images']):
74
+
75
+ class_names = indexedData.get_classes_for_image(im)
76
+ b_human = False
77
+ for cn in class_names:
78
+ if cn.lower() in human_classes:
79
+ b_human = True
80
+ n_humans += 1
81
+ break
82
+
83
+ if b_human or im['id'] in annotation_list:
84
+ images_to_annotate.append(im)
85
+
86
+
87
+ print('Found {} of {} images ({} humans)'.format(len(images_to_annotate),len(annotation_list),n_humans))
88
+ assert len(images_to_annotate) >= len(annotation_list)
89
+
90
+
91
+
92
+ #%% Sort by sequence and frame
93
+
94
+ images_to_annotate = sorted(images_to_annotate, key=itemgetter('seq_id', 'frame_num'))
95
+
96
+
97
+ #%% Copy to a folder by GUID
98
+
99
+ # dataset[dataset_id].seq[sequence_id].frame[frame_number].img[img_id].extension
100
+
101
+ imerit_output_base = os.path.join(output_base,'imerit_batch_9')
102
+ os.makedirs(imerit_output_base,exist_ok=True)
103
+
104
+ # im = images_to_annotate[0]
105
+ for im in tqdm(images_to_annotate):
106
+
107
+ relative_path = im['file_name']
108
+ extension = os.path.splitext(relative_path)[1]
109
+ frame_num = im['frame_num']
110
+ seq_id = im['seq_id']
111
+ id = im['id']
112
+ assert '.' not in id
113
+ input_full_path = os.path.join(image_base,relative_path)
114
+ assert os.path.isfile(input_full_path)
115
+ output_filename = 'datasetzslborneo.seq' + '{0:0>8d}'.format(seq_id) + '.frame' + \
116
+ '{0:0>4d}'.format(frame_num) + '.img' + id + extension
117
+ im['imerit_filename'] = output_filename
118
+ output_full_path = os.path.join(imerit_output_base,output_filename)
119
+ assert not os.path.isfile(output_full_path)
120
+ copyfile(input_full_path,output_full_path)
121
+
122
+ # ...for each image
123
+
124
+
125
+ #%% Write out the annotation list
126
+
127
+ imerit_batch9_json_filename = os.path.join(imerit_output_base,'imerit_batch_9.json')
128
+ with open(imerit_batch9_json_filename,'w') as f:
129
+ json.dump(images_to_annotate, f, indent=2)
130
+
131
+