megadetector 5.0.8__py3-none-any.whl → 5.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (190) hide show
  1. api/__init__.py +0 -0
  2. api/batch_processing/__init__.py +0 -0
  3. api/batch_processing/api_core/__init__.py +0 -0
  4. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. api/batch_processing/api_core/batch_service/score.py +0 -1
  6. api/batch_processing/api_core/server_job_status_table.py +0 -1
  7. api/batch_processing/api_core_support/__init__.py +0 -0
  8. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
  9. api/batch_processing/api_support/__init__.py +0 -0
  10. api/batch_processing/api_support/summarize_daily_activity.py +0 -1
  11. api/batch_processing/data_preparation/__init__.py +0 -0
  12. api/batch_processing/data_preparation/manage_local_batch.py +65 -65
  13. api/batch_processing/data_preparation/manage_video_batch.py +8 -8
  14. api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
  15. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  16. api/batch_processing/postprocessing/__init__.py +0 -0
  17. api/batch_processing/postprocessing/add_max_conf.py +12 -12
  18. api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
  19. api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
  20. api/batch_processing/postprocessing/compare_batch_results.py +113 -43
  21. api/batch_processing/postprocessing/convert_output_format.py +41 -16
  22. api/batch_processing/postprocessing/load_api_results.py +16 -17
  23. api/batch_processing/postprocessing/md_to_coco.py +31 -21
  24. api/batch_processing/postprocessing/md_to_labelme.py +52 -22
  25. api/batch_processing/postprocessing/merge_detections.py +14 -14
  26. api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
  27. api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
  28. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
  29. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
  30. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
  31. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
  32. api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
  33. api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
  34. api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
  35. api/synchronous/__init__.py +0 -0
  36. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  37. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
  38. api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
  39. api/synchronous/api_core/animal_detection_api/config.py +35 -35
  40. api/synchronous/api_core/tests/__init__.py +0 -0
  41. api/synchronous/api_core/tests/load_test.py +109 -109
  42. classification/__init__.py +0 -0
  43. classification/aggregate_classifier_probs.py +21 -24
  44. classification/analyze_failed_images.py +11 -13
  45. classification/cache_batchapi_outputs.py +51 -51
  46. classification/create_classification_dataset.py +69 -68
  47. classification/crop_detections.py +54 -53
  48. classification/csv_to_json.py +97 -100
  49. classification/detect_and_crop.py +105 -105
  50. classification/evaluate_model.py +43 -42
  51. classification/identify_mislabeled_candidates.py +47 -46
  52. classification/json_to_azcopy_list.py +10 -10
  53. classification/json_validator.py +72 -71
  54. classification/map_classification_categories.py +44 -43
  55. classification/merge_classification_detection_output.py +68 -68
  56. classification/prepare_classification_script.py +157 -154
  57. classification/prepare_classification_script_mc.py +228 -228
  58. classification/run_classifier.py +27 -26
  59. classification/save_mislabeled.py +30 -30
  60. classification/train_classifier.py +20 -20
  61. classification/train_classifier_tf.py +21 -22
  62. classification/train_utils.py +10 -10
  63. data_management/__init__.py +0 -0
  64. data_management/annotations/__init__.py +0 -0
  65. data_management/annotations/annotation_constants.py +18 -31
  66. data_management/camtrap_dp_to_coco.py +238 -0
  67. data_management/cct_json_utils.py +102 -59
  68. data_management/cct_to_md.py +176 -158
  69. data_management/cct_to_wi.py +247 -219
  70. data_management/coco_to_labelme.py +272 -263
  71. data_management/coco_to_yolo.py +79 -58
  72. data_management/databases/__init__.py +0 -0
  73. data_management/databases/add_width_and_height_to_db.py +20 -16
  74. data_management/databases/combine_coco_camera_traps_files.py +35 -31
  75. data_management/databases/integrity_check_json_db.py +62 -24
  76. data_management/databases/subset_json_db.py +24 -15
  77. data_management/generate_crops_from_cct.py +27 -45
  78. data_management/get_image_sizes.py +188 -162
  79. data_management/importers/add_nacti_sizes.py +8 -8
  80. data_management/importers/add_timestamps_to_icct.py +78 -78
  81. data_management/importers/animl_results_to_md_results.py +158 -158
  82. data_management/importers/auckland_doc_test_to_json.py +9 -9
  83. data_management/importers/auckland_doc_to_json.py +8 -8
  84. data_management/importers/awc_to_json.py +7 -7
  85. data_management/importers/bellevue_to_json.py +15 -15
  86. data_management/importers/cacophony-thermal-importer.py +13 -13
  87. data_management/importers/carrizo_shrubfree_2018.py +8 -8
  88. data_management/importers/carrizo_trail_cam_2017.py +8 -8
  89. data_management/importers/cct_field_adjustments.py +9 -9
  90. data_management/importers/channel_islands_to_cct.py +10 -10
  91. data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
  92. data_management/importers/ena24_to_json.py +7 -7
  93. data_management/importers/filenames_to_json.py +8 -8
  94. data_management/importers/helena_to_cct.py +7 -7
  95. data_management/importers/idaho-camera-traps.py +7 -7
  96. data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
  97. data_management/importers/jb_csv_to_json.py +9 -9
  98. data_management/importers/mcgill_to_json.py +8 -8
  99. data_management/importers/missouri_to_json.py +18 -18
  100. data_management/importers/nacti_fieldname_adjustments.py +10 -10
  101. data_management/importers/noaa_seals_2019.py +7 -7
  102. data_management/importers/pc_to_json.py +7 -7
  103. data_management/importers/plot_wni_giraffes.py +7 -7
  104. data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
  105. data_management/importers/prepare_zsl_imerit.py +7 -7
  106. data_management/importers/rspb_to_json.py +8 -8
  107. data_management/importers/save_the_elephants_survey_A.py +8 -8
  108. data_management/importers/save_the_elephants_survey_B.py +9 -9
  109. data_management/importers/snapshot_safari_importer.py +26 -26
  110. data_management/importers/snapshot_safari_importer_reprise.py +665 -665
  111. data_management/importers/snapshot_serengeti_lila.py +14 -14
  112. data_management/importers/sulross_get_exif.py +8 -9
  113. data_management/importers/timelapse_csv_set_to_json.py +11 -11
  114. data_management/importers/ubc_to_json.py +13 -13
  115. data_management/importers/umn_to_json.py +7 -7
  116. data_management/importers/wellington_to_json.py +8 -8
  117. data_management/importers/wi_to_json.py +9 -9
  118. data_management/importers/zamba_results_to_md_results.py +181 -181
  119. data_management/labelme_to_coco.py +65 -24
  120. data_management/labelme_to_yolo.py +8 -8
  121. data_management/lila/__init__.py +0 -0
  122. data_management/lila/add_locations_to_island_camera_traps.py +9 -9
  123. data_management/lila/add_locations_to_nacti.py +147 -147
  124. data_management/lila/create_lila_blank_set.py +13 -13
  125. data_management/lila/create_lila_test_set.py +8 -8
  126. data_management/lila/create_links_to_md_results_files.py +106 -106
  127. data_management/lila/download_lila_subset.py +44 -110
  128. data_management/lila/generate_lila_per_image_labels.py +55 -42
  129. data_management/lila/get_lila_annotation_counts.py +18 -15
  130. data_management/lila/get_lila_image_counts.py +11 -11
  131. data_management/lila/lila_common.py +96 -33
  132. data_management/lila/test_lila_metadata_urls.py +132 -116
  133. data_management/ocr_tools.py +173 -128
  134. data_management/read_exif.py +110 -97
  135. data_management/remap_coco_categories.py +83 -83
  136. data_management/remove_exif.py +58 -62
  137. data_management/resize_coco_dataset.py +30 -23
  138. data_management/wi_download_csv_to_coco.py +246 -239
  139. data_management/yolo_output_to_md_output.py +86 -73
  140. data_management/yolo_to_coco.py +300 -60
  141. detection/__init__.py +0 -0
  142. detection/detector_training/__init__.py +0 -0
  143. detection/process_video.py +85 -33
  144. detection/pytorch_detector.py +43 -25
  145. detection/run_detector.py +157 -72
  146. detection/run_detector_batch.py +179 -113
  147. detection/run_inference_with_yolov5_val.py +108 -48
  148. detection/run_tiled_inference.py +111 -40
  149. detection/tf_detector.py +51 -29
  150. detection/video_utils.py +606 -521
  151. docs/source/conf.py +43 -0
  152. md_utils/__init__.py +0 -0
  153. md_utils/azure_utils.py +9 -9
  154. md_utils/ct_utils.py +228 -68
  155. md_utils/directory_listing.py +59 -64
  156. md_utils/md_tests.py +968 -871
  157. md_utils/path_utils.py +460 -134
  158. md_utils/process_utils.py +157 -133
  159. md_utils/sas_blob_utils.py +20 -20
  160. md_utils/split_locations_into_train_val.py +45 -32
  161. md_utils/string_utils.py +33 -10
  162. md_utils/url_utils.py +176 -60
  163. md_utils/write_html_image_list.py +40 -33
  164. md_visualization/__init__.py +0 -0
  165. md_visualization/plot_utils.py +102 -109
  166. md_visualization/render_images_with_thumbnails.py +34 -34
  167. md_visualization/visualization_utils.py +597 -291
  168. md_visualization/visualize_db.py +76 -48
  169. md_visualization/visualize_detector_output.py +61 -42
  170. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/METADATA +13 -7
  171. megadetector-5.0.10.dist-info/RECORD +224 -0
  172. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/top_level.txt +1 -0
  173. taxonomy_mapping/__init__.py +0 -0
  174. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
  175. taxonomy_mapping/map_new_lila_datasets.py +154 -154
  176. taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
  177. taxonomy_mapping/preview_lila_taxonomy.py +591 -591
  178. taxonomy_mapping/retrieve_sample_image.py +12 -12
  179. taxonomy_mapping/simple_image_download.py +11 -11
  180. taxonomy_mapping/species_lookup.py +10 -10
  181. taxonomy_mapping/taxonomy_csv_checker.py +18 -18
  182. taxonomy_mapping/taxonomy_graph.py +47 -47
  183. taxonomy_mapping/validate_lila_category_mappings.py +83 -76
  184. data_management/cct_json_to_filename_json.py +0 -89
  185. data_management/cct_to_csv.py +0 -140
  186. data_management/databases/remove_corrupted_images_from_db.py +0 -191
  187. detection/detector_training/copy_checkpoints.py +0 -43
  188. megadetector-5.0.8.dist-info/RECORD +0 -205
  189. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/LICENSE +0 -0
  190. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/WHEEL +0 -0
@@ -1,11 +1,11 @@
1
- ########
2
- #
3
- # resize_coco_dataset.py
4
- #
5
- # Given a COCO-formatted dataset, resize all the images to a target size,
6
- # scaling bounding boxes accordingly.
7
- #
8
- ########
1
+ """
2
+
3
+ resize_coco_dataset.py
4
+
5
+ Given a COCO-formatted dataset, resizes all the images to a target size,
6
+ scaling bounding boxes accordingly.
7
+
8
+ """
9
9
 
10
10
  #%% Imports and constants
11
11
 
@@ -28,22 +28,29 @@ def resize_coco_dataset(input_folder,input_filename,
28
28
  target_size=(-1,-1),
29
29
  correct_size_image_handling='copy'):
30
30
  """
31
- Given a COCO-formatted dataset (images in input_folder, data in input_filename), resize
32
- all the images to a target size (in output_folder) and scale bounding boxes accordingly
33
- (in output_filename).
34
-
35
- target_size should be a tuple/list of ints, length 2. If either dimension is -1, aspect ratio
36
- will be preserved. If both dimensions are -1, this means "keep the original size". If
37
- both dimensions are -1 and correct_size_image_handling is copy, this function is basically
38
- a no-op.
39
-
40
- correct_size_image_handling can be 'copy' (in which case the original image is just copied
41
- to the output folder) or 'rewrite' (in which case the image is opened via PIL and re-written,
42
- attempting to preserve the same quality). The only reason to do this is the case where
43
- you're superstitious about biases coming from images in a training set being written
44
- by different image encoders.
31
+ Given a COCO-formatted dataset (images in input_folder, data in input_filename), resizes
32
+ all the images to a target size (in output_folder) and scales bounding boxes accordingly.
33
+
34
+ Args:
35
+ input_folder (str): the folder where images live; filenames in [input_filename] should
36
+ be relative to [input_folder]
37
+ input_filename (str): the (input) COCO-formatted .json file containing annotations
38
+ output_folder (str): the folder to which we should write resized images; can be the
39
+ same as [input_folder], in which case images are over-written
40
+ output_filename (str): the COCO-formatted .json file we should generate that refers to
41
+ the resized images
42
+ target_size (list or tuple of ints): this should be tuple/list of ints, with length 2 (w,h).
43
+ If either dimension is -1, aspect ratio will be preserved. If both dimensions are -1, this means
44
+ "keep the original size". If both dimensions are -1 and correct_size_image_handling is copy, this
45
+ function is basically a no-op.
46
+ correct_size_image_handling (str): can be 'copy' (in which case the original image is just copied
47
+ to the output folder) or 'rewrite' (in which case the image is opened via PIL and re-written,
48
+ attempting to preserve the same quality). The only reason to do use 'rewrite' 'is the case where
49
+ you're superstitious about biases coming from images in a training set being written by different
50
+ image encoders.
45
51
 
46
- Returns the COCO database with resized images.
52
+ Returns:
53
+ dict: the COCO database with resized images, identical to the content of [output_filename]
47
54
  """
48
55
 
49
56
  # Read input data
@@ -1,239 +1,246 @@
1
- ########
2
- #
3
- # wi_download_csv_to_coco.py
4
- #
5
- # Convert a .csv file from a Wildlife Insights project export to a COCO camera traps .json file.
6
- #
7
- # Currently assumes that common names are unique identifiers, which is convenient but unreliable.
8
- #
9
- ########
10
-
11
- #%% Imports and constants
12
-
13
- import os
14
- import json
15
- import pandas as pd
16
- import numpy as np
17
-
18
- from tqdm import tqdm
19
- from collections import defaultdict
20
-
21
- from md_visualization import visualization_utils as vis_utils
22
-
23
- wi_extra_annotation_columns = \
24
- ('is_blank','identified_by','wi_taxon_id','class','order','family','genus','species','uncertainty',
25
- 'number_of_objects','age','sex','animal_recognizable','individual_id','individual_animal_notes',
26
- 'behavior','highlighted','markings')
27
-
28
- wi_extra_image_columns = ('project_id','deployment_id')
29
-
30
- def make_location_id(project_id,deployment_id):
31
- return 'project_' + str(project_id) + '_deployment_' + deployment_id
32
-
33
- def isnan(v):
34
- try:
35
- return np.isnan(v)
36
- except Exception:
37
- return False
38
-
39
- default_category_remappings = {
40
- 'Homo Species':'Human',
41
- 'Human-Camera Trapper':'Human',
42
- 'No CV Result':'Unknown'
43
- }
44
-
45
-
46
- #%%
47
-
48
- def wi_download_csv_to_coco(csv_file_in,
49
- coco_file_out=None,
50
- image_folder=None,
51
- validate_images=False,
52
- gs_prefix=None,
53
- verbose=True,
54
- category_remappings=default_category_remappings):
55
- """
56
- Convert a .csv file from a Wildlife Insights project export to a COCO
57
- camera traps .json file.
58
-
59
- If [coco_file_out] is None, uses [csv_file_in].json
60
-
61
- gs_prefix is a string to remove from GS URLs to convert to path names... for example, if
62
- your gs:// URLs look like:
63
-
64
- gs://11234134_xyz/deployment/55554/dfadfasdfs.jpg
65
-
66
- ...and you specify gs_prefix='11234134_xyz/deployment/', the filenames in
67
- the .json file will look like:
68
-
69
- 55554/dfadfasdfs.jpg
70
-
71
- exclude_re discards matching images; typically use to omit thumbnail images.
72
- """
73
-
74
- #%% Create COCO dictionaries
75
-
76
- category_name_to_id = {}
77
- category_name_to_id['empty'] = 0
78
-
79
- df = pd.read_csv(csv_file_in)
80
-
81
- print('Read {} rows from {}'.format(len(df),csv_file_in))
82
-
83
- image_id_to_image = {}
84
- image_id_to_annotations = defaultdict(list)
85
-
86
- # i_row = 0; row = df.iloc[i_row]
87
- for i_row,row in df.iterrows():
88
-
89
- image_id = row['image_id']
90
-
91
- if image_id not in image_id_to_image:
92
-
93
- im = {}
94
- image_id_to_image[image_id] = im
95
-
96
- im['id'] = image_id
97
-
98
- gs_url = row['location']
99
- assert gs_url.startswith('gs://')
100
-
101
- file_name = gs_url.replace('gs://','')
102
- if gs_prefix is not None:
103
- file_name = file_name.replace(gs_prefix,'')
104
-
105
- location_id = make_location_id(row['project_id'],row['deployment_id'])
106
- im['file_name'] = file_name
107
- im['location'] = location_id
108
- im['datetime'] = row['timestamp']
109
-
110
- im['wi_image_info'] = {}
111
- for s in wi_extra_image_columns:
112
- im['wi_image_info'][s] = str(row[s])
113
-
114
- else:
115
-
116
- im = image_id_to_image[image_id]
117
- assert im['datetime'] == row['timestamp']
118
- location_id = make_location_id(row['project_id'],row['deployment_id'])
119
- assert im['location'] == location_id
120
-
121
- category_name = row['common_name']
122
- if category_remappings is not None and category_name in category_remappings:
123
- category_name = category_remappings[category_name]
124
-
125
- if category_name == 'Blank':
126
- category_name = 'empty'
127
- assert row['is_blank'] == 1
128
- else:
129
- assert row['is_blank'] == 0
130
- assert isinstance(category_name,str)
131
- if category_name in category_name_to_id:
132
- category_id = category_name_to_id[category_name]
133
- else:
134
- category_id = len(category_name_to_id)
135
- category_name_to_id[category_name] = category_id
136
-
137
- ann = {}
138
- ann['image_id'] = image_id
139
- annotations_this_image = image_id_to_annotations[image_id]
140
- annotation_number = len(annotations_this_image)
141
- ann['id'] = image_id + '_' + str(annotation_number).zfill(2)
142
- ann['category_id'] = category_id
143
- annotations_this_image.append(ann)
144
-
145
- extra_info = {}
146
- for s in wi_extra_annotation_columns:
147
- v = row[s]
148
- if not isnan(v):
149
- extra_info[s] = v
150
- ann['wi_extra_info'] = extra_info
151
-
152
- # ...for each row
153
-
154
- images = list(image_id_to_image.values())
155
- categories = []
156
- for category_name in category_name_to_id:
157
- category_id = category_name_to_id[category_name]
158
- categories.append({'id':category_id,'name':category_name})
159
- annotations = []
160
- for image_id in image_id_to_annotations:
161
- annotations_this_image = image_id_to_annotations[image_id]
162
- for ann in annotations_this_image:
163
- annotations.append(ann)
164
- info = {'version':'1.00','description':'converted from WI export'}
165
- info['source_file'] = csv_file_in
166
- coco_data = {}
167
- coco_data['info'] = info
168
- coco_data['images'] = images
169
- coco_data['annotations'] = annotations
170
- coco_data['categories'] = categories
171
-
172
-
173
- ##%% Validate images, add sizes
174
-
175
- if validate_images:
176
-
177
- print('Validating images')
178
- # TODO: trivially parallelizable
179
-
180
- assert os.path.isdir(image_folder), \
181
- 'Must specify a valid image folder if you specify validate_images=True'
182
-
183
- # im = images[0]
184
- for im in tqdm(images):
185
- file_name_relative = im['file_name']
186
- file_name_abs = os.path.join(image_folder,file_name_relative)
187
- assert os.path.isfile(file_name_abs)
188
-
189
- im['corrupt'] = False
190
- try:
191
- pil_im = vis_utils.load_image(file_name_abs)
192
- except Exception:
193
- im['corrupt'] = True
194
- if not im['corrupt']:
195
- im['width'] = pil_im.width
196
- im['height'] = pil_im.height
197
-
198
-
199
- ##%% Write output json
200
-
201
- if coco_file_out is None:
202
-
203
- coco_file_out = csv_file_in + '.json'
204
-
205
- with open(coco_file_out,'w') as f:
206
- json.dump(coco_data,f,indent=1)
207
-
208
-
209
- ##%% Validate output
210
-
211
- from data_management.databases.integrity_check_json_db import \
212
- IntegrityCheckOptions,integrity_check_json_db
213
- options = IntegrityCheckOptions()
214
- options.baseDir = image_folder
215
- options.bCheckImageExistence = True
216
- options.verbose = verbose
217
- _ = integrity_check_json_db(coco_file_out,options)
218
-
219
-
220
-
221
- #%% Interactive driver
222
-
223
- if False:
224
-
225
- #%%
226
-
227
- base_folder = r'a/b/c'
228
- csv_file_in = os.path.join(base_folder,'images.csv')
229
- coco_file_out = None
230
- gs_prefix = 'a_b_c_main/'
231
- image_folder = os.path.join(base_folder,'images')
232
- validate_images = False
233
- verbose = True
234
- category_remappings = default_category_remappings
235
-
236
-
237
- #%% Command-line driver
238
-
239
- # TODO
1
+ """
2
+
3
+ wi_download_csv_to_coco.py
4
+
5
+ Converts a .csv file from a Wildlife Insights project export to a COCO camera traps .json file.
6
+
7
+ Currently assumes that common names are unique identifiers, which is convenient but unreliable.
8
+
9
+ """
10
+
11
+ #%% Imports and constants
12
+
13
+ import os
14
+ import json
15
+ import pandas as pd
16
+
17
+ from tqdm import tqdm
18
+ from collections import defaultdict
19
+
20
+ from md_visualization import visualization_utils as vis_utils
21
+ from md_utils.ct_utils import isnan
22
+
23
+ wi_extra_annotation_columns = \
24
+ ('is_blank','identified_by','wi_taxon_id','class','order','family','genus','species','uncertainty',
25
+ 'number_of_objects','age','sex','animal_recognizable','individual_id','individual_animal_notes',
26
+ 'behavior','highlighted','markings')
27
+
28
+ wi_extra_image_columns = ('project_id','deployment_id')
29
+
30
+ def _make_location_id(project_id,deployment_id):
31
+ return 'project_' + str(project_id) + '_deployment_' + deployment_id
32
+
33
+ default_category_remappings = {
34
+ 'Homo Species':'Human',
35
+ 'Human-Camera Trapper':'Human',
36
+ 'No CV Result':'Unknown'
37
+ }
38
+
39
+
40
+ #%% Main function
41
+
42
+ def wi_download_csv_to_coco(csv_file_in,
43
+ coco_file_out=None,
44
+ image_folder=None,
45
+ validate_images=False,
46
+ gs_prefix=None,
47
+ verbose=True,
48
+ category_remappings=default_category_remappings):
49
+ """
50
+ Converts a .csv file from a Wildlife Insights project export to a COCO
51
+ Camera Traps .json file.
52
+
53
+ Args:
54
+ csv_file_in (str): the downloaded .csv file we should convert to COCO
55
+ coco_file_out (str, optional): the .json file we should write; if [coco_file_out] is None,
56
+ uses [csv_file_in].json
57
+ image_folder (str, optional): the folder where images live, only relevant if
58
+ [validate_images] is True
59
+ validate_images (bool, optional): whether to check images for corruption and load
60
+ image sizes; if this is True, [image_folder] must be a valid folder
61
+ gs_prefix (str, optional): a string to remove from GS URLs to convert to path names...
62
+ for example, if your gs:// URLs look like:
63
+
64
+ `gs://11234134_xyz/deployment/55554/dfadfasdfs.jpg`
65
+
66
+ ...and you specify gs_prefix='11234134_xyz/deployment/', the filenames in
67
+ the .json file will look like:
68
+
69
+ `55554/dfadfasdfs.jpg`
70
+ verbose (bool, optional): enable additional debug console output
71
+ category_remappings (dict, optional): str --> str dict that maps any number of
72
+ WI category names to output category names; for example defaults to mapping
73
+ "Homo Species" to "Human", but leaves 99.99% of categories unchanged.
74
+
75
+ Returns:
76
+ dict: COCO-formatted data, identical to what's written to [coco_file_out]
77
+ """
78
+
79
+ ##%% Create COCO dictionaries
80
+
81
+ category_name_to_id = {}
82
+ category_name_to_id['empty'] = 0
83
+
84
+ df = pd.read_csv(csv_file_in)
85
+
86
+ print('Read {} rows from {}'.format(len(df),csv_file_in))
87
+
88
+ image_id_to_image = {}
89
+ image_id_to_annotations = defaultdict(list)
90
+
91
+ # i_row = 0; row = df.iloc[i_row]
92
+ for i_row,row in df.iterrows():
93
+
94
+ image_id = row['image_id']
95
+
96
+ if image_id not in image_id_to_image:
97
+
98
+ im = {}
99
+ image_id_to_image[image_id] = im
100
+
101
+ im['id'] = image_id
102
+
103
+ gs_url = row['location']
104
+ assert gs_url.startswith('gs://')
105
+
106
+ file_name = gs_url.replace('gs://','')
107
+ if gs_prefix is not None:
108
+ file_name = file_name.replace(gs_prefix,'')
109
+
110
+ location_id = _make_location_id(row['project_id'],row['deployment_id'])
111
+ im['file_name'] = file_name
112
+ im['location'] = location_id
113
+ im['datetime'] = row['timestamp']
114
+
115
+ im['wi_image_info'] = {}
116
+ for s in wi_extra_image_columns:
117
+ im['wi_image_info'][s] = str(row[s])
118
+
119
+ else:
120
+
121
+ im = image_id_to_image[image_id]
122
+ assert im['datetime'] == row['timestamp']
123
+ location_id = _make_location_id(row['project_id'],row['deployment_id'])
124
+ assert im['location'] == location_id
125
+
126
+ category_name = row['common_name']
127
+ if category_remappings is not None and category_name in category_remappings:
128
+ category_name = category_remappings[category_name]
129
+
130
+ if category_name == 'Blank':
131
+ category_name = 'empty'
132
+ assert row['is_blank'] == 1
133
+ else:
134
+ assert row['is_blank'] == 0
135
+ assert isinstance(category_name,str)
136
+ if category_name in category_name_to_id:
137
+ category_id = category_name_to_id[category_name]
138
+ else:
139
+ category_id = len(category_name_to_id)
140
+ category_name_to_id[category_name] = category_id
141
+
142
+ ann = {}
143
+ ann['image_id'] = image_id
144
+ annotations_this_image = image_id_to_annotations[image_id]
145
+ annotation_number = len(annotations_this_image)
146
+ ann['id'] = image_id + '_' + str(annotation_number).zfill(2)
147
+ ann['category_id'] = category_id
148
+ annotations_this_image.append(ann)
149
+
150
+ extra_info = {}
151
+ for s in wi_extra_annotation_columns:
152
+ v = row[s]
153
+ if not isnan(v):
154
+ extra_info[s] = v
155
+ ann['wi_extra_info'] = extra_info
156
+
157
+ # ...for each row
158
+
159
+ images = list(image_id_to_image.values())
160
+ categories = []
161
+ for category_name in category_name_to_id:
162
+ category_id = category_name_to_id[category_name]
163
+ categories.append({'id':category_id,'name':category_name})
164
+ annotations = []
165
+ for image_id in image_id_to_annotations:
166
+ annotations_this_image = image_id_to_annotations[image_id]
167
+ for ann in annotations_this_image:
168
+ annotations.append(ann)
169
+ info = {'version':'1.00','description':'converted from WI export'}
170
+ info['source_file'] = csv_file_in
171
+ coco_data = {}
172
+ coco_data['info'] = info
173
+ coco_data['images'] = images
174
+ coco_data['annotations'] = annotations
175
+ coco_data['categories'] = categories
176
+
177
+
178
+ ##%% Validate images, add sizes
179
+
180
+ if validate_images:
181
+
182
+ print('Validating images')
183
+ # TODO: trivially parallelizable
184
+
185
+ assert os.path.isdir(image_folder), \
186
+ 'Must specify a valid image folder if you specify validate_images=True'
187
+
188
+ # im = images[0]
189
+ for im in tqdm(images):
190
+ file_name_relative = im['file_name']
191
+ file_name_abs = os.path.join(image_folder,file_name_relative)
192
+ assert os.path.isfile(file_name_abs)
193
+
194
+ im['corrupt'] = False
195
+ try:
196
+ pil_im = vis_utils.load_image(file_name_abs)
197
+ except Exception:
198
+ im['corrupt'] = True
199
+ if not im['corrupt']:
200
+ im['width'] = pil_im.width
201
+ im['height'] = pil_im.height
202
+
203
+
204
+ ##%% Write output json
205
+
206
+ if coco_file_out is None:
207
+ coco_file_out = csv_file_in + '.json'
208
+
209
+ with open(coco_file_out,'w') as f:
210
+ json.dump(coco_data,f,indent=1)
211
+
212
+
213
+ ##%% Validate output
214
+
215
+ from data_management.databases.integrity_check_json_db import \
216
+ IntegrityCheckOptions,integrity_check_json_db
217
+ options = IntegrityCheckOptions()
218
+ options.baseDir = image_folder
219
+ options.bCheckImageExistence = True
220
+ options.verbose = verbose
221
+ _ = integrity_check_json_db(coco_file_out,options)
222
+
223
+ return coco_data
224
+
225
+ # ...def wi_download_csv_to_coco(...)
226
+
227
+
228
+ #%% Interactive driver
229
+
230
+ if False:
231
+
232
+ #%%
233
+
234
+ base_folder = r'a/b/c'
235
+ csv_file_in = os.path.join(base_folder,'images.csv')
236
+ coco_file_out = None
237
+ gs_prefix = 'a_b_c_main/'
238
+ image_folder = os.path.join(base_folder,'images')
239
+ validate_images = False
240
+ verbose = True
241
+ category_remappings = default_category_remappings
242
+
243
+
244
+ #%% Command-line driver
245
+
246
+ # TODO