megadetector 5.0.11__py3-none-any.whl → 5.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (201) hide show
  1. megadetector/api/__init__.py +0 -0
  2. megadetector/api/batch_processing/__init__.py +0 -0
  3. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  4. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. megadetector/api/batch_processing/api_core/batch_service/score.py +439 -0
  6. megadetector/api/batch_processing/api_core/server.py +294 -0
  7. megadetector/api/batch_processing/api_core/server_api_config.py +98 -0
  8. megadetector/api/batch_processing/api_core/server_app_config.py +55 -0
  9. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +220 -0
  10. megadetector/api/batch_processing/api_core/server_job_status_table.py +152 -0
  11. megadetector/api/batch_processing/api_core/server_orchestration.py +360 -0
  12. megadetector/api/batch_processing/api_core/server_utils.py +92 -0
  13. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  14. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +46 -0
  15. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  16. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +152 -0
  17. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  18. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  19. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  20. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  21. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +126 -0
  22. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  23. megadetector/api/synchronous/__init__.py +0 -0
  24. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  25. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +152 -0
  26. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -0
  27. megadetector/api/synchronous/api_core/animal_detection_api/config.py +35 -0
  28. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  29. megadetector/api/synchronous/api_core/tests/load_test.py +110 -0
  30. megadetector/classification/__init__.py +0 -0
  31. megadetector/classification/aggregate_classifier_probs.py +108 -0
  32. megadetector/classification/analyze_failed_images.py +227 -0
  33. megadetector/classification/cache_batchapi_outputs.py +198 -0
  34. megadetector/classification/create_classification_dataset.py +627 -0
  35. megadetector/classification/crop_detections.py +516 -0
  36. megadetector/classification/csv_to_json.py +226 -0
  37. megadetector/classification/detect_and_crop.py +855 -0
  38. megadetector/classification/efficientnet/__init__.py +9 -0
  39. megadetector/classification/efficientnet/model.py +415 -0
  40. megadetector/classification/efficientnet/utils.py +610 -0
  41. megadetector/classification/evaluate_model.py +520 -0
  42. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  43. megadetector/classification/json_to_azcopy_list.py +63 -0
  44. megadetector/classification/json_validator.py +699 -0
  45. megadetector/classification/map_classification_categories.py +276 -0
  46. megadetector/classification/merge_classification_detection_output.py +506 -0
  47. megadetector/classification/prepare_classification_script.py +194 -0
  48. megadetector/classification/prepare_classification_script_mc.py +228 -0
  49. megadetector/classification/run_classifier.py +287 -0
  50. megadetector/classification/save_mislabeled.py +110 -0
  51. megadetector/classification/train_classifier.py +827 -0
  52. megadetector/classification/train_classifier_tf.py +725 -0
  53. megadetector/classification/train_utils.py +323 -0
  54. megadetector/data_management/__init__.py +0 -0
  55. megadetector/data_management/annotations/__init__.py +0 -0
  56. megadetector/data_management/annotations/annotation_constants.py +34 -0
  57. megadetector/data_management/camtrap_dp_to_coco.py +239 -0
  58. megadetector/data_management/cct_json_utils.py +395 -0
  59. megadetector/data_management/cct_to_md.py +176 -0
  60. megadetector/data_management/cct_to_wi.py +289 -0
  61. megadetector/data_management/coco_to_labelme.py +272 -0
  62. megadetector/data_management/coco_to_yolo.py +662 -0
  63. megadetector/data_management/databases/__init__.py +0 -0
  64. megadetector/data_management/databases/add_width_and_height_to_db.py +33 -0
  65. megadetector/data_management/databases/combine_coco_camera_traps_files.py +206 -0
  66. megadetector/data_management/databases/integrity_check_json_db.py +477 -0
  67. megadetector/data_management/databases/subset_json_db.py +115 -0
  68. megadetector/data_management/generate_crops_from_cct.py +149 -0
  69. megadetector/data_management/get_image_sizes.py +189 -0
  70. megadetector/data_management/importers/add_nacti_sizes.py +52 -0
  71. megadetector/data_management/importers/add_timestamps_to_icct.py +79 -0
  72. megadetector/data_management/importers/animl_results_to_md_results.py +158 -0
  73. megadetector/data_management/importers/auckland_doc_test_to_json.py +373 -0
  74. megadetector/data_management/importers/auckland_doc_to_json.py +201 -0
  75. megadetector/data_management/importers/awc_to_json.py +191 -0
  76. megadetector/data_management/importers/bellevue_to_json.py +273 -0
  77. megadetector/data_management/importers/cacophony-thermal-importer.py +796 -0
  78. megadetector/data_management/importers/carrizo_shrubfree_2018.py +269 -0
  79. megadetector/data_management/importers/carrizo_trail_cam_2017.py +289 -0
  80. megadetector/data_management/importers/cct_field_adjustments.py +58 -0
  81. megadetector/data_management/importers/channel_islands_to_cct.py +913 -0
  82. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +180 -0
  83. megadetector/data_management/importers/eMammal/eMammal_helpers.py +249 -0
  84. megadetector/data_management/importers/eMammal/make_eMammal_json.py +223 -0
  85. megadetector/data_management/importers/ena24_to_json.py +276 -0
  86. megadetector/data_management/importers/filenames_to_json.py +386 -0
  87. megadetector/data_management/importers/helena_to_cct.py +283 -0
  88. megadetector/data_management/importers/idaho-camera-traps.py +1407 -0
  89. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +294 -0
  90. megadetector/data_management/importers/jb_csv_to_json.py +150 -0
  91. megadetector/data_management/importers/mcgill_to_json.py +250 -0
  92. megadetector/data_management/importers/missouri_to_json.py +490 -0
  93. megadetector/data_management/importers/nacti_fieldname_adjustments.py +79 -0
  94. megadetector/data_management/importers/noaa_seals_2019.py +181 -0
  95. megadetector/data_management/importers/pc_to_json.py +365 -0
  96. megadetector/data_management/importers/plot_wni_giraffes.py +123 -0
  97. megadetector/data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -0
  98. megadetector/data_management/importers/prepare_zsl_imerit.py +131 -0
  99. megadetector/data_management/importers/rspb_to_json.py +356 -0
  100. megadetector/data_management/importers/save_the_elephants_survey_A.py +320 -0
  101. megadetector/data_management/importers/save_the_elephants_survey_B.py +329 -0
  102. megadetector/data_management/importers/snapshot_safari_importer.py +758 -0
  103. megadetector/data_management/importers/snapshot_safari_importer_reprise.py +665 -0
  104. megadetector/data_management/importers/snapshot_serengeti_lila.py +1067 -0
  105. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +150 -0
  106. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +153 -0
  107. megadetector/data_management/importers/sulross_get_exif.py +65 -0
  108. megadetector/data_management/importers/timelapse_csv_set_to_json.py +490 -0
  109. megadetector/data_management/importers/ubc_to_json.py +399 -0
  110. megadetector/data_management/importers/umn_to_json.py +507 -0
  111. megadetector/data_management/importers/wellington_to_json.py +263 -0
  112. megadetector/data_management/importers/wi_to_json.py +442 -0
  113. megadetector/data_management/importers/zamba_results_to_md_results.py +181 -0
  114. megadetector/data_management/labelme_to_coco.py +547 -0
  115. megadetector/data_management/labelme_to_yolo.py +272 -0
  116. megadetector/data_management/lila/__init__.py +0 -0
  117. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +97 -0
  118. megadetector/data_management/lila/add_locations_to_nacti.py +147 -0
  119. megadetector/data_management/lila/create_lila_blank_set.py +558 -0
  120. megadetector/data_management/lila/create_lila_test_set.py +152 -0
  121. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  122. megadetector/data_management/lila/download_lila_subset.py +178 -0
  123. megadetector/data_management/lila/generate_lila_per_image_labels.py +516 -0
  124. megadetector/data_management/lila/get_lila_annotation_counts.py +170 -0
  125. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  126. megadetector/data_management/lila/lila_common.py +300 -0
  127. megadetector/data_management/lila/test_lila_metadata_urls.py +132 -0
  128. megadetector/data_management/ocr_tools.py +874 -0
  129. megadetector/data_management/read_exif.py +681 -0
  130. megadetector/data_management/remap_coco_categories.py +84 -0
  131. megadetector/data_management/remove_exif.py +66 -0
  132. megadetector/data_management/resize_coco_dataset.py +189 -0
  133. megadetector/data_management/wi_download_csv_to_coco.py +246 -0
  134. megadetector/data_management/yolo_output_to_md_output.py +441 -0
  135. megadetector/data_management/yolo_to_coco.py +676 -0
  136. megadetector/detection/__init__.py +0 -0
  137. megadetector/detection/detector_training/__init__.py +0 -0
  138. megadetector/detection/detector_training/model_main_tf2.py +114 -0
  139. megadetector/detection/process_video.py +702 -0
  140. megadetector/detection/pytorch_detector.py +341 -0
  141. megadetector/detection/run_detector.py +779 -0
  142. megadetector/detection/run_detector_batch.py +1219 -0
  143. megadetector/detection/run_inference_with_yolov5_val.py +917 -0
  144. megadetector/detection/run_tiled_inference.py +934 -0
  145. megadetector/detection/tf_detector.py +189 -0
  146. megadetector/detection/video_utils.py +606 -0
  147. megadetector/postprocessing/__init__.py +0 -0
  148. megadetector/postprocessing/add_max_conf.py +64 -0
  149. megadetector/postprocessing/categorize_detections_by_size.py +163 -0
  150. megadetector/postprocessing/combine_api_outputs.py +249 -0
  151. megadetector/postprocessing/compare_batch_results.py +958 -0
  152. megadetector/postprocessing/convert_output_format.py +396 -0
  153. megadetector/postprocessing/load_api_results.py +195 -0
  154. megadetector/postprocessing/md_to_coco.py +310 -0
  155. megadetector/postprocessing/md_to_labelme.py +330 -0
  156. megadetector/postprocessing/merge_detections.py +401 -0
  157. megadetector/postprocessing/postprocess_batch_results.py +1902 -0
  158. megadetector/postprocessing/remap_detection_categories.py +170 -0
  159. megadetector/postprocessing/render_detection_confusion_matrix.py +660 -0
  160. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +211 -0
  161. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +83 -0
  162. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1631 -0
  163. megadetector/postprocessing/separate_detections_into_folders.py +730 -0
  164. megadetector/postprocessing/subset_json_detector_output.py +696 -0
  165. megadetector/postprocessing/top_folders_to_bottom.py +223 -0
  166. megadetector/taxonomy_mapping/__init__.py +0 -0
  167. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  168. megadetector/taxonomy_mapping/map_new_lila_datasets.py +150 -0
  169. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -0
  170. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +590 -0
  171. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  172. megadetector/taxonomy_mapping/simple_image_download.py +219 -0
  173. megadetector/taxonomy_mapping/species_lookup.py +834 -0
  174. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  175. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  176. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  177. megadetector/utils/__init__.py +0 -0
  178. megadetector/utils/azure_utils.py +178 -0
  179. megadetector/utils/ct_utils.py +612 -0
  180. megadetector/utils/directory_listing.py +246 -0
  181. megadetector/utils/md_tests.py +968 -0
  182. megadetector/utils/path_utils.py +1044 -0
  183. megadetector/utils/process_utils.py +157 -0
  184. megadetector/utils/sas_blob_utils.py +509 -0
  185. megadetector/utils/split_locations_into_train_val.py +228 -0
  186. megadetector/utils/string_utils.py +92 -0
  187. megadetector/utils/url_utils.py +323 -0
  188. megadetector/utils/write_html_image_list.py +225 -0
  189. megadetector/visualization/__init__.py +0 -0
  190. megadetector/visualization/plot_utils.py +293 -0
  191. megadetector/visualization/render_images_with_thumbnails.py +275 -0
  192. megadetector/visualization/visualization_utils.py +1536 -0
  193. megadetector/visualization/visualize_db.py +550 -0
  194. megadetector/visualization/visualize_detector_output.py +405 -0
  195. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/METADATA +1 -1
  196. megadetector-5.0.12.dist-info/RECORD +199 -0
  197. megadetector-5.0.12.dist-info/top_level.txt +1 -0
  198. megadetector-5.0.11.dist-info/RECORD +0 -5
  199. megadetector-5.0.11.dist-info/top_level.txt +0 -1
  200. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/LICENSE +0 -0
  201. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/WHEEL +0 -0
@@ -0,0 +1,396 @@
1
+ """
2
+
3
+ convert_output_format.py
4
+
5
+ Converts between file formats output by our batch processing API. Currently
6
+ supports json <--> csv conversion, but this should be the landing place for any
7
+ conversion - including between hypothetical alternative .json versions - that we support
8
+ in the future.
9
+
10
+ The .csv format is largely obsolete, don't use it unless you're super-duper sure you need it.
11
+
12
+ """
13
+
14
+ #%% Constants and imports
15
+
16
+ import argparse
17
+ import json
18
+ import csv
19
+ import sys
20
+ import os
21
+
22
+ from tqdm import tqdm
23
+
24
+ from megadetector.postprocessing.load_api_results import load_api_results_csv
25
+ from megadetector.data_management.annotations import annotation_constants
26
+ from megadetector.utils import ct_utils
27
+
28
+ CONF_DIGITS = 3
29
+
30
+
31
+ #%% Conversion functions
32
+
33
+ def convert_json_to_csv(input_path,output_path=None,min_confidence=None,
34
+ omit_bounding_boxes=False,output_encoding=None,
35
+ overwrite=True):
36
+ """
37
+ Converts a MD results .json file to a totally non-standard .csv format.
38
+
39
+ If [output_path] is None, will convert x.json to x.csv.
40
+
41
+ TODO: this function should obviously be using Pandas or some other sensible structured
42
+ representation of tabular data. Even a list of dicts. This implementation is quite
43
+ brittle and depends on adding fields to every row in exactly the right order.
44
+
45
+ Args:
46
+ input_path (str): the input .json file to convert
47
+ output_path (str, optional): the output .csv file to generate; if this is None, uses
48
+ [input_path].csv
49
+ min_confidence (float, optional): the minimum-confidence detection we should include
50
+ in the "detections" column; has no impact on the other columns
51
+ omit_bounding_boxes (bool): whether to leave out the json-formatted bounding boxes
52
+ that make up the "detections" column, which are not generally useful for someone who
53
+ wants to consume this data as a .csv file
54
+ output_encoding (str, optional): encoding to use for the .csv file
55
+ overwrite (bool): whether to overwrite an existing .csv file; if this is False and the
56
+ output file exists, no-ops and returns
57
+
58
+ """
59
+
60
+ if output_path is None:
61
+ output_path = os.path.splitext(input_path)[0]+'.csv'
62
+
63
+ if os.path.isfile(output_path) and (not overwrite):
64
+ print('File {} exists, skipping json --> csv conversion'.format(output_path))
65
+ return
66
+
67
+ print('Loading json results from {}...'.format(input_path))
68
+ json_output = json.load(open(input_path))
69
+
70
+ rows = []
71
+
72
+ fixed_columns = ['image_path', 'max_confidence', 'detections']
73
+
74
+ # We add an output column for each class other than 'empty',
75
+ # containing the maximum probability of that class for each image
76
+ # n_non_empty_detection_categories = len(annotation_constants.annotation_bbox_categories) - 1
77
+ n_non_empty_detection_categories = annotation_constants.NUM_DETECTOR_CATEGORIES
78
+ detection_category_column_names = []
79
+ assert annotation_constants.detector_bbox_categories[0] == 'empty'
80
+ for cat_id in range(1,n_non_empty_detection_categories+1):
81
+ cat_name = annotation_constants.detector_bbox_categories[cat_id]
82
+ detection_category_column_names.append('max_conf_' + cat_name)
83
+
84
+ n_classification_categories = 0
85
+
86
+ if 'classification_categories' in json_output.keys():
87
+ classification_category_id_to_name = json_output['classification_categories']
88
+ classification_category_ids = list(classification_category_id_to_name.keys())
89
+ classification_category_id_to_column_number = {}
90
+ classification_category_column_names = []
91
+ for i_category,category_id in enumerate(classification_category_ids):
92
+ category_name = classification_category_id_to_name[category_id].\
93
+ replace(' ','_').replace(',','')
94
+ classification_category_column_names.append('max_classification_conf_' + category_name)
95
+ classification_category_id_to_column_number[category_id] = i_category
96
+
97
+ n_classification_categories = len(classification_category_ids)
98
+
99
+ # There are several .json fields for which we add .csv columns; other random bespoke fields
100
+ # will be ignored.
101
+ optional_fields = ['width','height','datetime','exif_metadata']
102
+ optional_fields_present = set()
103
+
104
+ # Iterate once over the data to check for optional fields
105
+ print('Looking for optional fields...')
106
+
107
+ for im in tqdm(json_output['images']):
108
+ # Which optional fields are present for this image?
109
+ for k in im.keys():
110
+ if k in optional_fields:
111
+ optional_fields_present.add(k)
112
+
113
+ optional_fields_present = sorted(list(optional_fields_present))
114
+ if len(optional_fields_present) > 0:
115
+ print('Found {} optional fields'.format(len(optional_fields_present)))
116
+
117
+ expected_row_length = len(fixed_columns) + len(detection_category_column_names) + \
118
+ n_classification_categories + len(optional_fields_present)
119
+
120
+ print('Formatting results...')
121
+
122
+ # i_image = 0; im = json_output['images'][i_image]
123
+ for im in tqdm(json_output['images']):
124
+
125
+ image_id = im['file']
126
+
127
+ if 'failure' in im and im['failure'] is not None:
128
+ row = [image_id, 'failure', im['failure']]
129
+ rows.append(row)
130
+ # print('Skipping failed image {} ({})'.format(im['file'],im['failure']))
131
+ continue
132
+
133
+ max_conf = ct_utils.get_max_conf(im)
134
+ detections = []
135
+ max_detection_category_probabilities = [None] * n_non_empty_detection_categories
136
+ max_classification_category_probabilities = [0] * n_classification_categories
137
+
138
+ # d = im['detections'][0]
139
+ for d in im['detections']:
140
+
141
+ # Skip sub-threshold detections
142
+ if (min_confidence is not None) and (d['conf'] < min_confidence):
143
+ continue
144
+
145
+ input_bbox = d['bbox']
146
+
147
+ # Our .json format is xmin/ymin/w/h
148
+ #
149
+ # Our .csv format was ymin/xmin/ymax/xmax
150
+ xmin = input_bbox[0]
151
+ ymin = input_bbox[1]
152
+ xmax = input_bbox[0] + input_bbox[2]
153
+ ymax = input_bbox[1] + input_bbox[3]
154
+ output_detection = [ymin, xmin, ymax, xmax]
155
+
156
+ output_detection.append(d['conf'])
157
+
158
+ # Category 0 is empty, for which we don't have a column, so the max
159
+ # confidence for category N goes in column N-1
160
+ detection_category_id = int(d['category'])
161
+ assert detection_category_id > 0 and detection_category_id <= \
162
+ n_non_empty_detection_categories
163
+ detection_category_column = detection_category_id - 1
164
+ detection_category_max = max_detection_category_probabilities[detection_category_column]
165
+ if detection_category_max is None or d['conf'] > detection_category_max:
166
+ max_detection_category_probabilities[detection_category_column] = d['conf']
167
+
168
+ output_detection.append(detection_category_id)
169
+ detections.append(output_detection)
170
+
171
+ if 'classifications' in d:
172
+ assert n_classification_categories > 0,\
173
+ 'Oops, I have classification results, but no classification metadata'
174
+ for c in d['classifications']:
175
+ category_id = c[0]
176
+ p = c[1]
177
+ category_index = classification_category_id_to_column_number[category_id]
178
+ if (max_classification_category_probabilities[category_index] < p):
179
+ max_classification_category_probabilities[category_index] = p
180
+
181
+ # ...for each classification
182
+
183
+ # ...if we have classification results for this detection
184
+
185
+ # ...for each detection
186
+
187
+ detection_string = ''
188
+ if not omit_bounding_boxes:
189
+ detection_string = json.dumps(detections)
190
+
191
+ row = [image_id, max_conf, detection_string]
192
+ row.extend(max_detection_category_probabilities)
193
+ row.extend(max_classification_category_probabilities)
194
+
195
+ for field_name in optional_fields_present:
196
+ if field_name not in im:
197
+ row.append('')
198
+ else:
199
+ row.append(str(im[field_name]))
200
+
201
+ assert len(row) == expected_row_length
202
+ rows.append(row)
203
+
204
+ # ...for each image
205
+
206
+ print('Writing to csv...')
207
+
208
+ with open(output_path, 'w', newline='', encoding=output_encoding) as f:
209
+ writer = csv.writer(f, delimiter=',')
210
+ header = fixed_columns
211
+ header.extend(detection_category_column_names)
212
+ if n_classification_categories > 0:
213
+ header.extend(classification_category_column_names)
214
+ for field_name in optional_fields_present:
215
+ header.append(field_name)
216
+ writer.writerow(header)
217
+ writer.writerows(rows)
218
+
219
+ # ...def convert_json_to_csv(...)
220
+
221
+
222
+ def convert_csv_to_json(input_path,output_path=None,overwrite=True):
223
+ """
224
+ Convert .csv to .json. If output_path is None, will convert x.csv to x.json.
225
+
226
+ Args:
227
+ input_path (str): .csv filename to convert to .json
228
+ output_path (str, optional): the output .json file to generate; if this is None, uses
229
+ [input_path].json
230
+ overwrite (bool): whether to overwrite an existing .json file; if this is False and the
231
+ output file exists, no-ops and returns
232
+
233
+ """
234
+
235
+ if output_path is None:
236
+ output_path = os.path.splitext(input_path)[0]+'.json'
237
+
238
+ if os.path.isfile(output_path) and (not overwrite):
239
+ print('File {} exists, skipping csv --> json conversion'.format(output_path))
240
+ return
241
+
242
+ # Format spec:
243
+ #
244
+ # https://github.com/agentmorris/MegaDetector/tree/main/megadetector/api/batch_processing
245
+
246
+ print('Loading csv results...')
247
+ df = load_api_results_csv(input_path)
248
+
249
+ info = {
250
+ "format_version":"1.2",
251
+ "detector": "unknown",
252
+ "detection_completion_time" : "unknown",
253
+ "classifier": "unknown",
254
+ "classification_completion_time": "unknown"
255
+ }
256
+
257
+ classification_categories = {}
258
+ detection_categories = annotation_constants.detector_bbox_categories
259
+
260
+ images = []
261
+
262
+ # iFile = 0; row = df.iloc[iFile]
263
+ for iFile,row in df.iterrows():
264
+
265
+ image = {}
266
+ image['file'] = row['image_path']
267
+ image['max_detection_conf'] = round(row['max_confidence'], CONF_DIGITS)
268
+ src_detections = row['detections']
269
+ out_detections = []
270
+
271
+ for iDetection,detection in enumerate(src_detections):
272
+
273
+ # Our .csv format was ymin/xmin/ymax/xmax
274
+ #
275
+ # Our .json format is xmin/ymin/w/h
276
+ ymin = detection[0]
277
+ xmin = detection[1]
278
+ ymax = detection[2]
279
+ xmax = detection[3]
280
+ bbox = [xmin, ymin, xmax-xmin, ymax-ymin]
281
+ conf = detection[4]
282
+ iClass = detection[5]
283
+ out_detection = {}
284
+ out_detection['category'] = str(iClass)
285
+ out_detection['conf'] = conf
286
+ out_detection['bbox'] = bbox
287
+ out_detections.append(out_detection)
288
+
289
+ # ...for each detection
290
+
291
+ image['detections'] = out_detections
292
+ images.append(image)
293
+
294
+ # ...for each image
295
+ json_out = {}
296
+ json_out['info'] = info
297
+ json_out['detection_categories'] = detection_categories
298
+ json_out['classification_categories'] = classification_categories
299
+ json_out['images'] = images
300
+
301
+ json.dump(json_out,open(output_path,'w'),indent=1)
302
+
303
+ # ...def convert_csv_to_json(...)
304
+
305
+
306
+ #%% Interactive driver
307
+
308
+ if False:
309
+
310
+ #%%
311
+
312
+ input_path = r'c:\temp\test.json'
313
+ min_confidence = None
314
+ output_path = input_path + '.csv'
315
+ convert_json_to_csv(input_path,output_path,min_confidence=min_confidence,
316
+ omit_bounding_boxes=False)
317
+
318
+ #%%
319
+
320
+ base_path = r'c:\temp\json'
321
+ input_paths = os.listdir(base_path)
322
+ input_paths = [os.path.join(base_path,s) for s in input_paths]
323
+
324
+ min_confidence = None
325
+ for input_path in input_paths:
326
+ output_path = input_path + '.csv'
327
+ convert_json_to_csv(input_path,output_path,min_confidence=min_confidence,
328
+ omit_bounding_boxes=True)
329
+
330
+ #%% Concatenate .csv files from a folder
331
+
332
+ import glob
333
+ csv_files = glob.glob(os.path.join(base_path,'*.json.csv' ))
334
+ master_csv = os.path.join(base_path,'all.csv')
335
+
336
+ print('Concatenating {} files to {}'.format(len(csv_files),master_csv))
337
+
338
+ header = None
339
+ with open(master_csv, 'w') as fout:
340
+
341
+ for filename in tqdm(csv_files):
342
+
343
+ with open(filename) as fin:
344
+
345
+ lines = fin.readlines()
346
+
347
+ if header is not None:
348
+ assert lines[0] == header
349
+ else:
350
+ header = lines[0]
351
+ fout.write(header)
352
+
353
+ for line in lines[1:]:
354
+ if len(line.strip()) == 0:
355
+ continue
356
+ fout.write(line)
357
+
358
+ # ...for each .csv file
359
+
360
+ # with open(master_csv)
361
+
362
+
363
+ #%% Command-line driver
364
+
365
+ def main():
366
+
367
+ parser = argparse.ArgumentParser()
368
+ parser.add_argument('input_path',type=str,
369
+ help='Input filename ending in .json or .csv')
370
+ parser.add_argument('--output_path',type=str,default=None,
371
+ help='Output filename ending in .json or .csv (defaults to ' + \
372
+ 'input file, with .json/.csv replaced by .csv/.json)')
373
+
374
+ if len(sys.argv[1:]) == 0:
375
+ parser.print_help()
376
+ parser.exit()
377
+
378
+ args = parser.parse_args()
379
+
380
+ if args.output_path is None:
381
+ if args.input_path.endswith('.csv'):
382
+ args.output_path = args.input_path[:-4] + '.json'
383
+ elif args.input_path.endswith('.json'):
384
+ args.output_path = args.input_path[:-5] + '.csv'
385
+ else:
386
+ raise ValueError('Illegal input file extension')
387
+
388
+ if args.input_path.endswith('.csv') and args.output_path.endswith('.json'):
389
+ convert_csv_to_json(args.input_path,args.output_path)
390
+ elif args.input_path.endswith('.json') and args.output_path.endswith('.csv'):
391
+ convert_json_to_csv(args.input_path,args.output_path)
392
+ else:
393
+ raise ValueError('Illegal format combination')
394
+
395
+ if __name__ == '__main__':
396
+ main()
@@ -0,0 +1,195 @@
1
+ """
2
+
3
+ load_api_results.py
4
+
5
+ DEPRECATED
6
+
7
+ As of 2023.12, this module is used in postprocessing and RDE. Not recommended
8
+ for new code.
9
+
10
+ Loads the output of the batch processing API (json) into a Pandas dataframe.
11
+
12
+ Includes functions to read/write the (very very old) .csv results format.
13
+
14
+ """
15
+
16
+ #%% Imports
17
+
18
+ import json
19
+ import os
20
+
21
+ from typing import Dict, Mapping, Optional, Tuple
22
+
23
+ import pandas as pd
24
+
25
+ from megadetector.utils import ct_utils
26
+
27
+
28
+ #%% Functions for loading .json results into a Pandas DataFrame, and writing back to .json
29
+
30
+ def load_api_results(api_output_path: str, normalize_paths: bool = True,
31
+ filename_replacements: Optional[Mapping[str, str]] = None,
32
+ force_forward_slashes: bool = True
33
+ ) -> Tuple[pd.DataFrame, Dict]:
34
+ r"""
35
+ Loads json-formatted MegaDetector results to a Pandas DataFrame.
36
+
37
+ Args:
38
+ api_output_path: path to the output json file
39
+ normalize_paths: whether to apply os.path.normpath to the 'file' field
40
+ in each image entry in the output file
41
+ filename_replacements: replace some path tokens to match local paths to
42
+ the original blob structure
43
+ force_forward_slashes: whether to convert backslashes to forward slashes
44
+ in filenames
45
+
46
+ Returns:
47
+ detection_results: pd.DataFrame, contains at least the columns ['file', 'detections','failure']
48
+ other_fields: a dict containing fields in the results other than 'images'
49
+ """
50
+
51
+ print('Loading results from {}'.format(api_output_path))
52
+
53
+ with open(api_output_path) as f:
54
+ detection_results = json.load(f)
55
+
56
+ # Validate that this is really a detector output file
57
+ for s in ['info', 'detection_categories', 'images']:
58
+ assert s in detection_results, 'Missing field {} in detection results'.format(s)
59
+
60
+ # Fields in the output json other than 'images'
61
+ other_fields = {}
62
+ for k, v in detection_results.items():
63
+ if k != 'images':
64
+ other_fields[k] = v
65
+
66
+ if normalize_paths:
67
+ for image in detection_results['images']:
68
+ image['file'] = os.path.normpath(image['file'])
69
+
70
+ if force_forward_slashes:
71
+ for image in detection_results['images']:
72
+ image['file'] = image['file'].replace('\\','/')
73
+
74
+ # Replace some path tokens to match local paths to original blob structure
75
+ if filename_replacements is not None:
76
+ for string_to_replace in filename_replacements.keys():
77
+ replacement_string = filename_replacements[string_to_replace]
78
+ for im in detection_results['images']:
79
+ im['file'] = im['file'].replace(string_to_replace,replacement_string)
80
+
81
+ print('Converting results to dataframe')
82
+
83
+ # If this is a newer file that doesn't include maximum detection confidence values,
84
+ # add them, because our unofficial internal dataframe format includes this.
85
+ for im in detection_results['images']:
86
+ if 'max_detection_conf' not in im:
87
+ im['max_detection_conf'] = ct_utils.get_max_conf(im)
88
+
89
+ # Pack the json output into a Pandas DataFrame
90
+ detection_results = pd.DataFrame(detection_results['images'])
91
+
92
+ print('Finished loading MegaDetector results for {} images from {}'.format(
93
+ len(detection_results),api_output_path))
94
+
95
+ return detection_results, other_fields
96
+
97
+
98
+ def write_api_results(detection_results_table, other_fields, out_path):
99
+ """
100
+ Writes a Pandas DataFrame to the MegaDetector .json format.
101
+ """
102
+
103
+ print('Writing detection results to {}'.format(out_path))
104
+
105
+ fields = other_fields
106
+
107
+ images = detection_results_table.to_json(orient='records',
108
+ double_precision=3)
109
+ images = json.loads(images)
110
+ fields['images'] = images
111
+
112
+ # Convert the 'version' field back to a string as per format convention
113
+ try:
114
+ version = other_fields['info']['format_version']
115
+ if not isinstance(version,str):
116
+ other_fields['info']['format_version'] = str(version)
117
+ except Exception:
118
+ print('Warning: error determining format version')
119
+ pass
120
+
121
+ # Remove 'max_detection_conf' as per newer file convention (format >= v1.3)
122
+ try:
123
+ version = other_fields['info']['format_version']
124
+ version = float(version)
125
+ if version >= 1.3:
126
+ for im in images:
127
+ if 'max_detection_conf' in im:
128
+ del im['max_detection_conf']
129
+ except Exception:
130
+ print('Warning: error removing max_detection_conf from output')
131
+ pass
132
+
133
+ with open(out_path, 'w') as f:
134
+ json.dump(fields, f, indent=1)
135
+
136
+ print('Finished writing detection results to {}'.format(out_path))
137
+
138
+
139
+ def load_api_results_csv(filename, normalize_paths=True, filename_replacements={}, nrows=None):
140
+ """
141
+ [DEPRECATED]
142
+
143
+ Loads .csv-formatted MegaDetector results to a pandas table
144
+ """
145
+
146
+ print('Loading MegaDetector results from {}'.format(filename))
147
+
148
+ detection_results = pd.read_csv(filename,nrows=nrows)
149
+
150
+ print('De-serializing MegaDetector results from {}'.format(filename))
151
+
152
+ # Confirm that this is really a detector output file
153
+ for s in ['image_path','max_confidence','detections']:
154
+ assert s in detection_results.columns
155
+
156
+ # Normalize paths to simplify comparisons later
157
+ if normalize_paths:
158
+ detection_results['image_path'] = detection_results['image_path'].apply(os.path.normpath)
159
+
160
+ # De-serialize detections
161
+ detection_results['detections'] = detection_results['detections'].apply(json.loads)
162
+
163
+ # Optionally replace some path tokens to match local paths to the original blob structure
164
+ # string_to_replace = list(options.detector_output_filename_replacements.keys())[0]
165
+ for string_to_replace in filename_replacements:
166
+
167
+ replacement_string = filename_replacements[string_to_replace]
168
+
169
+ # iRow = 0
170
+ for iRow in range(0,len(detection_results)):
171
+ row = detection_results.iloc[iRow]
172
+ fn = row['image_path']
173
+ fn = fn.replace(string_to_replace,replacement_string)
174
+ detection_results.at[iRow,'image_path'] = fn
175
+
176
+ print('Finished loading and de-serializing MD results for {} images from {}'.format(
177
+ len(detection_results),filename))
178
+
179
+ return detection_results
180
+
181
+
182
+ def write_api_results_csv(detection_results, filename):
183
+ """
184
+ [DEPRECATED]
185
+
186
+ Writes a Pandas table to csv in a way that's compatible with the .csv output
187
+ format. Currently just a wrapper around to_csv that forces output writing
188
+ to go through a common code path.
189
+ """
190
+
191
+ print('Writing detection results to {}'.format(filename))
192
+
193
+ detection_results.to_csv(filename, index=False)
194
+
195
+ print('Finished writing detection results to {}'.format(filename))