megadetector 5.0.11__py3-none-any.whl → 5.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (201) hide show
  1. megadetector/api/__init__.py +0 -0
  2. megadetector/api/batch_processing/__init__.py +0 -0
  3. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  4. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. megadetector/api/batch_processing/api_core/batch_service/score.py +439 -0
  6. megadetector/api/batch_processing/api_core/server.py +294 -0
  7. megadetector/api/batch_processing/api_core/server_api_config.py +98 -0
  8. megadetector/api/batch_processing/api_core/server_app_config.py +55 -0
  9. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +220 -0
  10. megadetector/api/batch_processing/api_core/server_job_status_table.py +152 -0
  11. megadetector/api/batch_processing/api_core/server_orchestration.py +360 -0
  12. megadetector/api/batch_processing/api_core/server_utils.py +92 -0
  13. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  14. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +46 -0
  15. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  16. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +152 -0
  17. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  18. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  19. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  20. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  21. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +126 -0
  22. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  23. megadetector/api/synchronous/__init__.py +0 -0
  24. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  25. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +152 -0
  26. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -0
  27. megadetector/api/synchronous/api_core/animal_detection_api/config.py +35 -0
  28. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  29. megadetector/api/synchronous/api_core/tests/load_test.py +110 -0
  30. megadetector/classification/__init__.py +0 -0
  31. megadetector/classification/aggregate_classifier_probs.py +108 -0
  32. megadetector/classification/analyze_failed_images.py +227 -0
  33. megadetector/classification/cache_batchapi_outputs.py +198 -0
  34. megadetector/classification/create_classification_dataset.py +627 -0
  35. megadetector/classification/crop_detections.py +516 -0
  36. megadetector/classification/csv_to_json.py +226 -0
  37. megadetector/classification/detect_and_crop.py +855 -0
  38. megadetector/classification/efficientnet/__init__.py +9 -0
  39. megadetector/classification/efficientnet/model.py +415 -0
  40. megadetector/classification/efficientnet/utils.py +610 -0
  41. megadetector/classification/evaluate_model.py +520 -0
  42. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  43. megadetector/classification/json_to_azcopy_list.py +63 -0
  44. megadetector/classification/json_validator.py +699 -0
  45. megadetector/classification/map_classification_categories.py +276 -0
  46. megadetector/classification/merge_classification_detection_output.py +506 -0
  47. megadetector/classification/prepare_classification_script.py +194 -0
  48. megadetector/classification/prepare_classification_script_mc.py +228 -0
  49. megadetector/classification/run_classifier.py +287 -0
  50. megadetector/classification/save_mislabeled.py +110 -0
  51. megadetector/classification/train_classifier.py +827 -0
  52. megadetector/classification/train_classifier_tf.py +725 -0
  53. megadetector/classification/train_utils.py +323 -0
  54. megadetector/data_management/__init__.py +0 -0
  55. megadetector/data_management/annotations/__init__.py +0 -0
  56. megadetector/data_management/annotations/annotation_constants.py +34 -0
  57. megadetector/data_management/camtrap_dp_to_coco.py +239 -0
  58. megadetector/data_management/cct_json_utils.py +395 -0
  59. megadetector/data_management/cct_to_md.py +176 -0
  60. megadetector/data_management/cct_to_wi.py +289 -0
  61. megadetector/data_management/coco_to_labelme.py +272 -0
  62. megadetector/data_management/coco_to_yolo.py +662 -0
  63. megadetector/data_management/databases/__init__.py +0 -0
  64. megadetector/data_management/databases/add_width_and_height_to_db.py +33 -0
  65. megadetector/data_management/databases/combine_coco_camera_traps_files.py +206 -0
  66. megadetector/data_management/databases/integrity_check_json_db.py +477 -0
  67. megadetector/data_management/databases/subset_json_db.py +115 -0
  68. megadetector/data_management/generate_crops_from_cct.py +149 -0
  69. megadetector/data_management/get_image_sizes.py +189 -0
  70. megadetector/data_management/importers/add_nacti_sizes.py +52 -0
  71. megadetector/data_management/importers/add_timestamps_to_icct.py +79 -0
  72. megadetector/data_management/importers/animl_results_to_md_results.py +158 -0
  73. megadetector/data_management/importers/auckland_doc_test_to_json.py +373 -0
  74. megadetector/data_management/importers/auckland_doc_to_json.py +201 -0
  75. megadetector/data_management/importers/awc_to_json.py +191 -0
  76. megadetector/data_management/importers/bellevue_to_json.py +273 -0
  77. megadetector/data_management/importers/cacophony-thermal-importer.py +796 -0
  78. megadetector/data_management/importers/carrizo_shrubfree_2018.py +269 -0
  79. megadetector/data_management/importers/carrizo_trail_cam_2017.py +289 -0
  80. megadetector/data_management/importers/cct_field_adjustments.py +58 -0
  81. megadetector/data_management/importers/channel_islands_to_cct.py +913 -0
  82. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +180 -0
  83. megadetector/data_management/importers/eMammal/eMammal_helpers.py +249 -0
  84. megadetector/data_management/importers/eMammal/make_eMammal_json.py +223 -0
  85. megadetector/data_management/importers/ena24_to_json.py +276 -0
  86. megadetector/data_management/importers/filenames_to_json.py +386 -0
  87. megadetector/data_management/importers/helena_to_cct.py +283 -0
  88. megadetector/data_management/importers/idaho-camera-traps.py +1407 -0
  89. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +294 -0
  90. megadetector/data_management/importers/jb_csv_to_json.py +150 -0
  91. megadetector/data_management/importers/mcgill_to_json.py +250 -0
  92. megadetector/data_management/importers/missouri_to_json.py +490 -0
  93. megadetector/data_management/importers/nacti_fieldname_adjustments.py +79 -0
  94. megadetector/data_management/importers/noaa_seals_2019.py +181 -0
  95. megadetector/data_management/importers/pc_to_json.py +365 -0
  96. megadetector/data_management/importers/plot_wni_giraffes.py +123 -0
  97. megadetector/data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -0
  98. megadetector/data_management/importers/prepare_zsl_imerit.py +131 -0
  99. megadetector/data_management/importers/rspb_to_json.py +356 -0
  100. megadetector/data_management/importers/save_the_elephants_survey_A.py +320 -0
  101. megadetector/data_management/importers/save_the_elephants_survey_B.py +329 -0
  102. megadetector/data_management/importers/snapshot_safari_importer.py +758 -0
  103. megadetector/data_management/importers/snapshot_safari_importer_reprise.py +665 -0
  104. megadetector/data_management/importers/snapshot_serengeti_lila.py +1067 -0
  105. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +150 -0
  106. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +153 -0
  107. megadetector/data_management/importers/sulross_get_exif.py +65 -0
  108. megadetector/data_management/importers/timelapse_csv_set_to_json.py +490 -0
  109. megadetector/data_management/importers/ubc_to_json.py +399 -0
  110. megadetector/data_management/importers/umn_to_json.py +507 -0
  111. megadetector/data_management/importers/wellington_to_json.py +263 -0
  112. megadetector/data_management/importers/wi_to_json.py +442 -0
  113. megadetector/data_management/importers/zamba_results_to_md_results.py +181 -0
  114. megadetector/data_management/labelme_to_coco.py +547 -0
  115. megadetector/data_management/labelme_to_yolo.py +272 -0
  116. megadetector/data_management/lila/__init__.py +0 -0
  117. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +97 -0
  118. megadetector/data_management/lila/add_locations_to_nacti.py +147 -0
  119. megadetector/data_management/lila/create_lila_blank_set.py +558 -0
  120. megadetector/data_management/lila/create_lila_test_set.py +152 -0
  121. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  122. megadetector/data_management/lila/download_lila_subset.py +178 -0
  123. megadetector/data_management/lila/generate_lila_per_image_labels.py +516 -0
  124. megadetector/data_management/lila/get_lila_annotation_counts.py +170 -0
  125. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  126. megadetector/data_management/lila/lila_common.py +300 -0
  127. megadetector/data_management/lila/test_lila_metadata_urls.py +132 -0
  128. megadetector/data_management/ocr_tools.py +874 -0
  129. megadetector/data_management/read_exif.py +681 -0
  130. megadetector/data_management/remap_coco_categories.py +84 -0
  131. megadetector/data_management/remove_exif.py +66 -0
  132. megadetector/data_management/resize_coco_dataset.py +189 -0
  133. megadetector/data_management/wi_download_csv_to_coco.py +246 -0
  134. megadetector/data_management/yolo_output_to_md_output.py +441 -0
  135. megadetector/data_management/yolo_to_coco.py +676 -0
  136. megadetector/detection/__init__.py +0 -0
  137. megadetector/detection/detector_training/__init__.py +0 -0
  138. megadetector/detection/detector_training/model_main_tf2.py +114 -0
  139. megadetector/detection/process_video.py +702 -0
  140. megadetector/detection/pytorch_detector.py +341 -0
  141. megadetector/detection/run_detector.py +779 -0
  142. megadetector/detection/run_detector_batch.py +1219 -0
  143. megadetector/detection/run_inference_with_yolov5_val.py +917 -0
  144. megadetector/detection/run_tiled_inference.py +934 -0
  145. megadetector/detection/tf_detector.py +189 -0
  146. megadetector/detection/video_utils.py +606 -0
  147. megadetector/postprocessing/__init__.py +0 -0
  148. megadetector/postprocessing/add_max_conf.py +64 -0
  149. megadetector/postprocessing/categorize_detections_by_size.py +163 -0
  150. megadetector/postprocessing/combine_api_outputs.py +249 -0
  151. megadetector/postprocessing/compare_batch_results.py +958 -0
  152. megadetector/postprocessing/convert_output_format.py +396 -0
  153. megadetector/postprocessing/load_api_results.py +195 -0
  154. megadetector/postprocessing/md_to_coco.py +310 -0
  155. megadetector/postprocessing/md_to_labelme.py +330 -0
  156. megadetector/postprocessing/merge_detections.py +401 -0
  157. megadetector/postprocessing/postprocess_batch_results.py +1902 -0
  158. megadetector/postprocessing/remap_detection_categories.py +170 -0
  159. megadetector/postprocessing/render_detection_confusion_matrix.py +660 -0
  160. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +211 -0
  161. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +83 -0
  162. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1631 -0
  163. megadetector/postprocessing/separate_detections_into_folders.py +730 -0
  164. megadetector/postprocessing/subset_json_detector_output.py +696 -0
  165. megadetector/postprocessing/top_folders_to_bottom.py +223 -0
  166. megadetector/taxonomy_mapping/__init__.py +0 -0
  167. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  168. megadetector/taxonomy_mapping/map_new_lila_datasets.py +150 -0
  169. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -0
  170. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +590 -0
  171. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  172. megadetector/taxonomy_mapping/simple_image_download.py +219 -0
  173. megadetector/taxonomy_mapping/species_lookup.py +834 -0
  174. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  175. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  176. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  177. megadetector/utils/__init__.py +0 -0
  178. megadetector/utils/azure_utils.py +178 -0
  179. megadetector/utils/ct_utils.py +612 -0
  180. megadetector/utils/directory_listing.py +246 -0
  181. megadetector/utils/md_tests.py +968 -0
  182. megadetector/utils/path_utils.py +1044 -0
  183. megadetector/utils/process_utils.py +157 -0
  184. megadetector/utils/sas_blob_utils.py +509 -0
  185. megadetector/utils/split_locations_into_train_val.py +228 -0
  186. megadetector/utils/string_utils.py +92 -0
  187. megadetector/utils/url_utils.py +323 -0
  188. megadetector/utils/write_html_image_list.py +225 -0
  189. megadetector/visualization/__init__.py +0 -0
  190. megadetector/visualization/plot_utils.py +293 -0
  191. megadetector/visualization/render_images_with_thumbnails.py +275 -0
  192. megadetector/visualization/visualization_utils.py +1536 -0
  193. megadetector/visualization/visualize_db.py +550 -0
  194. megadetector/visualization/visualize_detector_output.py +405 -0
  195. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/METADATA +1 -1
  196. megadetector-5.0.12.dist-info/RECORD +199 -0
  197. megadetector-5.0.12.dist-info/top_level.txt +1 -0
  198. megadetector-5.0.11.dist-info/RECORD +0 -5
  199. megadetector-5.0.11.dist-info/top_level.txt +0 -1
  200. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/LICENSE +0 -0
  201. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/WHEEL +0 -0
@@ -0,0 +1,163 @@
1
+ """
2
+
3
+ categorize_detections_by_size.py
4
+
5
+ Given a MegaDetector .json file, creates a separate category for bounding boxes
6
+ above one or more size thresholds.
7
+
8
+ """
9
+
10
+ #%% Constants and imports
11
+
12
+ import json
13
+
14
+ from collections import defaultdict
15
+ from tqdm import tqdm
16
+
17
+
18
+ #%% Support classes
19
+
20
+ class SizeCategorizationOptions:
21
+ """
22
+ Options used to parameterize categorize_detections_by_size().
23
+ """
24
+
25
+ #: Thresholds to use for separation, as a fraction of the image size.
26
+ #:
27
+ #: Should be sorted from smallest to largest.
28
+ size_thresholds = [0.95]
29
+
30
+ #: List of category numbers to use in separation; uses all categories if None
31
+ categories_to_separate = None
32
+
33
+ #: Dimension to use for thresholding; can be "size", "width", or "height"
34
+ measurement = 'size'
35
+
36
+ #: Categories to assign to thresholded ranges; should have the same length as
37
+ #: "size_thresholds".
38
+ size_category_names = ['large_detection']
39
+
40
+
41
+ #%% Main functions
42
+
43
+ def categorize_detections_by_size(input_file,output_file=None,options=None):
44
+ """
45
+ Given a MegaDetector .json file, creates a separate category for bounding boxes
46
+ above one or more size thresholds, optionally writing results to [output_file].
47
+
48
+ Args:
49
+ input_file (str): file to process
50
+ output_file (str, optional): optional output file
51
+ options (SizeCategorizationOptions): categorization parameters
52
+
53
+ Returns:
54
+ dict: data loaded from [input_file], with the new size-based categories.
55
+ Identical to what's written to [output_file], if [output_file] is not None.
56
+ """
57
+ if options is None:
58
+ options = SizeCategorizationOptions()
59
+
60
+ if options.categories_to_separate is not None:
61
+ options.categories_to_separate = \
62
+ [str(c) for c in options.categories_to_separate]
63
+
64
+ assert len(options.size_thresholds) == len(options.size_category_names), \
65
+ 'Options struct should have the same number of category names and size thresholds'
66
+
67
+ # Sort size thresholds and names from largest to smallest
68
+ options.size_category_names = [x for _,x in sorted(zip(options.size_thresholds,
69
+ options.size_category_names),reverse=True)]
70
+ options.size_thresholds = sorted(options.size_thresholds,reverse=True)
71
+
72
+ with open(input_file) as f:
73
+ data = json.load(f)
74
+
75
+ detection_categories = data['detection_categories']
76
+ category_keys = list(detection_categories.keys())
77
+ category_keys = [int(k) for k in category_keys]
78
+ max_key = max(category_keys)
79
+
80
+ threshold_to_category_id = {}
81
+ for i_threshold,threshold in enumerate(options.size_thresholds):
82
+
83
+ category_id = str(max_key+1)
84
+ max_key += 1
85
+ detection_categories[category_id] = options.size_category_names[i_threshold]
86
+ threshold_to_category_id[i_threshold] = category_id
87
+
88
+ print('Creating category for {} with ID {}'.format(
89
+ options.size_category_names[i_threshold],category_id))
90
+
91
+ images = data['images']
92
+
93
+ print('Loaded {} images'.format(len(images)))
94
+
95
+ # For each image...
96
+ #
97
+ # im = images[0]
98
+
99
+ category_id_to_count = defaultdict(int)
100
+
101
+ for im in tqdm(images):
102
+
103
+ if im['detections'] is None:
104
+ assert im['failure'] is not None and len(im['failure']) > 0
105
+ continue
106
+
107
+ # d = im['detections'][0]
108
+ for d in im['detections']:
109
+
110
+ # Are there really any detections here?
111
+ if (d is None) or ('bbox' not in d) or (d['bbox'] is None):
112
+ continue
113
+
114
+ # Is this a category we're supposed to process?
115
+ if (options.categories_to_separate is not None) and \
116
+ (d['category'] not in options.categories_to_separate):
117
+ continue
118
+
119
+ # https://github.com/agentmorris/MegaDetector/tree/main/megadetector/api/batch_processing#detector-outputs
120
+ w = d['bbox'][2]
121
+ h = d['bbox'][3]
122
+ detection_size = w*h
123
+
124
+ metric = None
125
+
126
+ if options.measurement == 'size':
127
+ metric = detection_size
128
+ elif options.measurement == 'width':
129
+ metric = w
130
+ else:
131
+ assert options.measurement == 'height', 'Unrecognized measurement metric'
132
+ metric = h
133
+ assert metric is not None
134
+
135
+ for i_threshold,threshold in enumerate(options.size_thresholds):
136
+
137
+ if metric >= threshold:
138
+
139
+ category_id = threshold_to_category_id[i_threshold]
140
+
141
+ category_id_to_count[category_id] += 1
142
+ d['category'] = category_id
143
+
144
+ break
145
+
146
+ # ...for each threshold
147
+ # ...for each detection
148
+
149
+ # ...for each image
150
+
151
+ for i_threshold in range(0,len(options.size_thresholds)):
152
+ category_name = options.size_category_names[i_threshold]
153
+ category_id = threshold_to_category_id[i_threshold]
154
+ category_count = category_id_to_count[category_id]
155
+ print('Found {} detections in category {}'.format(category_count,category_name))
156
+
157
+ if output_file is not None:
158
+ with open(output_file,'w') as f:
159
+ json.dump(data,f,indent=1)
160
+
161
+ return data
162
+
163
+ # ...def categorize_detections_by_size()
@@ -0,0 +1,249 @@
1
+ """
2
+
3
+ combine_api_outputs.py
4
+
5
+ Merges two or more .json files in batch API output format, optionally
6
+ writing the results to another .json file.
7
+
8
+ * Concatenates image lists, erroring if images are not unique.
9
+ * Errors if class lists are conflicting; errors on unrecognized fields.
10
+ * Checks compatibility in info structs, within reason.
11
+
12
+ File format:
13
+
14
+ https://github.com/agentmorris/MegaDetector/tree/main/megadetector/api/batch_processing#batch-processing-api-output-format
15
+
16
+ Command-line use:
17
+
18
+ combine_api_outputs input1.json input2.json ... inputN.json output.json
19
+
20
+ Also see combine_api_shard_files() (not exposed via the command line yet) to
21
+ combine the intermediate files created by the API.
22
+
23
+ This does no checking for redundancy; if you are looking to ensemble
24
+ the results of multiple model versions, see merge_detections.py.
25
+
26
+ """
27
+
28
+ #%% Constants and imports
29
+
30
+ import argparse
31
+ import sys
32
+ import json
33
+
34
+
35
+ #%% Merge functions
36
+
37
+ def combine_api_output_files(input_files,
38
+ output_file=None,
39
+ require_uniqueness=True,
40
+ verbose=True):
41
+ """
42
+ Merges the list of MD results files [input_files] into a single
43
+ dictionary, optionally writing the result to [output_file].
44
+
45
+ Args:
46
+ input_files (list of str): paths to JSON detection files
47
+ output_file (str, optional): path to write merged JSON
48
+ require_uniqueness (bool): whether to require that the images in
49
+ each list of images be unique
50
+
51
+ Returns:
52
+ dict: merged dictionaries loaded from [input_files], identical to what's
53
+ written to [output_file] if [output_file] is not None
54
+ """
55
+
56
+ def print_if_verbose(s):
57
+ if verbose:
58
+ print(s)
59
+
60
+ input_dicts = []
61
+ for fn in input_files:
62
+ print_if_verbose('Loading results from {}'.format(fn))
63
+ with open(fn, 'r', encoding='utf-8') as f:
64
+ input_dicts.append(json.load(f))
65
+
66
+ print_if_verbose('Merging results')
67
+ merged_dict = combine_api_output_dictionaries(
68
+ input_dicts, require_uniqueness=require_uniqueness)
69
+
70
+ print_if_verbose('Writing output to {}'.format(output_file))
71
+ if output_file is not None:
72
+ with open(output_file, 'w') as f:
73
+ json.dump(merged_dict, f, indent=1)
74
+
75
+ return merged_dict
76
+
77
+
78
+ def combine_api_output_dictionaries(input_dicts, require_uniqueness=True):
79
+ """
80
+ Merges the list of MD results dictionaries [input_dicts] into a single dict.
81
+ See module header comment for details on merge rules.
82
+
83
+ Args:
84
+ input_dicts (list of dicts): list of dicts in which each dict represents the
85
+ contents of a MD output file
86
+ require_uniqueness (bool): whether to require that the images in
87
+ each input dict be unique; if this is True and image filenames are
88
+ not unique, an error is raised.
89
+
90
+ Returns
91
+ dict: merged MD results
92
+ """
93
+
94
+ # Map image filenames to detections, we'll convert to a list later
95
+ images = {}
96
+ info = {}
97
+ detection_categories = {}
98
+ classification_categories = {}
99
+ n_redundant_images = 0
100
+ n_images = 0
101
+
102
+ known_fields = ['info', 'detection_categories', 'classification_categories',
103
+ 'images']
104
+
105
+ for input_dict in input_dicts:
106
+
107
+ for k in input_dict:
108
+ if k not in known_fields:
109
+ raise ValueError(f'Unrecognized API output field: {k}')
110
+
111
+ # Check compatibility of detection categories
112
+ for cat_id in input_dict['detection_categories']:
113
+ cat_name = input_dict['detection_categories'][cat_id]
114
+ if cat_id in detection_categories:
115
+ assert detection_categories[cat_id] == cat_name, (
116
+ 'Detection category mismatch')
117
+ else:
118
+ detection_categories[cat_id] = cat_name
119
+
120
+ # Check compatibility of classification categories
121
+ if 'classification_categories' in input_dict:
122
+ for cat_id in input_dict['classification_categories']:
123
+ cat_name = input_dict['classification_categories'][cat_id]
124
+ if cat_id in classification_categories:
125
+ assert classification_categories[cat_id] == cat_name, (
126
+ 'Classification category mismatch')
127
+ else:
128
+ classification_categories[cat_id] = cat_name
129
+
130
+ # Merge image lists, checking uniqueness
131
+ for im in input_dict['images']:
132
+ # Normalize path separators so we don't treat images as different if they
133
+ # were processed on different OS's
134
+ im['file'] = im['file'].replace('\\','/')
135
+ im_file = im['file']
136
+ if require_uniqueness:
137
+ assert im_file not in images, f'Duplicate image: {im_file}'
138
+ images[im_file] = im
139
+ n_images += 1
140
+ else:
141
+ if im_file in images:
142
+ n_redundant_images += 1
143
+ previous_im = images[im_file]
144
+ # Replace a previous failure with a success
145
+ if ('detections' in im) and ('detections' not in previous_im):
146
+ images[im_file] = im
147
+ print(f'Replacing previous failure for image: {im_file}')
148
+ else:
149
+ images[im_file] = im
150
+ n_images += 1
151
+
152
+ # Merge info dicts, don't check completion time fields
153
+ if len(info) == 0:
154
+ info = input_dict['info']
155
+ else:
156
+ info_compare = input_dict['info']
157
+ assert info_compare['detector'] == info['detector'], (
158
+ 'Incompatible detection versions in merging')
159
+ assert info_compare['format_version'] == info['format_version'], (
160
+ 'Incompatible API output versions in merging')
161
+ if 'classifier' in info_compare:
162
+ if 'classifier' in info:
163
+ assert info['classifier'] == info_compare['classifier']
164
+ else:
165
+ info['classifier'] = info_compare['classifier']
166
+
167
+ # ...for each dictionary
168
+
169
+ if n_redundant_images > 0:
170
+ print(f'Warning: found {n_redundant_images} redundant images '
171
+ f'(out of {n_images} total) during merge')
172
+
173
+ # Convert merged image dictionaries to a sorted list
174
+ sorted_images = sorted(images.values(), key=lambda im: im['file'])
175
+
176
+ merged_dict = {'info': info,
177
+ 'detection_categories': detection_categories,
178
+ 'classification_categories': classification_categories,
179
+ 'images': sorted_images}
180
+ return merged_dict
181
+
182
+ # ...combine_api_output_files()
183
+
184
+
185
+ def combine_api_shard_files(input_files, output_file=None):
186
+ """
187
+ Merges the list of .json-formatted API shard files [input_files] into a single
188
+ list of dictionaries, optionally writing the result to [output_file].
189
+
190
+ This operates on mostly-deprecated API shard files, not MegaDetector results files.
191
+ If you don't know what an API shard file is, you don't want this function.
192
+
193
+ Args:
194
+ input_files (list of str): files to merge
195
+ output_file (str, optiona): file to which we should write merged results
196
+
197
+ Returns:
198
+ dict: merged results
199
+
200
+ :meta private:
201
+ """
202
+
203
+ input_lists = []
204
+ print('Loading input files')
205
+ for fn in input_files:
206
+ input_lists.append(json.load(open(fn)))
207
+
208
+ detections = []
209
+ # detection_list = input_lists[0]
210
+ for detection_list in input_lists:
211
+ assert isinstance(detection_list, list)
212
+ # d = detection_list[0]
213
+ for d in detection_list:
214
+ assert 'file' in d
215
+ assert 'max_detection_conf' in d
216
+ assert 'detections' in d
217
+ detections.extend([d])
218
+
219
+ print('Writing output')
220
+ if output_file is not None:
221
+ with open(output_file, 'w') as f:
222
+ json.dump(detections, f, indent=1)
223
+
224
+ return detections
225
+
226
+ # ...combine_api_shard_files()
227
+
228
+
229
+ #%% Command-line driver
230
+
231
+ def main():
232
+
233
+ parser = argparse.ArgumentParser()
234
+ parser.add_argument(
235
+ 'input_paths', nargs='+',
236
+ help='List of input .json files')
237
+ parser.add_argument(
238
+ 'output_path',
239
+ help='Output .json file')
240
+
241
+ if len(sys.argv[1:]) == 0:
242
+ parser.print_help()
243
+ parser.exit()
244
+
245
+ args = parser.parse_args()
246
+ combine_api_output_files(args.input_paths, args.output_path)
247
+
248
+ if __name__ == '__main__':
249
+ main()