megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (191) hide show
  1. api/__init__.py +0 -0
  2. api/batch_processing/__init__.py +0 -0
  3. api/batch_processing/api_core/__init__.py +0 -0
  4. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. api/batch_processing/api_core/batch_service/score.py +0 -1
  6. api/batch_processing/api_core/server_job_status_table.py +0 -1
  7. api/batch_processing/api_core_support/__init__.py +0 -0
  8. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
  9. api/batch_processing/api_support/__init__.py +0 -0
  10. api/batch_processing/api_support/summarize_daily_activity.py +0 -1
  11. api/batch_processing/data_preparation/__init__.py +0 -0
  12. api/batch_processing/data_preparation/manage_local_batch.py +93 -79
  13. api/batch_processing/data_preparation/manage_video_batch.py +8 -8
  14. api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
  15. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  16. api/batch_processing/postprocessing/__init__.py +0 -0
  17. api/batch_processing/postprocessing/add_max_conf.py +12 -12
  18. api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
  19. api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
  20. api/batch_processing/postprocessing/compare_batch_results.py +114 -44
  21. api/batch_processing/postprocessing/convert_output_format.py +62 -19
  22. api/batch_processing/postprocessing/load_api_results.py +17 -20
  23. api/batch_processing/postprocessing/md_to_coco.py +31 -21
  24. api/batch_processing/postprocessing/md_to_labelme.py +165 -68
  25. api/batch_processing/postprocessing/merge_detections.py +40 -15
  26. api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
  27. api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
  28. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
  29. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
  30. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
  31. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
  32. api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
  33. api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
  34. api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
  35. api/synchronous/__init__.py +0 -0
  36. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  37. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
  38. api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
  39. api/synchronous/api_core/animal_detection_api/config.py +35 -35
  40. api/synchronous/api_core/tests/__init__.py +0 -0
  41. api/synchronous/api_core/tests/load_test.py +109 -109
  42. classification/__init__.py +0 -0
  43. classification/aggregate_classifier_probs.py +21 -24
  44. classification/analyze_failed_images.py +11 -13
  45. classification/cache_batchapi_outputs.py +51 -51
  46. classification/create_classification_dataset.py +69 -68
  47. classification/crop_detections.py +54 -53
  48. classification/csv_to_json.py +97 -100
  49. classification/detect_and_crop.py +105 -105
  50. classification/evaluate_model.py +43 -42
  51. classification/identify_mislabeled_candidates.py +47 -46
  52. classification/json_to_azcopy_list.py +10 -10
  53. classification/json_validator.py +72 -71
  54. classification/map_classification_categories.py +44 -43
  55. classification/merge_classification_detection_output.py +68 -68
  56. classification/prepare_classification_script.py +157 -154
  57. classification/prepare_classification_script_mc.py +228 -228
  58. classification/run_classifier.py +27 -26
  59. classification/save_mislabeled.py +30 -30
  60. classification/train_classifier.py +20 -20
  61. classification/train_classifier_tf.py +21 -22
  62. classification/train_utils.py +10 -10
  63. data_management/__init__.py +0 -0
  64. data_management/annotations/__init__.py +0 -0
  65. data_management/annotations/annotation_constants.py +18 -31
  66. data_management/camtrap_dp_to_coco.py +238 -0
  67. data_management/cct_json_utils.py +107 -59
  68. data_management/cct_to_md.py +176 -158
  69. data_management/cct_to_wi.py +247 -219
  70. data_management/coco_to_labelme.py +272 -0
  71. data_management/coco_to_yolo.py +86 -62
  72. data_management/databases/__init__.py +0 -0
  73. data_management/databases/add_width_and_height_to_db.py +20 -16
  74. data_management/databases/combine_coco_camera_traps_files.py +35 -31
  75. data_management/databases/integrity_check_json_db.py +130 -83
  76. data_management/databases/subset_json_db.py +25 -16
  77. data_management/generate_crops_from_cct.py +27 -45
  78. data_management/get_image_sizes.py +188 -144
  79. data_management/importers/add_nacti_sizes.py +8 -8
  80. data_management/importers/add_timestamps_to_icct.py +78 -78
  81. data_management/importers/animl_results_to_md_results.py +158 -160
  82. data_management/importers/auckland_doc_test_to_json.py +9 -9
  83. data_management/importers/auckland_doc_to_json.py +8 -8
  84. data_management/importers/awc_to_json.py +7 -7
  85. data_management/importers/bellevue_to_json.py +15 -15
  86. data_management/importers/cacophony-thermal-importer.py +13 -13
  87. data_management/importers/carrizo_shrubfree_2018.py +8 -8
  88. data_management/importers/carrizo_trail_cam_2017.py +8 -8
  89. data_management/importers/cct_field_adjustments.py +9 -9
  90. data_management/importers/channel_islands_to_cct.py +10 -10
  91. data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
  92. data_management/importers/ena24_to_json.py +7 -7
  93. data_management/importers/filenames_to_json.py +8 -8
  94. data_management/importers/helena_to_cct.py +7 -7
  95. data_management/importers/idaho-camera-traps.py +7 -7
  96. data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
  97. data_management/importers/jb_csv_to_json.py +9 -9
  98. data_management/importers/mcgill_to_json.py +8 -8
  99. data_management/importers/missouri_to_json.py +18 -18
  100. data_management/importers/nacti_fieldname_adjustments.py +10 -10
  101. data_management/importers/noaa_seals_2019.py +8 -8
  102. data_management/importers/pc_to_json.py +7 -7
  103. data_management/importers/plot_wni_giraffes.py +7 -7
  104. data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
  105. data_management/importers/prepare_zsl_imerit.py +7 -7
  106. data_management/importers/rspb_to_json.py +8 -8
  107. data_management/importers/save_the_elephants_survey_A.py +8 -8
  108. data_management/importers/save_the_elephants_survey_B.py +9 -9
  109. data_management/importers/snapshot_safari_importer.py +26 -26
  110. data_management/importers/snapshot_safari_importer_reprise.py +665 -665
  111. data_management/importers/snapshot_serengeti_lila.py +14 -14
  112. data_management/importers/sulross_get_exif.py +8 -9
  113. data_management/importers/timelapse_csv_set_to_json.py +11 -11
  114. data_management/importers/ubc_to_json.py +13 -13
  115. data_management/importers/umn_to_json.py +7 -7
  116. data_management/importers/wellington_to_json.py +8 -8
  117. data_management/importers/wi_to_json.py +9 -9
  118. data_management/importers/zamba_results_to_md_results.py +181 -181
  119. data_management/labelme_to_coco.py +309 -159
  120. data_management/labelme_to_yolo.py +103 -60
  121. data_management/lila/__init__.py +0 -0
  122. data_management/lila/add_locations_to_island_camera_traps.py +9 -9
  123. data_management/lila/add_locations_to_nacti.py +147 -147
  124. data_management/lila/create_lila_blank_set.py +114 -31
  125. data_management/lila/create_lila_test_set.py +8 -8
  126. data_management/lila/create_links_to_md_results_files.py +106 -106
  127. data_management/lila/download_lila_subset.py +92 -90
  128. data_management/lila/generate_lila_per_image_labels.py +56 -43
  129. data_management/lila/get_lila_annotation_counts.py +18 -15
  130. data_management/lila/get_lila_image_counts.py +11 -11
  131. data_management/lila/lila_common.py +103 -70
  132. data_management/lila/test_lila_metadata_urls.py +132 -116
  133. data_management/ocr_tools.py +173 -128
  134. data_management/read_exif.py +161 -99
  135. data_management/remap_coco_categories.py +84 -0
  136. data_management/remove_exif.py +58 -62
  137. data_management/resize_coco_dataset.py +32 -44
  138. data_management/wi_download_csv_to_coco.py +246 -0
  139. data_management/yolo_output_to_md_output.py +86 -73
  140. data_management/yolo_to_coco.py +535 -95
  141. detection/__init__.py +0 -0
  142. detection/detector_training/__init__.py +0 -0
  143. detection/process_video.py +85 -33
  144. detection/pytorch_detector.py +43 -25
  145. detection/run_detector.py +157 -72
  146. detection/run_detector_batch.py +189 -114
  147. detection/run_inference_with_yolov5_val.py +118 -51
  148. detection/run_tiled_inference.py +113 -42
  149. detection/tf_detector.py +51 -28
  150. detection/video_utils.py +606 -521
  151. docs/source/conf.py +43 -0
  152. md_utils/__init__.py +0 -0
  153. md_utils/azure_utils.py +9 -9
  154. md_utils/ct_utils.py +249 -70
  155. md_utils/directory_listing.py +59 -64
  156. md_utils/md_tests.py +968 -862
  157. md_utils/path_utils.py +655 -155
  158. md_utils/process_utils.py +157 -133
  159. md_utils/sas_blob_utils.py +20 -20
  160. md_utils/split_locations_into_train_val.py +45 -32
  161. md_utils/string_utils.py +33 -10
  162. md_utils/url_utils.py +208 -27
  163. md_utils/write_html_image_list.py +51 -35
  164. md_visualization/__init__.py +0 -0
  165. md_visualization/plot_utils.py +102 -109
  166. md_visualization/render_images_with_thumbnails.py +34 -34
  167. md_visualization/visualization_utils.py +908 -311
  168. md_visualization/visualize_db.py +109 -58
  169. md_visualization/visualize_detector_output.py +61 -42
  170. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
  171. megadetector-5.0.9.dist-info/RECORD +224 -0
  172. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
  173. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
  174. taxonomy_mapping/__init__.py +0 -0
  175. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
  176. taxonomy_mapping/map_new_lila_datasets.py +154 -154
  177. taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
  178. taxonomy_mapping/preview_lila_taxonomy.py +591 -591
  179. taxonomy_mapping/retrieve_sample_image.py +12 -12
  180. taxonomy_mapping/simple_image_download.py +11 -11
  181. taxonomy_mapping/species_lookup.py +10 -10
  182. taxonomy_mapping/taxonomy_csv_checker.py +18 -18
  183. taxonomy_mapping/taxonomy_graph.py +47 -47
  184. taxonomy_mapping/validate_lila_category_mappings.py +83 -76
  185. data_management/cct_json_to_filename_json.py +0 -89
  186. data_management/cct_to_csv.py +0 -140
  187. data_management/databases/remove_corrupted_images_from_db.py +0 -191
  188. detection/detector_training/copy_checkpoints.py +0 -43
  189. md_visualization/visualize_megadb.py +0 -183
  190. megadetector-5.0.7.dist-info/RECORD +0 -202
  191. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
@@ -1,19 +1,19 @@
1
- ########
2
- #
3
- # compare_batch_results.py
4
- #
5
- # Compare sets of batch results; typically used to compare:
6
- #
7
- # * Results from different MegaDetector versions
8
- # * Results before/after RDE
9
- # * Results with/without augmentation
10
- #
11
- # Makes pairwise comparisons, but can take lists of results files (will perform
12
- # all pairwise comparisons). Results are written to an HTML page that shows the number
13
- # and nature of disagreements (in the sense of each image being a detection or non-detection),
14
- # with sample images for each category.
15
- #
16
- ########
1
+ """
2
+
3
+ compare_batch_results.py
4
+
5
+ Compare sets of batch results; typically used to compare:
6
+
7
+ * Results from different MegaDetector versions
8
+ * Results before/after RDE
9
+ * Results with/without augmentation
10
+
11
+ Makes pairwise comparisons, but can take lists of results files (will perform
12
+ all pairwise comparisons). Results are written to an HTML page that shows the number
13
+ and nature of disagreements (in the sense of each image being a detection or non-detection),
14
+ with sample images for each category.
15
+
16
+ """
17
17
 
18
18
  #%% Imports
19
19
 
@@ -43,16 +43,28 @@ class PairwiseBatchComparisonOptions:
43
43
  pairwise options sets is stored in the BatchComparisonsOptions class.
44
44
  """
45
45
 
46
+ #: First filename to compare
46
47
  results_filename_a = None
48
+
49
+ #: Second filename to compare
47
50
  results_filename_b = None
48
51
 
52
+ #: Description to use in the output HTML for filename A
49
53
  results_description_a = None
54
+
55
+ #: Description to use in the output HTML for filename B
50
56
  results_description_b = None
51
57
 
58
+ #: Per-class detection thresholds to use for filename A (including a 'default' threshold)
52
59
  detection_thresholds_a = {'animal':0.15,'person':0.15,'vehicle':0.15,'default':0.15}
60
+
61
+ #: Per-class detection thresholds to use for filename B (including a 'default' threshold)
53
62
  detection_thresholds_b = {'animal':0.15,'person':0.15,'vehicle':0.15,'default':0.15}
54
63
 
64
+ #: Rendering threshold to use for all categories for filename A
55
65
  rendering_confidence_threshold_a = 0.1
66
+
67
+ #: Rendering threshold to use for all categories for filename B
56
68
  rendering_confidence_threshold_b = 0.1
57
69
 
58
70
  # ...class PairwiseBatchComparisonOptions
@@ -63,33 +75,56 @@ class BatchComparisonOptions:
63
75
  Defines the options for a set of (possibly many) pairwise comparisons.
64
76
  """
65
77
 
78
+ #: Folder to which we should write HTML output
66
79
  output_folder = None
80
+
81
+ #: Base folder for images (which are specified as relative files)
67
82
  image_folder = None
83
+
84
+ #: Job name to use in the HTML output file
68
85
  job_name = ''
69
86
 
87
+ #: Maximum number of images to render for each category, where a "category" here is
88
+ #: "detections_a_only", "detections_b_only", etc., or None to render all images.
70
89
  max_images_per_category = 1000
90
+
91
+ #: Maximum number of images per HTML page (paginates if a category page goes beyond this),
92
+ #: or None to disable pagination.
71
93
  max_images_per_page = None
94
+
95
+ #: Colormap to use for detections in file A (maps detection categories to colors)
72
96
  colormap_a = ['Red']
97
+
98
+ #: Colormap to use for detections in file B (maps detection categories to colors)
73
99
  colormap_b = ['RoyalBlue']
74
100
 
75
- # Process-based parallelization isn't supported yet; this must be "True"
101
+ #: Process-based parallelization isn't supported yet; this must be "True"
76
102
  parallelize_rendering_with_threads = True
77
103
 
78
- # List of filenames to include in the comparison, or None to use all files
104
+ #: List of filenames to include in the comparison, or None to use all files
79
105
  filenames_to_include = None
80
106
 
81
- # Compare only detections/non-detections, ignore categories (still renders categories)
107
+ #: Compare only detections/non-detections, ignore categories (still renders categories)
82
108
  class_agnostic_comparison = False
83
109
 
110
+ #: Width of images to render in the output HTML
84
111
  target_width = 800
112
+
113
+ #: Number of workers to use for rendering, or <=1 to disable parallelization
85
114
  n_rendering_workers = 20
115
+
116
+ #: Random seed for image sampling (not used if max_images_per_category is None)
86
117
  random_seed = 0
87
118
 
88
- # Default to sorting by filename
119
+ #: Whether to sort results by confidence; if this is False, sorts by filename
89
120
  sort_by_confidence = False
90
121
 
122
+ #: The expectation is that all results sets being compared will refer to the same images; if this
123
+ #: is True (default), we'll error if that's not the case, otherwise non-matching lists will just be
124
+ #: a warning.
91
125
  error_on_non_matching_lists = True
92
126
 
127
+ #: List of PairwiseBatchComparisonOptions that defines the comparisons we'll render.
93
128
  pairwise_options = []
94
129
 
95
130
  # ...class BatchComparisonOptions
@@ -100,18 +135,21 @@ class PairwiseBatchComparisonResults:
100
135
  The results from a single pairwise comparison.
101
136
  """
102
137
 
138
+ #: String of HTML content suitable for rendering to an HTML file
103
139
  html_content = None
140
+
141
+ #: Possibly-modified version of the PairwiseBatchComparisonOptions supplied as input.
104
142
  pairwise_options = None
105
143
 
106
- # A dictionary with keys including:
107
- #
108
- # common_detections
109
- # common_non_detections
110
- # detections_a_only
111
- # detections_b_only
112
- # class_transitions
144
+ #: A dictionary with keys including:
145
+ #:
146
+ #: common_detections
147
+ #: common_non_detections
148
+ #: detections_a_only
149
+ #: detections_b_only
150
+ #: class_transitions
113
151
  #
114
- # Each of these maps a filename to a two-element list (the image in set A, the image in set B).
152
+ #: Each of these maps a filename to a two-element list (the image in set A, the image in set B).
115
153
  categories_to_image_pairs = None
116
154
 
117
155
  # ...class PairwiseBatchComparisonResults
@@ -122,9 +160,10 @@ class BatchComparisonResults:
122
160
  The results from a set of pairwise comparisons
123
161
  """
124
162
 
163
+ #: Filename containing HTML output
125
164
  html_output_file = None
126
165
 
127
- # An list of PairwiseBatchComparisonResults
166
+ #: A list of PairwiseBatchComparisonResults
128
167
  pairwise_results = None
129
168
 
130
169
  # ...class BatchComparisonResults
@@ -144,9 +183,20 @@ main_page_footer = '<br/><br/><br/></body></html>\n'
144
183
 
145
184
  #%% Comparison functions
146
185
 
147
- def render_image_pair(fn,image_pairs,category_folder,options,pairwise_options):
186
+ def _render_image_pair(fn,image_pairs,category_folder,options,pairwise_options):
148
187
  """
149
188
  Render two sets of results (i.e., a comparison) for a single image.
189
+
190
+ Args:
191
+ fn (str): image filename
192
+ image_pairs (dict): dict mapping filenames to pairs of image dicts
193
+ category_folder (str): folder to which to render this image, typically
194
+ "detections_a_only", "detections_b_only", etc.
195
+ options (BatchComparisonOptions): job options
196
+ pairwise_options (PairwiseBatchComparisonOptions): pairwise comparison options
197
+
198
+ Returns:
199
+ str: rendered image filename
150
200
  """
151
201
 
152
202
  input_image_path = os.path.join(options.image_folder,fn)
@@ -194,20 +244,22 @@ def render_image_pair(fn,image_pairs,category_folder,options,pairwise_options):
194
244
  im.save(output_image_path)
195
245
  return output_image_path
196
246
 
197
- # ...def render_image_pair()
247
+ # ...def _render_image_pair()
198
248
 
199
249
 
200
- def pairwise_compare_batch_results(options,output_index,pairwise_options):
250
+ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
201
251
  """
202
252
  The main entry point for this module is compare_batch_results(), which calls
203
253
  this function for each pair of comparisons the caller has requested. Generates an
204
254
  HTML page for this comparison. Returns a BatchComparisonResults object.
205
255
 
206
- options: an instance of BatchComparisonOptions
207
-
208
- output_index: a numeric index used for generating HTML titles
209
-
210
- pairwise_options: an instance of PairwiseBatchComparisonOptions
256
+ Args:
257
+ options (BatchComparisonOptions): overall job options for this comparison group
258
+ output_index (int): a numeric index used for generating HTML titles
259
+ pairwise_options (PairwiseBatchComparisonOptions): job options for this comparison
260
+
261
+ Returns:
262
+ PairwiseBatchComparisonResults: the results of this pairwise comparison
211
263
  """
212
264
 
213
265
  # pairwise_options is passed as a parameter here, and should not be specified
@@ -291,7 +343,7 @@ def pairwise_compare_batch_results(options,output_index,pairwise_options):
291
343
  filenames_b_set = set([im['file'] for im in images_b])
292
344
 
293
345
  if len(images_a) != len(images_b):
294
- s = 'set A has {} iamges, set B has {}'.format(len(images_a),len(images_b))
346
+ s = 'set A has {} images, set B has {}'.format(len(images_a),len(images_b))
295
347
  if options.error_on_non_matching_lists:
296
348
  raise ValueError(s)
297
349
  else:
@@ -463,11 +515,11 @@ def pairwise_compare_batch_results(options,output_index,pairwise_options):
463
515
  if options.n_rendering_workers <= 1:
464
516
  output_image_paths = []
465
517
  for fn in tqdm(image_filenames):
466
- output_image_paths.append(render_image_pair(fn,image_pairs,category_folder,
518
+ output_image_paths.append(_render_image_pair(fn,image_pairs,category_folder,
467
519
  options,pairwise_options))
468
520
  else:
469
521
  output_image_paths = list(tqdm(pool.imap(
470
- partial(render_image_pair, image_pairs=image_pairs,
522
+ partial(_render_image_pair, image_pairs=image_pairs,
471
523
  category_folder=category_folder,options=options,
472
524
  pairwise_options=pairwise_options),
473
525
  image_filenames),
@@ -644,14 +696,20 @@ def pairwise_compare_batch_results(options,output_index,pairwise_options):
644
696
 
645
697
  return pairwise_results
646
698
 
647
- # ...def pairwise_compare_batch_results()
699
+ # ...def _pairwise_compare_batch_results()
648
700
 
649
701
 
650
702
  def compare_batch_results(options):
651
703
  """
652
704
  The main entry point for this module. Runs one or more batch results comparisons,
653
- writing results to an html page. Most of the work is deferred to
654
- pairwise_compare_batch_results().
705
+ writing results to an html page. Most of the work is deferred to _pairwise_compare_batch_results().
706
+
707
+ Args:
708
+ options (BatchComparisonOptions): job options to use for this comparison task, including the
709
+ list of specific pairswise comparisons to make (in the pairwise_options field)
710
+
711
+ Returns:
712
+ BatchComparisonResults: the results of this comparison task
655
713
  """
656
714
 
657
715
  assert options.output_folder is not None
@@ -675,7 +733,7 @@ def compare_batch_results(options):
675
733
  for i_comparison,pairwise_options in enumerate(pairwise_options_list):
676
734
  print('Running comparison {} of {}'.format(i_comparison,n_comparisons))
677
735
  pairwise_results = \
678
- pairwise_compare_batch_results(options,i_comparison,pairwise_options)
736
+ _pairwise_compare_batch_results(options,i_comparison,pairwise_options)
679
737
  html_content += pairwise_results.html_content
680
738
  all_pairwise_results.append(pairwise_results)
681
739
 
@@ -702,6 +760,18 @@ def n_way_comparison(filenames,options,detection_thresholds=None,rendering_thres
702
760
  """
703
761
  Performs N pairwise comparisons for the list of results files in [filenames], by generating
704
762
  sets of pairwise options and calling compare_batch_results.
763
+
764
+ Args:
765
+ filenames (list): list of MD results filenames to compare
766
+ options (BatchComparisonOptions): task options set in which pairwise_options is still
767
+ empty; that will get populated from [filenames]
768
+ detection_thresholds (list, optional): list of detection thresholds with the same length
769
+ as [filenames], or None to use sensible defaults
770
+ rendering_thresholds (list, optional): list of rendering thresholds with the same length
771
+ as [filenames], or None to use sensible defaults
772
+
773
+ Returns:
774
+ BatchComparisonResults: the results of this comparison task
705
775
  """
706
776
 
707
777
  if detection_thresholds is None:
@@ -1,13 +1,15 @@
1
- ########
2
- #
3
- # convert_output_format.py
4
- #
5
- # Converts between file formats output by our batch processing API. Currently
6
- # supports json <--> csv conversion, but this should be the landing place for any
7
- # conversion - including between future .json versions - that we support in the
8
- # future.
9
- #
10
- ########
1
+ """
2
+
3
+ convert_output_format.py
4
+
5
+ Converts between file formats output by our batch processing API. Currently
6
+ supports json <--> csv conversion, but this should be the landing place for any
7
+ conversion - including between hypothetical alternative .json versions - that we support
8
+ in the future.
9
+
10
+ The .csv format is largely obsolete, don't use it unless you're super-duper sure you need it.
11
+
12
+ """
11
13
 
12
14
  #%% Constants and imports
13
15
 
@@ -30,18 +32,39 @@ CONF_DIGITS = 3
30
32
  #%% Conversion functions
31
33
 
32
34
  def convert_json_to_csv(input_path,output_path=None,min_confidence=None,
33
- omit_bounding_boxes=False,output_encoding=None):
35
+ omit_bounding_boxes=False,output_encoding=None,
36
+ overwrite=True):
34
37
  """
35
- Convert .json to .csv
38
+ Converts a MD results .json file to a totally non-standard .csv format.
39
+
40
+ If [output_path] is None, will convert x.json to x.csv.
36
41
 
37
42
  TODO: this function should obviously be using Pandas or some other sensible structured
38
43
  representation of tabular data. Even a list of dicts. This implementation is quite
39
44
  brittle and depends on adding fields to every row in exactly the right order.
45
+
46
+ Args:
47
+ input_path (str): the input .json file to convert
48
+ output_path (str, optional): the output .csv file to generate; if this is None, uses
49
+ [input_path].csv
50
+ min_confidence (float, optional): the minimum-confidence detection we should include
51
+ in the "detections" column; has no impact on the other columns
52
+ omit_bounding_boxes (bool): whether to leave out the json-formatted bounding boxes
53
+ that make up the "detections" column, which are not generally useful for someone who
54
+ wants to consume this data as a .csv file
55
+ output_encoding (str, optional): encoding to use for the .csv file
56
+ overwrite (bool): whether to overwrite an existing .csv file; if this is False and the
57
+ output file exists, no-ops and returns
58
+
40
59
  """
41
60
 
42
61
  if output_path is None:
43
62
  output_path = os.path.splitext(input_path)[0]+'.csv'
44
63
 
64
+ if os.path.isfile(output_path) and (not overwrite):
65
+ print('File {} exists, skipping json --> csv conversion'.format(output_path))
66
+ return
67
+
45
68
  print('Loading json results from {}...'.format(input_path))
46
69
  json_output = json.load(open(input_path))
47
70
 
@@ -51,11 +74,12 @@ def convert_json_to_csv(input_path,output_path=None,min_confidence=None,
51
74
 
52
75
  # We add an output column for each class other than 'empty',
53
76
  # containing the maximum probability of that class for each image
54
- n_non_empty_detection_categories = len(annotation_constants.annotation_bbox_categories) - 1
77
+ # n_non_empty_detection_categories = len(annotation_constants.annotation_bbox_categories) - 1
78
+ n_non_empty_detection_categories = annotation_constants.NUM_DETECTOR_CATEGORIES
55
79
  detection_category_column_names = []
56
- assert annotation_constants.annotation_bbox_category_id_to_name[0] == 'empty'
80
+ assert annotation_constants.detector_bbox_categories[0] == 'empty'
57
81
  for cat_id in range(1,n_non_empty_detection_categories+1):
58
- cat_name = annotation_constants.annotation_bbox_category_id_to_name[cat_id]
82
+ cat_name = annotation_constants.detector_bbox_categories[cat_id]
59
83
  detection_category_column_names.append('max_conf_' + cat_name)
60
84
 
61
85
  n_classification_categories = 0
@@ -73,7 +97,7 @@ def convert_json_to_csv(input_path,output_path=None,min_confidence=None,
73
97
 
74
98
  n_classification_categories = len(classification_category_ids)
75
99
 
76
- # There are several fields for which we add columns, other random bespoke fields
100
+ # There are several .json fields for which we add .csv columns; other random bespoke fields
77
101
  # will be ignored.
78
102
  optional_fields = ['width','height','datetime','exif_metadata']
79
103
  optional_fields_present = set()
@@ -104,7 +128,7 @@ def convert_json_to_csv(input_path,output_path=None,min_confidence=None,
104
128
  if 'failure' in im and im['failure'] is not None:
105
129
  row = [image_id, 'failure', im['failure']]
106
130
  rows.append(row)
107
- print('Skipping failed image {} ({})'.format(im['file'],im['failure']))
131
+ # print('Skipping failed image {} ({})'.format(im['file'],im['failure']))
108
132
  continue
109
133
 
110
134
  max_conf = ct_utils.get_max_conf(im)
@@ -193,12 +217,29 @@ def convert_json_to_csv(input_path,output_path=None,min_confidence=None,
193
217
  writer.writerow(header)
194
218
  writer.writerows(rows)
195
219
 
220
+ # ...def convert_json_to_csv(...)
221
+
196
222
 
197
- def convert_csv_to_json(input_path,output_path=None):
223
+ def convert_csv_to_json(input_path,output_path=None,overwrite=True):
224
+ """
225
+ Convert .csv to .json. If output_path is None, will convert x.csv to x.json.
226
+
227
+ Args:
228
+ input_path (str): .csv filename to convert to .json
229
+ output_path (str, optional): the output .json file to generate; if this is None, uses
230
+ [input_path].json
231
+ overwrite (bool): whether to overwrite an existing .json file; if this is False and the
232
+ output file exists, no-ops and returns
233
+
234
+ """
198
235
 
199
236
  if output_path is None:
200
237
  output_path = os.path.splitext(input_path)[0]+'.json'
201
238
 
239
+ if os.path.isfile(output_path) and (not overwrite):
240
+ print('File {} exists, skipping csv --> json conversion'.format(output_path))
241
+ return
242
+
202
243
  # Format spec:
203
244
  #
204
245
  # https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing
@@ -215,7 +256,7 @@ def convert_csv_to_json(input_path,output_path=None):
215
256
  }
216
257
 
217
258
  classification_categories = {}
218
- detection_categories = annotation_constants.annotation_bbox_category_id_to_name
259
+ detection_categories = annotation_constants.detector_bbox_categories
219
260
 
220
261
  images = []
221
262
 
@@ -259,6 +300,8 @@ def convert_csv_to_json(input_path,output_path=None):
259
300
  json_out['images'] = images
260
301
 
261
302
  json.dump(json_out,open(output_path,'w'),indent=1)
303
+
304
+ # ...def convert_csv_to_json(...)
262
305
 
263
306
 
264
307
  #%% Interactive driver
@@ -1,17 +1,17 @@
1
- ########
2
- #
3
- # load_api_results.py
4
- #
5
- # DEPRECATED
6
- #
7
- # As of 2023.12, this module is used in postprocessing and RDE. Not recommended
8
- # for new code.
9
- #
10
- # Loads the output of the batch processing API (json) into a Pandas dataframe.
11
- #
12
- # Includes functions to read/write the (very very old) .csv results format.
13
- #
14
- ########
1
+ """
2
+
3
+ load_api_results.py
4
+
5
+ DEPRECATED
6
+
7
+ As of 2023.12, this module is used in postprocessing and RDE. Not recommended
8
+ for new code.
9
+
10
+ Loads the output of the batch processing API (json) into a Pandas dataframe.
11
+
12
+ Includes functions to read/write the (very very old) .csv results format.
13
+
14
+ """
15
15
 
16
16
  #%% Imports
17
17
 
@@ -31,7 +31,7 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
31
31
  filename_replacements: Optional[Mapping[str, str]] = None,
32
32
  force_forward_slashes: bool = True
33
33
  ) -> Tuple[pd.DataFrame, Dict]:
34
- """
34
+ r"""
35
35
  Loads json-formatted MegaDetector results to a Pandas DataFrame.
36
36
 
37
37
  Args:
@@ -44,8 +44,7 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
44
44
  in filenames
45
45
 
46
46
  Returns:
47
- detection_results: pd.DataFrame, contains at least the columns:
48
- ['file', 'detections','failure']
47
+ detection_results: pd.DataFrame, contains at least the columns ['file', 'detections','failure']
49
48
  other_fields: a dict containing fields in the results other than 'images'
50
49
  """
51
50
 
@@ -64,11 +63,9 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
64
63
  if k != 'images':
65
64
  other_fields[k] = v
66
65
 
67
- # Normalize paths to simplify comparisons later
68
66
  if normalize_paths:
69
67
  for image in detection_results['images']:
70
- image['file'] = os.path.normpath(image['file'])
71
- # image['file'] = image['file'].replace('\\','/')
68
+ image['file'] = os.path.normpath(image['file'])
72
69
 
73
70
  if force_forward_slashes:
74
71
  for image in detection_results['images']:
@@ -1,13 +1,13 @@
1
- ########
2
- #
3
- # md_to_coco.py
4
- #
5
- # "Converts" MegaDetector output files to COCO format. "Converts" is in quotes because
6
- # this is an opinionated transformation that requires a confidence threshold.
7
- #
8
- # Does not currently handle classification information.
9
- #
10
- ########
1
+ """
2
+
3
+ md_to_coco.py
4
+
5
+ "Converts" MegaDetector output files to COCO format. "Converts" is in quotes because
6
+ this is an opinionated transformation that requires a confidence threshold.
7
+
8
+ Does not currently handle classification information.
9
+
10
+ """
11
11
 
12
12
  #%% Constants and imports
13
13
 
@@ -38,18 +38,28 @@ def md_to_coco(md_results_file,
38
38
 
39
39
  A folder of images is required if width and height information are not available
40
40
  in the MD results file.
41
+
42
+ Args:
43
+ md_results_file (str): MD results .json file to convert to COCO format
44
+ coco_output_file (str, optional): COCO .json file to write; if this is None, we'll return
45
+ a COCO-formatted dict, but won't write it to disk
46
+ image_folder (str, optional): folder of images, required if 'width' and 'height' are not
47
+ present in the MD results file (they are not required by the format)
48
+ confidence_threshold (float, optional): boxes below this confidence threshold will not be
49
+ included in the output data
50
+ validate_image_sizes (bool, optional): if this is True, we'll check the image sizes
51
+ regardless of whether "width" and "height" are present in the MD results file.
52
+ info (dict, optional): arbitrary metadata to include in an "info" field in the COCO-formatted
53
+ output
54
+ preserve_nonstandard_metadata (bool, optional): if this is True, confidence will be preserved in a
55
+ non-standard "conf" field in each annotation, and any random fields present in each image's data
56
+ (e.g. EXIF metadata) will be propagated to COCO output
57
+ include_failed_images (boo, optional): if this is True, failed images will be propagated to COCO output
58
+ with a non-empty "failure" field and no other fields, otherwise failed images will be skipped.
41
59
 
42
- If validate_image_sizes is True, we'll check the image sizes regardless of whether width
43
- and height are present in the MD results file.
44
-
45
- If preserve_nonstandard_metadata is True, confidence will be preserved in a non-standard
46
- "conf" field in each annotation, and any random fields present in each image's data (e.g.
47
- EXIF metadata) will be propagated to COCO output.
48
-
49
- If include_failed_images is True, failed images will be propagated to COCO output with
50
- a non-empty "failure" field and no other fields, otherwise failed images will be skipped.
51
-
52
- Returns the COCO json dict.
60
+ Returns:
61
+ dict: the COCO data dict, identical to what's written to [coco_output_file] if [coco_output_file]
62
+ is not None.
53
63
  """
54
64
 
55
65
  with open(md_results_file,'r') as f: