megadetector 5.0.8__py3-none-any.whl → 5.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (190) hide show
  1. api/__init__.py +0 -0
  2. api/batch_processing/__init__.py +0 -0
  3. api/batch_processing/api_core/__init__.py +0 -0
  4. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. api/batch_processing/api_core/batch_service/score.py +0 -1
  6. api/batch_processing/api_core/server_job_status_table.py +0 -1
  7. api/batch_processing/api_core_support/__init__.py +0 -0
  8. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
  9. api/batch_processing/api_support/__init__.py +0 -0
  10. api/batch_processing/api_support/summarize_daily_activity.py +0 -1
  11. api/batch_processing/data_preparation/__init__.py +0 -0
  12. api/batch_processing/data_preparation/manage_local_batch.py +65 -65
  13. api/batch_processing/data_preparation/manage_video_batch.py +8 -8
  14. api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
  15. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  16. api/batch_processing/postprocessing/__init__.py +0 -0
  17. api/batch_processing/postprocessing/add_max_conf.py +12 -12
  18. api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
  19. api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
  20. api/batch_processing/postprocessing/compare_batch_results.py +113 -43
  21. api/batch_processing/postprocessing/convert_output_format.py +41 -16
  22. api/batch_processing/postprocessing/load_api_results.py +16 -17
  23. api/batch_processing/postprocessing/md_to_coco.py +31 -21
  24. api/batch_processing/postprocessing/md_to_labelme.py +52 -22
  25. api/batch_processing/postprocessing/merge_detections.py +14 -14
  26. api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
  27. api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
  28. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
  29. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
  30. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
  31. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
  32. api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
  33. api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
  34. api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
  35. api/synchronous/__init__.py +0 -0
  36. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  37. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
  38. api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
  39. api/synchronous/api_core/animal_detection_api/config.py +35 -35
  40. api/synchronous/api_core/tests/__init__.py +0 -0
  41. api/synchronous/api_core/tests/load_test.py +109 -109
  42. classification/__init__.py +0 -0
  43. classification/aggregate_classifier_probs.py +21 -24
  44. classification/analyze_failed_images.py +11 -13
  45. classification/cache_batchapi_outputs.py +51 -51
  46. classification/create_classification_dataset.py +69 -68
  47. classification/crop_detections.py +54 -53
  48. classification/csv_to_json.py +97 -100
  49. classification/detect_and_crop.py +105 -105
  50. classification/evaluate_model.py +43 -42
  51. classification/identify_mislabeled_candidates.py +47 -46
  52. classification/json_to_azcopy_list.py +10 -10
  53. classification/json_validator.py +72 -71
  54. classification/map_classification_categories.py +44 -43
  55. classification/merge_classification_detection_output.py +68 -68
  56. classification/prepare_classification_script.py +157 -154
  57. classification/prepare_classification_script_mc.py +228 -228
  58. classification/run_classifier.py +27 -26
  59. classification/save_mislabeled.py +30 -30
  60. classification/train_classifier.py +20 -20
  61. classification/train_classifier_tf.py +21 -22
  62. classification/train_utils.py +10 -10
  63. data_management/__init__.py +0 -0
  64. data_management/annotations/__init__.py +0 -0
  65. data_management/annotations/annotation_constants.py +18 -31
  66. data_management/camtrap_dp_to_coco.py +238 -0
  67. data_management/cct_json_utils.py +102 -59
  68. data_management/cct_to_md.py +176 -158
  69. data_management/cct_to_wi.py +247 -219
  70. data_management/coco_to_labelme.py +272 -263
  71. data_management/coco_to_yolo.py +79 -58
  72. data_management/databases/__init__.py +0 -0
  73. data_management/databases/add_width_and_height_to_db.py +20 -16
  74. data_management/databases/combine_coco_camera_traps_files.py +35 -31
  75. data_management/databases/integrity_check_json_db.py +62 -24
  76. data_management/databases/subset_json_db.py +24 -15
  77. data_management/generate_crops_from_cct.py +27 -45
  78. data_management/get_image_sizes.py +188 -162
  79. data_management/importers/add_nacti_sizes.py +8 -8
  80. data_management/importers/add_timestamps_to_icct.py +78 -78
  81. data_management/importers/animl_results_to_md_results.py +158 -158
  82. data_management/importers/auckland_doc_test_to_json.py +9 -9
  83. data_management/importers/auckland_doc_to_json.py +8 -8
  84. data_management/importers/awc_to_json.py +7 -7
  85. data_management/importers/bellevue_to_json.py +15 -15
  86. data_management/importers/cacophony-thermal-importer.py +13 -13
  87. data_management/importers/carrizo_shrubfree_2018.py +8 -8
  88. data_management/importers/carrizo_trail_cam_2017.py +8 -8
  89. data_management/importers/cct_field_adjustments.py +9 -9
  90. data_management/importers/channel_islands_to_cct.py +10 -10
  91. data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
  92. data_management/importers/ena24_to_json.py +7 -7
  93. data_management/importers/filenames_to_json.py +8 -8
  94. data_management/importers/helena_to_cct.py +7 -7
  95. data_management/importers/idaho-camera-traps.py +7 -7
  96. data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
  97. data_management/importers/jb_csv_to_json.py +9 -9
  98. data_management/importers/mcgill_to_json.py +8 -8
  99. data_management/importers/missouri_to_json.py +18 -18
  100. data_management/importers/nacti_fieldname_adjustments.py +10 -10
  101. data_management/importers/noaa_seals_2019.py +7 -7
  102. data_management/importers/pc_to_json.py +7 -7
  103. data_management/importers/plot_wni_giraffes.py +7 -7
  104. data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
  105. data_management/importers/prepare_zsl_imerit.py +7 -7
  106. data_management/importers/rspb_to_json.py +8 -8
  107. data_management/importers/save_the_elephants_survey_A.py +8 -8
  108. data_management/importers/save_the_elephants_survey_B.py +9 -9
  109. data_management/importers/snapshot_safari_importer.py +26 -26
  110. data_management/importers/snapshot_safari_importer_reprise.py +665 -665
  111. data_management/importers/snapshot_serengeti_lila.py +14 -14
  112. data_management/importers/sulross_get_exif.py +8 -9
  113. data_management/importers/timelapse_csv_set_to_json.py +11 -11
  114. data_management/importers/ubc_to_json.py +13 -13
  115. data_management/importers/umn_to_json.py +7 -7
  116. data_management/importers/wellington_to_json.py +8 -8
  117. data_management/importers/wi_to_json.py +9 -9
  118. data_management/importers/zamba_results_to_md_results.py +181 -181
  119. data_management/labelme_to_coco.py +65 -24
  120. data_management/labelme_to_yolo.py +8 -8
  121. data_management/lila/__init__.py +0 -0
  122. data_management/lila/add_locations_to_island_camera_traps.py +9 -9
  123. data_management/lila/add_locations_to_nacti.py +147 -147
  124. data_management/lila/create_lila_blank_set.py +13 -13
  125. data_management/lila/create_lila_test_set.py +8 -8
  126. data_management/lila/create_links_to_md_results_files.py +106 -106
  127. data_management/lila/download_lila_subset.py +44 -110
  128. data_management/lila/generate_lila_per_image_labels.py +55 -42
  129. data_management/lila/get_lila_annotation_counts.py +18 -15
  130. data_management/lila/get_lila_image_counts.py +11 -11
  131. data_management/lila/lila_common.py +96 -33
  132. data_management/lila/test_lila_metadata_urls.py +132 -116
  133. data_management/ocr_tools.py +173 -128
  134. data_management/read_exif.py +110 -97
  135. data_management/remap_coco_categories.py +83 -83
  136. data_management/remove_exif.py +58 -62
  137. data_management/resize_coco_dataset.py +30 -23
  138. data_management/wi_download_csv_to_coco.py +246 -239
  139. data_management/yolo_output_to_md_output.py +86 -73
  140. data_management/yolo_to_coco.py +300 -60
  141. detection/__init__.py +0 -0
  142. detection/detector_training/__init__.py +0 -0
  143. detection/process_video.py +85 -33
  144. detection/pytorch_detector.py +43 -25
  145. detection/run_detector.py +157 -72
  146. detection/run_detector_batch.py +179 -113
  147. detection/run_inference_with_yolov5_val.py +108 -48
  148. detection/run_tiled_inference.py +111 -40
  149. detection/tf_detector.py +51 -29
  150. detection/video_utils.py +606 -521
  151. docs/source/conf.py +43 -0
  152. md_utils/__init__.py +0 -0
  153. md_utils/azure_utils.py +9 -9
  154. md_utils/ct_utils.py +228 -68
  155. md_utils/directory_listing.py +59 -64
  156. md_utils/md_tests.py +968 -871
  157. md_utils/path_utils.py +460 -134
  158. md_utils/process_utils.py +157 -133
  159. md_utils/sas_blob_utils.py +20 -20
  160. md_utils/split_locations_into_train_val.py +45 -32
  161. md_utils/string_utils.py +33 -10
  162. md_utils/url_utils.py +176 -60
  163. md_utils/write_html_image_list.py +40 -33
  164. md_visualization/__init__.py +0 -0
  165. md_visualization/plot_utils.py +102 -109
  166. md_visualization/render_images_with_thumbnails.py +34 -34
  167. md_visualization/visualization_utils.py +597 -291
  168. md_visualization/visualize_db.py +76 -48
  169. md_visualization/visualize_detector_output.py +61 -42
  170. {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/METADATA +13 -7
  171. megadetector-5.0.9.dist-info/RECORD +224 -0
  172. {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
  173. taxonomy_mapping/__init__.py +0 -0
  174. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
  175. taxonomy_mapping/map_new_lila_datasets.py +154 -154
  176. taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
  177. taxonomy_mapping/preview_lila_taxonomy.py +591 -591
  178. taxonomy_mapping/retrieve_sample_image.py +12 -12
  179. taxonomy_mapping/simple_image_download.py +11 -11
  180. taxonomy_mapping/species_lookup.py +10 -10
  181. taxonomy_mapping/taxonomy_csv_checker.py +18 -18
  182. taxonomy_mapping/taxonomy_graph.py +47 -47
  183. taxonomy_mapping/validate_lila_category_mappings.py +83 -76
  184. data_management/cct_json_to_filename_json.py +0 -89
  185. data_management/cct_to_csv.py +0 -140
  186. data_management/databases/remove_corrupted_images_from_db.py +0 -191
  187. detection/detector_training/copy_checkpoints.py +0 -43
  188. megadetector-5.0.8.dist-info/RECORD +0 -205
  189. {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
  190. {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/WHEEL +0 -0
@@ -1,19 +1,19 @@
1
- ########
2
- #
3
- # compare_batch_results.py
4
- #
5
- # Compare sets of batch results; typically used to compare:
6
- #
7
- # * Results from different MegaDetector versions
8
- # * Results before/after RDE
9
- # * Results with/without augmentation
10
- #
11
- # Makes pairwise comparisons, but can take lists of results files (will perform
12
- # all pairwise comparisons). Results are written to an HTML page that shows the number
13
- # and nature of disagreements (in the sense of each image being a detection or non-detection),
14
- # with sample images for each category.
15
- #
16
- ########
1
+ """
2
+
3
+ compare_batch_results.py
4
+
5
+ Compare sets of batch results; typically used to compare:
6
+
7
+ * Results from different MegaDetector versions
8
+ * Results before/after RDE
9
+ * Results with/without augmentation
10
+
11
+ Makes pairwise comparisons, but can take lists of results files (will perform
12
+ all pairwise comparisons). Results are written to an HTML page that shows the number
13
+ and nature of disagreements (in the sense of each image being a detection or non-detection),
14
+ with sample images for each category.
15
+
16
+ """
17
17
 
18
18
  #%% Imports
19
19
 
@@ -43,16 +43,28 @@ class PairwiseBatchComparisonOptions:
43
43
  pairwise options sets is stored in the BatchComparisonsOptions class.
44
44
  """
45
45
 
46
+ #: First filename to compare
46
47
  results_filename_a = None
48
+
49
+ #: Second filename to compare
47
50
  results_filename_b = None
48
51
 
52
+ #: Description to use in the output HTML for filename A
49
53
  results_description_a = None
54
+
55
+ #: Description to use in the output HTML for filename B
50
56
  results_description_b = None
51
57
 
58
+ #: Per-class detection thresholds to use for filename A (including a 'default' threshold)
52
59
  detection_thresholds_a = {'animal':0.15,'person':0.15,'vehicle':0.15,'default':0.15}
60
+
61
+ #: Per-class detection thresholds to use for filename B (including a 'default' threshold)
53
62
  detection_thresholds_b = {'animal':0.15,'person':0.15,'vehicle':0.15,'default':0.15}
54
63
 
64
+ #: Rendering threshold to use for all categories for filename A
55
65
  rendering_confidence_threshold_a = 0.1
66
+
67
+ #: Rendering threshold to use for all categories for filename B
56
68
  rendering_confidence_threshold_b = 0.1
57
69
 
58
70
  # ...class PairwiseBatchComparisonOptions
@@ -63,33 +75,56 @@ class BatchComparisonOptions:
63
75
  Defines the options for a set of (possibly many) pairwise comparisons.
64
76
  """
65
77
 
78
+ #: Folder to which we should write HTML output
66
79
  output_folder = None
80
+
81
+ #: Base folder for images (which are specified as relative files)
67
82
  image_folder = None
83
+
84
+ #: Job name to use in the HTML output file
68
85
  job_name = ''
69
86
 
87
+ #: Maximum number of images to render for each category, where a "category" here is
88
+ #: "detections_a_only", "detections_b_only", etc., or None to render all images.
70
89
  max_images_per_category = 1000
90
+
91
+ #: Maximum number of images per HTML page (paginates if a category page goes beyond this),
92
+ #: or None to disable pagination.
71
93
  max_images_per_page = None
94
+
95
+ #: Colormap to use for detections in file A (maps detection categories to colors)
72
96
  colormap_a = ['Red']
97
+
98
+ #: Colormap to use for detections in file B (maps detection categories to colors)
73
99
  colormap_b = ['RoyalBlue']
74
100
 
75
- # Process-based parallelization isn't supported yet; this must be "True"
101
+ #: Process-based parallelization isn't supported yet; this must be "True"
76
102
  parallelize_rendering_with_threads = True
77
103
 
78
- # List of filenames to include in the comparison, or None to use all files
104
+ #: List of filenames to include in the comparison, or None to use all files
79
105
  filenames_to_include = None
80
106
 
81
- # Compare only detections/non-detections, ignore categories (still renders categories)
107
+ #: Compare only detections/non-detections, ignore categories (still renders categories)
82
108
  class_agnostic_comparison = False
83
109
 
110
+ #: Width of images to render in the output HTML
84
111
  target_width = 800
112
+
113
+ #: Number of workers to use for rendering, or <=1 to disable parallelization
85
114
  n_rendering_workers = 20
115
+
116
+ #: Random seed for image sampling (not used if max_images_per_category is None)
86
117
  random_seed = 0
87
118
 
88
- # Default to sorting by filename
119
+ #: Whether to sort results by confidence; if this is False, sorts by filename
89
120
  sort_by_confidence = False
90
121
 
122
+ #: The expectation is that all results sets being compared will refer to the same images; if this
123
+ #: is True (default), we'll error if that's not the case, otherwise non-matching lists will just be
124
+ #: a warning.
91
125
  error_on_non_matching_lists = True
92
126
 
127
+ #: List of PairwiseBatchComparisonOptions that defines the comparisons we'll render.
93
128
  pairwise_options = []
94
129
 
95
130
  # ...class BatchComparisonOptions
@@ -100,18 +135,21 @@ class PairwiseBatchComparisonResults:
100
135
  The results from a single pairwise comparison.
101
136
  """
102
137
 
138
+ #: String of HTML content suitable for rendering to an HTML file
103
139
  html_content = None
140
+
141
+ #: Possibly-modified version of the PairwiseBatchComparisonOptions supplied as input.
104
142
  pairwise_options = None
105
143
 
106
- # A dictionary with keys including:
107
- #
108
- # common_detections
109
- # common_non_detections
110
- # detections_a_only
111
- # detections_b_only
112
- # class_transitions
144
+ #: A dictionary with keys including:
145
+ #:
146
+ #: common_detections
147
+ #: common_non_detections
148
+ #: detections_a_only
149
+ #: detections_b_only
150
+ #: class_transitions
113
151
  #
114
- # Each of these maps a filename to a two-element list (the image in set A, the image in set B).
152
+ #: Each of these maps a filename to a two-element list (the image in set A, the image in set B).
115
153
  categories_to_image_pairs = None
116
154
 
117
155
  # ...class PairwiseBatchComparisonResults
@@ -122,9 +160,10 @@ class BatchComparisonResults:
122
160
  The results from a set of pairwise comparisons
123
161
  """
124
162
 
163
+ #: Filename containing HTML output
125
164
  html_output_file = None
126
165
 
127
- # An list of PairwiseBatchComparisonResults
166
+ #: A list of PairwiseBatchComparisonResults
128
167
  pairwise_results = None
129
168
 
130
169
  # ...class BatchComparisonResults
@@ -144,9 +183,20 @@ main_page_footer = '<br/><br/><br/></body></html>\n'
144
183
 
145
184
  #%% Comparison functions
146
185
 
147
- def render_image_pair(fn,image_pairs,category_folder,options,pairwise_options):
186
+ def _render_image_pair(fn,image_pairs,category_folder,options,pairwise_options):
148
187
  """
149
188
  Render two sets of results (i.e., a comparison) for a single image.
189
+
190
+ Args:
191
+ fn (str): image filename
192
+ image_pairs (dict): dict mapping filenames to pairs of image dicts
193
+ category_folder (str): folder to which to render this image, typically
194
+ "detections_a_only", "detections_b_only", etc.
195
+ options (BatchComparisonOptions): job options
196
+ pairwise_options (PairwiseBatchComparisonOptions): pairwise comparison options
197
+
198
+ Returns:
199
+ str: rendered image filename
150
200
  """
151
201
 
152
202
  input_image_path = os.path.join(options.image_folder,fn)
@@ -194,20 +244,22 @@ def render_image_pair(fn,image_pairs,category_folder,options,pairwise_options):
194
244
  im.save(output_image_path)
195
245
  return output_image_path
196
246
 
197
- # ...def render_image_pair()
247
+ # ...def _render_image_pair()
198
248
 
199
249
 
200
- def pairwise_compare_batch_results(options,output_index,pairwise_options):
250
+ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
201
251
  """
202
252
  The main entry point for this module is compare_batch_results(), which calls
203
253
  this function for each pair of comparisons the caller has requested. Generates an
204
254
  HTML page for this comparison. Returns a BatchComparisonResults object.
205
255
 
206
- options: an instance of BatchComparisonOptions
207
-
208
- output_index: a numeric index used for generating HTML titles
209
-
210
- pairwise_options: an instance of PairwiseBatchComparisonOptions
256
+ Args:
257
+ options (BatchComparisonOptions): overall job options for this comparison group
258
+ output_index (int): a numeric index used for generating HTML titles
259
+ pairwise_options (PairwiseBatchComparisonOptions): job options for this comparison
260
+
261
+ Returns:
262
+ PairwiseBatchComparisonResults: the results of this pairwise comparison
211
263
  """
212
264
 
213
265
  # pairwise_options is passed as a parameter here, and should not be specified
@@ -463,11 +515,11 @@ def pairwise_compare_batch_results(options,output_index,pairwise_options):
463
515
  if options.n_rendering_workers <= 1:
464
516
  output_image_paths = []
465
517
  for fn in tqdm(image_filenames):
466
- output_image_paths.append(render_image_pair(fn,image_pairs,category_folder,
518
+ output_image_paths.append(_render_image_pair(fn,image_pairs,category_folder,
467
519
  options,pairwise_options))
468
520
  else:
469
521
  output_image_paths = list(tqdm(pool.imap(
470
- partial(render_image_pair, image_pairs=image_pairs,
522
+ partial(_render_image_pair, image_pairs=image_pairs,
471
523
  category_folder=category_folder,options=options,
472
524
  pairwise_options=pairwise_options),
473
525
  image_filenames),
@@ -644,14 +696,20 @@ def pairwise_compare_batch_results(options,output_index,pairwise_options):
644
696
 
645
697
  return pairwise_results
646
698
 
647
- # ...def pairwise_compare_batch_results()
699
+ # ...def _pairwise_compare_batch_results()
648
700
 
649
701
 
650
702
  def compare_batch_results(options):
651
703
  """
652
704
  The main entry point for this module. Runs one or more batch results comparisons,
653
- writing results to an html page. Most of the work is deferred to
654
- pairwise_compare_batch_results().
705
+ writing results to an html page. Most of the work is deferred to _pairwise_compare_batch_results().
706
+
707
+ Args:
708
+ options (BatchComparisonOptions): job options to use for this comparison task, including the
709
+ list of specific pairswise comparisons to make (in the pairwise_options field)
710
+
711
+ Returns:
712
+ BatchComparisonResults: the results of this comparison task
655
713
  """
656
714
 
657
715
  assert options.output_folder is not None
@@ -675,7 +733,7 @@ def compare_batch_results(options):
675
733
  for i_comparison,pairwise_options in enumerate(pairwise_options_list):
676
734
  print('Running comparison {} of {}'.format(i_comparison,n_comparisons))
677
735
  pairwise_results = \
678
- pairwise_compare_batch_results(options,i_comparison,pairwise_options)
736
+ _pairwise_compare_batch_results(options,i_comparison,pairwise_options)
679
737
  html_content += pairwise_results.html_content
680
738
  all_pairwise_results.append(pairwise_results)
681
739
 
@@ -702,6 +760,18 @@ def n_way_comparison(filenames,options,detection_thresholds=None,rendering_thres
702
760
  """
703
761
  Performs N pairwise comparisons for the list of results files in [filenames], by generating
704
762
  sets of pairwise options and calling compare_batch_results.
763
+
764
+ Args:
765
+ filenames (list): list of MD results filenames to compare
766
+ options (BatchComparisonOptions): task options set in which pairwise_options is still
767
+ empty; that will get populated from [filenames]
768
+ detection_thresholds (list, optional): list of detection thresholds with the same length
769
+ as [filenames], or None to use sensible defaults
770
+ rendering_thresholds (list, optional): list of rendering thresholds with the same length
771
+ as [filenames], or None to use sensible defaults
772
+
773
+ Returns:
774
+ BatchComparisonResults: the results of this comparison task
705
775
  """
706
776
 
707
777
  if detection_thresholds is None:
@@ -1,13 +1,15 @@
1
- ########
2
- #
3
- # convert_output_format.py
4
- #
5
- # Converts between file formats output by our batch processing API. Currently
6
- # supports json <--> csv conversion, but this should be the landing place for any
7
- # conversion - including between hypothetical alternative .json versions - that we support
8
- # in the future.
9
- #
10
- ########
1
+ """
2
+
3
+ convert_output_format.py
4
+
5
+ Converts between file formats output by our batch processing API. Currently
6
+ supports json <--> csv conversion, but this should be the landing place for any
7
+ conversion - including between hypothetical alternative .json versions - that we support
8
+ in the future.
9
+
10
+ The .csv format is largely obsolete, don't use it unless you're super-duper sure you need it.
11
+
12
+ """
11
13
 
12
14
  #%% Constants and imports
13
15
 
@@ -33,13 +35,27 @@ def convert_json_to_csv(input_path,output_path=None,min_confidence=None,
33
35
  omit_bounding_boxes=False,output_encoding=None,
34
36
  overwrite=True):
35
37
  """
36
- Convert .json to .csv
38
+ Converts a MD results .json file to a totally non-standard .csv format.
37
39
 
38
- If output_path is None, will convert x.json to x.csv.
40
+ If [output_path] is None, will convert x.json to x.csv.
39
41
 
40
42
  TODO: this function should obviously be using Pandas or some other sensible structured
41
43
  representation of tabular data. Even a list of dicts. This implementation is quite
42
44
  brittle and depends on adding fields to every row in exactly the right order.
45
+
46
+ Args:
47
+ input_path (str): the input .json file to convert
48
+ output_path (str, optional): the output .csv file to generate; if this is None, uses
49
+ [input_path].csv
50
+ min_confidence (float, optional): the minimum-confidence detection we should include
51
+ in the "detections" column; has no impact on the other columns
52
+ omit_bounding_boxes (bool): whether to leave out the json-formatted bounding boxes
53
+ that make up the "detections" column, which are not generally useful for someone who
54
+ wants to consume this data as a .csv file
55
+ output_encoding (str, optional): encoding to use for the .csv file
56
+ overwrite (bool): whether to overwrite an existing .csv file; if this is False and the
57
+ output file exists, no-ops and returns
58
+
43
59
  """
44
60
 
45
61
  if output_path is None:
@@ -58,11 +74,12 @@ def convert_json_to_csv(input_path,output_path=None,min_confidence=None,
58
74
 
59
75
  # We add an output column for each class other than 'empty',
60
76
  # containing the maximum probability of that class for each image
61
- n_non_empty_detection_categories = len(annotation_constants.annotation_bbox_categories) - 1
77
+ # n_non_empty_detection_categories = len(annotation_constants.annotation_bbox_categories) - 1
78
+ n_non_empty_detection_categories = annotation_constants.NUM_DETECTOR_CATEGORIES
62
79
  detection_category_column_names = []
63
- assert annotation_constants.annotation_bbox_category_id_to_name[0] == 'empty'
80
+ assert annotation_constants.detector_bbox_categories[0] == 'empty'
64
81
  for cat_id in range(1,n_non_empty_detection_categories+1):
65
- cat_name = annotation_constants.annotation_bbox_category_id_to_name[cat_id]
82
+ cat_name = annotation_constants.detector_bbox_categories[cat_id]
66
83
  detection_category_column_names.append('max_conf_' + cat_name)
67
84
 
68
85
  n_classification_categories = 0
@@ -206,6 +223,14 @@ def convert_json_to_csv(input_path,output_path=None,min_confidence=None,
206
223
  def convert_csv_to_json(input_path,output_path=None,overwrite=True):
207
224
  """
208
225
  Convert .csv to .json. If output_path is None, will convert x.csv to x.json.
226
+
227
+ Args:
228
+ input_path (str): .csv filename to convert to .json
229
+ output_path (str, optional): the output .json file to generate; if this is None, uses
230
+ [input_path].json
231
+ overwrite (bool): whether to overwrite an existing .json file; if this is False and the
232
+ output file exists, no-ops and returns
233
+
209
234
  """
210
235
 
211
236
  if output_path is None:
@@ -231,7 +256,7 @@ def convert_csv_to_json(input_path,output_path=None,overwrite=True):
231
256
  }
232
257
 
233
258
  classification_categories = {}
234
- detection_categories = annotation_constants.annotation_bbox_category_id_to_name
259
+ detection_categories = annotation_constants.detector_bbox_categories
235
260
 
236
261
  images = []
237
262
 
@@ -1,17 +1,17 @@
1
- ########
2
- #
3
- # load_api_results.py
4
- #
5
- # DEPRECATED
6
- #
7
- # As of 2023.12, this module is used in postprocessing and RDE. Not recommended
8
- # for new code.
9
- #
10
- # Loads the output of the batch processing API (json) into a Pandas dataframe.
11
- #
12
- # Includes functions to read/write the (very very old) .csv results format.
13
- #
14
- ########
1
+ """
2
+
3
+ load_api_results.py
4
+
5
+ DEPRECATED
6
+
7
+ As of 2023.12, this module is used in postprocessing and RDE. Not recommended
8
+ for new code.
9
+
10
+ Loads the output of the batch processing API (json) into a Pandas dataframe.
11
+
12
+ Includes functions to read/write the (very very old) .csv results format.
13
+
14
+ """
15
15
 
16
16
  #%% Imports
17
17
 
@@ -31,7 +31,7 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
31
31
  filename_replacements: Optional[Mapping[str, str]] = None,
32
32
  force_forward_slashes: bool = True
33
33
  ) -> Tuple[pd.DataFrame, Dict]:
34
- """
34
+ r"""
35
35
  Loads json-formatted MegaDetector results to a Pandas DataFrame.
36
36
 
37
37
  Args:
@@ -44,8 +44,7 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
44
44
  in filenames
45
45
 
46
46
  Returns:
47
- detection_results: pd.DataFrame, contains at least the columns:
48
- ['file', 'detections','failure']
47
+ detection_results: pd.DataFrame, contains at least the columns ['file', 'detections','failure']
49
48
  other_fields: a dict containing fields in the results other than 'images'
50
49
  """
51
50
 
@@ -1,13 +1,13 @@
1
- ########
2
- #
3
- # md_to_coco.py
4
- #
5
- # "Converts" MegaDetector output files to COCO format. "Converts" is in quotes because
6
- # this is an opinionated transformation that requires a confidence threshold.
7
- #
8
- # Does not currently handle classification information.
9
- #
10
- ########
1
+ """
2
+
3
+ md_to_coco.py
4
+
5
+ "Converts" MegaDetector output files to COCO format. "Converts" is in quotes because
6
+ this is an opinionated transformation that requires a confidence threshold.
7
+
8
+ Does not currently handle classification information.
9
+
10
+ """
11
11
 
12
12
  #%% Constants and imports
13
13
 
@@ -38,18 +38,28 @@ def md_to_coco(md_results_file,
38
38
 
39
39
  A folder of images is required if width and height information are not available
40
40
  in the MD results file.
41
+
42
+ Args:
43
+ md_results_file (str): MD results .json file to convert to COCO format
44
+ coco_output_file (str, optional): COCO .json file to write; if this is None, we'll return
45
+ a COCO-formatted dict, but won't write it to disk
46
+ image_folder (str, optional): folder of images, required if 'width' and 'height' are not
47
+ present in the MD results file (they are not required by the format)
48
+ confidence_threshold (float, optional): boxes below this confidence threshold will not be
49
+ included in the output data
50
+ validate_image_sizes (bool, optional): if this is True, we'll check the image sizes
51
+ regardless of whether "width" and "height" are present in the MD results file.
52
+ info (dict, optional): arbitrary metadata to include in an "info" field in the COCO-formatted
53
+ output
54
+ preserve_nonstandard_metadata (bool, optional): if this is True, confidence will be preserved in a
55
+ non-standard "conf" field in each annotation, and any random fields present in each image's data
56
+ (e.g. EXIF metadata) will be propagated to COCO output
57
+ include_failed_images (boo, optional): if this is True, failed images will be propagated to COCO output
58
+ with a non-empty "failure" field and no other fields, otherwise failed images will be skipped.
41
59
 
42
- If validate_image_sizes is True, we'll check the image sizes regardless of whether width
43
- and height are present in the MD results file.
44
-
45
- If preserve_nonstandard_metadata is True, confidence will be preserved in a non-standard
46
- "conf" field in each annotation, and any random fields present in each image's data (e.g.
47
- EXIF metadata) will be propagated to COCO output.
48
-
49
- If include_failed_images is True, failed images will be propagated to COCO output with
50
- a non-empty "failure" field and no other fields, otherwise failed images will be skipped.
51
-
52
- Returns the COCO json dict.
60
+ Returns:
61
+ dict: the COCO data dict, identical to what's written to [coco_output_file] if [coco_output_file]
62
+ is not None.
53
63
  """
54
64
 
55
65
  with open(md_results_file,'r') as f:
@@ -1,17 +1,17 @@
1
- ########
2
- #
3
- # md_to_labelme.py
4
- #
5
- # "Converts" a MegaDetector output .json file to labelme format (one .json per image
6
- # file). "Convert" is in quotes because this is an opinionated transformation that
7
- # requires a confidence threshold.
8
- #
9
- # TODO:
10
- #
11
- # * support variable confidence thresholds across classes
12
- # * support classification data
13
- #
14
- ########
1
+ """
2
+
3
+ md_to_labelme.py
4
+
5
+ "Converts" a MegaDetector output .json file to labelme format (one .json per image
6
+ file). "Convert" is in quotes because this is an opinionated transformation that
7
+ requires a confidence threshold.
8
+
9
+ TODO:
10
+
11
+ * support variable confidence thresholds across classes
12
+ * support classification data
13
+
14
+ """
15
15
 
16
16
  #%% Imports and constants
17
17
 
@@ -26,6 +26,7 @@ from functools import partial
26
26
 
27
27
  from md_visualization.visualization_utils import open_image
28
28
  from md_utils.ct_utils import truncate_float
29
+ from detection.run_detector import DEFAULT_DETECTOR_LABEL_MAP
29
30
 
30
31
  output_precision = 3
31
32
  default_confidence_threshold = 0.15
@@ -33,18 +34,33 @@ default_confidence_threshold = 0.15
33
34
 
34
35
  #%% Functions
35
36
 
36
- def get_labelme_dict_for_image(im,image_base_name,category_id_to_name,
37
+ def get_labelme_dict_for_image(im,image_base_name=None,category_id_to_name=None,
37
38
  info=None,confidence_threshold=None):
38
39
  """
39
40
  For the given image struct in MD results format, reformat the detections into
40
- labelme format. Returns a dict.
41
+ labelme format.
41
42
 
42
- 'height' and 'width' are required in [im].
43
+ Args:
44
+ im (dict): MegaDetector-formatted results dict, must include 'height' and 'width' fields
45
+ image_base_name (str, optional): written directly to the 'imagePath' field in the output;
46
+ defaults to os.path.basename(im['file']).
47
+ category_id_to_name (dict, optional): maps string-int category IDs to category names, defaults
48
+ to the standard MD categories
49
+ info (dict, optional): arbitrary metadata to write to the "detector_info" field in the output
50
+ dict
51
+ confidence_threshold (float, optional): only detections at or above this confidence threshold
52
+ will be included in the output dict
43
53
 
44
- image_base_name is written directly to the 'imagePath' field in the output; it should generally be
45
- os.path.basename(your_image_file).
54
+ Return:
55
+ dict: labelme-formatted dictionary, suitable for writing directly to a labelme-formatted .json file
46
56
  """
47
57
 
58
+ if image_base_name is None:
59
+ image_base_name = os.path.basename(im['file'])
60
+
61
+ if category_id_to_name:
62
+ category_id_to_name = DEFAULT_DETECTOR_LABEL_MAP
63
+
48
64
  if confidence_threshold is None:
49
65
  confidence_threshold = -1.0
50
66
 
@@ -130,7 +146,22 @@ def md_to_labelme(results_file,image_base,confidence_threshold=None,
130
146
  For all the images in [results_file], write a .json file in labelme format alongside the
131
147
  corresponding relative path within image_base.
132
148
 
133
- If non-empty, "extension_prefix" will be inserted before the .json extension.
149
+ Args:
150
+ results_file (str): MD results .json file to convert to Labelme format
151
+ image_base (str): folder of images; filenames in [results_file] should be relative to
152
+ this folder
153
+ confidence_threshold (float, optional): only detections at or above this confidence threshold
154
+ will be included in the output dict
155
+ overwrite (bool, optional): whether to overwrite existing output files; if this is False
156
+ and the output file for an image exists, we'll skip that image
157
+ extension_prefix (str, optional): if non-empty, "extension_prefix" will be inserted before the .json
158
+ extension
159
+ n_workers (int, optional): enables multiprocessing if > 1
160
+ use_threads (bool, optional): if [n_workers] > 1, determines whether we parallelize via threads (True)
161
+ or processes (False)
162
+ bypass_image_size_read (bool, optional): if True, skips reading image sizes and trusts whatever is in
163
+ the MD results file (don't set this to "True" if your MD results file doesn't contain image sizes)
164
+ verbose (bool, optional): enables additionald ebug output
134
165
  """
135
166
 
136
167
  if extension_prefix is None:
@@ -294,7 +325,6 @@ def main():
294
325
  args = parser.parse_args()
295
326
 
296
327
  md_to_labelme(args.results_file,args.image_base,args.confidence_threshold,args.overwrite)
297
-
298
-
328
+
299
329
  if __name__ == '__main__':
300
330
  main()
@@ -1,17 +1,17 @@
1
- ########
2
- #
3
- # merge_detections.py
4
- #
5
- # Merge high-confidence detections from one or more results files into another
6
- # file. Typically used to combine results from MDv5b and/or MDv4 into a "primary"
7
- # results file from MDv5a.
8
- #
9
- # Detection categories must be the same in both files; if you want to first remap
10
- # one file's category mapping to be the same as another's, see remap_detection_categories.
11
- #
12
- # If you want to literally merge two .json files, see combine_api_outputs.py.
13
- #
14
- ########
1
+ """
2
+
3
+ merge_detections.py
4
+
5
+ Merge high-confidence detections from one or more results files into another
6
+ file. Typically used to combine results from MDv5b and/or MDv4 into a "primary"
7
+ results file from MDv5a.
8
+
9
+ Detection categories must be the same in both files; if you want to first remap
10
+ one file's category mapping to be the same as another's, see remap_detection_categories.
11
+
12
+ If you want to literally merge two .json files, see combine_api_outputs.py.
13
+
14
+ """
15
15
 
16
16
  #%% Constants and imports
17
17