megadetector 10.0.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. megadetector/__init__.py +0 -0
  2. megadetector/api/__init__.py +0 -0
  3. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
  7. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  8. megadetector/classification/__init__.py +0 -0
  9. megadetector/classification/aggregate_classifier_probs.py +108 -0
  10. megadetector/classification/analyze_failed_images.py +227 -0
  11. megadetector/classification/cache_batchapi_outputs.py +198 -0
  12. megadetector/classification/create_classification_dataset.py +626 -0
  13. megadetector/classification/crop_detections.py +516 -0
  14. megadetector/classification/csv_to_json.py +226 -0
  15. megadetector/classification/detect_and_crop.py +853 -0
  16. megadetector/classification/efficientnet/__init__.py +9 -0
  17. megadetector/classification/efficientnet/model.py +415 -0
  18. megadetector/classification/efficientnet/utils.py +608 -0
  19. megadetector/classification/evaluate_model.py +520 -0
  20. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  21. megadetector/classification/json_to_azcopy_list.py +63 -0
  22. megadetector/classification/json_validator.py +696 -0
  23. megadetector/classification/map_classification_categories.py +276 -0
  24. megadetector/classification/merge_classification_detection_output.py +509 -0
  25. megadetector/classification/prepare_classification_script.py +194 -0
  26. megadetector/classification/prepare_classification_script_mc.py +228 -0
  27. megadetector/classification/run_classifier.py +287 -0
  28. megadetector/classification/save_mislabeled.py +110 -0
  29. megadetector/classification/train_classifier.py +827 -0
  30. megadetector/classification/train_classifier_tf.py +725 -0
  31. megadetector/classification/train_utils.py +323 -0
  32. megadetector/data_management/__init__.py +0 -0
  33. megadetector/data_management/animl_to_md.py +161 -0
  34. megadetector/data_management/annotations/__init__.py +0 -0
  35. megadetector/data_management/annotations/annotation_constants.py +33 -0
  36. megadetector/data_management/camtrap_dp_to_coco.py +270 -0
  37. megadetector/data_management/cct_json_utils.py +566 -0
  38. megadetector/data_management/cct_to_md.py +184 -0
  39. megadetector/data_management/cct_to_wi.py +293 -0
  40. megadetector/data_management/coco_to_labelme.py +284 -0
  41. megadetector/data_management/coco_to_yolo.py +701 -0
  42. megadetector/data_management/databases/__init__.py +0 -0
  43. megadetector/data_management/databases/add_width_and_height_to_db.py +107 -0
  44. megadetector/data_management/databases/combine_coco_camera_traps_files.py +210 -0
  45. megadetector/data_management/databases/integrity_check_json_db.py +563 -0
  46. megadetector/data_management/databases/subset_json_db.py +195 -0
  47. megadetector/data_management/generate_crops_from_cct.py +200 -0
  48. megadetector/data_management/get_image_sizes.py +164 -0
  49. megadetector/data_management/labelme_to_coco.py +559 -0
  50. megadetector/data_management/labelme_to_yolo.py +349 -0
  51. megadetector/data_management/lila/__init__.py +0 -0
  52. megadetector/data_management/lila/create_lila_blank_set.py +556 -0
  53. megadetector/data_management/lila/create_lila_test_set.py +192 -0
  54. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  55. megadetector/data_management/lila/download_lila_subset.py +182 -0
  56. megadetector/data_management/lila/generate_lila_per_image_labels.py +777 -0
  57. megadetector/data_management/lila/get_lila_annotation_counts.py +174 -0
  58. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  59. megadetector/data_management/lila/lila_common.py +319 -0
  60. megadetector/data_management/lila/test_lila_metadata_urls.py +164 -0
  61. megadetector/data_management/mewc_to_md.py +344 -0
  62. megadetector/data_management/ocr_tools.py +873 -0
  63. megadetector/data_management/read_exif.py +964 -0
  64. megadetector/data_management/remap_coco_categories.py +195 -0
  65. megadetector/data_management/remove_exif.py +156 -0
  66. megadetector/data_management/rename_images.py +194 -0
  67. megadetector/data_management/resize_coco_dataset.py +665 -0
  68. megadetector/data_management/speciesnet_to_md.py +41 -0
  69. megadetector/data_management/wi_download_csv_to_coco.py +247 -0
  70. megadetector/data_management/yolo_output_to_md_output.py +594 -0
  71. megadetector/data_management/yolo_to_coco.py +984 -0
  72. megadetector/data_management/zamba_to_md.py +188 -0
  73. megadetector/detection/__init__.py +0 -0
  74. megadetector/detection/change_detection.py +840 -0
  75. megadetector/detection/process_video.py +479 -0
  76. megadetector/detection/pytorch_detector.py +1451 -0
  77. megadetector/detection/run_detector.py +1267 -0
  78. megadetector/detection/run_detector_batch.py +2172 -0
  79. megadetector/detection/run_inference_with_yolov5_val.py +1314 -0
  80. megadetector/detection/run_md_and_speciesnet.py +1604 -0
  81. megadetector/detection/run_tiled_inference.py +1044 -0
  82. megadetector/detection/tf_detector.py +209 -0
  83. megadetector/detection/video_utils.py +1379 -0
  84. megadetector/postprocessing/__init__.py +0 -0
  85. megadetector/postprocessing/add_max_conf.py +72 -0
  86. megadetector/postprocessing/categorize_detections_by_size.py +166 -0
  87. megadetector/postprocessing/classification_postprocessing.py +1943 -0
  88. megadetector/postprocessing/combine_batch_outputs.py +249 -0
  89. megadetector/postprocessing/compare_batch_results.py +2110 -0
  90. megadetector/postprocessing/convert_output_format.py +403 -0
  91. megadetector/postprocessing/create_crop_folder.py +629 -0
  92. megadetector/postprocessing/detector_calibration.py +570 -0
  93. megadetector/postprocessing/generate_csv_report.py +522 -0
  94. megadetector/postprocessing/load_api_results.py +223 -0
  95. megadetector/postprocessing/md_to_coco.py +428 -0
  96. megadetector/postprocessing/md_to_labelme.py +351 -0
  97. megadetector/postprocessing/md_to_wi.py +41 -0
  98. megadetector/postprocessing/merge_detections.py +392 -0
  99. megadetector/postprocessing/postprocess_batch_results.py +2140 -0
  100. megadetector/postprocessing/remap_detection_categories.py +226 -0
  101. megadetector/postprocessing/render_detection_confusion_matrix.py +677 -0
  102. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +206 -0
  103. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +82 -0
  104. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1665 -0
  105. megadetector/postprocessing/separate_detections_into_folders.py +795 -0
  106. megadetector/postprocessing/subset_json_detector_output.py +964 -0
  107. megadetector/postprocessing/top_folders_to_bottom.py +238 -0
  108. megadetector/postprocessing/validate_batch_results.py +332 -0
  109. megadetector/taxonomy_mapping/__init__.py +0 -0
  110. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  111. megadetector/taxonomy_mapping/map_new_lila_datasets.py +211 -0
  112. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +165 -0
  113. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +543 -0
  114. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  115. megadetector/taxonomy_mapping/simple_image_download.py +231 -0
  116. megadetector/taxonomy_mapping/species_lookup.py +1008 -0
  117. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  118. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  119. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  120. megadetector/tests/__init__.py +0 -0
  121. megadetector/tests/test_nms_synthetic.py +335 -0
  122. megadetector/utils/__init__.py +0 -0
  123. megadetector/utils/ct_utils.py +1857 -0
  124. megadetector/utils/directory_listing.py +199 -0
  125. megadetector/utils/extract_frames_from_video.py +307 -0
  126. megadetector/utils/gpu_test.py +125 -0
  127. megadetector/utils/md_tests.py +2072 -0
  128. megadetector/utils/path_utils.py +2872 -0
  129. megadetector/utils/process_utils.py +172 -0
  130. megadetector/utils/split_locations_into_train_val.py +237 -0
  131. megadetector/utils/string_utils.py +234 -0
  132. megadetector/utils/url_utils.py +825 -0
  133. megadetector/utils/wi_platform_utils.py +968 -0
  134. megadetector/utils/wi_taxonomy_utils.py +1766 -0
  135. megadetector/utils/write_html_image_list.py +239 -0
  136. megadetector/visualization/__init__.py +0 -0
  137. megadetector/visualization/plot_utils.py +309 -0
  138. megadetector/visualization/render_images_with_thumbnails.py +243 -0
  139. megadetector/visualization/visualization_utils.py +1973 -0
  140. megadetector/visualization/visualize_db.py +630 -0
  141. megadetector/visualization/visualize_detector_output.py +498 -0
  142. megadetector/visualization/visualize_video_output.py +705 -0
  143. megadetector-10.0.15.dist-info/METADATA +115 -0
  144. megadetector-10.0.15.dist-info/RECORD +147 -0
  145. megadetector-10.0.15.dist-info/WHEEL +5 -0
  146. megadetector-10.0.15.dist-info/licenses/LICENSE +19 -0
  147. megadetector-10.0.15.dist-info/top_level.txt +1 -0
@@ -0,0 +1,2140 @@
1
+ """
2
+
3
+ postprocess_batch_results.py
4
+
5
+ Given a .json or .csv file containing MD results, do one or more of the following:
6
+
7
+ * Sample detections/non-detections and render to HTML (when ground truth isn't
8
+ available) (this is 99.9% of what this module is for)
9
+ * Evaluate detector precision/recall, optionally rendering results (requires
10
+ ground truth)
11
+ * Sample true/false positives/negatives and render to HTML (requires ground
12
+ truth)
13
+
14
+ Ground truth, if available, must be in COCO Camera Traps format:
15
+
16
+ https://github.com/agentmorris/MegaDetector/blob/main/megadetector/data_management/README.md#coco-camera-traps-format
17
+
18
+ """
19
+
20
+ #%% Constants and imports
21
+
22
+ import argparse
23
+ import collections
24
+ import copy
25
+ import errno
26
+ import io
27
+ import os
28
+ import sys
29
+ import time
30
+ import uuid
31
+ import warnings
32
+ import random
33
+
34
+ from enum import IntEnum
35
+ from multiprocessing.pool import ThreadPool
36
+ from multiprocessing.pool import Pool
37
+ from functools import partial
38
+ from collections import defaultdict
39
+
40
+ import matplotlib.pyplot as plt
41
+ import numpy as np
42
+ import humanfriendly
43
+ import pandas as pd
44
+
45
+ from sklearn.metrics import precision_recall_curve, confusion_matrix, average_precision_score
46
+ from tqdm import tqdm
47
+
48
+ from megadetector.visualization import visualization_utils as vis_utils
49
+ from megadetector.visualization import plot_utils
50
+ from megadetector.utils.write_html_image_list import write_html_image_list
51
+ from megadetector.utils.wi_taxonomy_utils import load_md_or_speciesnet_file
52
+ from megadetector.utils import path_utils
53
+ from megadetector.utils.ct_utils import args_to_object
54
+ from megadetector.utils.ct_utils import sets_overlap
55
+ from megadetector.utils.ct_utils import sort_dictionary_by_value
56
+ from megadetector.utils.ct_utils import sort_dictionary_by_key
57
+ from megadetector.data_management.cct_json_utils import CameraTrapJsonUtils
58
+ from megadetector.data_management.cct_json_utils import IndexedJsonDb
59
+ from megadetector.postprocessing.load_api_results import load_api_results
60
+ from megadetector.detection.run_detector import get_typical_confidence_threshold_from_results
61
+
62
+ warnings.filterwarnings('ignore', '(Possibly )?corrupt EXIF data', UserWarning)
63
+
64
+
65
+ #%% Options
66
+
67
+ DEFAULT_NEGATIVE_CLASSES = ['empty']
68
+ DEFAULT_UNKNOWN_CLASSES = ['unknown', 'unlabeled', 'ambiguous']
69
+
70
+ # Make sure there is no overlap between the two sets, because this will cause
71
+ # issues in the code
72
+ assert not sets_overlap(DEFAULT_NEGATIVE_CLASSES, DEFAULT_UNKNOWN_CLASSES), (
73
+ 'Default negative and unknown classes cannot overlap.')
74
+
75
+
76
+ class PostProcessingOptions:
77
+ """
78
+ Options used to parameterize process_batch_results().
79
+ """
80
+
81
+ def __init__(self):
82
+
83
+ ### Required inputs
84
+
85
+ #: MD results .json file to process
86
+ self.md_results_file = ''
87
+
88
+ #: Folder to which we should write HTML output
89
+ self.output_dir = ''
90
+
91
+ ### Options
92
+
93
+ #: Folder where images live (filenames in [md_results_file] should be relative to this folder)
94
+ #:
95
+ #: Can be '' if [md_results_file] uses absolute paths.
96
+ self.image_base_dir = ''
97
+
98
+ ## These apply only when we're doing ground-truth comparisons
99
+
100
+ #: Optional .json file containing ground truth information
101
+ self.ground_truth_json_file = ''
102
+
103
+ #: List of classes we'll treat as negative (defaults to "empty", typically includes
104
+ #: classes like "blank", "misfire", etc.).
105
+ #:
106
+ #: Include the token "#NO_LABELS#" to indicate that an image with no annotations
107
+ #: should be considered empty.
108
+ self.negative_classes = DEFAULT_NEGATIVE_CLASSES
109
+
110
+ #: List of classes we'll treat as neither positive nor negative (defaults to
111
+ #: "unknown", typically includes classes like "unidentifiable").
112
+ self.unlabeled_classes = DEFAULT_UNKNOWN_CLASSES
113
+
114
+ #: List of output sets that we should count, but not render images for.
115
+ #:
116
+ #: Typically used to preview sets with lots of empties, where you don't want to
117
+ #: subset but also don't want to render 100,000 empty images.
118
+ #:
119
+ #: Example strings that are valid for this option:
120
+ #:
121
+ #: detections, non_detections
122
+ #: detections_animal, detections_person, detections_vehicle
123
+ #: tn, tp, fn, fp
124
+ self.rendering_bypass_sets = []
125
+
126
+ #: If this is None, choose a confidence threshold based on the detector version.
127
+ #:
128
+ #: This can either be a float or a dictionary mapping category names (not IDs) to
129
+ #: thresholds. The category "default" can be used to specify thresholds for
130
+ #: other categories. Currently the use of a dict here is not supported when
131
+ #: ground truth is supplied.
132
+ self.confidence_threshold = None
133
+
134
+ #: Confidence threshold to apply to classification (not detection) results
135
+ #:
136
+ #: Only a float is supported here (unlike the "confidence_threshold" parameter, which
137
+ #: can be a dict).
138
+ self.classification_confidence_threshold = 0.5
139
+
140
+ #: Used for summary statistics only
141
+ self.target_recall = 0.9
142
+
143
+ #: Number of images to sample, -1 for "all images"
144
+ self.num_images_to_sample = 500
145
+
146
+ #: Random seed for sampling, or None
147
+ self.sample_seed = 0 # None
148
+
149
+ #: Image width for images in the HTML output
150
+ self.viz_target_width = 800
151
+
152
+ #: Line width (in pixels) for rendering detections
153
+ self.line_thickness = 4
154
+
155
+ #: Box expansion (in pixels) for rendering detections
156
+ self.box_expansion = 0
157
+
158
+ #: Job name to include in big letters in the output HTML
159
+ self.job_name_string = None
160
+
161
+ #: Model version string to include in the output HTML
162
+ self.model_version_string = None
163
+
164
+ #: Sort order for the output, should be one of "filename", "confidence", or "random"
165
+ self.html_sort_order = 'filename'
166
+
167
+ #: If True, images in the output HTML will be links back to the original images
168
+ self.link_images_to_originals = True
169
+
170
+ #: Optionally separate detections into categories (animal/vehicle/human)
171
+ #:
172
+ #: Currently only supported when ground truth is unavailable
173
+ self.separate_detections_by_category = True
174
+
175
+ #: Optionally replace one or more strings in filenames with other strings;
176
+ #: useful for taking a set of results generated for one folder structure
177
+ #: and applying them to a slightly different folder structure.
178
+ self.api_output_filename_replacements = {}
179
+
180
+ #: Optionally replace one or more strings in filenames with other strings;
181
+ #: useful for taking a set of results generated for one folder structure
182
+ #: and applying them to a slightly different folder structure.
183
+ self.ground_truth_filename_replacements = {}
184
+
185
+ #: Allow bypassing API output loading when operating on previously-loaded
186
+ #: results. If present, this is a Pandas DataFrame. Almost never useful.
187
+ self.api_detection_results = None
188
+
189
+ #: Allow bypassing API output loading when operating on previously-loaded
190
+ #: results. If present, this is a str --> obj dict. Almost never useful.
191
+ self.api_other_fields = None
192
+
193
+ #: Should we also split out a separate report about the detections that were
194
+ #: just below our main confidence threshold?
195
+ #:
196
+ #: Currently only supported when ground truth is unavailable.
197
+ self.include_almost_detections = False
198
+
199
+ #: Only a float is supported here (unlike the "confidence_threshold" parameter, which
200
+ #: can be a dict).
201
+ self.almost_detection_confidence_threshold = None
202
+
203
+ #: Enable/disable rendering parallelization
204
+ self.parallelize_rendering = False
205
+
206
+ #: Number of threads/processes to use for rendering parallelization
207
+ self.parallelize_rendering_n_cores = 16
208
+
209
+ #: Whether to use threads (True) or processes (False) for rendering parallelization
210
+ self.parallelize_rendering_with_threads = True
211
+
212
+ #: When classification results are present, should be sort alphabetically by class
213
+ #: name (False) or in descending order by frequency (True)?
214
+ self.sort_classification_results_by_count = False
215
+
216
+ #: When classification results are present, use this dictionary to push some
217
+ #: categories to the bottom of the list. Larger numbers == later groups.
218
+ #: Default sort weight is zero. Line breaks will separate equal sort weights.
219
+ #: Sort weights must be integers.
220
+ #:
221
+ #: In practice this is used to push generic categories like "blank", "animal",
222
+ #: and "unreliable" to the bottom of the list, like:
223
+ #:
224
+ #: options.category_name_to_sort_weight = \
225
+ #: {'animal':1,'blank':1,'unknown':1,'unreliable':1,'mammal':1,'no cv result':1}
226
+ self.category_name_to_sort_weight = {}
227
+
228
+ #: Should we split individual pages up into smaller pages if there are more than
229
+ #: N images?
230
+ self.max_figures_per_html_file = None
231
+
232
+ #: Footer text for the index page
233
+ # self.footer_text = \
234
+ # '<br/><p style="font-size:80%;">Preview page created with the ' + \
235
+ # <a href="{}">MegaDetector Python package</a>.</p>'.\
236
+ # format('https://megadetector.readthedocs.io')
237
+ self.footer_text = ''
238
+
239
+ #: Character encoding to use when writing the index HTML html
240
+ self.output_html_encoding = None
241
+
242
+ #: Additional image fields to display in image headers. If this is a list,
243
+ #: we'll include those fields; if this is a dict, we'll use that dict to choose
244
+ #: alternative display names for each field.
245
+ self.additional_image_fields_to_display = None
246
+
247
+ #: If classification results are present, should we include a summary of
248
+ #: classification categories?
249
+ self.include_classification_category_report = True
250
+
251
+ # ...__init__()
252
+
253
+ # ...PostProcessingOptions
254
+
255
+
256
+ class PostProcessingResults:
257
+ """
258
+ Return format from process_batch_results
259
+ """
260
+
261
+ def __init__(self):
262
+
263
+ #: HTML file to which preview information was written
264
+ self.output_html_file = ''
265
+
266
+ #: Pandas Dataframe containing detection results
267
+ self.api_detection_results = None
268
+
269
+ #: str --> obj dictionary containing other information loaded from the results file
270
+ self.api_other_fields = None
271
+
272
+
273
+ ##%% Helper classes and functions
274
+
275
+ class DetectionStatus(IntEnum):
276
+ """
277
+ Flags used to mark images as positive or negative for P/R analysis
278
+ (according to ground truth and/or detector output)
279
+
280
+ :meta private:
281
+ """
282
+
283
+ DS_NEGATIVE = 0
284
+ DS_POSITIVE = 1
285
+
286
+ # Anything greater than this isn't clearly positive or negative
287
+ DS_MAX_DEFINITIVE_VALUE = DS_POSITIVE
288
+
289
+ # image has annotations suggesting both negative and positive
290
+ DS_AMBIGUOUS = 2
291
+
292
+ # image is not annotated or is annotated with 'unknown', 'unlabeled', ETC.
293
+ DS_UNKNOWN = 3
294
+
295
+ # image has not yet been assigned a state
296
+ DS_UNASSIGNED = 4
297
+
298
+ # In some analyses, we add an additional class that lets us look at
299
+ # detections just below our main confidence threshold
300
+ DS_ALMOST = 5
301
+
302
+
303
+ def _mark_detection_status(indexed_db,
304
+ negative_classes=DEFAULT_NEGATIVE_CLASSES,
305
+ unknown_classes=DEFAULT_UNKNOWN_CLASSES):
306
+ """
307
+ For each image in indexed_db.db['images'], add a '_detection_status' field
308
+ to indicate whether to treat this image as positive, negative, ambiguous,
309
+ or unknown.
310
+
311
+ Makes modifications in-place.
312
+
313
+ returns (n_negative, n_positive, n_unknown, n_ambiguous)
314
+ """
315
+
316
+ negative_classes = set(negative_classes)
317
+ unknown_classes = set(unknown_classes)
318
+
319
+ # count the # of images with each type of DetectionStatus
320
+ n_unknown = 0
321
+ n_ambiguous = 0
322
+ n_positive = 0
323
+ n_negative = 0
324
+
325
+ print('Preparing ground-truth annotations')
326
+ for im in tqdm(indexed_db.db['images']):
327
+
328
+ image_id = im['id']
329
+ annotations = indexed_db.image_id_to_annotations[image_id]
330
+ categories = [ann['category_id'] for ann in annotations]
331
+ category_names = set(indexed_db.cat_id_to_name[cat] for cat in categories)
332
+
333
+ # Check whether this image has:
334
+ # - unknown / unassigned-type labels
335
+ # - negative-type labels
336
+ # - positive labels (i.e., labels that are neither unknown nor negative)
337
+ has_unknown_labels = sets_overlap(category_names, unknown_classes)
338
+ has_negative_labels = sets_overlap(category_names, negative_classes)
339
+ has_positive_labels = 0 < len(category_names - (unknown_classes | negative_classes))
340
+ # assert has_unknown_labels is False, '{} has unknown labels'.format(annotations)
341
+
342
+ # If there are no image annotations...
343
+ if len(categories) == 0:
344
+
345
+ if '#NO_LABELS#' in negative_classes:
346
+ n_negative += 1
347
+ im['_detection_status'] = DetectionStatus.DS_NEGATIVE
348
+ else:
349
+ n_unknown += 1
350
+ im['_detection_status'] = DetectionStatus.DS_UNKNOWN
351
+
352
+ # n_negative += 1
353
+ # im['_detection_status'] = DetectionStatus.DS_NEGATIVE
354
+
355
+ # If the image has more than one type of labels, it's ambiguous
356
+ # note: bools are automatically converted to 0/1, so we can sum
357
+ elif (has_unknown_labels + has_negative_labels + has_positive_labels) > 1:
358
+ n_ambiguous += 1
359
+ im['_detection_status'] = DetectionStatus.DS_AMBIGUOUS
360
+
361
+ # After the check above, we can be sure it's only one of positive,
362
+ # negative, or unknown.
363
+ #
364
+ # Important: do not merge the following 'unknown' branch with the first
365
+ # 'unknown' branch above, where we tested 'if len(categories) == 0'
366
+ #
367
+ # If the image has only unknown labels
368
+ elif has_unknown_labels:
369
+ n_unknown += 1
370
+ im['_detection_status'] = DetectionStatus.DS_UNKNOWN
371
+
372
+ # If the image has only negative labels
373
+ elif has_negative_labels:
374
+ n_negative += 1
375
+ im['_detection_status'] = DetectionStatus.DS_NEGATIVE
376
+
377
+ # If the images has only positive labels
378
+ elif has_positive_labels:
379
+ n_positive += 1
380
+ im['_detection_status'] = DetectionStatus.DS_POSITIVE
381
+
382
+ # Annotate the category, if it is unambiguous
383
+ if len(category_names) == 1:
384
+ im['_unambiguous_category'] = list(category_names)[0]
385
+
386
+ else:
387
+ raise Exception('Invalid detection state')
388
+
389
+ # ...for each image
390
+
391
+ return n_negative, n_positive, n_unknown, n_ambiguous
392
+
393
+ # ..._mark_detection_status()
394
+
395
+
396
+ def is_sas_url(s) -> bool:
397
+ """
398
+ Placeholder for a more robust way to verify that a link is a SAS URL.
399
+ 99.999% of the time this will suffice for what we're using it for right now.
400
+
401
+ :meta private:
402
+ """
403
+
404
+ return (s.startswith(('http://', 'https://')) and ('core.windows.net' in s)
405
+ and ('?' in s))
406
+
407
+
408
+ def relative_sas_url(folder_url, relative_path):
409
+ """
410
+ Given a container-level or folder-level SAS URL, create a SAS URL to the
411
+ specified relative path.
412
+
413
+ :meta private:
414
+ """
415
+
416
+ relative_path = relative_path.replace('%','%25')
417
+ relative_path = relative_path.replace('#','%23')
418
+ relative_path = relative_path.replace(' ','%20')
419
+
420
+ if not is_sas_url(folder_url):
421
+ return None
422
+ tokens = folder_url.split('?')
423
+ assert len(tokens) == 2
424
+ if not tokens[0].endswith('/'):
425
+ tokens[0] = tokens[0] + '/'
426
+ if relative_path.startswith('/'):
427
+ relative_path = relative_path[1:]
428
+ return tokens[0] + relative_path + '?' + tokens[1]
429
+
430
+
431
+ def _render_bounding_boxes(
432
+ image_base_dir,
433
+ image_relative_path,
434
+ display_name,
435
+ detections,
436
+ res,
437
+ ground_truth_boxes=None,
438
+ detection_categories=None,
439
+ classification_categories=None,
440
+ options=None):
441
+ """
442
+ Renders detection bounding boxes on a single image.
443
+
444
+ This is an internal function; if you want tools for rendering boxes on images, see
445
+ visualization.visualization_utils.
446
+
447
+ The source image is:
448
+
449
+ image_base_dir / image_relative_path
450
+
451
+ The target image is, for example:
452
+
453
+ [options.output_dir] /
454
+ ['detections' or 'non_detections'] /
455
+ [filename with slashes turned into tildes]
456
+
457
+ "res" is a result type, e.g. "detections", "non-detections"; this determines the
458
+ output folder for the rendered image.
459
+
460
+ Only very preliminary support is provided for ground truth box rendering.
461
+
462
+ Returns the html info struct for this image in the format that's used for
463
+ write_html_image_list.
464
+
465
+ :meta private:
466
+ """
467
+
468
+ if options is None:
469
+ options = PostProcessingOptions()
470
+
471
+ image_full_path = None
472
+
473
+ if res in options.rendering_bypass_sets:
474
+
475
+ sample_name = res + '_' + path_utils.flatten_path(image_relative_path)
476
+
477
+ else:
478
+
479
+ if is_sas_url(image_base_dir):
480
+ image_full_path = relative_sas_url(image_base_dir, image_relative_path)
481
+ else:
482
+ image_full_path = os.path.join(image_base_dir, image_relative_path)
483
+
484
+ # os.path.isfile() is slow when mounting remote directories; much faster
485
+ # to just try/except on the image open.
486
+ try:
487
+ image = vis_utils.open_image(image_full_path)
488
+ except Exception as e:
489
+ print('Warning: could not open image file {}: {}'.format(image_full_path,str(e)))
490
+ image = None
491
+ # return ''
492
+
493
+ # Render images to a flat folder
494
+ sample_name = res + '_' + path_utils.flatten_path(image_relative_path)
495
+ fullpath = os.path.join(options.output_dir, res, sample_name)
496
+
497
+ if image is not None:
498
+
499
+ original_size = image.size
500
+
501
+ # Resize the image if necessary
502
+ if options.viz_target_width is not None:
503
+ image = vis_utils.resize_image(image, options.viz_target_width)
504
+
505
+ # Render ground truth boxes if necessary
506
+ if ground_truth_boxes is not None and len(ground_truth_boxes) > 0:
507
+
508
+ # Create class labels like "gt_1" or "gt_27"
509
+ gt_classes = [0] * len(ground_truth_boxes)
510
+ label_map = {0:'ground truth'}
511
+ # for i_box,box in enumerate(ground_truth_boxes):
512
+ # gt_classes.append('_' + str(box[-1]))
513
+ vis_utils.render_db_bounding_boxes(ground_truth_boxes, gt_classes, image,
514
+ original_size=original_size,label_map=label_map,
515
+ thickness=4,expansion=4)
516
+
517
+ # Prepare per-category confidence thresholds
518
+ if isinstance(options.confidence_threshold,float):
519
+ rendering_confidence_threshold = options.confidence_threshold
520
+ else:
521
+ category_ids = set()
522
+ for d in detections:
523
+ category_ids.add(d['category'])
524
+ rendering_confidence_threshold = {}
525
+ for category_id in category_ids:
526
+ rendering_confidence_threshold[category_id] = \
527
+ _get_threshold_for_category_id(category_id, options, detection_categories)
528
+
529
+ # Render detection boxes
530
+ vis_utils.render_detection_bounding_boxes(
531
+ detections, image,
532
+ label_map=detection_categories,
533
+ classification_label_map=classification_categories,
534
+ confidence_threshold=rendering_confidence_threshold,
535
+ classification_confidence_threshold=options.classification_confidence_threshold,
536
+ thickness=options.line_thickness,
537
+ expansion=options.box_expansion)
538
+
539
+ try:
540
+ image.save(fullpath)
541
+ except OSError as e:
542
+ # errno.ENAMETOOLONG doesn't get thrown properly on Windows, so
543
+ # we awkwardly check against a hard-coded limit
544
+ if (e.errno == errno.ENAMETOOLONG) or (len(fullpath) >= 259):
545
+ extension = os.path.splitext(sample_name)[1]
546
+ sample_name = res + '_' + str(uuid.uuid4()) + extension
547
+ image.save(os.path.join(options.output_dir, res, sample_name))
548
+ else:
549
+ raise
550
+
551
+ # Use slashes regardless of os
552
+ file_name = '{}/{}'.format(res,sample_name)
553
+
554
+ info = {
555
+ 'filename': file_name,
556
+ 'title': display_name,
557
+ 'textStyle':\
558
+ 'font-family:verdana,arial,calibri;font-size:80%;text-align:left;margin-top:20;margin-bottom:5'
559
+ }
560
+
561
+ # Optionally add links back to the original images
562
+ if options.link_images_to_originals and (image_full_path is not None):
563
+
564
+ # Handling special characters in links has been pushed down into
565
+ # write_html_image_list
566
+ #
567
+ # link_target = image_full_path.replace('\\','/')
568
+ # link_target = urllib.parse.quote(link_target)
569
+ link_target = image_full_path
570
+ info['linkTarget'] = link_target
571
+
572
+ return info
573
+
574
+ # ..._render_bounding_boxes
575
+
576
+
577
+ def _prepare_html_subpages(images_html, output_dir, options=None):
578
+ """
579
+ Write out a series of html image lists, e.g. the "detections" or "non-detections"
580
+ pages.
581
+
582
+ image_html is a dictionary mapping an html page name (e.g. "detections_animal") to
583
+ a list of image structs friendly to write_html_image_list.
584
+
585
+ Returns a dictionary mapping category names to image counts.
586
+ """
587
+
588
+ if options is None:
589
+ options = PostProcessingOptions()
590
+
591
+ # Count items in each category
592
+ image_counts = {}
593
+ for res, array in images_html.items():
594
+ image_counts[res] = len(array)
595
+
596
+ # Optionally sort by filename before writing to html
597
+ if options.html_sort_order == 'filename':
598
+ images_html_sorted = {}
599
+ for res, array in images_html.items():
600
+ sorted_array = sorted(array, key=lambda x: x['filename'])
601
+ images_html_sorted[res] = sorted_array
602
+ images_html = images_html_sorted
603
+
604
+ # Optionally sort by confidence before writing to html
605
+ elif options.html_sort_order == 'confidence':
606
+ images_html_sorted = {}
607
+ for res, array in images_html.items():
608
+
609
+ if not all(['max_conf' in d for d in array]):
610
+ print(f"Warning: some elements in the {res} page don't have confidence " + \
611
+ "values, can't sort by confidence")
612
+ else:
613
+ sorted_array = sorted(array, key=lambda x: x['max_conf'], reverse=True)
614
+ images_html_sorted[res] = sorted_array
615
+ images_html = images_html_sorted
616
+
617
+ else:
618
+ assert options.html_sort_order == 'random',\
619
+ 'Unrecognized sort order {}'.format(options.html_sort_order)
620
+ images_html_sorted = {}
621
+ for res, array in images_html.items():
622
+ sorted_array = random.sample(array,len(array))
623
+ images_html_sorted[res] = sorted_array
624
+ images_html = images_html_sorted
625
+
626
+ # Write the individual HTML files
627
+ for res, array in images_html.items():
628
+
629
+ html_image_list_options = {}
630
+ html_image_list_options['maxFiguresPerHtmlFile'] = options.max_figures_per_html_file
631
+ html_image_list_options['headerHtml'] = '<h1>{}</h1>'.format(res.upper())
632
+ html_image_list_options['pageTitle'] = '{}'.format(res.lower())
633
+
634
+ # Don't write empty pages
635
+ if len(array) == 0:
636
+ continue
637
+ else:
638
+ write_html_image_list(
639
+ filename=os.path.join(output_dir, '{}.html'.format(res)),
640
+ images=array,
641
+ options=html_image_list_options)
642
+
643
+ return image_counts
644
+
645
+ # ..._prepare_html_subpages()
646
+
647
+
648
+ def _get_threshold_for_category_name(category_name,options):
649
+ """
650
+ Determines the confidence threshold we should use for a specific category name.
651
+ """
652
+
653
+ if isinstance(options.confidence_threshold,float):
654
+ return options.confidence_threshold
655
+ else:
656
+ assert isinstance(options.confidence_threshold,dict), \
657
+ 'confidence_threshold must either be a float or a dict'
658
+
659
+ if category_name in options.confidence_threshold:
660
+
661
+ return options.confidence_threshold[category_name]
662
+
663
+ else:
664
+ assert 'default' in options.confidence_threshold, \
665
+ 'category {} not in confidence_threshold dict, and no default supplied'.format(
666
+ category_name)
667
+ return options.confidence_threshold['default']
668
+
669
+
670
+ def _get_threshold_for_category_id(category_id,options,detection_categories):
671
+ """
672
+ Determines the confidence threshold we should use for a specific category ID.
673
+
674
+ [detection_categories] is a dict mapping category IDs to names.
675
+ """
676
+
677
+ if isinstance(options.confidence_threshold,float):
678
+ return options.confidence_threshold
679
+
680
+ assert category_id in detection_categories, \
681
+ 'Invalid category ID {}'.format(category_id)
682
+
683
+ category_name = detection_categories[category_id]
684
+
685
+ return _get_threshold_for_category_name(category_name,options)
686
+
687
+
688
+ def _get_positive_categories(detections,options,detection_categories):
689
+ """
690
+ Gets a sorted list of unique categories (as string IDs) above the threshold for this image
691
+
692
+ [detection_categories] is a dict mapping category IDs to names.
693
+ """
694
+
695
+ positive_categories = set()
696
+ for d in detections:
697
+ threshold = _get_threshold_for_category_id(d['category'], options, detection_categories)
698
+ if d['conf'] >= threshold:
699
+ positive_categories.add(d['category'])
700
+ return sorted(positive_categories)
701
+
702
+
703
+ def _has_positive_detection(detections,options,detection_categories):
704
+ """
705
+ Determines whether any positive detections are present in the detection list
706
+ [detections].
707
+ """
708
+
709
+ found_positive_detection = False
710
+ for d in detections:
711
+ threshold = _get_threshold_for_category_id(d['category'], options, detection_categories)
712
+ if d['conf'] >= threshold:
713
+ found_positive_detection = True
714
+ break
715
+ return found_positive_detection
716
+
717
+
718
+ def _render_image_no_gt(file_info,
719
+ detection_categories_to_results_name,
720
+ detection_categories,
721
+ classification_categories,
722
+ options):
723
+ r"""
724
+ Renders an image (with no ground truth information)
725
+
726
+ Returns a list of rendering structs, where the first item is a category (e.g. "detections_animal"),
727
+ and the second is a dict of information needed for rendering. E.g.:
728
+
729
+ [['detections_animal',
730
+ {
731
+ 'filename': 'detections_animal/detections_animal_blah~01060415.JPG',
732
+ 'title': '<b>Result type</b>: detections_animal,
733
+ <b>Image</b>: blah\\01060415.JPG,
734
+ <b>Max conf</b>: 0.897',
735
+ 'textStyle': 'font-family:verdana,arial,calibri;font-size:80%;text-align:left;margin-top:20;margin-bottom:5',
736
+ 'linkTarget': 'full_path_to_%5C01060415.JPG'
737
+ }]]
738
+
739
+ When no classification data is present, this list will always be length-1. When
740
+ classification data is present, an image may appear in multiple categories.
741
+
742
+ Populates the 'max_conf' field of the first element of the list.
743
+
744
+ Returns None if there are any errors.
745
+ """
746
+
747
+ image_relative_path = file_info['file']
748
+
749
+ # Useful debug snippet
750
+ #
751
+ # if 'filename' in image_relative_path:
752
+ # import pdb; pdb.set_trace()
753
+
754
+ max_conf = file_info['max_detection_conf']
755
+ detections = file_info['detections']
756
+
757
+ # Determine whether any positive detections are present (using a threshold that
758
+ # may vary by category)
759
+ found_positive_detection = _has_positive_detection(detections,options,detection_categories)
760
+
761
+ detection_status = DetectionStatus.DS_UNASSIGNED
762
+ if found_positive_detection:
763
+ detection_status = DetectionStatus.DS_POSITIVE
764
+ else:
765
+ if options.include_almost_detections:
766
+ if max_conf >= options.almost_detection_confidence_threshold:
767
+ detection_status = DetectionStatus.DS_ALMOST
768
+ else:
769
+ detection_status = DetectionStatus.DS_NEGATIVE
770
+ else:
771
+ detection_status = DetectionStatus.DS_NEGATIVE
772
+
773
+ if detection_status == DetectionStatus.DS_POSITIVE:
774
+ if options.separate_detections_by_category:
775
+ positive_categories = tuple(_get_positive_categories(detections,options,detection_categories))
776
+ if positive_categories not in detection_categories_to_results_name:
777
+ raise ValueError('Error: {} not in category mapping (file {})'.format(
778
+ str(positive_categories),image_relative_path))
779
+ res = detection_categories_to_results_name[positive_categories]
780
+ else:
781
+ res = 'detections'
782
+
783
+ elif detection_status == DetectionStatus.DS_NEGATIVE:
784
+ res = 'non_detections'
785
+ else:
786
+ assert detection_status == DetectionStatus.DS_ALMOST
787
+ res = 'almost_detections'
788
+
789
+ display_name = '<b>Result type</b>: {}, <b>image</b>: {}, <b>max conf</b>: {:0.3f}'.format(
790
+ res, image_relative_path, max_conf)
791
+
792
+ # Are there any bonus fields we need to include in each image header?
793
+ if options.additional_image_fields_to_display is not None:
794
+
795
+ for field_name in options.additional_image_fields_to_display:
796
+
797
+ if field_name in file_info:
798
+
799
+ field_value = file_info[field_name]
800
+
801
+ if (field_value is None) or \
802
+ (isinstance(field_value,float) and np.isnan(field_value)):
803
+ continue
804
+
805
+ # Optionally use a display name that's different from the field name
806
+ if isinstance(options.additional_image_fields_to_display,dict):
807
+ field_display_name = \
808
+ options.additional_image_fields_to_display[field_name]
809
+ else:
810
+ field_display_name = field_name
811
+ field_string = '<b>{}</b>: {}'.format(field_display_name,field_value)
812
+ display_name += ', {}'.format(field_string)
813
+
814
+ rendering_options = copy.copy(options)
815
+ if detection_status == DetectionStatus.DS_ALMOST:
816
+ rendering_options.confidence_threshold = \
817
+ rendering_options.almost_detection_confidence_threshold
818
+
819
+ rendered_image_html_info = _render_bounding_boxes(
820
+ image_base_dir=options.image_base_dir,
821
+ image_relative_path=image_relative_path,
822
+ display_name=display_name,
823
+ detections=detections,
824
+ res=res,
825
+ ground_truth_boxes=None,
826
+ detection_categories=detection_categories,
827
+ classification_categories=classification_categories,
828
+ options=rendering_options)
829
+
830
+ image_result = None
831
+
832
+ if len(rendered_image_html_info) > 0:
833
+
834
+ image_result = [[res, rendered_image_html_info]]
835
+ classes_rendered_this_image = set()
836
+ max_conf = 0
837
+
838
+ for det in detections:
839
+
840
+ if det['conf'] > max_conf:
841
+ max_conf = det['conf']
842
+
843
+ # We make the decision here that only "detections" (not "almost-detections")
844
+ # will appear on the classification category pages
845
+ detection_threshold = \
846
+ _get_threshold_for_category_id(det['category'], options, detection_categories)
847
+ if det['conf'] < detection_threshold:
848
+ continue
849
+
850
+ if ('classifications' in det) and (len(det['classifications']) > 0) and \
851
+ (res != 'non_detections'):
852
+
853
+ # This is a list of [class,confidence] pairs, sorted by classification confidence
854
+ classifications = det['classifications']
855
+ top1_class_id = classifications[0][0]
856
+ top1_class_name = classification_categories[top1_class_id]
857
+ top1_class_score = classifications[0][1]
858
+
859
+ # If we either don't have a classification confidence threshold, or
860
+ # we've met our classification confidence threshold
861
+ if (options.classification_confidence_threshold < 0) or \
862
+ (top1_class_score >= options.classification_confidence_threshold):
863
+ class_string = 'class_{}'.format(top1_class_name)
864
+ else:
865
+ class_string = 'class_unreliable'
866
+
867
+ if class_string not in classes_rendered_this_image:
868
+ image_result.append([class_string,
869
+ rendered_image_html_info])
870
+ classes_rendered_this_image.add(class_string)
871
+
872
+ # ...if this detection has classification info
873
+
874
+ # ...for each detection
875
+
876
+ image_result[0][1]['max_conf'] = max_conf
877
+
878
+ # ...if we got valid rendering info back from _render_bounding_boxes()
879
+
880
+ return image_result
881
+
882
+ # ...def _render_image_no_gt()
883
+
884
+
885
+ def _render_image_with_gt(file_info,ground_truth_indexed_db,
886
+ detection_categories,classification_categories,options):
887
+ """
888
+ Render an image with ground truth information. See _render_image_no_gt for return
889
+ data format.
890
+ """
891
+
892
+ image_relative_path = file_info['file']
893
+ max_conf = file_info['max_detection_conf']
894
+ detections = file_info['detections']
895
+
896
+ # This should already have been normalized to either '/' or '\'
897
+
898
+ image_id = ground_truth_indexed_db.filename_to_id.get(image_relative_path, None)
899
+ if image_id is None:
900
+ print('Warning: couldn''t find ground truth for image {}'.format(image_relative_path))
901
+ return None
902
+
903
+ image = ground_truth_indexed_db.image_id_to_image[image_id]
904
+ annotations = ground_truth_indexed_db.image_id_to_annotations[image_id]
905
+
906
+ ground_truth_boxes = []
907
+ for ann in annotations:
908
+ if 'bbox' in ann:
909
+ ground_truth_box = [x for x in ann['bbox']]
910
+ ground_truth_box.append(ann['category_id'])
911
+ ground_truth_boxes.append(ground_truth_box)
912
+
913
+ gt_status = image['_detection_status']
914
+
915
+ gt_presence = bool(gt_status)
916
+
917
+ gt_classes = CameraTrapJsonUtils.annotations_to_class_names(
918
+ annotations, ground_truth_indexed_db.cat_id_to_name)
919
+ gt_class_summary = ','.join(gt_classes)
920
+
921
+ if gt_status > DetectionStatus.DS_MAX_DEFINITIVE_VALUE:
922
+ print(f'Skipping image {image_id}, does not have a definitive '
923
+ f'ground truth status (status: {gt_status}, classes: {gt_class_summary})')
924
+ return None
925
+
926
+ detected = _has_positive_detection(detections, options, detection_categories)
927
+
928
+ if gt_presence and detected:
929
+ if '_classification_accuracy' not in image.keys():
930
+ res = 'tp'
931
+ elif np.isclose(1, image['_classification_accuracy']):
932
+ res = 'tpc'
933
+ else:
934
+ res = 'tpi'
935
+ elif not gt_presence and detected:
936
+ res = 'fp'
937
+ elif gt_presence and not detected:
938
+ res = 'fn'
939
+ else:
940
+ res = 'tn'
941
+
942
+ display_name = '<b>Result type</b>: {}, <b>Presence</b>: {}, <b>Class</b>: {}, <b>Max conf</b>: {:0.3f}%, <b>Image</b>: {}'.format( # noqa
943
+ res.upper(), str(gt_presence), gt_class_summary,
944
+ max_conf * 100, image_relative_path)
945
+
946
+ rendered_image_html_info = _render_bounding_boxes(
947
+ image_base_dir=options.image_base_dir,
948
+ image_relative_path=image_relative_path,
949
+ display_name=display_name,
950
+ detections=detections,
951
+ res=res,
952
+ ground_truth_boxes=ground_truth_boxes,
953
+ detection_categories=detection_categories,
954
+ classification_categories=classification_categories,
955
+ options=options)
956
+
957
+ image_result = None
958
+ if len(rendered_image_html_info) > 0:
959
+ image_result = [[res, rendered_image_html_info]]
960
+ for gt_class in gt_classes:
961
+ image_result.append(['class_{}'.format(gt_class), rendered_image_html_info])
962
+
963
+ return image_result
964
+
965
+ # ...def _render_image_with_gt()
966
+
967
+
968
+ #%% Main function
969
+
970
+ def process_batch_results(options):
971
+ """
972
+ Given a .json or .csv file containing MD results, do one or more of the following:
973
+
974
+ * Sample detections/non-detections and render to HTML (when ground truth isn't
975
+ available) (this is 99.9% of what this module is for)
976
+ * Evaluate detector precision/recall, optionally rendering results (requires
977
+ ground truth)
978
+ * Sample true/false positives/negatives and render to HTML (requires ground
979
+ truth)
980
+
981
+ Ground truth, if available, must be in COCO Camera Traps format:
982
+
983
+ https://github.com/agentmorris/MegaDetector/blob/main/megadetector/data_management/README.md#coco-camera-traps-format
984
+
985
+ Args:
986
+ options (PostProcessingOptions): everything we need to render a preview/analysis for
987
+ this set of results; see the PostProcessingOptions class for details.
988
+
989
+ Returns:
990
+ PostProcessingResults: information about the results/preview, most importantly the
991
+ HTML filename of the output. See the PostProcessingResults class for details.
992
+ """
993
+ ppresults = PostProcessingResults()
994
+
995
+ ##%% Expand some options for convenience
996
+
997
+ output_dir = options.output_dir
998
+
999
+
1000
+ ##%% Prepare output dir
1001
+
1002
+ os.makedirs(output_dir, exist_ok=True)
1003
+
1004
+
1005
+ ##%% Load ground truth if available
1006
+
1007
+ ground_truth_indexed_db = None
1008
+
1009
+ if (options.ground_truth_json_file is not None) and (len(options.ground_truth_json_file) > 0):
1010
+ assert (options.confidence_threshold is None) or (isinstance(options.confidence_threshold,float)), \
1011
+ 'Variable confidence thresholds are not supported when supplying ground truth'
1012
+
1013
+ if (options.ground_truth_json_file is not None) and (len(options.ground_truth_json_file) > 0):
1014
+
1015
+ if options.separate_detections_by_category:
1016
+ print("Warning: I don't know how to separate categories yet when doing " + \
1017
+ "a P/R analysis, disabling category separation")
1018
+ options.separate_detections_by_category = False
1019
+
1020
+ ground_truth_indexed_db = IndexedJsonDb(
1021
+ options.ground_truth_json_file,
1022
+ b_normalize_paths=True,
1023
+ filename_replacements=options.ground_truth_filename_replacements)
1024
+
1025
+ # Mark images in the ground truth as positive or negative
1026
+ n_negative, n_positive, n_unknown, n_ambiguous = _mark_detection_status(
1027
+ ground_truth_indexed_db,
1028
+ negative_classes=options.negative_classes,
1029
+ unknown_classes=options.unlabeled_classes)
1030
+
1031
+ print(f'Finished loading and indexing ground truth: {n_negative} '
1032
+ f'negative, {n_positive} positive, {n_unknown} unknown, '
1033
+ f'{n_ambiguous} ambiguous')
1034
+
1035
+ if n_positive == 0:
1036
+ print('\n*** Warning: no positives found in ground truth, analysis won\'t be very meaningful ***\n')
1037
+ if n_negative == 0:
1038
+ print('\n*** Warning: no negatives found in ground truth, analysis won\'t be very meaningful ***\n')
1039
+ if n_ambiguous > 0:
1040
+ print('\n*** Warning: {} images with ambiguous positive/negative status found in ground truth ***\n'.format(
1041
+ n_ambiguous))
1042
+
1043
+
1044
+ ##%% Load detection (and possibly classification) results
1045
+
1046
+ # If the caller hasn't supplied results, load them
1047
+ if options.api_detection_results is None:
1048
+ detections_df, other_fields = load_api_results(
1049
+ options.md_results_file, force_forward_slashes=True,
1050
+ filename_replacements=options.api_output_filename_replacements)
1051
+ ppresults.api_detection_results = detections_df
1052
+ ppresults.api_other_fields = other_fields
1053
+
1054
+ else:
1055
+ print('Bypassing detection results loading...')
1056
+ assert options.api_other_fields is not None
1057
+ detections_df = options.api_detection_results
1058
+ other_fields = options.api_other_fields
1059
+
1060
+ # Determine confidence thresholds if necessary
1061
+
1062
+ if options.confidence_threshold is None:
1063
+ options.confidence_threshold = \
1064
+ get_typical_confidence_threshold_from_results(other_fields)
1065
+ print('Choosing default confidence threshold of {} based on MD version'.format(
1066
+ options.confidence_threshold))
1067
+
1068
+ if options.almost_detection_confidence_threshold is None and options.include_almost_detections:
1069
+ assert isinstance(options.confidence_threshold,float), \
1070
+ 'If you are using a dictionary of confidence thresholds and almost-detections are enabled, ' + \
1071
+ 'you need to supply a threshold for almost detections.'
1072
+ options.almost_detection_confidence_threshold = options.confidence_threshold - 0.05
1073
+ if options.almost_detection_confidence_threshold < 0:
1074
+ options.almost_detection_confidence_threshold = 0
1075
+
1076
+ # Remove rows with inference failures (typically due to corrupt images)
1077
+ n_failures = 0
1078
+ if 'failure' in detections_df.columns:
1079
+ n_failures = detections_df['failure'].count()
1080
+ print('Ignoring {} failed images'.format(n_failures))
1081
+ # Explicitly forcing a copy() operation here to suppress "trying to be set
1082
+ # on a copy" warnings (and associated risks) below.
1083
+ detections_df = detections_df[detections_df['failure'].isna()].copy()
1084
+
1085
+ assert other_fields is not None
1086
+
1087
+ detection_categories = other_fields['detection_categories']
1088
+
1089
+ # Convert keys and values to lowercase
1090
+ classification_categories = other_fields.get('classification_categories', {})
1091
+ if classification_categories is not None:
1092
+ classification_categories = {
1093
+ k.lower(): v.lower()
1094
+ for k, v in classification_categories.items()
1095
+ }
1096
+
1097
+ # Count detections and almost-detections for reporting purposes
1098
+ n_positives = 0
1099
+ n_almosts = 0
1100
+
1101
+ print('Assigning images to rendering categories')
1102
+
1103
+ for i_row,row in tqdm(detections_df.iterrows(),total=len(detections_df)):
1104
+
1105
+ detections = row['detections']
1106
+ max_conf = row['max_detection_conf']
1107
+ if _has_positive_detection(detections, options, detection_categories):
1108
+ n_positives += 1
1109
+ elif (options.almost_detection_confidence_threshold is not None) and \
1110
+ (max_conf >= options.almost_detection_confidence_threshold):
1111
+ n_almosts += 1
1112
+
1113
+ print(f'Finished loading and preprocessing {len(detections_df)} rows '
1114
+ f'from detector output, predicted {n_positives} positives.')
1115
+
1116
+ if options.include_almost_detections:
1117
+ print('...and {} almost-positives'.format(n_almosts))
1118
+
1119
+
1120
+ ##%% Find descriptive metadata to include at the top of the page
1121
+
1122
+ if options.job_name_string is not None:
1123
+ job_name_string = options.job_name_string
1124
+ else:
1125
+ # This is rare; it only happens during debugging when the caller
1126
+ # is supplying already-loaded MD results.
1127
+ if options.md_results_file is None:
1128
+ job_name_string = 'unknown'
1129
+ else:
1130
+ job_name_string = os.path.basename(options.md_results_file)
1131
+
1132
+ if options.model_version_string is not None:
1133
+ model_version_string = options.model_version_string
1134
+ else:
1135
+
1136
+ if 'info' not in other_fields or 'detector' not in other_fields['info']:
1137
+ print('No model metadata supplied, assuming MDv4')
1138
+ model_version_string = 'MDv4 (assumed)'
1139
+ else:
1140
+ model_version_string = other_fields['info']['detector']
1141
+
1142
+
1143
+ ##%% If we have ground truth, remove images we can't match to ground truth
1144
+
1145
+ if ground_truth_indexed_db is not None:
1146
+
1147
+ b_match = detections_df['file'].isin(
1148
+ ground_truth_indexed_db.filename_to_id)
1149
+ print(f'Confirmed filename matches to ground truth for {sum(b_match)} '
1150
+ f'of {len(detections_df)} files')
1151
+
1152
+ detections_df = detections_df[b_match]
1153
+ detector_files = detections_df['file'].tolist()
1154
+
1155
+ assert len(detector_files) > 0, (
1156
+ 'No detection files available, possible path issue?')
1157
+
1158
+ print('Trimmed detection results to {} files'.format(len(detector_files)))
1159
+
1160
+
1161
+ ##%% (Optionally) sample from the full set of images
1162
+
1163
+ images_to_visualize = detections_df
1164
+
1165
+ if (options.num_images_to_sample is not None) and (options.num_images_to_sample > 0):
1166
+ images_to_visualize = images_to_visualize.sample(
1167
+ n=min(options.num_images_to_sample, len(images_to_visualize)),
1168
+ random_state=options.sample_seed)
1169
+
1170
+ output_html_file = ''
1171
+
1172
+ style_header = """<head>
1173
+ <title>Detection results preview</title>
1174
+ <style type="text/css">
1175
+ a { text-decoration: none; }
1176
+ body { font-family: segoe ui, calibri, "trebuchet ms", verdana, arial, sans-serif; }
1177
+ div.contentdiv { margin-left: 20px; }
1178
+ </style>
1179
+ </head>"""
1180
+
1181
+
1182
+ ##%% Fork here depending on whether or not ground truth is available
1183
+
1184
+ # If we have ground truth, we'll compute precision/recall and sample tp/fp/tn/fn.
1185
+ #
1186
+ # Otherwise we'll just visualize detections/non-detections.
1187
+
1188
+ if ground_truth_indexed_db is not None:
1189
+
1190
+ ##%% Detection evaluation: compute precision/recall
1191
+
1192
+ # numpy array of maximum confidence values
1193
+ p_detection = detections_df['max_detection_conf'].values
1194
+ n_detection_values = len(p_detection)
1195
+
1196
+ # numpy array of bools (0.0/1.0), and -1 as null value
1197
+ gt_detections = np.zeros(n_detection_values, dtype=float)
1198
+
1199
+ n_positive = 0
1200
+ n_negative = 0
1201
+
1202
+ for i_detection, fn in enumerate(detector_files):
1203
+
1204
+ image_id = ground_truth_indexed_db.filename_to_id[fn]
1205
+ image = ground_truth_indexed_db.image_id_to_image[image_id]
1206
+ detection_status = image['_detection_status']
1207
+
1208
+ if detection_status == DetectionStatus.DS_NEGATIVE:
1209
+ gt_detections[i_detection] = 0.0
1210
+ n_negative += 1
1211
+ elif detection_status == DetectionStatus.DS_POSITIVE:
1212
+ gt_detections[i_detection] = 1.0
1213
+ n_positive += 1
1214
+ else:
1215
+ gt_detections[i_detection] = -1.0
1216
+
1217
+ print('Of {} ground truth values, found {} positives and {} negatives'.format(
1218
+ len(detections_df),n_positive,n_negative))
1219
+
1220
+ # Don't include ambiguous/unknown ground truth in precision/recall analysis
1221
+ b_valid_ground_truth = gt_detections >= 0.0
1222
+
1223
+ p_detection_pr = p_detection[b_valid_ground_truth]
1224
+ gt_detections_pr = (gt_detections[b_valid_ground_truth] == 1.)
1225
+
1226
+ print('Including {} of {} values in p/r analysis'.format(np.sum(b_valid_ground_truth),
1227
+ len(b_valid_ground_truth)))
1228
+
1229
+ precisions, recalls, thresholds = precision_recall_curve(gt_detections_pr, p_detection_pr)
1230
+
1231
+ # For completeness, include the result at a confidence threshold of 1.0
1232
+ thresholds = np.append(thresholds, [1.0])
1233
+
1234
+ precisions_recalls = pd.DataFrame(data={
1235
+ 'confidence_threshold': thresholds,
1236
+ 'precision': precisions,
1237
+ 'recall': recalls
1238
+ })
1239
+
1240
+ # Compute and print summary statistics
1241
+ average_precision = average_precision_score(gt_detections_pr, p_detection_pr)
1242
+ print('Average precision: {:.1%}'.format(average_precision))
1243
+
1244
+ # Thresholds go up throughout precisions/recalls/thresholds; find the last
1245
+ # value where recall is at or above target. That's our precision @ target recall.
1246
+
1247
+ i_above_target_recall = (np.where(recalls >= options.target_recall))
1248
+
1249
+ # np.where returns a tuple of arrays, but in this syntax where we're
1250
+ # comparing an array with a scalar, there will only be one element.
1251
+ assert len (i_above_target_recall) == 1
1252
+
1253
+ # Convert back to a list
1254
+ i_above_target_recall = i_above_target_recall[0].tolist()
1255
+
1256
+ if len(i_above_target_recall) == 0:
1257
+ precision_at_target_recall = 0.0
1258
+ else:
1259
+ precision_at_target_recall = precisions[i_above_target_recall[-1]]
1260
+ print('Precision at {:.1%} recall: {:.1%}'.format(options.target_recall,
1261
+ precision_at_target_recall))
1262
+
1263
+ cm_predictions = np.array(p_detection_pr) > options.confidence_threshold
1264
+ cm = confusion_matrix(gt_detections_pr, cm_predictions, labels=[False,True])
1265
+
1266
+ # Flatten the confusion matrix
1267
+ tn, fp, fn, tp = cm.ravel()
1268
+
1269
+ precision_at_confidence_threshold = tp / (tp + fp)
1270
+ recall_at_confidence_threshold = tp / (tp + fn)
1271
+ f1 = 2.0 * (precision_at_confidence_threshold * recall_at_confidence_threshold) / \
1272
+ (precision_at_confidence_threshold + recall_at_confidence_threshold)
1273
+
1274
+ print('At a confidence threshold of {:.1%}, precision={:.1%}, recall={:.1%}, f1={:.1%}'.format(
1275
+ options.confidence_threshold, precision_at_confidence_threshold,
1276
+ recall_at_confidence_threshold, f1))
1277
+
1278
+ ##%% Collect classification results, if they exist
1279
+
1280
+ classifier_accuracies = []
1281
+
1282
+ # Mapping of classnames to idx for the confusion matrix.
1283
+ #
1284
+ # The lambda is actually kind of a hack, because we use assume that
1285
+ # the following code does not reassign classname_to_idx
1286
+ classname_to_idx = collections.defaultdict(lambda: len(classname_to_idx))
1287
+
1288
+ # Confusion matrix as defaultdict of defaultdict
1289
+ #
1290
+ # Rows / first index is ground truth, columns / second index is predicted category
1291
+ classifier_cm = collections.defaultdict(lambda: collections.defaultdict(lambda: 0))
1292
+
1293
+ # i_detection = 0; fn = detector_files[i_detection]; print(fn)
1294
+ assert len(detector_files) == len(detections_df)
1295
+ for i_detection, fn in enumerate(detector_files):
1296
+
1297
+ image_id = ground_truth_indexed_db.filename_to_id[fn]
1298
+ image = ground_truth_indexed_db.image_id_to_image[image_id]
1299
+ detections = detections_df['detections'].iloc[i_detection]
1300
+ pred_class_ids = [det['classifications'][0][0] \
1301
+ for det in detections if 'classifications' in det.keys()]
1302
+ pred_classnames = [classification_categories[pd] for pd in pred_class_ids]
1303
+
1304
+ # If this image has classification predictions, and an unambiguous class
1305
+ # annotated, and is a positive image...
1306
+ if len(pred_classnames) > 0 \
1307
+ and '_unambiguous_category' in image.keys() \
1308
+ and image['_detection_status'] == DetectionStatus.DS_POSITIVE:
1309
+
1310
+ # The unambiguous category, we make this a set for easier handling afterward
1311
+ gt_categories = set([image['_unambiguous_category']])
1312
+ pred_categories = set(pred_classnames)
1313
+
1314
+ # Compute the accuracy as intersection of union,
1315
+ # i.e. (# of categories in both prediction and GT)
1316
+ # divided by (# of categories in either prediction or GT
1317
+ #
1318
+ # In case of only one GT category, the result will be 1.0, if
1319
+ # prediction is one category and this category matches GT
1320
+ #
1321
+ # It is 1.0/(# of predicted top-1 categories), if the GT is
1322
+ # one of the predicted top-1 categories.
1323
+ #
1324
+ # It is 0.0, if none of the predicted categories is correct
1325
+
1326
+ classifier_accuracies.append(
1327
+ len(gt_categories & pred_categories)
1328
+ / len(gt_categories | pred_categories)
1329
+ )
1330
+ image['_classification_accuracy'] = classifier_accuracies[-1]
1331
+
1332
+ # Distribute this accuracy across all predicted categories in the
1333
+ # confusion matrix
1334
+ assert len(gt_categories) == 1
1335
+ gt_class_idx = classname_to_idx[list(gt_categories)[0]]
1336
+ for pred_category in pred_categories:
1337
+ pred_class_idx = classname_to_idx[pred_category]
1338
+ classifier_cm[gt_class_idx][pred_class_idx] += 1
1339
+
1340
+ # ...for each file in the detection results
1341
+
1342
+ # If we have classification results
1343
+ if len(classifier_accuracies) > 0:
1344
+
1345
+ # Build confusion matrix as array from classifier_cm
1346
+ all_class_ids = sorted(classname_to_idx.values())
1347
+ classifier_cm_array = np.array(
1348
+ [[classifier_cm[r_idx][c_idx] for c_idx in all_class_ids] for \
1349
+ r_idx in all_class_ids], dtype=float)
1350
+ classifier_cm_array /= (classifier_cm_array.sum(axis=1, keepdims=True) + 1e-7)
1351
+
1352
+ # Print some statistics
1353
+ print('Finished computation of {} classification results'.format(
1354
+ len(classifier_accuracies)))
1355
+ print('Mean accuracy: {}'.format(np.mean(classifier_accuracies)))
1356
+
1357
+ # Prepare confusion matrix output
1358
+
1359
+ # Get confusion matrix as string
1360
+ sio = io.StringIO()
1361
+ np.savetxt(sio, classifier_cm_array * 100, fmt='%5.1f')
1362
+ cm_str = sio.getvalue()
1363
+ # Get fixed-size classname for each idx
1364
+ idx_to_classname = {v:k for k,v in classname_to_idx.items()}
1365
+ classname_list = [idx_to_classname[idx] for idx in sorted(classname_to_idx.values())]
1366
+ classname_headers = ['{:<5}'.format(cname[:5]) for cname in classname_list]
1367
+
1368
+ # Prepend class name on each line and add to the top
1369
+ cm_str_lines = [' ' * 16 + ' '.join(classname_headers)]
1370
+ cm_str_lines += ['{:>15}'.format(cn[:15]) + ' ' + cm_line for cn, cm_line in \
1371
+ zip(classname_list, cm_str.splitlines(), strict=True)]
1372
+
1373
+ # Print formatted confusion matrix
1374
+ if False:
1375
+ # Actually don't, this gets really messy in all but the widest consoles
1376
+ print('Confusion matrix: ')
1377
+ print(*cm_str_lines, sep='\n')
1378
+
1379
+ # Plot confusion matrix
1380
+
1381
+ # To manually add more space at bottom: plt.rcParams['figure.subplot.bottom'] = 0.1
1382
+ #
1383
+ # Add 0.5 to figsize for every class. For two classes, this will result in
1384
+ # fig = plt.figure(figsize=[4,4])
1385
+ fig = plot_utils.plot_confusion_matrix(
1386
+ classifier_cm_array,
1387
+ classname_list,
1388
+ normalize=False,
1389
+ title='Confusion matrix',
1390
+ cmap=plt.cm.Blues,
1391
+ vmax=1.0,
1392
+ use_colorbar=True,
1393
+ y_label=True)
1394
+ cm_figure_relative_filename = 'confusion_matrix.png'
1395
+ cm_figure_filename = os.path.join(output_dir, cm_figure_relative_filename)
1396
+ plt.savefig(cm_figure_filename)
1397
+ plt.close(fig)
1398
+
1399
+ # ...if we have classification results
1400
+
1401
+
1402
+ ##%% Render output
1403
+
1404
+ # Write p/r table to .csv file in output directory
1405
+ pr_table_filename = os.path.join(output_dir, 'prec_recall.csv')
1406
+ precisions_recalls.to_csv(pr_table_filename, index=False)
1407
+
1408
+ # Write precision/recall plot to .png file in output directory
1409
+ t = 'Precision-Recall curve: AP={:0.1%}, P@{:0.1%}={:0.1%}'.format(
1410
+ average_precision, options.target_recall, precision_at_target_recall)
1411
+ fig = plot_utils.plot_precision_recall_curve(precisions, recalls, t)
1412
+
1413
+ pr_figure_relative_filename = 'prec_recall.png'
1414
+ pr_figure_filename = os.path.join(output_dir, pr_figure_relative_filename)
1415
+ fig.savefig(pr_figure_filename)
1416
+ plt.close(fig)
1417
+
1418
+
1419
+ ##%% Sampling
1420
+
1421
+ # Sample true/false positives/negatives with correct/incorrect top-1
1422
+ # classification and render to html
1423
+
1424
+ # Accumulate html image structs (in the format expected by write_html_image_lists)
1425
+ # for each category, e.g. 'tp', 'fp', ..., 'class_bird', ...
1426
+ images_html = collections.defaultdict(list)
1427
+
1428
+ # Add default entries by accessing them for the first time
1429
+ [images_html[res] for res in ['tp', 'tpc', 'tpi', 'fp', 'tn', 'fn']]
1430
+ for res in images_html.keys():
1431
+ os.makedirs(os.path.join(output_dir, res), exist_ok=True)
1432
+
1433
+ image_count = len(images_to_visualize)
1434
+
1435
+ # Each element will be a list of 2-tuples, with elements [collection name,html info struct]
1436
+ rendering_results = []
1437
+
1438
+ # Each element will be a three-tuple with elements file,max_conf,detections
1439
+ files_to_render = []
1440
+
1441
+ # Assemble the information we need for rendering, so we can parallelize without
1442
+ # dealing with Pandas
1443
+ # i_row = 0; row = images_to_visualize.iloc[0]
1444
+ for _, row in images_to_visualize.iterrows():
1445
+
1446
+ # Filenames should already have been normalized to either '/' or '\'
1447
+ files_to_render.append(row.to_dict())
1448
+
1449
+ start_time = time.time()
1450
+ if options.parallelize_rendering:
1451
+ pool = None
1452
+ try:
1453
+ if options.parallelize_rendering_n_cores is None:
1454
+ if options.parallelize_rendering_with_threads:
1455
+ pool = ThreadPool()
1456
+ else:
1457
+ pool = Pool()
1458
+ else:
1459
+ if options.parallelize_rendering_with_threads:
1460
+ pool = ThreadPool(options.parallelize_rendering_n_cores)
1461
+ worker_string = 'threads'
1462
+ else:
1463
+ pool = Pool(options.parallelize_rendering_n_cores)
1464
+ worker_string = 'processes'
1465
+ print('Rendering images with {} {}'.format(options.parallelize_rendering_n_cores,
1466
+ worker_string))
1467
+
1468
+ rendering_results = list(tqdm(pool.imap(
1469
+ partial(_render_image_with_gt,
1470
+ ground_truth_indexed_db=ground_truth_indexed_db,
1471
+ detection_categories=detection_categories,
1472
+ classification_categories=classification_categories,
1473
+ options=options),
1474
+ files_to_render), total=len(files_to_render)))
1475
+ finally:
1476
+ if pool is not None:
1477
+ pool.close()
1478
+ pool.join()
1479
+ print('Pool closed and joined for GT rendering')
1480
+ else:
1481
+ for file_info in tqdm(files_to_render):
1482
+ rendering_results.append(_render_image_with_gt(
1483
+ file_info,ground_truth_indexed_db,
1484
+ detection_categories,classification_categories,
1485
+ options=options))
1486
+ elapsed = time.time() - start_time
1487
+
1488
+ # Map all the rendering results in the list rendering_results into the
1489
+ # dictionary images_html, which maps category names to lists of results
1490
+ image_rendered_count = 0
1491
+ for rendering_result in rendering_results:
1492
+ if rendering_result is None:
1493
+ continue
1494
+ image_rendered_count += 1
1495
+ for assignment in rendering_result:
1496
+ images_html[assignment[0]].append(assignment[1])
1497
+
1498
+ # Prepare the individual html image files
1499
+ image_counts = _prepare_html_subpages(images_html, output_dir, options)
1500
+
1501
+ print('{} images rendered (of {})'.format(image_rendered_count,image_count))
1502
+
1503
+ # Write index.html
1504
+ all_tp_count = image_counts['tp'] + image_counts['tpc'] + image_counts['tpi']
1505
+ total_count = all_tp_count + image_counts['tn'] + image_counts['fp'] + image_counts['fn']
1506
+
1507
+ classification_detection_results = """&nbsp;&nbsp;&nbsp;&nbsp;<a href="tpc.html">with all correct top-1 predictions (TPC)</a> ({})<br/>
1508
+ &nbsp;&nbsp;&nbsp;&nbsp;<a href="tpi.html">with one or more incorrect top-1 prediction (TPI)</a> ({})<br/>
1509
+ &nbsp;&nbsp;&nbsp;&nbsp;<a href="tp.html">without classification evaluation</a><sup>*</sup> ({})<br/>""".format(
1510
+ image_counts['tpc'],
1511
+ image_counts['tpi'],
1512
+ image_counts['tp']
1513
+ )
1514
+
1515
+ confidence_threshold_string = ''
1516
+ if isinstance(options.confidence_threshold,float):
1517
+ confidence_threshold_string = '{:.2%}'.format(options.confidence_threshold)
1518
+ else:
1519
+ confidence_threshold_string = str(options.confidence_threshold)
1520
+
1521
+ index_page = """<html>
1522
+ {}
1523
+ <body>
1524
+ <h2>Evaluation</h2>
1525
+
1526
+ <h3>Job metadata</h3>
1527
+
1528
+ <div class="contentdiv">
1529
+ <p>Job name: {}<br/>
1530
+ <p>Model version: {}</p>
1531
+ </div>
1532
+
1533
+ <h3>Sample images</h3>
1534
+ <div class="contentdiv">
1535
+ <p>A sample of {} images, annotated with detections above confidence {}.</p>
1536
+ <a href="tp.html">True positives (TP)</a> ({}) ({:0.1%})<br/>
1537
+ CLASSIFICATION_PLACEHOLDER_1
1538
+ <a href="tn.html">True negatives (TN)</a> ({}) ({:0.1%})<br/>
1539
+ <a href="fp.html">False positives (FP)</a> ({}) ({:0.1%})<br/>
1540
+ <a href="fn.html">False negatives (FN)</a> ({}) ({:0.1%})<br/>
1541
+ CLASSIFICATION_PLACEHOLDER_2
1542
+ </div>
1543
+ """.format(
1544
+ style_header,job_name_string,model_version_string,
1545
+ image_count, confidence_threshold_string,
1546
+ all_tp_count, all_tp_count/total_count,
1547
+ image_counts['tn'], image_counts['tn']/total_count,
1548
+ image_counts['fp'], image_counts['fp']/total_count,
1549
+ image_counts['fn'], image_counts['fn']/total_count
1550
+ )
1551
+
1552
+ index_page += """
1553
+ <h3>Detection results</h3>
1554
+ <div class="contentdiv">
1555
+ <p>At a confidence threshold of {}, precision={:0.1%}, recall={:0.1%}</p>
1556
+ <p><strong>Precision/recall summary for all {} images</strong></p><img src="{}"><br/>
1557
+ </div>
1558
+ """.format(
1559
+ confidence_threshold_string, precision_at_confidence_threshold, recall_at_confidence_threshold,
1560
+ len(detections_df), pr_figure_relative_filename
1561
+ )
1562
+
1563
+ if len(classifier_accuracies) > 0:
1564
+ index_page = index_page.replace('CLASSIFICATION_PLACEHOLDER_1',classification_detection_results)
1565
+ index_page = index_page.replace('CLASSIFICATION_PLACEHOLDER_2',"""<p><sup>*</sup>We do not evaluate the classification result of images
1566
+ if the classification information is missing, if the image contains
1567
+ categories like &lsquo;empty&rsquo; or &lsquo;human&rsquo;, or if the image has multiple
1568
+ classification labels.</p>""")
1569
+ else:
1570
+ index_page = index_page.replace('CLASSIFICATION_PLACEHOLDER_1','')
1571
+ index_page = index_page.replace('CLASSIFICATION_PLACEHOLDER_2','')
1572
+
1573
+ if len(classifier_accuracies) > 0:
1574
+ index_page += """
1575
+ <h3>Classification results</h3>
1576
+ <div class="contentdiv">
1577
+ <p>Classification accuracy: {:.2%}<br>
1578
+ The accuracy is computed only for images with exactly one classification label.
1579
+ The accuracy of an image is computed as 1/(number of unique detected top-1 classes),
1580
+ i.e. if the model detects multiple boxes with different top-1 classes, then the accuracy
1581
+ decreases and the image is put into 'TPI'.</p>
1582
+ <p>Confusion matrix:</p>
1583
+ <p><img src="{}"></p>
1584
+ <div style='font-family:monospace;display:block;'>{}</div>
1585
+ </div>
1586
+ """.format(
1587
+ np.mean(classifier_accuracies),
1588
+ cm_figure_relative_filename,
1589
+ "<br>".join(cm_str_lines).replace(' ', '&nbsp;')
1590
+ )
1591
+
1592
+ # Show links to each GT class
1593
+ #
1594
+ # We could do this without classification results; currently we don't.
1595
+ if len(classname_to_idx) > 0:
1596
+
1597
+ index_page += '<h3>Images of specific classes</h3><br/><div class="contentdiv">'
1598
+ # Add links to all available classes
1599
+ for cname in sorted(classname_to_idx.keys()):
1600
+ index_page += '<a href="class_{0}.html">{0}</a> ({1})<br>'.format(
1601
+ cname,
1602
+ len(images_html['class_{}'.format(cname)]))
1603
+ index_page += '</div>'
1604
+
1605
+ # Write custom footer if it was provided
1606
+ if (options.footer_text is not None) and (len(options.footer_text) > 0):
1607
+ index_page += '{}\n'.format(options.footer_text)
1608
+
1609
+ # Close open html tags
1610
+ index_page += '\n</body></html>\n'
1611
+
1612
+ output_html_file = os.path.join(output_dir, 'index.html')
1613
+ with open(output_html_file, 'w',
1614
+ encoding=options.output_html_encoding) as f:
1615
+ f.write(index_page)
1616
+
1617
+ print('Finished writing html to {}'.format(output_html_file))
1618
+
1619
+ # ...if we have ground truth
1620
+
1621
+
1622
+ ##%% Otherwise, if we don't have ground truth...
1623
+
1624
+ else:
1625
+
1626
+ ##%% Sample detections/non-detections
1627
+
1628
+ # Accumulate html image structs (in the format expected by write_html_image_list)
1629
+ # for each category
1630
+ images_html = collections.defaultdict(list)
1631
+
1632
+ # Add default entries by accessing them for the first time
1633
+
1634
+ # Maps sorted tuples of detection category IDs (string ints) - e.g. ("1"), ("1", "4", "7") - to
1635
+ # result set names, e.g. "detections_human", "detections_cat_truck".
1636
+ detection_categories_to_results_name = {}
1637
+
1638
+ # Keep track of which categories are single-class (e.g. "animal") and which are
1639
+ # combinations (e.g. "animal_vehicle")
1640
+ detection_categories_to_category_count = {}
1641
+
1642
+ # For the creation of a "non-detections" category
1643
+ images_html['non_detections']
1644
+ detection_categories_to_category_count['non_detections'] = 0
1645
+
1646
+
1647
+ if not options.separate_detections_by_category:
1648
+ # For the creation of a "detections" category
1649
+ images_html['detections']
1650
+ detection_categories_to_category_count['detections'] = 0
1651
+ else:
1652
+ # Add a set of results for each category and combination of categories, e.g.
1653
+ # "detections_animal_vehicle". When we're using this script for non-MegaDetector
1654
+ # results, this can generate lots of categories, e.g. detections_bear_bird_cat_dog_pig.
1655
+ # We'll keep that huge set of combinations in this map, but we'll only write
1656
+ # out links for the ones that are non-empty.
1657
+ used_combinations = set()
1658
+
1659
+ # row = images_to_visualize.iloc[0]
1660
+ for i_row, row in images_to_visualize.iterrows():
1661
+ detections_this_row = row['detections']
1662
+ above_threshold_category_ids_this_row = set()
1663
+ for detection in detections_this_row:
1664
+ threshold = _get_threshold_for_category_id(detection['category'], options, detection_categories)
1665
+ if detection['conf'] >= threshold:
1666
+ above_threshold_category_ids_this_row.add(detection['category'])
1667
+ if len(above_threshold_category_ids_this_row) == 0:
1668
+ continue
1669
+ sorted_categories_this_row = tuple(sorted(above_threshold_category_ids_this_row))
1670
+ used_combinations.add(sorted_categories_this_row)
1671
+
1672
+ for sorted_subset in used_combinations:
1673
+ assert len(sorted_subset) > 0
1674
+ results_name = 'detections'
1675
+ for category_id in sorted_subset:
1676
+ results_name = results_name + '_' + detection_categories[category_id]
1677
+ images_html[results_name]
1678
+ detection_categories_to_results_name[sorted_subset] = results_name
1679
+ detection_categories_to_category_count[results_name] = len(sorted_subset)
1680
+
1681
+ if options.include_almost_detections:
1682
+ images_html['almost_detections']
1683
+ detection_categories_to_category_count['almost_detections'] = 0
1684
+
1685
+ # Create output directories
1686
+ for res in images_html.keys():
1687
+ os.makedirs(os.path.join(output_dir, res), exist_ok=True)
1688
+
1689
+ image_count = len(images_to_visualize)
1690
+
1691
+ # Each element will be a list of 2-tuples, with elements [collection name,html info struct]
1692
+ rendering_results = []
1693
+
1694
+ # list of 3-tuples with elements (file, max_conf, detections)
1695
+ files_to_render = []
1696
+
1697
+ # Assemble the information we need for rendering, so we can parallelize without
1698
+ # dealing with Pandas
1699
+ # i_row = 0; row = images_to_visualize.iloc[0]
1700
+ for _, row in images_to_visualize.iterrows():
1701
+
1702
+ assert isinstance(row['detections'],list)
1703
+
1704
+ # Filenames should already have been normalized to either '/' or '\'
1705
+ files_to_render.append(row.to_dict())
1706
+
1707
+ start_time = time.time()
1708
+ if options.parallelize_rendering:
1709
+ pool = None
1710
+ try:
1711
+ if options.parallelize_rendering_n_cores is None:
1712
+ if options.parallelize_rendering_with_threads:
1713
+ pool = ThreadPool()
1714
+ else:
1715
+ pool = Pool()
1716
+ else:
1717
+ if options.parallelize_rendering_with_threads:
1718
+ pool = ThreadPool(options.parallelize_rendering_n_cores)
1719
+ worker_string = 'threads'
1720
+ else:
1721
+ pool = Pool(options.parallelize_rendering_n_cores)
1722
+ worker_string = 'processes'
1723
+ print('Rendering images with {} {}'.format(options.parallelize_rendering_n_cores,
1724
+ worker_string))
1725
+
1726
+ # _render_image_no_gt(file_info,detection_categories_to_results_name,
1727
+ # detection_categories,classification_categories)
1728
+
1729
+ rendering_results = list(tqdm(pool.imap(
1730
+ partial(_render_image_no_gt,
1731
+ detection_categories_to_results_name=detection_categories_to_results_name,
1732
+ detection_categories=detection_categories,
1733
+ classification_categories=classification_categories,
1734
+ options=options),
1735
+ files_to_render), total=len(files_to_render)))
1736
+ finally:
1737
+ if pool is not None:
1738
+ pool.close()
1739
+ pool.join()
1740
+ print('Pool closed and joined for non-GT rendering')
1741
+ else:
1742
+ for file_info in tqdm(files_to_render):
1743
+ rendering_result = _render_image_no_gt(file_info,
1744
+ detection_categories_to_results_name,
1745
+ detection_categories,
1746
+ classification_categories,
1747
+ options=options)
1748
+ rendering_results.append(rendering_result)
1749
+
1750
+ elapsed = time.time() - start_time
1751
+
1752
+ # Do we have classification results in addition to detection results?
1753
+ has_classification_info = False
1754
+
1755
+ # Map all the rendering results in the list rendering_results into the
1756
+ # dictionary images_html
1757
+ image_rendered_count = 0
1758
+ for rendering_result in rendering_results:
1759
+ if rendering_result is None:
1760
+ continue
1761
+ image_rendered_count += 1
1762
+ for assignment in rendering_result:
1763
+ if 'class' in assignment[0]:
1764
+ has_classification_info = True
1765
+ images_html[assignment[0]].append(assignment[1])
1766
+
1767
+ # Prepare the individual html image files
1768
+ image_counts = _prepare_html_subpages(images_html, output_dir, options)
1769
+
1770
+ if image_rendered_count == 0:
1771
+ seconds_per_image = 0.0
1772
+ else:
1773
+ seconds_per_image = elapsed/image_rendered_count
1774
+
1775
+ print('Rendered {} images (of {}) in {} ({} per image)'.format(image_rendered_count,
1776
+ image_count,humanfriendly.format_timespan(elapsed),
1777
+ humanfriendly.format_timespan(seconds_per_image)))
1778
+
1779
+ # Write index.html
1780
+
1781
+ # We can't just sum these, because image_counts includes images in both their
1782
+ # detection and classification classes
1783
+ total_images = 0
1784
+ for k in image_counts.keys():
1785
+ v = image_counts[k]
1786
+ if has_classification_info and k.startswith('class_'):
1787
+ continue
1788
+ total_images += v
1789
+
1790
+ if total_images != image_count:
1791
+ print('Warning, missing images: image_count is {}, total_images is {}'.format(total_images,image_count))
1792
+
1793
+ almost_detection_string = ''
1794
+ if options.include_almost_detections:
1795
+ almost_detection_string = ' (&ldquo;almost detection&rdquo; threshold at {:.1%})'.format(
1796
+ options.almost_detection_confidence_threshold)
1797
+
1798
+ confidence_threshold_string = ''
1799
+ if isinstance(options.confidence_threshold,float):
1800
+ confidence_threshold_string = '{:.2%}'.format(options.confidence_threshold)
1801
+ else:
1802
+ confidence_threshold_string = str(options.confidence_threshold)
1803
+
1804
+ index_page = """<html>\n{}\n<body>\n
1805
+ <h2>Visualization of results for {}</h2>\n
1806
+ <p>A sample of {} images (of {} total)FAILURE_PLACEHOLDER, annotated with detections above confidence {}{}.</p>\n
1807
+
1808
+ <div class="contentdiv">
1809
+ <p>Model version: {}</p>
1810
+ </div>
1811
+
1812
+ <h3>Detection results</h3>\n
1813
+ <div class="contentdiv">\n""".format(
1814
+ style_header, job_name_string, image_count, len(detections_df), confidence_threshold_string,
1815
+ almost_detection_string, model_version_string)
1816
+
1817
+ failure_string = ''
1818
+ if n_failures is not None:
1819
+ failure_string = ' ({} failures)'.format(n_failures)
1820
+ index_page = index_page.replace('FAILURE_PLACEHOLDER',failure_string)
1821
+
1822
+ def result_set_name_to_friendly_name(result_set_name):
1823
+ friendly_name = ''
1824
+ friendly_name = result_set_name.replace('_','-')
1825
+ if friendly_name.startswith('detections-'):
1826
+ friendly_name = friendly_name.replace('detections-', 'detections: ')
1827
+ friendly_name = friendly_name.capitalize()
1828
+ return friendly_name
1829
+
1830
+ sorted_result_set_names = sorted(list(images_html.keys()))
1831
+
1832
+ result_set_name_to_count = {}
1833
+ for result_set_name in sorted_result_set_names:
1834
+ image_count = image_counts[result_set_name]
1835
+ result_set_name_to_count[result_set_name] = image_count
1836
+ sorted_result_set_names = sorted(sorted_result_set_names,
1837
+ key=lambda x: result_set_name_to_count[x],
1838
+ reverse=True)
1839
+
1840
+ for result_set_name in sorted_result_set_names:
1841
+
1842
+ # Don't print classification classes here; we'll do that later with a slightly
1843
+ # different structure
1844
+ if has_classification_info and result_set_name.lower().startswith('class_'):
1845
+ continue
1846
+
1847
+ filename = result_set_name + '.html'
1848
+ label = result_set_name_to_friendly_name(result_set_name)
1849
+ image_count = image_counts[result_set_name]
1850
+
1851
+ # Don't include line items for empty multi-category pages
1852
+ if image_count == 0 and \
1853
+ detection_categories_to_category_count[result_set_name] > 1:
1854
+ continue
1855
+
1856
+ if total_images == 0:
1857
+ image_fraction = -1
1858
+ else:
1859
+ image_fraction = image_count / total_images
1860
+
1861
+ # Write the line item for this category, including a link only if the
1862
+ # category is non-empty
1863
+ if image_count == 0:
1864
+ index_page += '{} ({}, {:.1%})<br/>\n'.format(
1865
+ label,image_count,image_fraction)
1866
+ else:
1867
+ index_page += '<a href="{}">{}</a> ({}, {:.1%})<br/>\n'.format(
1868
+ filename,label,image_count,image_fraction)
1869
+
1870
+ # ...for each result set
1871
+
1872
+ index_page += '</div>\n'
1873
+
1874
+ # If classification information is present and we're supposed to create
1875
+ # a summary of classifications, we'll put it here
1876
+ category_count_footer = None
1877
+
1878
+ if has_classification_info:
1879
+
1880
+ index_page += '<h3>Species classification results</h3>'
1881
+ index_page += '<p>The same image might appear under multiple classes ' + \
1882
+ 'if multiple species were detected.</p>\n'
1883
+ index_page += '<p>Classifications with confidence less than {:.1%} confidence are considered "unreliable".</p>\n'.format(
1884
+ options.classification_confidence_threshold)
1885
+ index_page += '<div class="contentdiv">\n'
1886
+
1887
+ # Add links to all available classes
1888
+ class_names = sorted(classification_categories.values())
1889
+ if 'class_unreliable' in images_html.keys():
1890
+ class_names.append('unreliable')
1891
+
1892
+ if options.sort_classification_results_by_count:
1893
+ class_name_to_count = {}
1894
+ for cname in class_names:
1895
+ ccount = len(images_html['class_{}'.format(cname)])
1896
+ class_name_to_count[cname] = ccount
1897
+ class_names = sorted(class_names,key=lambda x: class_name_to_count[x],reverse=True)
1898
+
1899
+ if options.category_name_to_sort_weight is not None:
1900
+ category_name_to_sort_weight = {}
1901
+ else:
1902
+ category_name_to_sort_weight = options.category_name_to_sort_weight
1903
+ for category_name in category_name_to_sort_weight:
1904
+ assert isinstance(category_name_to_sort_weight[category_name],int), \
1905
+ 'Illegal sort weight {} for category {}'.format(
1906
+ category_name_to_sort_weight[category_name],category_name)
1907
+
1908
+ # Figure out whether we need to do any grouping of categories
1909
+ # while we print results
1910
+
1911
+ sort_weight_to_class_names = defaultdict(list)
1912
+
1913
+ # Loop over class names in the already-sorted order, which will be
1914
+ # preserved within each weight group
1915
+ for class_name in class_names:
1916
+ if class_name in options.category_name_to_sort_weight:
1917
+ weight = options.category_name_to_sort_weight[class_name]
1918
+ sort_weight_to_class_names[weight].append(class_name)
1919
+ else:
1920
+ # The default weight is zero
1921
+ sort_weight_to_class_names[0].append(class_name)
1922
+
1923
+ sort_weight_to_class_names = sort_dictionary_by_key(sort_weight_to_class_names)
1924
+ category_names_printed = set()
1925
+
1926
+ for i_weight,sort_weight in enumerate(sort_weight_to_class_names):
1927
+
1928
+ class_names_this_weight = sort_weight_to_class_names[sort_weight]
1929
+
1930
+ for cname in class_names_this_weight:
1931
+
1932
+ # Don't print multiple links when multiple category IDs have the same name
1933
+ if cname in category_names_printed:
1934
+ continue
1935
+
1936
+ category_names_printed.add(cname)
1937
+ ccount = len(images_html['class_{}'.format(cname)])
1938
+ if ccount > 0:
1939
+ index_page += '<a href="class_{}.html">{}</a> ({})<br/>\n'.format(
1940
+ cname, cname.lower(), ccount)
1941
+
1942
+ # ...for every category in this sort weight group
1943
+
1944
+ # Print a line break between sort weight groups
1945
+ if i_weight != (len(sort_weight_to_class_names)-1):
1946
+ index_page += '<br/>\n'
1947
+
1948
+ # ...for every sort weight group
1949
+
1950
+ index_page += '</div>\n'
1951
+
1952
+ if options.include_classification_category_report:
1953
+
1954
+ # TODO: it's only for silly historical reasons that we re-read
1955
+ # the input file in this case; because this module has used Pandas
1956
+ # forever, we're not currently carrying the json representation around,
1957
+ # only the Pandas representation.
1958
+
1959
+ print('Generating classification category report')
1960
+
1961
+ d = load_md_or_speciesnet_file(options.md_results_file)
1962
+
1963
+ category_name_to_count = {}
1964
+
1965
+ # im = d['images'][0]
1966
+ for im in d['images']:
1967
+ if 'detections' in im and im['detections'] is not None:
1968
+ for det in im['detections']:
1969
+ if ('classifications' in det) and (len(det['classifications']) > 0):
1970
+ class_id = det['classifications'][0][0]
1971
+ class_conf = det['classifications'][0][1]
1972
+ if class_conf < options.classification_confidence_threshold:
1973
+ continue
1974
+ category_name = d['classification_categories'][class_id]
1975
+ if category_name not in category_name_to_count:
1976
+ category_name_to_count[category_name] = 1
1977
+ else:
1978
+ category_name_to_count[category_name] = \
1979
+ category_name_to_count[category_name] + 1
1980
+ # ...for each detection
1981
+ # ...if this image has detections
1982
+ # ...for each image
1983
+
1984
+ category_name_to_count = sort_dictionary_by_value(
1985
+ category_name_to_count,reverse=True)
1986
+
1987
+ category_count_footer = ''
1988
+ category_count_footer += '<br/>\n'
1989
+ category_count_footer += \
1990
+ '<h3>Category counts (for the whole dataset, not just the sample used for this page)</h3>\n'
1991
+ category_count_footer += '<div class="contentdiv">\n'
1992
+
1993
+ for category_name in category_name_to_count.keys():
1994
+ count = category_name_to_count[category_name]
1995
+ category_count_html = '{}: {}<br>\n'.format(category_name,count)
1996
+ category_count_footer += category_count_html
1997
+
1998
+ category_count_footer += '</div>\n'
1999
+
2000
+ # ...if we're generating a classification category report
2001
+
2002
+ # ...if classification info is present
2003
+
2004
+ if category_count_footer is not None:
2005
+ index_page += category_count_footer + '\n'
2006
+
2007
+ # Write custom footer if it was provided
2008
+ if (options.footer_text is not None) and (len(options.footer_text) > 0):
2009
+ index_page += options.footer_text + '\n'
2010
+
2011
+ # Close open html tags
2012
+ index_page += '\n</body></html>\n'
2013
+
2014
+ output_html_file = os.path.join(output_dir, 'index.html')
2015
+ with open(output_html_file, 'w',
2016
+ encoding=options.output_html_encoding) as f:
2017
+ f.write(index_page)
2018
+
2019
+ print('Finished writing html to {}'.format(output_html_file))
2020
+
2021
+ # ...if we do/don't have ground truth
2022
+
2023
+ ppresults.output_html_file = output_html_file
2024
+ return ppresults
2025
+
2026
+ # ...process_batch_results
2027
+
2028
+
2029
+ #%% Interactive driver(s)
2030
+
2031
+ if False:
2032
+
2033
+ #%%
2034
+
2035
+ base_dir = r'g:\temp'
2036
+ options = PostProcessingOptions()
2037
+ options.image_base_dir = base_dir
2038
+ options.output_dir = os.path.join(base_dir, 'preview')
2039
+ options.md_results_file = os.path.join(base_dir, 'results.json')
2040
+ options.confidence_threshold = {'person':0.5,'animal':0.5,'vehicle':0.01}
2041
+ options.include_almost_detections = True
2042
+ options.almost_detection_confidence_threshold = 0.001
2043
+
2044
+ ppresults = process_batch_results(options)
2045
+ # from megadetector.utils.path_utils import open_file; open_file(ppresults.output_html_file)
2046
+
2047
+
2048
+ #%% Command-line driver
2049
+
2050
+ def main(): # noqa
2051
+
2052
+ options = PostProcessingOptions()
2053
+
2054
+ parser = argparse.ArgumentParser()
2055
+ parser.add_argument(
2056
+ 'md_results_file',
2057
+ help='path to .json file containing MegaDetector results')
2058
+ parser.add_argument(
2059
+ 'output_dir',
2060
+ help='base directory for output')
2061
+ parser.add_argument(
2062
+ '--image_base_dir', default=options.image_base_dir,
2063
+ help='base directory for images (optional, can compute statistics '
2064
+ 'without images)')
2065
+ parser.add_argument(
2066
+ '--ground_truth_json_file', default=options.ground_truth_json_file,
2067
+ help='ground truth labels (optional, can render detections without '
2068
+ 'ground truth), in the COCO Camera Traps format')
2069
+ parser.add_argument(
2070
+ '--confidence_threshold', type=float,
2071
+ default=options.confidence_threshold,
2072
+ help='Confidence threshold for statistics and visualization')
2073
+ parser.add_argument(
2074
+ '--almost_detection_confidence_threshold', type=float,
2075
+ default=options.almost_detection_confidence_threshold,
2076
+ help='Almost-detection confidence threshold for statistics and visualization')
2077
+ parser.add_argument(
2078
+ '--target_recall', type=float, default=options.target_recall,
2079
+ help='Target recall (for statistics only)')
2080
+ parser.add_argument(
2081
+ '--num_images_to_sample', type=int,
2082
+ default=options.num_images_to_sample,
2083
+ help='number of images to visualize, -1 for all images (default: 500)')
2084
+ parser.add_argument(
2085
+ '--viz_target_width', type=int, default=options.viz_target_width,
2086
+ help='Output image width')
2087
+ parser.add_argument(
2088
+ '--include_almost_detections', action='store_true',
2089
+ help='Include a separate category for images just above a second confidence threshold')
2090
+ parser.add_argument(
2091
+ '--html_sort_order', type=str, default='filename',
2092
+ help='Sort order for output pages, should be one of [filename,confidence,random] (defaults to filename)')
2093
+ parser.add_argument(
2094
+ '--sort_by_confidence', action='store_true',
2095
+ help='Sort output in decreasing order by confidence (defaults to sorting by filename)')
2096
+ parser.add_argument(
2097
+ '--n_cores', type=int, default=1,
2098
+ help='Number of threads to use for rendering (default: 1)')
2099
+ parser.add_argument(
2100
+ '--parallelize_rendering_with_processes',
2101
+ action='store_true',
2102
+ help='Should we use processes (instead of threads) for parallelization?')
2103
+ parser.add_argument(
2104
+ '--no_separate_detections_by_category',
2105
+ action='store_true',
2106
+ help='Collapse all categories into just "detections" and "non-detections"')
2107
+ parser.add_argument(
2108
+ '--open_output_file',
2109
+ action='store_true',
2110
+ help='Open the HTML output file when finished')
2111
+ parser.add_argument(
2112
+ '--max_figures_per_html_file',
2113
+ type=int, default=None,
2114
+ help='Maximum number of images to put on a single HTML page')
2115
+
2116
+ if len(sys.argv[1:]) == 0:
2117
+ parser.print_help()
2118
+ parser.exit()
2119
+
2120
+ args = parser.parse_args()
2121
+
2122
+ if args.n_cores != 1:
2123
+ assert (args.n_cores > 1), 'Illegal number of cores: {}'.format(args.n_cores)
2124
+ if args.parallelize_rendering_with_processes:
2125
+ args.parallelize_rendering_with_threads = False
2126
+ args.parallelize_rendering = True
2127
+ args.parallelize_rendering_n_cores = args.n_cores
2128
+
2129
+ args_to_object(args, options)
2130
+
2131
+ if args.no_separate_detections_by_category:
2132
+ options.separate_detections_by_category = False
2133
+
2134
+ ppresults = process_batch_results(options)
2135
+
2136
+ if options.open_output_file:
2137
+ path_utils.open_file(ppresults.output_html_file)
2138
+
2139
+ if __name__ == '__main__':
2140
+ main()