megadetector 10.0.7__py3-none-any.whl → 10.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/data_management/cct_json_utils.py +16 -6
- megadetector/data_management/databases/subset_json_db.py +57 -2
- megadetector/detection/pytorch_detector.py +32 -15
- megadetector/detection/run_detector.py +1 -2
- megadetector/detection/run_detector_batch.py +30 -15
- megadetector/detection/run_inference_with_yolov5_val.py +3 -1
- megadetector/detection/run_tiled_inference.py +61 -17
- megadetector/detection/video_utils.py +23 -7
- megadetector/postprocessing/classification_postprocessing.py +5 -1
- megadetector/postprocessing/compare_batch_results.py +48 -28
- megadetector/postprocessing/convert_output_format.py +81 -87
- megadetector/postprocessing/postprocess_batch_results.py +1 -1
- megadetector/postprocessing/subset_json_detector_output.py +83 -0
- megadetector/utils/directory_listing.py +19 -13
- megadetector/utils/path_utils.py +58 -8
- megadetector/utils/url_utils.py +91 -1
- megadetector/utils/wi_taxonomy_utils.py +26 -26
- megadetector/visualization/visualize_video_output.py +16 -6
- {megadetector-10.0.7.dist-info → megadetector-10.0.9.dist-info}/METADATA +1 -1
- {megadetector-10.0.7.dist-info → megadetector-10.0.9.dist-info}/RECORD +23 -23
- {megadetector-10.0.7.dist-info → megadetector-10.0.9.dist-info}/WHEEL +0 -0
- {megadetector-10.0.7.dist-info → megadetector-10.0.9.dist-info}/licenses/LICENSE +0 -0
- {megadetector-10.0.7.dist-info → megadetector-10.0.9.dist-info}/top_level.txt +0 -0
|
@@ -136,7 +136,7 @@ class BatchComparisonOptions:
|
|
|
136
136
|
#: Colormap to use for detections in file B (maps detection categories to colors)
|
|
137
137
|
self.colormap_b = ['RoyalBlue']
|
|
138
138
|
|
|
139
|
-
#:
|
|
139
|
+
#: Whether to render images with threads (True) or processes (False)
|
|
140
140
|
self.parallelize_rendering_with_threads = True
|
|
141
141
|
|
|
142
142
|
#: List of filenames to include in the comparison, or None to use all files
|
|
@@ -152,7 +152,7 @@ class BatchComparisonOptions:
|
|
|
152
152
|
self.target_width = 800
|
|
153
153
|
|
|
154
154
|
#: Number of workers to use for rendering, or <=1 to disable parallelization
|
|
155
|
-
self.n_rendering_workers =
|
|
155
|
+
self.n_rendering_workers = 10
|
|
156
156
|
|
|
157
157
|
#: Random seed for image sampling (not used if max_images_per_category is None)
|
|
158
158
|
self.random_seed = 0
|
|
@@ -183,7 +183,7 @@ class BatchComparisonOptions:
|
|
|
183
183
|
#: Should we show category names (instead of numbers) on detected boxes?
|
|
184
184
|
self.show_category_names_on_detected_boxes = True
|
|
185
185
|
|
|
186
|
-
#: List of PairwiseBatchComparisonOptions that defines the comparisons we'll render
|
|
186
|
+
#: List of PairwiseBatchComparisonOptions that defines the comparisons we'll render
|
|
187
187
|
self.pairwise_options = []
|
|
188
188
|
|
|
189
189
|
#: Only process images whose file names contain this token
|
|
@@ -197,7 +197,7 @@ class BatchComparisonOptions:
|
|
|
197
197
|
self.verbose = False
|
|
198
198
|
|
|
199
199
|
#: Separate out the "clean TP" and "clean TN" categories, only relevant when GT is
|
|
200
|
-
#: available
|
|
200
|
+
#: available
|
|
201
201
|
self.include_clean_categories = True
|
|
202
202
|
|
|
203
203
|
#: When rendering to the output table, optionally write alternative strings
|
|
@@ -211,6 +211,10 @@ class BatchComparisonOptions:
|
|
|
211
211
|
#: Should we include a TOC? TOC is always omitted if <=2 comparisons are performed.
|
|
212
212
|
self.include_toc = True
|
|
213
213
|
|
|
214
|
+
#: Should we return the mapping from categories (e.g. "common detections") to image
|
|
215
|
+
#: pairs? Makes the return dict much larger, but allows post-hoc exploration.
|
|
216
|
+
self.return_images_by_category = False
|
|
217
|
+
|
|
214
218
|
# ...class BatchComparisonOptions
|
|
215
219
|
|
|
216
220
|
|
|
@@ -224,7 +228,7 @@ class PairwiseBatchComparisonResults:
|
|
|
224
228
|
#: String of HTML content suitable for rendering to an HTML file
|
|
225
229
|
self.html_content = None
|
|
226
230
|
|
|
227
|
-
#: Possibly-modified version of the PairwiseBatchComparisonOptions supplied as input
|
|
231
|
+
#: Possibly-modified version of the PairwiseBatchComparisonOptions supplied as input
|
|
228
232
|
self.pairwise_options = None
|
|
229
233
|
|
|
230
234
|
#: A dictionary with keys representing category names; in the no-ground-truth case, for example,
|
|
@@ -295,7 +299,8 @@ def _render_image_pair(fn,image_pairs,category_folder,options,pairwise_options):
|
|
|
295
299
|
"""
|
|
296
300
|
|
|
297
301
|
input_image_path = os.path.join(options.image_folder,fn)
|
|
298
|
-
assert os.path.isfile(input_image_path),
|
|
302
|
+
assert os.path.isfile(input_image_path), \
|
|
303
|
+
'Image {} does not exist'.format(input_image_path)
|
|
299
304
|
|
|
300
305
|
im = visualization_utils.open_image(input_image_path)
|
|
301
306
|
image_pair = image_pairs[fn]
|
|
@@ -628,11 +633,21 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
|
|
|
628
633
|
os.makedirs(options.output_folder,exist_ok=True)
|
|
629
634
|
|
|
630
635
|
|
|
636
|
+
# Just in case the user provided a single category instead of a list
|
|
637
|
+
# for category_names_to_include
|
|
638
|
+
if options.category_names_to_include is not None:
|
|
639
|
+
if isinstance(options.category_names_to_include,str):
|
|
640
|
+
options.category_names_to_include = [options.category_names_to_include]
|
|
641
|
+
|
|
631
642
|
##%% Load both result sets
|
|
632
643
|
|
|
644
|
+
if options.verbose:
|
|
645
|
+
print('Loading {}'.format(pairwise_options.results_filename_a))
|
|
633
646
|
with open(pairwise_options.results_filename_a,'r') as f:
|
|
634
647
|
results_a = json.load(f)
|
|
635
648
|
|
|
649
|
+
if options.verbose:
|
|
650
|
+
print('Loading {}'.format(pairwise_options.results_filename_b))
|
|
636
651
|
with open(pairwise_options.results_filename_b,'r') as f:
|
|
637
652
|
results_b = json.load(f)
|
|
638
653
|
|
|
@@ -654,6 +669,17 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
|
|
|
654
669
|
detection_category_name_to_id = invert_dictionary(detection_categories_a)
|
|
655
670
|
options.detection_category_id_to_name = detection_category_id_to_name
|
|
656
671
|
|
|
672
|
+
category_name_to_id_a = invert_dictionary(detection_categories_a)
|
|
673
|
+
category_name_to_id_b = invert_dictionary(detection_categories_b)
|
|
674
|
+
category_ids_to_include_a = []
|
|
675
|
+
category_ids_to_include_b = []
|
|
676
|
+
|
|
677
|
+
for category_name in options.category_names_to_include:
|
|
678
|
+
if category_name in category_name_to_id_a:
|
|
679
|
+
category_ids_to_include_a.append(category_name_to_id_a[category_name])
|
|
680
|
+
if category_name in category_name_to_id_b:
|
|
681
|
+
category_ids_to_include_b.append(category_name_to_id_b[category_name])
|
|
682
|
+
|
|
657
683
|
if pairwise_options.results_description_a is None:
|
|
658
684
|
if 'detector' not in results_a['info']:
|
|
659
685
|
print('No model metadata supplied for results-A, assuming MDv4')
|
|
@@ -679,7 +705,7 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
|
|
|
679
705
|
filename_to_image_b = {im['file']:im for im in images_b}
|
|
680
706
|
|
|
681
707
|
|
|
682
|
-
##%% Make sure
|
|
708
|
+
##%% Make sure the two result sets represent the same set of images
|
|
683
709
|
|
|
684
710
|
filenames_a = [im['file'] for im in images_a]
|
|
685
711
|
filenames_b_set = set([im['file'] for im in images_b])
|
|
@@ -914,7 +940,8 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
|
|
|
914
940
|
pairwise_options.detection_thresholds_b['default']
|
|
915
941
|
|
|
916
942
|
# fn = filenames_to_compare[0]
|
|
917
|
-
for i_file,fn in tqdm(enumerate(filenames_to_compare),
|
|
943
|
+
for i_file,fn in tqdm(enumerate(filenames_to_compare),
|
|
944
|
+
total=len(filenames_to_compare)):
|
|
918
945
|
|
|
919
946
|
if fn not in filename_to_image_b:
|
|
920
947
|
|
|
@@ -1000,27 +1027,11 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
|
|
|
1000
1027
|
categories_above_threshold_b.add(category_id)
|
|
1001
1028
|
|
|
1002
1029
|
if invalid_category_error:
|
|
1003
|
-
|
|
1004
1030
|
continue
|
|
1005
1031
|
|
|
1006
1032
|
# Should we be restricting the comparison to only certain categories?
|
|
1007
1033
|
if options.category_names_to_include is not None:
|
|
1008
1034
|
|
|
1009
|
-
# Just in case the user provided a single category instead of a list
|
|
1010
|
-
if isinstance(options.category_names_to_include,str):
|
|
1011
|
-
options.category_names_to_include = [options.category_names_to_include]
|
|
1012
|
-
|
|
1013
|
-
category_name_to_id_a = invert_dictionary(detection_categories_a)
|
|
1014
|
-
category_name_to_id_b = invert_dictionary(detection_categories_b)
|
|
1015
|
-
category_ids_to_include_a = []
|
|
1016
|
-
category_ids_to_include_b = []
|
|
1017
|
-
|
|
1018
|
-
for category_name in options.category_names_to_include:
|
|
1019
|
-
if category_name in category_name_to_id_a:
|
|
1020
|
-
category_ids_to_include_a.append(category_name_to_id_a[category_name])
|
|
1021
|
-
if category_name in category_name_to_id_b:
|
|
1022
|
-
category_ids_to_include_b.append(category_name_to_id_b[category_name])
|
|
1023
|
-
|
|
1024
1035
|
# Restrict the categories we treat as above-threshold to the set we're supposed
|
|
1025
1036
|
# to be using
|
|
1026
1037
|
categories_above_threshold_a = [category_id for category_id in categories_above_threshold_a if \
|
|
@@ -1287,7 +1298,7 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
|
|
|
1287
1298
|
max_conf_b = _maxempty([det['conf'] for det in im_b['detections']])
|
|
1288
1299
|
sort_conf = max(max_conf_a,max_conf_b)
|
|
1289
1300
|
|
|
1290
|
-
|
|
1301
|
+
# ...what kind of ground truth (if any) do we have?
|
|
1291
1302
|
|
|
1292
1303
|
assert comparison_category is not None
|
|
1293
1304
|
categories_to_image_pairs[comparison_category][fn] = im_pair
|
|
@@ -1313,7 +1324,11 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
|
|
|
1313
1324
|
local_output_folder = os.path.join(options.output_folder,'cmp_' + \
|
|
1314
1325
|
str(output_index).zfill(3))
|
|
1315
1326
|
|
|
1316
|
-
def
|
|
1327
|
+
def _render_detection_comparisons(category,image_pairs,image_filenames):
|
|
1328
|
+
"""
|
|
1329
|
+
Render all the detection results pairs for the sampled images in a
|
|
1330
|
+
particular category (e.g. all the "common detections").
|
|
1331
|
+
"""
|
|
1317
1332
|
|
|
1318
1333
|
print('Rendering detections for category {}'.format(category))
|
|
1319
1334
|
|
|
@@ -1336,7 +1351,7 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
|
|
|
1336
1351
|
|
|
1337
1352
|
return output_image_paths
|
|
1338
1353
|
|
|
1339
|
-
# ...def
|
|
1354
|
+
# ...def _render_detection_comparisons()
|
|
1340
1355
|
|
|
1341
1356
|
if len(options.colormap_a) > 1:
|
|
1342
1357
|
color_string_a = str(options.colormap_a)
|
|
@@ -1371,7 +1386,7 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
|
|
|
1371
1386
|
|
|
1372
1387
|
input_image_absolute_paths = [os.path.join(options.image_folder,fn) for fn in image_filenames]
|
|
1373
1388
|
|
|
1374
|
-
category_image_output_paths =
|
|
1389
|
+
category_image_output_paths = _render_detection_comparisons(category,
|
|
1375
1390
|
image_pairs,image_filenames)
|
|
1376
1391
|
|
|
1377
1392
|
category_html_filename = os.path.join(local_output_folder,
|
|
@@ -1469,6 +1484,8 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
|
|
|
1469
1484
|
print("Pool closed and joined for comparison rendering")
|
|
1470
1485
|
except Exception:
|
|
1471
1486
|
pass
|
|
1487
|
+
|
|
1488
|
+
|
|
1472
1489
|
##%% Write the top-level HTML file content
|
|
1473
1490
|
|
|
1474
1491
|
html_output_string = ''
|
|
@@ -1591,8 +1608,11 @@ def compare_batch_results(options):
|
|
|
1591
1608
|
for i_comparison,pairwise_options in enumerate(pairwise_options_list):
|
|
1592
1609
|
|
|
1593
1610
|
print('Running comparison {} of {}'.format(i_comparison,n_comparisons))
|
|
1611
|
+
pairwise_options.verbose = options.verbose
|
|
1594
1612
|
pairwise_results = \
|
|
1595
1613
|
_pairwise_compare_batch_results(options,i_comparison,pairwise_options)
|
|
1614
|
+
if not options.return_images_by_category:
|
|
1615
|
+
pairwise_results.categories_to_image_pairs = None
|
|
1596
1616
|
html_content += pairwise_results.html_content
|
|
1597
1617
|
all_pairwise_results.append(pairwise_results)
|
|
1598
1618
|
|
|
@@ -2,12 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
convert_output_format.py
|
|
4
4
|
|
|
5
|
-
Converts between file
|
|
6
|
-
|
|
7
|
-
conversion - including between hypothetical alternative .json versions - that we support
|
|
8
|
-
in the future.
|
|
9
|
-
|
|
10
|
-
The .csv format is largely obsolete, don't use it unless you're super-duper sure you need it.
|
|
5
|
+
Converts between file .json and .csv representations of MD output. The .csv format is
|
|
6
|
+
largely obsolete, don't use it unless you're super-duper sure you need it.
|
|
11
7
|
|
|
12
8
|
"""
|
|
13
9
|
|
|
@@ -15,13 +11,16 @@ The .csv format is largely obsolete, don't use it unless you're super-duper sure
|
|
|
15
11
|
|
|
16
12
|
import argparse
|
|
17
13
|
import json
|
|
18
|
-
import csv
|
|
19
14
|
import sys
|
|
20
15
|
import os
|
|
21
16
|
|
|
22
17
|
from tqdm import tqdm
|
|
18
|
+
from collections import defaultdict
|
|
19
|
+
|
|
20
|
+
import pandas as pd
|
|
23
21
|
|
|
24
22
|
from megadetector.postprocessing.load_api_results import load_api_results_csv
|
|
23
|
+
from megadetector.utils.wi_taxonomy_utils import load_md_or_speciesnet_file
|
|
25
24
|
from megadetector.data_management.annotations import annotation_constants
|
|
26
25
|
from megadetector.utils import ct_utils
|
|
27
26
|
|
|
@@ -35,16 +34,13 @@ def convert_json_to_csv(input_path,
|
|
|
35
34
|
min_confidence=None,
|
|
36
35
|
omit_bounding_boxes=False,
|
|
37
36
|
output_encoding=None,
|
|
38
|
-
overwrite=True
|
|
37
|
+
overwrite=True,
|
|
38
|
+
verbose=False):
|
|
39
39
|
"""
|
|
40
40
|
Converts a MD results .json file to a totally non-standard .csv format.
|
|
41
41
|
|
|
42
42
|
If [output_path] is None, will convert x.json to x.csv.
|
|
43
43
|
|
|
44
|
-
TODO: this function should obviously be using Pandas or some other sensible structured
|
|
45
|
-
representation of tabular data. Even a list of dicts. This implementation is quite
|
|
46
|
-
brittle and depends on adding fields to every row in exactly the right order.
|
|
47
|
-
|
|
48
44
|
Args:
|
|
49
45
|
input_path (str): the input .json file to convert
|
|
50
46
|
output_path (str, optional): the output .csv file to generate; if this is None, uses
|
|
@@ -57,7 +53,7 @@ def convert_json_to_csv(input_path,
|
|
|
57
53
|
output_encoding (str, optional): encoding to use for the .csv file
|
|
58
54
|
overwrite (bool, optional): whether to overwrite an existing .csv file; if this is False and
|
|
59
55
|
the output file exists, no-ops and returns
|
|
60
|
-
|
|
56
|
+
verbose (bool, optional): enable additional debug output
|
|
61
57
|
"""
|
|
62
58
|
|
|
63
59
|
if output_path is None:
|
|
@@ -68,36 +64,28 @@ def convert_json_to_csv(input_path,
|
|
|
68
64
|
return
|
|
69
65
|
|
|
70
66
|
print('Loading json results from {}...'.format(input_path))
|
|
71
|
-
json_output =
|
|
72
|
-
|
|
73
|
-
rows = []
|
|
67
|
+
json_output = load_md_or_speciesnet_file(input_path,
|
|
68
|
+
verbose=verbose)
|
|
74
69
|
|
|
75
|
-
|
|
70
|
+
def clean_category_name(s):
|
|
71
|
+
return s.replace(',','_').replace(' ','_').lower()
|
|
76
72
|
|
|
77
|
-
#
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
for cat_id in range(1,n_non_empty_detection_categories+1):
|
|
84
|
-
cat_name = annotation_constants.detector_bbox_category_id_to_name[cat_id]
|
|
85
|
-
detection_category_column_names.append('max_conf_' + cat_name)
|
|
73
|
+
# Create column names for max detection confidences
|
|
74
|
+
detection_category_id_to_max_conf_column_name = {}
|
|
75
|
+
for category_id in json_output['detection_categories'].keys():
|
|
76
|
+
category_name = clean_category_name(json_output['detection_categories'][category_id])
|
|
77
|
+
detection_category_id_to_max_conf_column_name[category_id] = \
|
|
78
|
+
'max_conf_' + category_name
|
|
86
79
|
|
|
87
|
-
|
|
80
|
+
classification_category_id_to_max_conf_column_name = {}
|
|
88
81
|
|
|
82
|
+
# Create column names for max classification confidences (if necessary)
|
|
89
83
|
if 'classification_categories' in json_output.keys():
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
category_name = classification_category_id_to_name[category_id].\
|
|
96
|
-
replace(' ','_').replace(',','')
|
|
97
|
-
classification_category_column_names.append('max_classification_conf_' + category_name)
|
|
98
|
-
classification_category_id_to_column_number[category_id] = i_category
|
|
99
|
-
|
|
100
|
-
n_classification_categories = len(classification_category_ids)
|
|
84
|
+
|
|
85
|
+
for category_id in json_output['classification_categories'].keys():
|
|
86
|
+
category_name = clean_category_name(json_output['classification_categories'][category_id])
|
|
87
|
+
classification_category_id_to_max_conf_column_name[category_id] = \
|
|
88
|
+
'max_classification_conf_' + category_name
|
|
101
89
|
|
|
102
90
|
# There are several .json fields for which we add .csv columns; other random bespoke fields
|
|
103
91
|
# will be ignored.
|
|
@@ -117,26 +105,43 @@ def convert_json_to_csv(input_path,
|
|
|
117
105
|
if len(optional_fields_present) > 0:
|
|
118
106
|
print('Found {} optional fields'.format(len(optional_fields_present)))
|
|
119
107
|
|
|
120
|
-
expected_row_length = len(fixed_columns) + len(detection_category_column_names) + \
|
|
121
|
-
n_classification_categories + len(optional_fields_present)
|
|
122
|
-
|
|
123
108
|
print('Formatting results...')
|
|
124
109
|
|
|
110
|
+
output_records = []
|
|
111
|
+
|
|
125
112
|
# i_image = 0; im = json_output['images'][i_image]
|
|
126
113
|
for im in tqdm(json_output['images']):
|
|
127
114
|
|
|
128
|
-
|
|
115
|
+
output_record = {}
|
|
116
|
+
output_records.append(output_record)
|
|
117
|
+
|
|
118
|
+
output_record['image_path'] = im['file']
|
|
119
|
+
output_record['max_confidence'] = ''
|
|
120
|
+
output_record['detections'] = ''
|
|
121
|
+
|
|
122
|
+
for field_name in optional_fields_present:
|
|
123
|
+
output_record[field_name] = ''
|
|
124
|
+
if field_name in im:
|
|
125
|
+
output_record[field_name] = im[field_name]
|
|
126
|
+
|
|
127
|
+
for detection_category_id in detection_category_id_to_max_conf_column_name:
|
|
128
|
+
column_name = detection_category_id_to_max_conf_column_name[detection_category_id]
|
|
129
|
+
output_record[column_name] = 0
|
|
130
|
+
|
|
131
|
+
for classification_category_id in classification_category_id_to_max_conf_column_name:
|
|
132
|
+
column_name = classification_category_id_to_max_conf_column_name[classification_category_id]
|
|
133
|
+
output_record[column_name] = 0
|
|
129
134
|
|
|
130
135
|
if 'failure' in im and im['failure'] is not None:
|
|
131
|
-
|
|
132
|
-
|
|
136
|
+
output_record['max_confidence'] = 'failure'
|
|
137
|
+
output_record['detections'] = im['failure']
|
|
133
138
|
# print('Skipping failed image {} ({})'.format(im['file'],im['failure']))
|
|
134
139
|
continue
|
|
135
140
|
|
|
136
141
|
max_conf = ct_utils.get_max_conf(im)
|
|
142
|
+
detection_category_id_to_max_conf = defaultdict(float)
|
|
143
|
+
classification_category_id_to_max_conf = defaultdict(float)
|
|
137
144
|
detections = []
|
|
138
|
-
max_detection_category_probabilities = [None] * n_non_empty_detection_categories
|
|
139
|
-
max_classification_category_probabilities = [0] * n_classification_categories
|
|
140
145
|
|
|
141
146
|
# d = im['detections'][0]
|
|
142
147
|
for d in im['detections']:
|
|
@@ -155,31 +160,24 @@ def convert_json_to_csv(input_path,
|
|
|
155
160
|
xmax = input_bbox[0] + input_bbox[2]
|
|
156
161
|
ymax = input_bbox[1] + input_bbox[3]
|
|
157
162
|
output_detection = [ymin, xmin, ymax, xmax]
|
|
158
|
-
|
|
159
163
|
output_detection.append(d['conf'])
|
|
160
|
-
|
|
161
|
-
# Category 0 is empty, for which we don't have a column, so the max
|
|
162
|
-
# confidence for category N goes in column N-1
|
|
163
|
-
detection_category_id = int(d['category'])
|
|
164
|
-
assert detection_category_id > 0 and detection_category_id <= \
|
|
165
|
-
n_non_empty_detection_categories
|
|
166
|
-
detection_category_column = detection_category_id - 1
|
|
167
|
-
detection_category_max = max_detection_category_probabilities[detection_category_column]
|
|
168
|
-
if detection_category_max is None or d['conf'] > detection_category_max:
|
|
169
|
-
max_detection_category_probabilities[detection_category_column] = d['conf']
|
|
170
|
-
|
|
171
|
-
output_detection.append(detection_category_id)
|
|
164
|
+
output_detection.append(int(d['category']))
|
|
172
165
|
detections.append(output_detection)
|
|
173
166
|
|
|
167
|
+
detection_category_id = d['category']
|
|
168
|
+
detection_category_max = detection_category_id_to_max_conf[detection_category_id]
|
|
169
|
+
if d['conf'] > detection_category_max:
|
|
170
|
+
detection_category_id_to_max_conf[detection_category_id] = d['conf']
|
|
171
|
+
|
|
174
172
|
if 'classifications' in d:
|
|
175
|
-
|
|
176
|
-
'Oops, I have classification results, but no classification metadata'
|
|
173
|
+
|
|
177
174
|
for c in d['classifications']:
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
175
|
+
classification_category_id = c[0]
|
|
176
|
+
classification_conf = c[1]
|
|
177
|
+
classification_category_max = \
|
|
178
|
+
classification_category_id_to_max_conf[classification_category_id]
|
|
179
|
+
if classification_conf > classification_category_max:
|
|
180
|
+
classification_category_id_to_max_conf[classification_category_id] = d['conf']
|
|
183
181
|
|
|
184
182
|
# ...for each classification
|
|
185
183
|
|
|
@@ -191,40 +189,36 @@ def convert_json_to_csv(input_path,
|
|
|
191
189
|
if not omit_bounding_boxes:
|
|
192
190
|
detection_string = json.dumps(detections)
|
|
193
191
|
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
row.extend(max_classification_category_probabilities)
|
|
192
|
+
output_record['detections'] = detection_string
|
|
193
|
+
output_record['max_confidence'] = max_conf
|
|
197
194
|
|
|
198
|
-
for
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
row.append(str(im[field_name]))
|
|
195
|
+
for detection_category_id in detection_category_id_to_max_conf_column_name:
|
|
196
|
+
column_name = detection_category_id_to_max_conf_column_name[detection_category_id]
|
|
197
|
+
output_record[column_name] = \
|
|
198
|
+
detection_category_id_to_max_conf[detection_category_id]
|
|
203
199
|
|
|
204
|
-
|
|
205
|
-
|
|
200
|
+
for classification_category_id in classification_category_id_to_max_conf_column_name:
|
|
201
|
+
column_name = classification_category_id_to_max_conf_column_name[classification_category_id]
|
|
202
|
+
output_record[column_name] = \
|
|
203
|
+
classification_category_id_to_max_conf[classification_category_id]
|
|
206
204
|
|
|
207
205
|
# ...for each image
|
|
208
206
|
|
|
209
207
|
print('Writing to csv...')
|
|
210
208
|
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
header.extend(classification_category_column_names)
|
|
217
|
-
for field_name in optional_fields_present:
|
|
218
|
-
header.append(field_name)
|
|
219
|
-
writer.writerow(header)
|
|
220
|
-
writer.writerows(rows)
|
|
209
|
+
df = pd.DataFrame(output_records)
|
|
210
|
+
|
|
211
|
+
if omit_bounding_boxes:
|
|
212
|
+
df = df.drop('detections',axis=1)
|
|
213
|
+
df.to_csv(output_path,index=False,header=True)
|
|
221
214
|
|
|
222
215
|
# ...def convert_json_to_csv(...)
|
|
223
216
|
|
|
224
217
|
|
|
225
218
|
def convert_csv_to_json(input_path,output_path=None,overwrite=True):
|
|
226
219
|
"""
|
|
227
|
-
Convert .csv to .json. If output_path is None, will convert x.csv to x.json.
|
|
220
|
+
Convert .csv to .json. If output_path is None, will convert x.csv to x.json. This
|
|
221
|
+
supports a largely obsolete .csv format, there's almost no reason you want to do this.
|
|
228
222
|
|
|
229
223
|
Args:
|
|
230
224
|
input_path (str): .csv filename to convert to .json
|
|
@@ -1145,7 +1145,7 @@ def process_batch_results(options):
|
|
|
1145
1145
|
|
|
1146
1146
|
images_to_visualize = detections_df
|
|
1147
1147
|
|
|
1148
|
-
if options.num_images_to_sample is not None and options.num_images_to_sample > 0:
|
|
1148
|
+
if (options.num_images_to_sample is not None) and (options.num_images_to_sample > 0):
|
|
1149
1149
|
images_to_visualize = images_to_visualize.sample(
|
|
1150
1150
|
n=min(options.num_images_to_sample, len(images_to_visualize)),
|
|
1151
1151
|
random_state=options.sample_seed)
|
|
@@ -83,6 +83,9 @@ class SubsetJsonDetectorOutputOptions:
|
|
|
83
83
|
def __init__(self):
|
|
84
84
|
|
|
85
85
|
#: Only process files containing the token 'query'
|
|
86
|
+
#:
|
|
87
|
+
#: Does not support general regexes, but supports ^ as a special case
|
|
88
|
+
#: regex-like notation for "starts with"
|
|
86
89
|
self.query = None
|
|
87
90
|
|
|
88
91
|
#: Replace 'query' with 'replacement' if 'replacement' is not None. If 'query' is None,
|
|
@@ -153,6 +156,12 @@ class SubsetJsonDetectorOutputOptions:
|
|
|
153
156
|
#: to be contiguous. Set to 1 to remove empty categories only.
|
|
154
157
|
self.remove_classification_categories_below_count = None
|
|
155
158
|
|
|
159
|
+
#: Remove detections above a threshold size (as a fraction of the image size)
|
|
160
|
+
self.maximum_detection_size = None
|
|
161
|
+
|
|
162
|
+
#: Remove detections below a threshold size (as a fraction of the image size)
|
|
163
|
+
self.minimum_detection_size = None
|
|
164
|
+
|
|
156
165
|
# ...class SubsetJsonDetectorOutputOptions
|
|
157
166
|
|
|
158
167
|
|
|
@@ -271,6 +280,71 @@ def remove_classification_categories_below_count(data, options):
|
|
|
271
280
|
# ...def remove_classification_categories_below_count(...)
|
|
272
281
|
|
|
273
282
|
|
|
283
|
+
def subset_json_detector_output_by_size(data, options):
|
|
284
|
+
"""
|
|
285
|
+
Remove detections above or below threshold sizes (as a fraction
|
|
286
|
+
of the image size).
|
|
287
|
+
|
|
288
|
+
Args:
|
|
289
|
+
data (dict): data loaded from a MD results file
|
|
290
|
+
options (SubsetJsonDetectorOutputOptions): parameters for subsetting
|
|
291
|
+
|
|
292
|
+
Returns:
|
|
293
|
+
dict: Possibly-modified version of [data] (also modifies in place)
|
|
294
|
+
"""
|
|
295
|
+
|
|
296
|
+
if (options.maximum_detection_size is None) and \
|
|
297
|
+
(options.minimum_detection_size is None):
|
|
298
|
+
return data
|
|
299
|
+
|
|
300
|
+
if options.maximum_detection_size is None:
|
|
301
|
+
options.maximum_detection_size = 1000
|
|
302
|
+
|
|
303
|
+
if options.minimum_detection_size is None:
|
|
304
|
+
options.minimum_detection_size = -1000
|
|
305
|
+
|
|
306
|
+
print('Subsetting by size ({} <--> {})'.format(
|
|
307
|
+
options.minimum_detection_size,
|
|
308
|
+
options.maximum_detection_size))
|
|
309
|
+
|
|
310
|
+
images_in = data['images']
|
|
311
|
+
images_out = []
|
|
312
|
+
|
|
313
|
+
# im = images_in[0]
|
|
314
|
+
for i_image, im in tqdm(enumerate(images_in), total=len(images_in)):
|
|
315
|
+
|
|
316
|
+
# Always keep failed images; if the caller wants to remove these, they
|
|
317
|
+
# will use remove_failed_images
|
|
318
|
+
if ('detections' not in im) or (im['detections'] is None):
|
|
319
|
+
images_out.append(im)
|
|
320
|
+
continue
|
|
321
|
+
|
|
322
|
+
detections_to_keep = []
|
|
323
|
+
|
|
324
|
+
for det in im['detections']:
|
|
325
|
+
|
|
326
|
+
# [x_min, y_min, width_of_box, height_of_box]
|
|
327
|
+
detection_size = det['bbox'][2] * det['bbox'][3]
|
|
328
|
+
|
|
329
|
+
if (detection_size >= options.minimum_detection_size) and \
|
|
330
|
+
(detection_size <= options.maximum_detection_size):
|
|
331
|
+
detections_to_keep.append(det)
|
|
332
|
+
|
|
333
|
+
im['detections'] = detections_to_keep
|
|
334
|
+
|
|
335
|
+
images_out.append(im)
|
|
336
|
+
|
|
337
|
+
# ...for each image
|
|
338
|
+
|
|
339
|
+
data['images'] = images_out
|
|
340
|
+
print('done, found {} matches (of {})'.format(
|
|
341
|
+
len(data['images']),len(images_in)))
|
|
342
|
+
|
|
343
|
+
return data
|
|
344
|
+
|
|
345
|
+
# ...def subset_json_detector_output_by_size(...)
|
|
346
|
+
|
|
347
|
+
|
|
274
348
|
def subset_json_detector_output_by_confidence(data, options):
|
|
275
349
|
"""
|
|
276
350
|
Removes all detections below options.confidence_threshold.
|
|
@@ -671,6 +745,11 @@ def subset_json_detector_output(input_filename, output_filename, options, data=N
|
|
|
671
745
|
|
|
672
746
|
data = subset_json_detector_output_by_list(data, options)
|
|
673
747
|
|
|
748
|
+
if (options.maximum_detection_size is not None) or \
|
|
749
|
+
(options.minimum_detection_size is not None):
|
|
750
|
+
|
|
751
|
+
data = subset_json_detector_output_by_size(data, options)
|
|
752
|
+
|
|
674
753
|
if not options.split_folders:
|
|
675
754
|
|
|
676
755
|
_write_detection_results(data, output_filename, options)
|
|
@@ -834,6 +913,10 @@ def main(): # noqa
|
|
|
834
913
|
help='Replace [query] with this')
|
|
835
914
|
parser.add_argument('--confidence_threshold', type=float, default=None,
|
|
836
915
|
help='Remove detections below this confidence level')
|
|
916
|
+
parser.add_argument('--maximum_detection_size', type=float, default=None,
|
|
917
|
+
help='Remove detections above this size (as a fraction of the image size)')
|
|
918
|
+
parser.add_argument('--minimum_detection_size', type=float, default=None,
|
|
919
|
+
help='Remove detections below this size (as a fraction of the image size)')
|
|
837
920
|
parser.add_argument('--keep_files_in_list', type=str, default=None,
|
|
838
921
|
help='Keep only files in this list, which can be a .json results file or a folder.' + \
|
|
839
922
|
' Assumes that the input .json file contains relative paths when comparing to a folder.')
|