megadetector 10.0.7__py3-none-any.whl → 10.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

@@ -136,7 +136,7 @@ class BatchComparisonOptions:
136
136
  #: Colormap to use for detections in file B (maps detection categories to colors)
137
137
  self.colormap_b = ['RoyalBlue']
138
138
 
139
- #: Process-based parallelization isn't supported yet; this must be "True"
139
+ #: Whether to render images with threads (True) or processes (False)
140
140
  self.parallelize_rendering_with_threads = True
141
141
 
142
142
  #: List of filenames to include in the comparison, or None to use all files
@@ -152,7 +152,7 @@ class BatchComparisonOptions:
152
152
  self.target_width = 800
153
153
 
154
154
  #: Number of workers to use for rendering, or <=1 to disable parallelization
155
- self.n_rendering_workers = 20
155
+ self.n_rendering_workers = 10
156
156
 
157
157
  #: Random seed for image sampling (not used if max_images_per_category is None)
158
158
  self.random_seed = 0
@@ -183,7 +183,7 @@ class BatchComparisonOptions:
183
183
  #: Should we show category names (instead of numbers) on detected boxes?
184
184
  self.show_category_names_on_detected_boxes = True
185
185
 
186
- #: List of PairwiseBatchComparisonOptions that defines the comparisons we'll render.
186
+ #: List of PairwiseBatchComparisonOptions that defines the comparisons we'll render
187
187
  self.pairwise_options = []
188
188
 
189
189
  #: Only process images whose file names contain this token
@@ -197,7 +197,7 @@ class BatchComparisonOptions:
197
197
  self.verbose = False
198
198
 
199
199
  #: Separate out the "clean TP" and "clean TN" categories, only relevant when GT is
200
- #: available.
200
+ #: available
201
201
  self.include_clean_categories = True
202
202
 
203
203
  #: When rendering to the output table, optionally write alternative strings
@@ -211,6 +211,10 @@ class BatchComparisonOptions:
211
211
  #: Should we include a TOC? TOC is always omitted if <=2 comparisons are performed.
212
212
  self.include_toc = True
213
213
 
214
+ #: Should we return the mapping from categories (e.g. "common detections") to image
215
+ #: pairs? Makes the return dict much larger, but allows post-hoc exploration.
216
+ self.return_images_by_category = False
217
+
214
218
  # ...class BatchComparisonOptions
215
219
 
216
220
 
@@ -224,7 +228,7 @@ class PairwiseBatchComparisonResults:
224
228
  #: String of HTML content suitable for rendering to an HTML file
225
229
  self.html_content = None
226
230
 
227
- #: Possibly-modified version of the PairwiseBatchComparisonOptions supplied as input.
231
+ #: Possibly-modified version of the PairwiseBatchComparisonOptions supplied as input
228
232
  self.pairwise_options = None
229
233
 
230
234
  #: A dictionary with keys representing category names; in the no-ground-truth case, for example,
@@ -295,7 +299,8 @@ def _render_image_pair(fn,image_pairs,category_folder,options,pairwise_options):
295
299
  """
296
300
 
297
301
  input_image_path = os.path.join(options.image_folder,fn)
298
- assert os.path.isfile(input_image_path), 'Image {} does not exist'.format(input_image_path)
302
+ assert os.path.isfile(input_image_path), \
303
+ 'Image {} does not exist'.format(input_image_path)
299
304
 
300
305
  im = visualization_utils.open_image(input_image_path)
301
306
  image_pair = image_pairs[fn]
@@ -628,11 +633,21 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
628
633
  os.makedirs(options.output_folder,exist_ok=True)
629
634
 
630
635
 
636
+ # Just in case the user provided a single category instead of a list
637
+ # for category_names_to_include
638
+ if options.category_names_to_include is not None:
639
+ if isinstance(options.category_names_to_include,str):
640
+ options.category_names_to_include = [options.category_names_to_include]
641
+
631
642
  ##%% Load both result sets
632
643
 
644
+ if options.verbose:
645
+ print('Loading {}'.format(pairwise_options.results_filename_a))
633
646
  with open(pairwise_options.results_filename_a,'r') as f:
634
647
  results_a = json.load(f)
635
648
 
649
+ if options.verbose:
650
+ print('Loading {}'.format(pairwise_options.results_filename_b))
636
651
  with open(pairwise_options.results_filename_b,'r') as f:
637
652
  results_b = json.load(f)
638
653
 
@@ -654,6 +669,17 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
654
669
  detection_category_name_to_id = invert_dictionary(detection_categories_a)
655
670
  options.detection_category_id_to_name = detection_category_id_to_name
656
671
 
672
+ category_name_to_id_a = invert_dictionary(detection_categories_a)
673
+ category_name_to_id_b = invert_dictionary(detection_categories_b)
674
+ category_ids_to_include_a = []
675
+ category_ids_to_include_b = []
676
+
677
+ for category_name in options.category_names_to_include:
678
+ if category_name in category_name_to_id_a:
679
+ category_ids_to_include_a.append(category_name_to_id_a[category_name])
680
+ if category_name in category_name_to_id_b:
681
+ category_ids_to_include_b.append(category_name_to_id_b[category_name])
682
+
657
683
  if pairwise_options.results_description_a is None:
658
684
  if 'detector' not in results_a['info']:
659
685
  print('No model metadata supplied for results-A, assuming MDv4')
@@ -679,7 +705,7 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
679
705
  filename_to_image_b = {im['file']:im for im in images_b}
680
706
 
681
707
 
682
- ##%% Make sure they represent the same set of images
708
+ ##%% Make sure the two result sets represent the same set of images
683
709
 
684
710
  filenames_a = [im['file'] for im in images_a]
685
711
  filenames_b_set = set([im['file'] for im in images_b])
@@ -914,7 +940,8 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
914
940
  pairwise_options.detection_thresholds_b['default']
915
941
 
916
942
  # fn = filenames_to_compare[0]
917
- for i_file,fn in tqdm(enumerate(filenames_to_compare),total=len(filenames_to_compare)):
943
+ for i_file,fn in tqdm(enumerate(filenames_to_compare),
944
+ total=len(filenames_to_compare)):
918
945
 
919
946
  if fn not in filename_to_image_b:
920
947
 
@@ -1000,27 +1027,11 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
1000
1027
  categories_above_threshold_b.add(category_id)
1001
1028
 
1002
1029
  if invalid_category_error:
1003
-
1004
1030
  continue
1005
1031
 
1006
1032
  # Should we be restricting the comparison to only certain categories?
1007
1033
  if options.category_names_to_include is not None:
1008
1034
 
1009
- # Just in case the user provided a single category instead of a list
1010
- if isinstance(options.category_names_to_include,str):
1011
- options.category_names_to_include = [options.category_names_to_include]
1012
-
1013
- category_name_to_id_a = invert_dictionary(detection_categories_a)
1014
- category_name_to_id_b = invert_dictionary(detection_categories_b)
1015
- category_ids_to_include_a = []
1016
- category_ids_to_include_b = []
1017
-
1018
- for category_name in options.category_names_to_include:
1019
- if category_name in category_name_to_id_a:
1020
- category_ids_to_include_a.append(category_name_to_id_a[category_name])
1021
- if category_name in category_name_to_id_b:
1022
- category_ids_to_include_b.append(category_name_to_id_b[category_name])
1023
-
1024
1035
  # Restrict the categories we treat as above-threshold to the set we're supposed
1025
1036
  # to be using
1026
1037
  categories_above_threshold_a = [category_id for category_id in categories_above_threshold_a if \
@@ -1287,7 +1298,7 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
1287
1298
  max_conf_b = _maxempty([det['conf'] for det in im_b['detections']])
1288
1299
  sort_conf = max(max_conf_a,max_conf_b)
1289
1300
 
1290
- # ...what kind of ground truth (if any) do we have?
1301
+ # ...what kind of ground truth (if any) do we have?
1291
1302
 
1292
1303
  assert comparison_category is not None
1293
1304
  categories_to_image_pairs[comparison_category][fn] = im_pair
@@ -1313,7 +1324,11 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
1313
1324
  local_output_folder = os.path.join(options.output_folder,'cmp_' + \
1314
1325
  str(output_index).zfill(3))
1315
1326
 
1316
- def render_detection_comparisons(category,image_pairs,image_filenames):
1327
+ def _render_detection_comparisons(category,image_pairs,image_filenames):
1328
+ """
1329
+ Render all the detection results pairs for the sampled images in a
1330
+ particular category (e.g. all the "common detections").
1331
+ """
1317
1332
 
1318
1333
  print('Rendering detections for category {}'.format(category))
1319
1334
 
@@ -1336,7 +1351,7 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
1336
1351
 
1337
1352
  return output_image_paths
1338
1353
 
1339
- # ...def render_detection_comparisons()
1354
+ # ...def _render_detection_comparisons()
1340
1355
 
1341
1356
  if len(options.colormap_a) > 1:
1342
1357
  color_string_a = str(options.colormap_a)
@@ -1371,7 +1386,7 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
1371
1386
 
1372
1387
  input_image_absolute_paths = [os.path.join(options.image_folder,fn) for fn in image_filenames]
1373
1388
 
1374
- category_image_output_paths = render_detection_comparisons(category,
1389
+ category_image_output_paths = _render_detection_comparisons(category,
1375
1390
  image_pairs,image_filenames)
1376
1391
 
1377
1392
  category_html_filename = os.path.join(local_output_folder,
@@ -1469,6 +1484,8 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
1469
1484
  print("Pool closed and joined for comparison rendering")
1470
1485
  except Exception:
1471
1486
  pass
1487
+
1488
+
1472
1489
  ##%% Write the top-level HTML file content
1473
1490
 
1474
1491
  html_output_string = ''
@@ -1591,8 +1608,11 @@ def compare_batch_results(options):
1591
1608
  for i_comparison,pairwise_options in enumerate(pairwise_options_list):
1592
1609
 
1593
1610
  print('Running comparison {} of {}'.format(i_comparison,n_comparisons))
1611
+ pairwise_options.verbose = options.verbose
1594
1612
  pairwise_results = \
1595
1613
  _pairwise_compare_batch_results(options,i_comparison,pairwise_options)
1614
+ if not options.return_images_by_category:
1615
+ pairwise_results.categories_to_image_pairs = None
1596
1616
  html_content += pairwise_results.html_content
1597
1617
  all_pairwise_results.append(pairwise_results)
1598
1618
 
@@ -2,12 +2,8 @@
2
2
 
3
3
  convert_output_format.py
4
4
 
5
- Converts between file formats output by our batch processing API. Currently
6
- supports json <--> csv conversion, but this should be the landing place for any
7
- conversion - including between hypothetical alternative .json versions - that we support
8
- in the future.
9
-
10
- The .csv format is largely obsolete, don't use it unless you're super-duper sure you need it.
5
+ Converts between file .json and .csv representations of MD output. The .csv format is
6
+ largely obsolete, don't use it unless you're super-duper sure you need it.
11
7
 
12
8
  """
13
9
 
@@ -15,13 +11,16 @@ The .csv format is largely obsolete, don't use it unless you're super-duper sure
15
11
 
16
12
  import argparse
17
13
  import json
18
- import csv
19
14
  import sys
20
15
  import os
21
16
 
22
17
  from tqdm import tqdm
18
+ from collections import defaultdict
19
+
20
+ import pandas as pd
23
21
 
24
22
  from megadetector.postprocessing.load_api_results import load_api_results_csv
23
+ from megadetector.utils.wi_taxonomy_utils import load_md_or_speciesnet_file
25
24
  from megadetector.data_management.annotations import annotation_constants
26
25
  from megadetector.utils import ct_utils
27
26
 
@@ -35,16 +34,13 @@ def convert_json_to_csv(input_path,
35
34
  min_confidence=None,
36
35
  omit_bounding_boxes=False,
37
36
  output_encoding=None,
38
- overwrite=True):
37
+ overwrite=True,
38
+ verbose=False):
39
39
  """
40
40
  Converts a MD results .json file to a totally non-standard .csv format.
41
41
 
42
42
  If [output_path] is None, will convert x.json to x.csv.
43
43
 
44
- TODO: this function should obviously be using Pandas or some other sensible structured
45
- representation of tabular data. Even a list of dicts. This implementation is quite
46
- brittle and depends on adding fields to every row in exactly the right order.
47
-
48
44
  Args:
49
45
  input_path (str): the input .json file to convert
50
46
  output_path (str, optional): the output .csv file to generate; if this is None, uses
@@ -57,7 +53,7 @@ def convert_json_to_csv(input_path,
57
53
  output_encoding (str, optional): encoding to use for the .csv file
58
54
  overwrite (bool, optional): whether to overwrite an existing .csv file; if this is False and
59
55
  the output file exists, no-ops and returns
60
-
56
+ verbose (bool, optional): enable additional debug output
61
57
  """
62
58
 
63
59
  if output_path is None:
@@ -68,36 +64,28 @@ def convert_json_to_csv(input_path,
68
64
  return
69
65
 
70
66
  print('Loading json results from {}...'.format(input_path))
71
- json_output = json.load(open(input_path))
72
-
73
- rows = []
67
+ json_output = load_md_or_speciesnet_file(input_path,
68
+ verbose=verbose)
74
69
 
75
- fixed_columns = ['image_path', 'max_confidence', 'detections']
70
+ def clean_category_name(s):
71
+ return s.replace(',','_').replace(' ','_').lower()
76
72
 
77
- # We add an output column for each class other than 'empty',
78
- # containing the maximum probability of that class for each image
79
- # n_non_empty_detection_categories = len(annotation_constants.annotation_bbox_categories) - 1
80
- n_non_empty_detection_categories = annotation_constants.NUM_DETECTOR_CATEGORIES
81
- detection_category_column_names = []
82
- assert annotation_constants.detector_bbox_category_id_to_name[0] == 'empty'
83
- for cat_id in range(1,n_non_empty_detection_categories+1):
84
- cat_name = annotation_constants.detector_bbox_category_id_to_name[cat_id]
85
- detection_category_column_names.append('max_conf_' + cat_name)
73
+ # Create column names for max detection confidences
74
+ detection_category_id_to_max_conf_column_name = {}
75
+ for category_id in json_output['detection_categories'].keys():
76
+ category_name = clean_category_name(json_output['detection_categories'][category_id])
77
+ detection_category_id_to_max_conf_column_name[category_id] = \
78
+ 'max_conf_' + category_name
86
79
 
87
- n_classification_categories = 0
80
+ classification_category_id_to_max_conf_column_name = {}
88
81
 
82
+ # Create column names for max classification confidences (if necessary)
89
83
  if 'classification_categories' in json_output.keys():
90
- classification_category_id_to_name = json_output['classification_categories']
91
- classification_category_ids = list(classification_category_id_to_name.keys())
92
- classification_category_id_to_column_number = {}
93
- classification_category_column_names = []
94
- for i_category,category_id in enumerate(classification_category_ids):
95
- category_name = classification_category_id_to_name[category_id].\
96
- replace(' ','_').replace(',','')
97
- classification_category_column_names.append('max_classification_conf_' + category_name)
98
- classification_category_id_to_column_number[category_id] = i_category
99
-
100
- n_classification_categories = len(classification_category_ids)
84
+
85
+ for category_id in json_output['classification_categories'].keys():
86
+ category_name = clean_category_name(json_output['classification_categories'][category_id])
87
+ classification_category_id_to_max_conf_column_name[category_id] = \
88
+ 'max_classification_conf_' + category_name
101
89
 
102
90
  # There are several .json fields for which we add .csv columns; other random bespoke fields
103
91
  # will be ignored.
@@ -117,26 +105,43 @@ def convert_json_to_csv(input_path,
117
105
  if len(optional_fields_present) > 0:
118
106
  print('Found {} optional fields'.format(len(optional_fields_present)))
119
107
 
120
- expected_row_length = len(fixed_columns) + len(detection_category_column_names) + \
121
- n_classification_categories + len(optional_fields_present)
122
-
123
108
  print('Formatting results...')
124
109
 
110
+ output_records = []
111
+
125
112
  # i_image = 0; im = json_output['images'][i_image]
126
113
  for im in tqdm(json_output['images']):
127
114
 
128
- image_id = im['file']
115
+ output_record = {}
116
+ output_records.append(output_record)
117
+
118
+ output_record['image_path'] = im['file']
119
+ output_record['max_confidence'] = ''
120
+ output_record['detections'] = ''
121
+
122
+ for field_name in optional_fields_present:
123
+ output_record[field_name] = ''
124
+ if field_name in im:
125
+ output_record[field_name] = im[field_name]
126
+
127
+ for detection_category_id in detection_category_id_to_max_conf_column_name:
128
+ column_name = detection_category_id_to_max_conf_column_name[detection_category_id]
129
+ output_record[column_name] = 0
130
+
131
+ for classification_category_id in classification_category_id_to_max_conf_column_name:
132
+ column_name = classification_category_id_to_max_conf_column_name[classification_category_id]
133
+ output_record[column_name] = 0
129
134
 
130
135
  if 'failure' in im and im['failure'] is not None:
131
- row = [image_id, 'failure', im['failure']]
132
- rows.append(row)
136
+ output_record['max_confidence'] = 'failure'
137
+ output_record['detections'] = im['failure']
133
138
  # print('Skipping failed image {} ({})'.format(im['file'],im['failure']))
134
139
  continue
135
140
 
136
141
  max_conf = ct_utils.get_max_conf(im)
142
+ detection_category_id_to_max_conf = defaultdict(float)
143
+ classification_category_id_to_max_conf = defaultdict(float)
137
144
  detections = []
138
- max_detection_category_probabilities = [None] * n_non_empty_detection_categories
139
- max_classification_category_probabilities = [0] * n_classification_categories
140
145
 
141
146
  # d = im['detections'][0]
142
147
  for d in im['detections']:
@@ -155,31 +160,24 @@ def convert_json_to_csv(input_path,
155
160
  xmax = input_bbox[0] + input_bbox[2]
156
161
  ymax = input_bbox[1] + input_bbox[3]
157
162
  output_detection = [ymin, xmin, ymax, xmax]
158
-
159
163
  output_detection.append(d['conf'])
160
-
161
- # Category 0 is empty, for which we don't have a column, so the max
162
- # confidence for category N goes in column N-1
163
- detection_category_id = int(d['category'])
164
- assert detection_category_id > 0 and detection_category_id <= \
165
- n_non_empty_detection_categories
166
- detection_category_column = detection_category_id - 1
167
- detection_category_max = max_detection_category_probabilities[detection_category_column]
168
- if detection_category_max is None or d['conf'] > detection_category_max:
169
- max_detection_category_probabilities[detection_category_column] = d['conf']
170
-
171
- output_detection.append(detection_category_id)
164
+ output_detection.append(int(d['category']))
172
165
  detections.append(output_detection)
173
166
 
167
+ detection_category_id = d['category']
168
+ detection_category_max = detection_category_id_to_max_conf[detection_category_id]
169
+ if d['conf'] > detection_category_max:
170
+ detection_category_id_to_max_conf[detection_category_id] = d['conf']
171
+
174
172
  if 'classifications' in d:
175
- assert n_classification_categories > 0,\
176
- 'Oops, I have classification results, but no classification metadata'
173
+
177
174
  for c in d['classifications']:
178
- category_id = c[0]
179
- p = c[1]
180
- category_index = classification_category_id_to_column_number[category_id]
181
- if (max_classification_category_probabilities[category_index] < p):
182
- max_classification_category_probabilities[category_index] = p
175
+ classification_category_id = c[0]
176
+ classification_conf = c[1]
177
+ classification_category_max = \
178
+ classification_category_id_to_max_conf[classification_category_id]
179
+ if classification_conf > classification_category_max:
180
+ classification_category_id_to_max_conf[classification_category_id] = d['conf']
183
181
 
184
182
  # ...for each classification
185
183
 
@@ -191,40 +189,36 @@ def convert_json_to_csv(input_path,
191
189
  if not omit_bounding_boxes:
192
190
  detection_string = json.dumps(detections)
193
191
 
194
- row = [image_id, max_conf, detection_string]
195
- row.extend(max_detection_category_probabilities)
196
- row.extend(max_classification_category_probabilities)
192
+ output_record['detections'] = detection_string
193
+ output_record['max_confidence'] = max_conf
197
194
 
198
- for field_name in optional_fields_present:
199
- if field_name not in im:
200
- row.append('')
201
- else:
202
- row.append(str(im[field_name]))
195
+ for detection_category_id in detection_category_id_to_max_conf_column_name:
196
+ column_name = detection_category_id_to_max_conf_column_name[detection_category_id]
197
+ output_record[column_name] = \
198
+ detection_category_id_to_max_conf[detection_category_id]
203
199
 
204
- assert len(row) == expected_row_length
205
- rows.append(row)
200
+ for classification_category_id in classification_category_id_to_max_conf_column_name:
201
+ column_name = classification_category_id_to_max_conf_column_name[classification_category_id]
202
+ output_record[column_name] = \
203
+ classification_category_id_to_max_conf[classification_category_id]
206
204
 
207
205
  # ...for each image
208
206
 
209
207
  print('Writing to csv...')
210
208
 
211
- with open(output_path, 'w', newline='', encoding=output_encoding) as f:
212
- writer = csv.writer(f, delimiter=',')
213
- header = fixed_columns
214
- header.extend(detection_category_column_names)
215
- if n_classification_categories > 0:
216
- header.extend(classification_category_column_names)
217
- for field_name in optional_fields_present:
218
- header.append(field_name)
219
- writer.writerow(header)
220
- writer.writerows(rows)
209
+ df = pd.DataFrame(output_records)
210
+
211
+ if omit_bounding_boxes:
212
+ df = df.drop('detections',axis=1)
213
+ df.to_csv(output_path,index=False,header=True)
221
214
 
222
215
  # ...def convert_json_to_csv(...)
223
216
 
224
217
 
225
218
  def convert_csv_to_json(input_path,output_path=None,overwrite=True):
226
219
  """
227
- Convert .csv to .json. If output_path is None, will convert x.csv to x.json.
220
+ Convert .csv to .json. If output_path is None, will convert x.csv to x.json. This
221
+ supports a largely obsolete .csv format, there's almost no reason you want to do this.
228
222
 
229
223
  Args:
230
224
  input_path (str): .csv filename to convert to .json
@@ -1145,7 +1145,7 @@ def process_batch_results(options):
1145
1145
 
1146
1146
  images_to_visualize = detections_df
1147
1147
 
1148
- if options.num_images_to_sample is not None and options.num_images_to_sample > 0:
1148
+ if (options.num_images_to_sample is not None) and (options.num_images_to_sample > 0):
1149
1149
  images_to_visualize = images_to_visualize.sample(
1150
1150
  n=min(options.num_images_to_sample, len(images_to_visualize)),
1151
1151
  random_state=options.sample_seed)
@@ -83,6 +83,9 @@ class SubsetJsonDetectorOutputOptions:
83
83
  def __init__(self):
84
84
 
85
85
  #: Only process files containing the token 'query'
86
+ #:
87
+ #: Does not support general regexes, but supports ^ as a special case
88
+ #: regex-like notation for "starts with"
86
89
  self.query = None
87
90
 
88
91
  #: Replace 'query' with 'replacement' if 'replacement' is not None. If 'query' is None,
@@ -153,6 +156,12 @@ class SubsetJsonDetectorOutputOptions:
153
156
  #: to be contiguous. Set to 1 to remove empty categories only.
154
157
  self.remove_classification_categories_below_count = None
155
158
 
159
+ #: Remove detections above a threshold size (as a fraction of the image size)
160
+ self.maximum_detection_size = None
161
+
162
+ #: Remove detections below a threshold size (as a fraction of the image size)
163
+ self.minimum_detection_size = None
164
+
156
165
  # ...class SubsetJsonDetectorOutputOptions
157
166
 
158
167
 
@@ -271,6 +280,71 @@ def remove_classification_categories_below_count(data, options):
271
280
  # ...def remove_classification_categories_below_count(...)
272
281
 
273
282
 
283
+ def subset_json_detector_output_by_size(data, options):
284
+ """
285
+ Remove detections above or below threshold sizes (as a fraction
286
+ of the image size).
287
+
288
+ Args:
289
+ data (dict): data loaded from a MD results file
290
+ options (SubsetJsonDetectorOutputOptions): parameters for subsetting
291
+
292
+ Returns:
293
+ dict: Possibly-modified version of [data] (also modifies in place)
294
+ """
295
+
296
+ if (options.maximum_detection_size is None) and \
297
+ (options.minimum_detection_size is None):
298
+ return data
299
+
300
+ if options.maximum_detection_size is None:
301
+ options.maximum_detection_size = 1000
302
+
303
+ if options.minimum_detection_size is None:
304
+ options.minimum_detection_size = -1000
305
+
306
+ print('Subsetting by size ({} <--> {})'.format(
307
+ options.minimum_detection_size,
308
+ options.maximum_detection_size))
309
+
310
+ images_in = data['images']
311
+ images_out = []
312
+
313
+ # im = images_in[0]
314
+ for i_image, im in tqdm(enumerate(images_in), total=len(images_in)):
315
+
316
+ # Always keep failed images; if the caller wants to remove these, they
317
+ # will use remove_failed_images
318
+ if ('detections' not in im) or (im['detections'] is None):
319
+ images_out.append(im)
320
+ continue
321
+
322
+ detections_to_keep = []
323
+
324
+ for det in im['detections']:
325
+
326
+ # [x_min, y_min, width_of_box, height_of_box]
327
+ detection_size = det['bbox'][2] * det['bbox'][3]
328
+
329
+ if (detection_size >= options.minimum_detection_size) and \
330
+ (detection_size <= options.maximum_detection_size):
331
+ detections_to_keep.append(det)
332
+
333
+ im['detections'] = detections_to_keep
334
+
335
+ images_out.append(im)
336
+
337
+ # ...for each image
338
+
339
+ data['images'] = images_out
340
+ print('done, found {} matches (of {})'.format(
341
+ len(data['images']),len(images_in)))
342
+
343
+ return data
344
+
345
+ # ...def subset_json_detector_output_by_size(...)
346
+
347
+
274
348
  def subset_json_detector_output_by_confidence(data, options):
275
349
  """
276
350
  Removes all detections below options.confidence_threshold.
@@ -671,6 +745,11 @@ def subset_json_detector_output(input_filename, output_filename, options, data=N
671
745
 
672
746
  data = subset_json_detector_output_by_list(data, options)
673
747
 
748
+ if (options.maximum_detection_size is not None) or \
749
+ (options.minimum_detection_size is not None):
750
+
751
+ data = subset_json_detector_output_by_size(data, options)
752
+
674
753
  if not options.split_folders:
675
754
 
676
755
  _write_detection_results(data, output_filename, options)
@@ -834,6 +913,10 @@ def main(): # noqa
834
913
  help='Replace [query] with this')
835
914
  parser.add_argument('--confidence_threshold', type=float, default=None,
836
915
  help='Remove detections below this confidence level')
916
+ parser.add_argument('--maximum_detection_size', type=float, default=None,
917
+ help='Remove detections above this size (as a fraction of the image size)')
918
+ parser.add_argument('--minimum_detection_size', type=float, default=None,
919
+ help='Remove detections below this size (as a fraction of the image size)')
837
920
  parser.add_argument('--keep_files_in_list', type=str, default=None,
838
921
  help='Keep only files in this list, which can be a .json results file or a folder.' + \
839
922
  ' Assumes that the input .json file contains relative paths when comparing to a folder.')