megadetector 10.0.8__py3-none-any.whl → 10.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

@@ -136,7 +136,7 @@ class BatchComparisonOptions:
136
136
  #: Colormap to use for detections in file B (maps detection categories to colors)
137
137
  self.colormap_b = ['RoyalBlue']
138
138
 
139
- #: Process-based parallelization isn't supported yet; this must be "True"
139
+ #: Whether to render images with threads (True) or processes (False)
140
140
  self.parallelize_rendering_with_threads = True
141
141
 
142
142
  #: List of filenames to include in the comparison, or None to use all files
@@ -152,7 +152,7 @@ class BatchComparisonOptions:
152
152
  self.target_width = 800
153
153
 
154
154
  #: Number of workers to use for rendering, or <=1 to disable parallelization
155
- self.n_rendering_workers = 20
155
+ self.n_rendering_workers = 10
156
156
 
157
157
  #: Random seed for image sampling (not used if max_images_per_category is None)
158
158
  self.random_seed = 0
@@ -183,7 +183,7 @@ class BatchComparisonOptions:
183
183
  #: Should we show category names (instead of numbers) on detected boxes?
184
184
  self.show_category_names_on_detected_boxes = True
185
185
 
186
- #: List of PairwiseBatchComparisonOptions that defines the comparisons we'll render.
186
+ #: List of PairwiseBatchComparisonOptions that defines the comparisons we'll render
187
187
  self.pairwise_options = []
188
188
 
189
189
  #: Only process images whose file names contain this token
@@ -197,7 +197,7 @@ class BatchComparisonOptions:
197
197
  self.verbose = False
198
198
 
199
199
  #: Separate out the "clean TP" and "clean TN" categories, only relevant when GT is
200
- #: available.
200
+ #: available
201
201
  self.include_clean_categories = True
202
202
 
203
203
  #: When rendering to the output table, optionally write alternative strings
@@ -211,6 +211,10 @@ class BatchComparisonOptions:
211
211
  #: Should we include a TOC? TOC is always omitted if <=2 comparisons are performed.
212
212
  self.include_toc = True
213
213
 
214
+ #: Should we return the mapping from categories (e.g. "common detections") to image
215
+ #: pairs? Makes the return dict much larger, but allows post-hoc exploration.
216
+ self.return_images_by_category = False
217
+
214
218
  # ...class BatchComparisonOptions
215
219
 
216
220
 
@@ -224,7 +228,7 @@ class PairwiseBatchComparisonResults:
224
228
  #: String of HTML content suitable for rendering to an HTML file
225
229
  self.html_content = None
226
230
 
227
- #: Possibly-modified version of the PairwiseBatchComparisonOptions supplied as input.
231
+ #: Possibly-modified version of the PairwiseBatchComparisonOptions supplied as input
228
232
  self.pairwise_options = None
229
233
 
230
234
  #: A dictionary with keys representing category names; in the no-ground-truth case, for example,
@@ -295,7 +299,8 @@ def _render_image_pair(fn,image_pairs,category_folder,options,pairwise_options):
295
299
  """
296
300
 
297
301
  input_image_path = os.path.join(options.image_folder,fn)
298
- assert os.path.isfile(input_image_path), 'Image {} does not exist'.format(input_image_path)
302
+ assert os.path.isfile(input_image_path), \
303
+ 'Image {} does not exist'.format(input_image_path)
299
304
 
300
305
  im = visualization_utils.open_image(input_image_path)
301
306
  image_pair = image_pairs[fn]
@@ -628,11 +633,21 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
628
633
  os.makedirs(options.output_folder,exist_ok=True)
629
634
 
630
635
 
636
+ # Just in case the user provided a single category instead of a list
637
+ # for category_names_to_include
638
+ if options.category_names_to_include is not None:
639
+ if isinstance(options.category_names_to_include,str):
640
+ options.category_names_to_include = [options.category_names_to_include]
641
+
631
642
  ##%% Load both result sets
632
643
 
644
+ if options.verbose:
645
+ print('Loading {}'.format(pairwise_options.results_filename_a))
633
646
  with open(pairwise_options.results_filename_a,'r') as f:
634
647
  results_a = json.load(f)
635
648
 
649
+ if options.verbose:
650
+ print('Loading {}'.format(pairwise_options.results_filename_b))
636
651
  with open(pairwise_options.results_filename_b,'r') as f:
637
652
  results_b = json.load(f)
638
653
 
@@ -654,6 +669,17 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
654
669
  detection_category_name_to_id = invert_dictionary(detection_categories_a)
655
670
  options.detection_category_id_to_name = detection_category_id_to_name
656
671
 
672
+ category_name_to_id_a = invert_dictionary(detection_categories_a)
673
+ category_name_to_id_b = invert_dictionary(detection_categories_b)
674
+ category_ids_to_include_a = []
675
+ category_ids_to_include_b = []
676
+
677
+ for category_name in options.category_names_to_include:
678
+ if category_name in category_name_to_id_a:
679
+ category_ids_to_include_a.append(category_name_to_id_a[category_name])
680
+ if category_name in category_name_to_id_b:
681
+ category_ids_to_include_b.append(category_name_to_id_b[category_name])
682
+
657
683
  if pairwise_options.results_description_a is None:
658
684
  if 'detector' not in results_a['info']:
659
685
  print('No model metadata supplied for results-A, assuming MDv4')
@@ -679,7 +705,7 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
679
705
  filename_to_image_b = {im['file']:im for im in images_b}
680
706
 
681
707
 
682
- ##%% Make sure they represent the same set of images
708
+ ##%% Make sure the two result sets represent the same set of images
683
709
 
684
710
  filenames_a = [im['file'] for im in images_a]
685
711
  filenames_b_set = set([im['file'] for im in images_b])
@@ -914,7 +940,8 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
914
940
  pairwise_options.detection_thresholds_b['default']
915
941
 
916
942
  # fn = filenames_to_compare[0]
917
- for i_file,fn in tqdm(enumerate(filenames_to_compare),total=len(filenames_to_compare)):
943
+ for i_file,fn in tqdm(enumerate(filenames_to_compare),
944
+ total=len(filenames_to_compare)):
918
945
 
919
946
  if fn not in filename_to_image_b:
920
947
 
@@ -1000,27 +1027,11 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
1000
1027
  categories_above_threshold_b.add(category_id)
1001
1028
 
1002
1029
  if invalid_category_error:
1003
-
1004
1030
  continue
1005
1031
 
1006
1032
  # Should we be restricting the comparison to only certain categories?
1007
1033
  if options.category_names_to_include is not None:
1008
1034
 
1009
- # Just in case the user provided a single category instead of a list
1010
- if isinstance(options.category_names_to_include,str):
1011
- options.category_names_to_include = [options.category_names_to_include]
1012
-
1013
- category_name_to_id_a = invert_dictionary(detection_categories_a)
1014
- category_name_to_id_b = invert_dictionary(detection_categories_b)
1015
- category_ids_to_include_a = []
1016
- category_ids_to_include_b = []
1017
-
1018
- for category_name in options.category_names_to_include:
1019
- if category_name in category_name_to_id_a:
1020
- category_ids_to_include_a.append(category_name_to_id_a[category_name])
1021
- if category_name in category_name_to_id_b:
1022
- category_ids_to_include_b.append(category_name_to_id_b[category_name])
1023
-
1024
1035
  # Restrict the categories we treat as above-threshold to the set we're supposed
1025
1036
  # to be using
1026
1037
  categories_above_threshold_a = [category_id for category_id in categories_above_threshold_a if \
@@ -1287,7 +1298,7 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
1287
1298
  max_conf_b = _maxempty([det['conf'] for det in im_b['detections']])
1288
1299
  sort_conf = max(max_conf_a,max_conf_b)
1289
1300
 
1290
- # ...what kind of ground truth (if any) do we have?
1301
+ # ...what kind of ground truth (if any) do we have?
1291
1302
 
1292
1303
  assert comparison_category is not None
1293
1304
  categories_to_image_pairs[comparison_category][fn] = im_pair
@@ -1313,7 +1324,11 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
1313
1324
  local_output_folder = os.path.join(options.output_folder,'cmp_' + \
1314
1325
  str(output_index).zfill(3))
1315
1326
 
1316
- def render_detection_comparisons(category,image_pairs,image_filenames):
1327
+ def _render_detection_comparisons(category,image_pairs,image_filenames):
1328
+ """
1329
+ Render all the detection results pairs for the sampled images in a
1330
+ particular category (e.g. all the "common detections").
1331
+ """
1317
1332
 
1318
1333
  print('Rendering detections for category {}'.format(category))
1319
1334
 
@@ -1336,7 +1351,7 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
1336
1351
 
1337
1352
  return output_image_paths
1338
1353
 
1339
- # ...def render_detection_comparisons()
1354
+ # ...def _render_detection_comparisons()
1340
1355
 
1341
1356
  if len(options.colormap_a) > 1:
1342
1357
  color_string_a = str(options.colormap_a)
@@ -1371,7 +1386,7 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
1371
1386
 
1372
1387
  input_image_absolute_paths = [os.path.join(options.image_folder,fn) for fn in image_filenames]
1373
1388
 
1374
- category_image_output_paths = render_detection_comparisons(category,
1389
+ category_image_output_paths = _render_detection_comparisons(category,
1375
1390
  image_pairs,image_filenames)
1376
1391
 
1377
1392
  category_html_filename = os.path.join(local_output_folder,
@@ -1469,6 +1484,8 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
1469
1484
  print("Pool closed and joined for comparison rendering")
1470
1485
  except Exception:
1471
1486
  pass
1487
+
1488
+
1472
1489
  ##%% Write the top-level HTML file content
1473
1490
 
1474
1491
  html_output_string = ''
@@ -1591,8 +1608,11 @@ def compare_batch_results(options):
1591
1608
  for i_comparison,pairwise_options in enumerate(pairwise_options_list):
1592
1609
 
1593
1610
  print('Running comparison {} of {}'.format(i_comparison,n_comparisons))
1611
+ pairwise_options.verbose = options.verbose
1594
1612
  pairwise_results = \
1595
1613
  _pairwise_compare_batch_results(options,i_comparison,pairwise_options)
1614
+ if not options.return_images_by_category:
1615
+ pairwise_results.categories_to_image_pairs = None
1596
1616
  html_content += pairwise_results.html_content
1597
1617
  all_pairwise_results.append(pairwise_results)
1598
1618
 
@@ -1145,7 +1145,7 @@ def process_batch_results(options):
1145
1145
 
1146
1146
  images_to_visualize = detections_df
1147
1147
 
1148
- if options.num_images_to_sample is not None and options.num_images_to_sample > 0:
1148
+ if (options.num_images_to_sample is not None) and (options.num_images_to_sample > 0):
1149
1149
  images_to_visualize = images_to_visualize.sample(
1150
1150
  n=min(options.num_images_to_sample, len(images_to_visualize)),
1151
1151
  random_state=options.sample_seed)
@@ -156,6 +156,12 @@ class SubsetJsonDetectorOutputOptions:
156
156
  #: to be contiguous. Set to 1 to remove empty categories only.
157
157
  self.remove_classification_categories_below_count = None
158
158
 
159
+ #: Remove detections above a threshold size (as a fraction of the image size)
160
+ self.maximum_detection_size = None
161
+
162
+ #: Remove detections below a threshold size (as a fraction of the image size)
163
+ self.minimum_detection_size = None
164
+
159
165
  # ...class SubsetJsonDetectorOutputOptions
160
166
 
161
167
 
@@ -274,6 +280,71 @@ def remove_classification_categories_below_count(data, options):
274
280
  # ...def remove_classification_categories_below_count(...)
275
281
 
276
282
 
283
+ def subset_json_detector_output_by_size(data, options):
284
+ """
285
+ Remove detections above or below threshold sizes (as a fraction
286
+ of the image size).
287
+
288
+ Args:
289
+ data (dict): data loaded from a MD results file
290
+ options (SubsetJsonDetectorOutputOptions): parameters for subsetting
291
+
292
+ Returns:
293
+ dict: Possibly-modified version of [data] (also modifies in place)
294
+ """
295
+
296
+ if (options.maximum_detection_size is None) and \
297
+ (options.minimum_detection_size is None):
298
+ return data
299
+
300
+ if options.maximum_detection_size is None:
301
+ options.maximum_detection_size = 1000
302
+
303
+ if options.minimum_detection_size is None:
304
+ options.minimum_detection_size = -1000
305
+
306
+ print('Subsetting by size ({} <--> {})'.format(
307
+ options.minimum_detection_size,
308
+ options.maximum_detection_size))
309
+
310
+ images_in = data['images']
311
+ images_out = []
312
+
313
+ # im = images_in[0]
314
+ for i_image, im in tqdm(enumerate(images_in), total=len(images_in)):
315
+
316
+ # Always keep failed images; if the caller wants to remove these, they
317
+ # will use remove_failed_images
318
+ if ('detections' not in im) or (im['detections'] is None):
319
+ images_out.append(im)
320
+ continue
321
+
322
+ detections_to_keep = []
323
+
324
+ for det in im['detections']:
325
+
326
+ # [x_min, y_min, width_of_box, height_of_box]
327
+ detection_size = det['bbox'][2] * det['bbox'][3]
328
+
329
+ if (detection_size >= options.minimum_detection_size) and \
330
+ (detection_size <= options.maximum_detection_size):
331
+ detections_to_keep.append(det)
332
+
333
+ im['detections'] = detections_to_keep
334
+
335
+ images_out.append(im)
336
+
337
+ # ...for each image
338
+
339
+ data['images'] = images_out
340
+ print('done, found {} matches (of {})'.format(
341
+ len(data['images']),len(images_in)))
342
+
343
+ return data
344
+
345
+ # ...def subset_json_detector_output_by_size(...)
346
+
347
+
277
348
  def subset_json_detector_output_by_confidence(data, options):
278
349
  """
279
350
  Removes all detections below options.confidence_threshold.
@@ -674,6 +745,11 @@ def subset_json_detector_output(input_filename, output_filename, options, data=N
674
745
 
675
746
  data = subset_json_detector_output_by_list(data, options)
676
747
 
748
+ if (options.maximum_detection_size is not None) or \
749
+ (options.minimum_detection_size is not None):
750
+
751
+ data = subset_json_detector_output_by_size(data, options)
752
+
677
753
  if not options.split_folders:
678
754
 
679
755
  _write_detection_results(data, output_filename, options)
@@ -837,6 +913,10 @@ def main(): # noqa
837
913
  help='Replace [query] with this')
838
914
  parser.add_argument('--confidence_threshold', type=float, default=None,
839
915
  help='Remove detections below this confidence level')
916
+ parser.add_argument('--maximum_detection_size', type=float, default=None,
917
+ help='Remove detections above this size (as a fraction of the image size)')
918
+ parser.add_argument('--minimum_detection_size', type=float, default=None,
919
+ help='Remove detections below this size (as a fraction of the image size)')
840
920
  parser.add_argument('--keep_files_in_list', type=str, default=None,
841
921
  help='Keep only files in this list, which can be a .json results file or a folder.' + \
842
922
  ' Assumes that the input .json file contains relative paths when comparing to a folder.')
@@ -129,6 +129,9 @@ def create_html_index(dir,
129
129
  recursive (bool, optional): recurse into subfolders
130
130
  """
131
131
 
132
+ if template_fun is None:
133
+ template_fun = _create_plain_index
134
+
132
135
  print('Traversing {}'.format(dir))
133
136
 
134
137
  # Make sure we remove the trailing /
@@ -1046,6 +1046,73 @@ def parallel_copy_files(input_file_to_output_file,
1046
1046
  # ...def parallel_copy_files(...)
1047
1047
 
1048
1048
 
1049
+ #%% File deletion functions
1050
+
1051
+ def delete_file(input_file, verbose=False):
1052
+ """
1053
+ Deletes a single file.
1054
+
1055
+ Args:
1056
+ input_file (str): file to delete
1057
+ verbose (bool, optional): enable additional debug console output
1058
+
1059
+ Returns:
1060
+ bool: True if file was deleted successfully, False otherwise
1061
+ """
1062
+
1063
+ try:
1064
+ if verbose:
1065
+ print('Deleting file {}'.format(input_file))
1066
+
1067
+ if os.path.isfile(input_file):
1068
+ os.remove(input_file)
1069
+ return True
1070
+ else:
1071
+ if verbose:
1072
+ print('File {} does not exist'.format(input_file))
1073
+ return False
1074
+
1075
+ except Exception as e:
1076
+ if verbose:
1077
+ print('Error deleting file {}: {}'.format(input_file, str(e)))
1078
+ return False
1079
+
1080
+ # ...def delete_file(...)
1081
+
1082
+
1083
+ def parallel_delete_files(input_files,
1084
+ max_workers=16,
1085
+ use_threads=True,
1086
+ verbose=False):
1087
+ """
1088
+ Deletes one or more files in parallel.
1089
+
1090
+ Args:
1091
+ input_files (list): list of files to delete
1092
+ max_workers (int, optional): number of concurrent workers, set to <= 1 to disable parallelism
1093
+ use_threads (bool, optional): whether to use threads (True) or processes (False); ignored if
1094
+ max_workers <= 1
1095
+ verbose (bool, optional): enable additional debug console output
1096
+ """
1097
+
1098
+ if len(input_files) == 0:
1099
+ return
1100
+
1101
+ n_workers = min(max_workers, len(input_files))
1102
+
1103
+ if use_threads:
1104
+ pool = ThreadPool(n_workers)
1105
+ else:
1106
+ pool = Pool(n_workers)
1107
+
1108
+ with tqdm(total=len(input_files)) as pbar:
1109
+ for i, _ in enumerate(pool.imap_unordered(partial(delete_file, verbose=verbose),
1110
+ input_files)):
1111
+ pbar.update()
1112
+
1113
+ # ...def parallel_delete_files(...)
1114
+
1115
+
1049
1116
  #%% File size functions
1050
1117
 
1051
1118
  def get_file_sizes(base_dir, convert_slashes=True):
@@ -34,6 +34,27 @@ def is_float(s):
34
34
  return True
35
35
 
36
36
 
37
+ def is_int(s):
38
+ """
39
+ Checks whether [s] is an object (typically a string) that can be cast to a int
40
+
41
+ Args:
42
+ s (object): object to evaluate
43
+
44
+ Returns:
45
+ bool: True if s successfully casts to a int, otherwise False
46
+ """
47
+
48
+ if s is None:
49
+ return False
50
+
51
+ try:
52
+ _ = int(s)
53
+ except ValueError:
54
+ return False
55
+ return True
56
+
57
+
37
58
  def human_readable_to_bytes(size):
38
59
  """
39
60
  Given a human-readable byte string (e.g. 2G, 10GB, 30MB, 20KB),