megadetector 5.0.20__py3-none-any.whl → 5.0.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

@@ -42,11 +42,13 @@ class ValidateBatchResultsOptions:
42
42
  #:
43
43
  #: If None, assumes absolute paths.
44
44
  self.relative_path_base = None
45
+
46
+ #: Should we return the loaded data, or just the validation results?
47
+ self.return_data = False
45
48
 
46
49
  # ...class ValidateBatchResultsOptions
47
50
 
48
51
 
49
-
50
52
  #%% Main function
51
53
 
52
54
  def validate_batch_results(json_filename,options=None):
@@ -55,11 +57,17 @@ def validate_batch_results(json_filename,options=None):
55
57
 
56
58
  Args:
57
59
  json_filename (str): the filename to validate
58
- options (ValidateBatchResultsOptions, optionsl): all the parameters used to control this
60
+ options (ValidateBatchResultsOptions, optional): all the parameters used to control this
59
61
  process, see ValidateBatchResultsOptions for details
60
62
 
61
63
  Returns:
62
- bool: reserved; currently always errors or returns True.
64
+ dict: a dict with a field called "validation_results", which is itself a dict. The reason
65
+ it's a dict inside a dict is that if return_data is True, the outer dict also contains all
66
+ the loaded data. The "validation_results" dict contains fields called "errors", "warnings",
67
+ and "filename". "errors" and "warnings" are lists of strings, although "errors" will never
68
+ be longer than N=1, since validation fails at the first error.
69
+
70
+
63
71
  """
64
72
 
65
73
  if options is None:
@@ -68,75 +76,127 @@ def validate_batch_results(json_filename,options=None):
68
76
  with open(json_filename,'r') as f:
69
77
  d = json.load(f)
70
78
 
71
- ## Info validation
79
+ validation_results = {}
80
+ validation_results['filename'] = json_filename
81
+ validation_results['warnings'] = []
82
+ validation_results['errors'] = []
72
83
 
73
- assert 'info' in d
74
- info = d['info']
84
+ if not isinstance(d,dict):
85
+
86
+ validation_results['errors'].append('Input data is not a dict')
87
+ to_return = {}
88
+ to_return['validation_results'] = validation_results
89
+ return to_return
75
90
 
76
- assert isinstance(info,dict)
77
- assert 'format_version' in info
78
- format_version = float(info['format_version'])
79
- assert format_version >= 1.3, 'This validator can only be used with format version 1.3 or later'
91
+ try:
92
+
93
+ ## Info validation
94
+
95
+ if not 'info' in d:
96
+ raise ValueError('Input does not contain info field')
80
97
 
81
- print('Validating a .json results file with format version {}'.format(format_version))
82
-
83
- ## Category validation
84
-
85
- assert 'detection_categories' in d
86
- for k in d['detection_categories'].keys():
87
- # Categories should be string-formatted ints
88
- assert isinstance(k,str)
89
- _ = int(k)
90
- assert isinstance(d['detection_categories'][k],str)
98
+ info = d['info']
99
+
100
+ if not isinstance(info,dict):
101
+ raise ValueError('Input contains invalid info field')
102
+
103
+ if 'format_version' not in info :
104
+ raise ValueError('Input does not specify format version')
105
+
106
+ format_version = float(info['format_version'])
107
+ if format_version < 1.3:
108
+ raise ValueError('This validator can only be used with format version 1.3 or later')
91
109
 
92
- if 'classification_categories' in d:
93
- for k in d['classification_categories'].keys():
94
- # Categories should be string-formatted ints
95
- assert isinstance(k,str)
110
+
111
+ ## Category validation
112
+
113
+ if 'detection_categories' not in d:
114
+ raise ValueError('Input does not contain detection_categories field')
115
+
116
+ for k in d['detection_categories'].keys():
117
+ # Category ID should be string-formatted ints
118
+ if not isinstance(k,str):
119
+ raise ValueError('Invalid detection category ID: {}'.format(k))
96
120
  _ = int(k)
97
- assert isinstance(d['classification_categories'][k],str)
98
-
99
-
100
- ## Image validation
101
-
102
- assert 'images' in d
103
- assert isinstance(d['images'],list)
104
-
105
- # im = d['images'][0]
106
- for im in d['images']:
121
+ if not isinstance(d['detection_categories'][k],str):
122
+ raise ValueError('Invalid detection category name: {}'.format(
123
+ d['detection_categories'][k]))
124
+
125
+ if 'classification_categories' in d:
126
+ for k in d['classification_categories'].keys():
127
+ # Categories should be string-formatted ints
128
+ if not isinstance(k,str):
129
+ raise ValueError('Invalid classification category ID: {}'.format(k))
130
+ _ = int(k)
131
+ if not isinstance(d['classification_categories'][k],str):
132
+ raise ValueError('Invalid classification category name: {}'.format(
133
+ d['classification_categories'][k]))
107
134
 
108
- assert isinstance(im,dict)
109
- assert 'file' in im
110
135
 
111
- file = im['file']
136
+ ## Image validation
112
137
 
113
- if options.check_image_existence:
114
- if options.relative_path_base is None:
115
- file_abs = file
116
- else:
117
- file_abs = os.path.join(options.relative_path_base,file)
118
- assert os.path.isfile(file_abs), 'Cannot find file {}'.format(file_abs)
138
+ if 'images' not in d:
139
+ raise ValueError('images field not present')
140
+ if not isinstance(d['images'],list):
141
+ raise ValueError('Invalid images field')
142
+
143
+ # im = d['images'][0]
144
+ for i_im,im in enumerate(d['images']):
119
145
 
120
- if 'detections' not in im or im['detections'] is None:
121
- assert 'failure' in im and isinstance(im['failure'],str)
122
- else:
123
- assert isinstance(im['detections'],list)
146
+ if not isinstance(im,dict):
147
+ raise ValueError('Invalid image at index {}'.format(i_im))
148
+ if 'file' not in im:
149
+ raise ValueError('Image without filename at index {}'.format(i_im))
124
150
 
125
- if is_video_file(im['file']) and (format_version >= 1.4):
126
- assert 'frame_rate' in im
127
- if 'detections' in im and im['detections'] is not None:
128
- for det in im['detections']:
129
- assert 'frame_number' in det
151
+ file = im['file']
130
152
 
131
- # ...for each image
153
+ if options.check_image_existence:
154
+ if options.relative_path_base is None:
155
+ file_abs = file
156
+ else:
157
+ file_abs = os.path.join(options.relative_path_base,file)
158
+ if not os.path.isfile(file_abs):
159
+ raise ValueError('Cannot find file {}'.format(file_abs))
160
+
161
+ if ('detections' not in im) or (im['detections'] is None):
162
+ if not ('failure' in im and isinstance(im['failure'],str)):
163
+ raise ValueError('Image {} has no detections and no failure'.format(im['file']))
164
+ else:
165
+ if not isinstance(im['detections'],list):
166
+ raise ValueError('Invalid detections list for image {}'.format(im['file']))
167
+
168
+ if is_video_file(im['file']) and (format_version >= 1.4):
169
+ if 'frame_rate' not in im:
170
+ raise ValueError('Video without frame rate: {}'.format(im['file']))
171
+ if 'detections' in im and im['detections'] is not None:
172
+ for det in im['detections']:
173
+ if 'frame_number' not in det:
174
+ raise ValueError('Frame without frame number in video {}'.format(
175
+ im['file']))
176
+
177
+ # ...for each image
178
+
179
+
180
+ ## Checking on other keys
181
+
182
+ for k in d.keys():
183
+ if (k not in typical_keys) and (k not in required_keys):
184
+ validation_results['warnings'].append(
185
+ 'Warning: non-standard key {} present at file level'.format(k))
132
186
 
187
+ except Exception as e:
188
+
189
+ validation_results['errors'].append(str(e))
190
+
191
+ if options.return_data:
192
+ to_return = d
193
+ else:
194
+ to_return = {}
133
195
 
134
- ## Checking on other keys
196
+ to_return['validation_results'] = validation_results
135
197
 
136
- for k in d.keys():
137
- if k not in typical_keys and k not in required_keys:
138
- print('Warning: non-standard key {} present at file level'.format(k))
139
-
198
+ return to_return
199
+
140
200
  # ...def validate_batch_results(...)
141
201
 
142
202
 
@@ -29,10 +29,6 @@ import subprocess
29
29
  import argparse
30
30
  import inspect
31
31
 
32
- #: IoU threshold used to determine whether boxes in two detection files likely correspond
33
- #: to the same box.
34
- iou_threshold_for_file_comparison = 0.9
35
-
36
32
 
37
33
  #%% Classes
38
34
 
@@ -106,6 +102,10 @@ class MDTestOptions:
106
102
  #: PYTHONPATH to set for CLI tests; if None, inherits from the parent process. Only
107
103
  #: impacts the called functions, not the parent process.
108
104
  self.cli_test_pythonpath = None
105
+
106
+ #: IoU threshold used to determine whether boxes in two detection files likely correspond
107
+ #: to the same box.
108
+ self.iou_threshold_for_file_comparison = 0.85
109
109
 
110
110
  # ...class MDTestOptions()
111
111
 
@@ -410,7 +410,7 @@ def compare_detection_lists(detections_a,detections_b,options,bidirectional_comp
410
410
  iou = get_iou(det_a['bbox'],b_det['bbox'])
411
411
 
412
412
  # Is this likely the same detection as det_a?
413
- if iou >= iou_threshold_for_file_comparison and iou > highest_iou:
413
+ if iou >= options.iou_threshold_for_file_comparison and iou > highest_iou:
414
414
  matching_det_b = b_det
415
415
  highest_iou = iou
416
416
 
@@ -529,12 +529,14 @@ def compare_results(inference_output_file,expected_results_file,options):
529
529
  if not options.warning_mode:
530
530
 
531
531
  assert max_conf_error <= options.max_conf_error, \
532
- 'Confidence error {} is greater than allowable ({}), on file:\n{}'.format(
533
- max_conf_error,options.max_conf_error,max_conf_error_file)
532
+ 'Confidence error {} is greater than allowable ({}), on file:\n{} ({},{})'.format(
533
+ max_conf_error,options.max_conf_error,max_conf_error_file,
534
+ inference_output_file,expected_results_file)
534
535
 
535
536
  assert max_coord_error <= options.max_coord_error, \
536
- 'Coord error {} is greater than allowable ({}), on file:\n{}'.format(
537
- max_coord_error,options.max_coord_error,max_coord_error_file)
537
+ 'Coord error {} is greater than allowable ({}), on file:\n{} ({},{})'.format(
538
+ max_coord_error,options.max_coord_error,max_coord_error_file,
539
+ inference_output_file,expected_results_file)
538
540
 
539
541
  print('Max conf error: {} (file {})'.format(
540
542
  max_conf_error,max_conf_error_file))
@@ -847,7 +849,7 @@ def run_python_tests(options):
847
849
  video_options.frame_rendering_folder = os.path.join(options.scratch_dir,'video_scratch/rendered_frame_folder')
848
850
  video_options.render_output_video = True
849
851
  # video_options.keep_rendered_frames = False
850
- # video_options.keep_rendered_frames = False
852
+ # video_options.keep_extracted_frames = False
851
853
  video_options.force_extracted_frame_folder_deletion = True
852
854
  video_options.force_rendered_frame_folder_deletion = True
853
855
  # video_options.reuse_results_if_available = False
@@ -887,7 +889,7 @@ def run_python_tests(options):
887
889
  video_options.frame_rendering_folder = os.path.join(options.scratch_dir,'video_scratch/rendered_frame_folder')
888
890
  video_options.render_output_video = False
889
891
  video_options.keep_rendered_frames = False
890
- video_options.keep_rendered_frames = False
892
+ video_options.keep_extracted_frames = False
891
893
  video_options.force_extracted_frame_folder_deletion = True
892
894
  video_options.force_rendered_frame_folder_deletion = True
893
895
  video_options.reuse_results_if_available = False
@@ -1353,7 +1355,7 @@ if False:
1353
1355
  # options.cli_working_dir = r'c:\git\MegaDetector'
1354
1356
  # options.yolo_working_dir = r'c:\git\yolov5-md'
1355
1357
  options.cli_working_dir = os.path.expanduser('~')
1356
- options.yolo_working_dir = '/mnt/c/git/yolov5-md'
1358
+ # options.yolo_working_dir = '/mnt/c/git/yolov5-md'
1357
1359
  options = download_test_data(options)
1358
1360
 
1359
1361
  #%%
@@ -17,6 +17,7 @@ import platform
17
17
  import string
18
18
  import json
19
19
  import shutil
20
+ import hashlib
20
21
  import unicodedata
21
22
  import zipfile
22
23
  import tarfile
@@ -236,6 +237,30 @@ def path_is_abs(p):
236
237
  return (len(p) > 1) and (p[0] == '/' or p[1] == ':' or p[0] == '\\')
237
238
 
238
239
 
240
+ def safe_create_link(link_exists,link_new):
241
+ """
242
+ Creates a symlink at [link_new] pointing to [link_exists].
243
+
244
+ If [link_new] already exists, make sure it's a link (not a file),
245
+ and if it has a different target than [link_exists], removes and re-creates
246
+ it.
247
+
248
+ Errors if [link_new] already exists but it's not a link.
249
+
250
+ Args:
251
+ link_exists (str): the source of the (possibly-new) symlink
252
+ link_new (str): the target of the (possibly-new) symlink
253
+ """
254
+
255
+ if os.path.exists(link_new) or os.path.islink(link_new):
256
+ assert os.path.islink(link_new)
257
+ if not os.readlink(link_new) == link_exists:
258
+ os.remove(link_new)
259
+ os.symlink(link_exists,link_new)
260
+ else:
261
+ os.symlink(link_exists,link_new)
262
+
263
+
239
264
  def top_level_folder(p):
240
265
  r"""
241
266
  Gets the top-level folder from the path *p*.
@@ -296,31 +321,6 @@ if False:
296
321
  p = r'c:/foo'; s = top_level_folder(p); print(s); assert s == 'c:/foo'
297
322
  p = r'c:\foo/bar'; s = top_level_folder(p); print(s); assert s == 'c:\\foo'
298
323
 
299
- #%%
300
-
301
- def safe_create_link(link_exists,link_new):
302
- """
303
- Creates a symlink at [link_new] pointing to [link_exists].
304
-
305
- If [link_new] already exists, make sure it's a link (not a file),
306
- and if it has a different target than [link_exists], removes and re-creates
307
- it.
308
-
309
- Errors if [link_new] already exists but it's not a link.
310
-
311
- Args:
312
- link_exists (str): the source of the (possibly-new) symlink
313
- link_new (str): the target of the (possibly-new) symlink
314
- """
315
-
316
- if os.path.exists(link_new) or os.path.islink(link_new):
317
- assert os.path.islink(link_new)
318
- if not os.readlink(link_new) == link_exists:
319
- os.remove(link_new)
320
- os.symlink(link_exists,link_new)
321
- else:
322
- os.symlink(link_exists,link_new)
323
-
324
324
 
325
325
  #%% Image-related path functions
326
326
 
@@ -598,7 +598,9 @@ def open_file(filename, attempt_to_open_in_wsl_host=False, browser_name=None):
598
598
 
599
599
  opener = 'xdg-open'
600
600
  subprocess.call([opener, filename])
601
-
601
+
602
+ # ...def open_file(...)
603
+
602
604
 
603
605
  #%% File list functions
604
606
 
@@ -649,8 +651,12 @@ def _copy_file(input_output_tuple,overwrite=True,verbose=False):
649
651
  target_fn = input_output_tuple[1]
650
652
  if (not overwrite) and (os.path.isfile(target_fn)):
651
653
  if verbose:
652
- print('Skipping existing file {}'.format(target_fn))
653
- return
654
+ print('Skipping existing target file {}'.format(target_fn))
655
+ return
656
+
657
+ if verbose:
658
+ print('Copying to target file {}'.format(target_fn))
659
+
654
660
  os.makedirs(os.path.dirname(target_fn),exist_ok=True)
655
661
  shutil.copyfile(source_fn,target_fn)
656
662
 
@@ -667,7 +673,7 @@ def parallel_copy_files(input_file_to_output_file, max_workers=16,
667
673
  use_threads (bool, optional): whether to use threads (True) or processes (False) for
668
674
  parallel copying; ignored if max_workers <= 1
669
675
  overwrite (bool, optional): whether to overwrite existing destination files
670
- verbose (bool, optional): enable additionald debug output
676
+ verbose (bool, optional): enable additional debug output
671
677
  """
672
678
 
673
679
  n_workers = min(max_workers,len(input_file_to_output_file))
@@ -750,7 +756,7 @@ def parallel_get_file_sizes(filenames,
750
756
  max_workers (int, optional): number of concurrent workers; set to <=1 to disable parallelism
751
757
  use_threads (bool, optional): whether to use threads (True) or processes (False) for
752
758
  parallel copying; ignored if max_workers <= 1
753
- verbose (bool, optional): enable additionald debug output
759
+ verbose (bool, optional): enable additional debug output
754
760
  recursive (bool, optional): enumerate recursively, only relevant if [filenames] is a folder.
755
761
  convert_slashes (bool, optional): convert backslashes to forward slashes
756
762
  return_relative_paths (bool, optional): return relative paths; only relevant if [filenames]
@@ -804,6 +810,8 @@ def parallel_get_file_sizes(filenames,
804
810
 
805
811
  return to_return
806
812
 
813
+ # ...def parallel_get_file_sizes(...)
814
+
807
815
 
808
816
  #%% Zip functions
809
817
 
@@ -1075,3 +1083,104 @@ def unzip_file(input_file, output_folder=None):
1075
1083
 
1076
1084
  with zipfile.ZipFile(input_file, 'r') as zf:
1077
1085
  zf.extractall(output_folder)
1086
+
1087
+
1088
+ #%% File hashing functions
1089
+
1090
+ def compute_file_hash(file_path, algorithm='sha256', allow_failures=True):
1091
+ """
1092
+ Compute the hash of a file.
1093
+
1094
+ Adapted from:
1095
+
1096
+ https://www.geeksforgeeks.org/python-program-to-find-hash-of-file/
1097
+
1098
+ Args:
1099
+ file_path (str): the file to hash
1100
+ algorithm (str, optional): the hashing algorithm to use (e.g. md5, sha256)
1101
+
1102
+ Returns:
1103
+ str: the hash value for this file
1104
+ """
1105
+
1106
+ try:
1107
+
1108
+ hash_func = hashlib.new(algorithm)
1109
+
1110
+ with open(file_path, 'rb') as file:
1111
+ while chunk := file.read(8192): # Read the file in chunks of 8192 bytes
1112
+ hash_func.update(chunk)
1113
+
1114
+ return str(hash_func.hexdigest())
1115
+
1116
+ except Exception:
1117
+
1118
+ if allow_failures:
1119
+ return None
1120
+ else:
1121
+ raise
1122
+
1123
+ # ...def compute_file_hash(...)
1124
+
1125
+
1126
+ def parallel_compute_file_hashes(filenames,
1127
+ max_workers=16,
1128
+ use_threads=True,
1129
+ recursive=True,
1130
+ algorithm='sha256',
1131
+ verbose=False):
1132
+ """
1133
+ Compute file hashes for a list or folder of images.
1134
+
1135
+ Args:
1136
+ filenames (list or str): a list of filenames or a folder
1137
+ max_workers (int, optional): the number of parallel workers to use; set to <=1 to disable
1138
+ parallelization
1139
+ use_threads (bool, optional): whether to use threads (True) or processes (False) for
1140
+ parallelization
1141
+ algorithm (str, optional): the hashing algorithm to use (e.g. md5, sha256)
1142
+ recursive (bool, optional): if [filenames] is a folder, whether to enumerate recursively.
1143
+ Ignored if [filenames] is a list.
1144
+ verbose (bool, optional): enable additional debug output
1145
+
1146
+ Returns:
1147
+ dict: a dict mapping filenames to hash values; values will be None for files that fail
1148
+ to load.
1149
+ """
1150
+
1151
+ if isinstance(filenames,str) and os.path.isdir(filenames):
1152
+ if verbose:
1153
+ print('Enumerating files in {}'.format(filenames))
1154
+ filenames = recursive_file_list(filenames,recursive=recursive,return_relative_paths=False)
1155
+
1156
+ n_workers = min(max_workers,len(filenames))
1157
+
1158
+ if verbose:
1159
+ print('Computing hashes for {} files on {} workers'.format(len(filenames),n_workers))
1160
+
1161
+ if n_workers <= 1:
1162
+
1163
+ results = []
1164
+ for filename in filenames:
1165
+ results.append(compute_file_hash(filename,algorithm=algorithm,allow_failures=True))
1166
+
1167
+ else:
1168
+
1169
+ if use_threads:
1170
+ pool = ThreadPool(n_workers)
1171
+ else:
1172
+ pool = Pool(n_workers)
1173
+
1174
+ results = list(tqdm(pool.imap(
1175
+ partial(compute_file_hash,algorithm=algorithm,allow_failures=True),
1176
+ filenames), total=len(filenames)))
1177
+
1178
+ assert len(filenames) == len(results), 'Internal error in parallel_compute_file_hashes'
1179
+
1180
+ to_return = {}
1181
+ for i_file,filename in enumerate(filenames):
1182
+ to_return[filename] = results[i_file]
1183
+
1184
+ return to_return
1185
+
1186
+ # ...def parallel_compute_file_hashes(...)
@@ -42,6 +42,7 @@ def write_html_image_list(filename=None,images=None,options=None):
42
42
  options (dict, optional): a dict with one or more of the following fields:
43
43
 
44
44
  - fHtml (file pointer to write to, used for splitting write operations over multiple calls)
45
+ - pageTitle (HTML page title)
45
46
  - headerHtml (html text to include before the image list)
46
47
  - trailerHtml (html text to include after the image list)
47
48
  - defaultImageStyle (default css style for images)
@@ -60,11 +61,14 @@ def write_html_image_list(filename=None,images=None,options=None):
60
61
  if 'fHtml' not in options:
61
62
  options['fHtml'] = -1
62
63
 
64
+ if 'pageTitle' not in options or options['pageTitle'] is None:
65
+ options['pageTitle'] = ''
66
+
63
67
  if 'headerHtml' not in options or options['headerHtml'] is None:
64
- options['headerHtml'] = ''
68
+ options['headerHtml'] = ''
65
69
 
66
70
  if 'trailerHtml' not in options or options['trailerHtml'] is None:
67
- options['trailerHtml'] = ''
71
+ options['trailerHtml'] = ''
68
72
 
69
73
  if 'defaultTextStyle' not in options or options['defaultTextStyle'] is None:
70
74
  options['defaultTextStyle'] = \
@@ -114,7 +118,7 @@ def write_html_image_list(filename=None,images=None,options=None):
114
118
  # You can't supply your own file handle in this case
115
119
  if options['fHtml'] != -1:
116
120
  raise ValueError(
117
- 'You can''t supply your own file handle if we have to page the image set')
121
+ "You can't supply your own file handle if we have to page the image set")
118
122
 
119
123
  figureFileStartingIndices = list(range(0,nImages,options['maxFiguresPerHtmlFile']))
120
124
 
@@ -124,7 +128,10 @@ def write_html_image_list(filename=None,images=None,options=None):
124
128
  fMeta = open(filename,'w')
125
129
 
126
130
  # Write header stuff
127
- fMeta.write('<html><body>\n')
131
+ titleString = '<title>Index page</title>'
132
+ if len(options['pageTitle']) > 0:
133
+ titleString = '<title>Index page for: {}</title>'.format(options['pageTitle'])
134
+ fMeta.write('<html><head>{}</head><body>\n'.format(titleString))
128
135
  fMeta.write(options['headerHtml'])
129
136
  fMeta.write('<table border = 0 cellpadding = 2>\n')
130
137
 
@@ -170,7 +177,11 @@ def write_html_image_list(filename=None,images=None,options=None):
170
177
  else:
171
178
  fHtml = options['fHtml']
172
179
 
173
- fHtml.write('<html><body>\n')
180
+ titleString = ''
181
+ if len(options['pageTitle']) > 0:
182
+ titleString = '<title>{}</title>'.format(options['pageTitle'])
183
+
184
+ fHtml.write('<html>{}<body>\n'.format(titleString))
174
185
 
175
186
  fHtml.write(options['headerHtml'])
176
187