megadetector 10.0.7__py3-none-any.whl → 10.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

@@ -305,6 +305,9 @@ class SequenceOptions:
305
305
  #: How to handle invalid datetimes: 'error' or 'none'
306
306
  self.datetime_conversion_failure_behavior = 'none'
307
307
 
308
+ #: Enable additional debug output
309
+ self.verbose = False
310
+
308
311
 
309
312
  #%% Functions
310
313
 
@@ -331,7 +334,9 @@ def write_object_with_serialized_datetimes(d,json_fn):
331
334
  json.dump(d,f,indent=1,default=json_serialize_datetime)
332
335
 
333
336
 
334
- def parse_datetimes_from_cct_image_list(images,conversion_failure_behavior='error'):
337
+ def parse_datetimes_from_cct_image_list(images,
338
+ conversion_failure_behavior='error',
339
+ verbose=False):
335
340
  """
336
341
  Given the "images" field from a COCO camera traps dictionary, converts all
337
342
  string-formatted datetime fields to Python datetimes, making reasonable assumptions
@@ -342,6 +347,7 @@ def parse_datetimes_from_cct_image_list(images,conversion_failure_behavior='erro
342
347
  conversion_failure_behavior (str, optional): determines what happens on a failed
343
348
  conversion; can be "error" (raise an error), "str" (leave as a string), or
344
349
  "none" (convert to None)
350
+ verbose (bool, optional): enable additional debug output
345
351
 
346
352
  Returns:
347
353
  images: the input list, with datetimes converted (after modifying in place)
@@ -359,14 +365,17 @@ def parse_datetimes_from_cct_image_list(images,conversion_failure_behavior='erro
359
365
  dt = dateutil.parser.parse(im['datetime'])
360
366
  im['datetime'] = dt
361
367
  except Exception as e:
362
- s = 'could not parse datetime {}: {}'.format(str(im['datetime']),str(e))
368
+ s = 'could not parse datetime {} from {}: {}'.format(
369
+ str(im['datetime']),im['file_name'],str(e))
363
370
  if conversion_failure_behavior == 'error':
364
371
  raise ValueError(s)
365
372
  elif conversion_failure_behavior == 'str':
366
- print('Warning: {}'.format(s))
373
+ if verbose:
374
+ print('Warning: {}'.format(s))
367
375
  pass
368
376
  elif conversion_failure_behavior == 'none':
369
- print('Warning: {}'.format(s))
377
+ if verbose:
378
+ print('Warning: {}'.format(s))
370
379
  im['datetime'] = None
371
380
 
372
381
  # ...for each image
@@ -450,7 +459,8 @@ def create_sequences(image_info,options=None):
450
459
 
451
460
  # Modifies the images in place
452
461
  _ = parse_datetimes_from_cct_image_list(image_info,
453
- conversion_failure_behavior=options.datetime_conversion_failure_behavior)
462
+ conversion_failure_behavior=options.datetime_conversion_failure_behavior,
463
+ verbose=options.verbose)
454
464
 
455
465
  n_invalid_datetimes = 0
456
466
  for im in image_info:
@@ -505,7 +515,7 @@ def create_sequences(image_info,options=None):
505
515
  delta = (im['datetime'] - previous_datetime).total_seconds()
506
516
 
507
517
  # Start a new sequence if necessary, including the case where this datetime is invalid
508
- if delta is None or delta > options.episode_interval_seconds or invalid_datetime:
518
+ if (delta is None) or (delta > options.episode_interval_seconds) or (invalid_datetime):
509
519
  next_frame_number = 0
510
520
  current_sequence_id = 'location_{}_sequence_index_{}'.format(
511
521
  location,str(next_sequence_number).zfill(5))
@@ -18,13 +18,20 @@ import json
18
18
  import argparse
19
19
 
20
20
  from tqdm import tqdm
21
- from megadetector.utils import ct_utils
22
21
  from copy import copy
23
22
 
23
+ from megadetector.utils import ct_utils
24
+ from megadetector.utils.ct_utils import sort_list_of_dicts_by_key
25
+
24
26
 
25
27
  #%% Functions
26
28
 
27
- def subset_json_db(input_json, query, output_json=None, ignore_case=False, verbose=False):
29
+ def subset_json_db(input_json,
30
+ query,
31
+ output_json=None,
32
+ ignore_case=False,
33
+ remap_categories=True,
34
+ verbose=False):
28
35
  """
29
36
  Given a json file (or dictionary already loaded from a json file), produce a new
30
37
  database containing only the images whose filenames contain the string 'query',
@@ -36,6 +43,8 @@ def subset_json_db(input_json, query, output_json=None, ignore_case=False, verbo
36
43
  contain this string. If this is a list, test for exact matches.
37
44
  output_json (str, optional): file to write the resulting .json file to
38
45
  ignore_case (bool, optional): whether to perform a case-insensitive search for [query]
46
+ remap_categories (bool, optional): trim the category list to only the categores used
47
+ in the subset
39
48
  verbose (bool, optional): enable additional debug output
40
49
 
41
50
  Returns:
@@ -92,6 +101,52 @@ def subset_json_db(input_json, query, output_json=None, ignore_case=False, verbo
92
101
  output_data['images'] = images
93
102
  output_data['annotations'] = annotations
94
103
 
104
+ # Remap categories if necessary
105
+ if remap_categories:
106
+
107
+ category_ids_used = set()
108
+ for ann in annotations:
109
+ category_ids_used.add(ann['category_id'])
110
+
111
+ if verbose:
112
+ print('Keeping {} of {} categories'.format(
113
+ len(category_ids_used),len(input_data['categories'])))
114
+
115
+ input_category_id_to_output_category_id = {}
116
+
117
+ next_category_id = 0
118
+
119
+ # Build mappings from old to new category IDs
120
+ for input_category_id in category_ids_used:
121
+ assert isinstance(input_category_id,int), \
122
+ 'Illegal category ID {}'.format(input_category_id)
123
+ output_category_id = next_category_id
124
+ next_category_id = next_category_id + 1
125
+ input_category_id_to_output_category_id[input_category_id] = output_category_id
126
+
127
+ # Modify the annotations
128
+ for ann in annotations:
129
+ assert ann['category_id'] in input_category_id_to_output_category_id
130
+ ann['category_id'] = input_category_id_to_output_category_id[ann['category_id']]
131
+
132
+ output_categories = []
133
+
134
+ # Re-write the category table
135
+ for cat in input_data['categories']:
136
+
137
+ if cat['id'] in input_category_id_to_output_category_id:
138
+
139
+ # There may be non-required fields, so don't just create an empty dict
140
+ # and copy the name/id field, keep the original dict other than "id"
141
+ output_category = copy(cat)
142
+ output_category['id'] = input_category_id_to_output_category_id[cat['id']]
143
+ output_categories.append(output_category)
144
+
145
+ output_categories = sort_list_of_dicts_by_key(output_categories,'id')
146
+ output_data['categories'] = output_categories
147
+
148
+ # ...if we need to remap categories
149
+
95
150
  # Write the output file if requested
96
151
  if output_json is not None:
97
152
  if verbose:
@@ -328,7 +328,8 @@ def _initialize_yolo_imports(model_type='yolov5',
328
328
  if try_yolov5_import and not utils_imported:
329
329
 
330
330
  try:
331
- from yolov5.utils.general import non_max_suppression, xyxy2xywh # noqa
331
+ # from yolov5.utils.general import non_max_suppression # type: ignore
332
+ from yolov5.utils.general import xyxy2xywh # noqa
332
333
  from yolov5.utils.augmentations import letterbox # noqa
333
334
  try:
334
335
  from yolov5.utils.general import scale_boxes as scale_coords
@@ -348,7 +349,8 @@ def _initialize_yolo_imports(model_type='yolov5',
348
349
 
349
350
  try:
350
351
 
351
- from yolov9.utils.general import non_max_suppression, xyxy2xywh # noqa
352
+ # from yolov9.utils.general import non_max_suppression # noqa
353
+ from yolov9.utils.general import xyxy2xywh # noqa
352
354
  from yolov9.utils.augmentations import letterbox # noqa
353
355
  from yolov9.utils.general import scale_boxes as scale_coords # noqa
354
356
  utils_imported = True
@@ -378,7 +380,12 @@ def _initialize_yolo_imports(model_type='yolov5',
378
380
 
379
381
  try:
380
382
 
381
- from ultralytics.utils.ops import non_max_suppression # type: ignore # noqa
383
+ # The non_max_suppression() function moved from the ops module to the nms module
384
+ # in mid-2025
385
+ try:
386
+ from ultralytics.utils.ops import non_max_suppression # type: ignore # noqa
387
+ except Exception:
388
+ from ultralytics.utils.nms import non_max_suppression # type: ignore # noqa
382
389
  from ultralytics.utils.ops import xyxy2xywh # type: ignore # noqa
383
390
 
384
391
  # In the ultralytics package, scale_boxes and scale_coords both exist;
@@ -444,9 +451,9 @@ def _initialize_yolo_imports(model_type='yolov5',
444
451
  if verbose:
445
452
  print('Imported utils from ultralytics package')
446
453
 
447
- except Exception:
454
+ except Exception as e:
448
455
 
449
- # print('Ultralytics module import failed')
456
+ print('Ultralytics module import failed: {}'.format(str(e)))
450
457
  pass
451
458
 
452
459
  # If we haven't succeeded yet, assume the YOLOv5 repo is on our PYTHONPATH.
@@ -455,7 +462,8 @@ def _initialize_yolo_imports(model_type='yolov5',
455
462
  try:
456
463
 
457
464
  # import pre- and post-processing functions from the YOLOv5 repo
458
- from utils.general import non_max_suppression, xyxy2xywh # type: ignore
465
+ # from utils.general import non_max_suppression # type: ignore
466
+ from utils.general import xyxy2xywh # type: ignore
459
467
  from utils.augmentations import letterbox # type: ignore
460
468
 
461
469
  # scale_coords() is scale_boxes() in some YOLOv5 versions
@@ -1283,17 +1291,23 @@ class PTDetector:
1283
1291
  else:
1284
1292
  nms_iou_thres = 0.6
1285
1293
 
1286
- pred = nms(prediction=pred,
1287
- conf_thres=detection_threshold,
1288
- iou_thres=nms_iou_thres)
1294
+ use_library_nms = False
1295
+
1296
+ # Model output format changed in recent ultralytics packages, and the nms implementation
1297
+ # in this module hasn't been updated to handle that format yet.
1298
+ if (yolo_model_type_imported is not None) and (yolo_model_type_imported == 'ultralytics'):
1299
+ use_library_nms = True
1289
1300
 
1290
- # For posterity, the ultralytics implementation
1291
- if False:
1301
+ if use_library_nms:
1292
1302
  pred = non_max_suppression(prediction=pred,
1293
- conf_thres=detection_threshold,
1294
- iou_thres=nms_iou_thres,
1295
- agnostic=False,
1296
- multi_label=False)
1303
+ conf_thres=detection_threshold,
1304
+ iou_thres=nms_iou_thres,
1305
+ agnostic=False,
1306
+ multi_label=False)
1307
+ else:
1308
+ pred = nms(prediction=pred,
1309
+ conf_thres=detection_threshold,
1310
+ iou_thres=nms_iou_thres)
1297
1311
 
1298
1312
  assert isinstance(pred, list)
1299
1313
  assert len(pred) == len(batch_metadata), \
@@ -159,7 +159,9 @@ class YoloInferenceOptions:
159
159
  #: These are deliberately offset from the standard MD categories; YOLOv5
160
160
  #: needs categories IDs to start at 0.
161
161
  #:
162
- #: This can also be a string that points to a YOLO dataset.yaml file.
162
+ #: This can also be a string that points to any class mapping file supported
163
+ #: by read_classes_from_yolo_dataset_file(): a YOLO dataset.yaml file, a text
164
+ #: file with a list of classes, or a .json file with an ID --> name dict
163
165
  self.yolo_category_id_to_name = {0:'animal',1:'person',2:'vehicle'}
164
166
 
165
167
  #: What should we do if the output file already exists?
@@ -907,7 +907,10 @@ if False:
907
907
 
908
908
  #%% Command-line driver
909
909
 
910
- def main(): # noqa
910
+ def main():
911
+ """
912
+ Command-line driver for run_tiled_inference
913
+ """
911
914
 
912
915
  parser = argparse.ArgumentParser(
913
916
  description='Chop a folder of images up into tiles, run MD on the tiles, and stitch the results together')
@@ -956,7 +959,7 @@ def main(): # noqa
956
959
  '--detector_options',
957
960
  type=str,
958
961
  default=None,
959
- help=('A list of detector options (key-value pairs) to '))
962
+ help=('A list of detector options (key-value pairs)'))
960
963
 
961
964
  # detector_options = parse_kvp_list(args.detector_options)
962
965
 
@@ -22,6 +22,7 @@ from functools import partial
22
22
  from inspect import signature
23
23
 
24
24
  from megadetector.utils import path_utils
25
+ from megadetector.utils.path_utils import clean_path
25
26
  from megadetector.utils.ct_utils import sort_list_of_dicts_by_key
26
27
  from megadetector.visualization import visualization_utils as vis_utils
27
28
 
@@ -592,7 +593,7 @@ def video_to_frames(input_video_file,
592
593
  quality=None,
593
594
  max_width=None,
594
595
  frames_to_extract=None,
595
- allow_empty_videos=False):
596
+ allow_empty_videos=True):
596
597
  """
597
598
  Renders frames from [input_video_file] to .jpg files in [output_folder].
598
599
 
@@ -618,8 +619,8 @@ def video_to_frames(input_video_file,
618
619
  a single frame number. In the special case where frames_to_extract
619
620
  is [], this function still reads video frame rates and verifies that videos
620
621
  are readable, but no frames are extracted.
621
- allow_empty_videos (bool, optional): Just print a warning if a video appears to have no
622
- frames (by default, this is an error).
622
+ allow_empty_videos (bool, optional): Just print a warning if a video appears to have
623
+ no frames (by default, this is an error).
623
624
 
624
625
  Returns:
625
626
  tuple: length-2 tuple containing (list of frame filenames,frame rate)
@@ -883,7 +884,14 @@ def _video_to_frames_for_folder(relative_fn,input_folder,output_folder_base,
883
884
 
884
885
  # Create the target output folder
885
886
  output_folder_video = os.path.join(output_folder_base,relative_fn)
886
- os.makedirs(output_folder_video,exist_ok=True)
887
+ try:
888
+ os.makedirs(output_folder_video,exist_ok=True)
889
+ except Exception:
890
+ output_folder_clean = clean_path(output_folder_video)
891
+ print('Warning: failed to create folder {}, trying {}'.format(
892
+ output_folder_video,output_folder_clean))
893
+ output_folder_video = output_folder_clean
894
+ os.makedirs(output_folder_video,exist_ok=True)
887
895
 
888
896
  # Render frames
889
897
  # input_video_file = input_fn_absolute; output_folder = output_folder_video
@@ -1090,6 +1098,9 @@ class FrameToVideoOptions:
1090
1098
  #: Are frame rates required?
1091
1099
  self.frame_rates_are_required = False
1092
1100
 
1101
+ #: Enable additional debug output
1102
+ self.verbose = False
1103
+
1093
1104
 
1094
1105
  def frame_results_to_video_results(input_file,
1095
1106
  output_file,
@@ -1176,9 +1187,14 @@ def frame_results_to_video_results(input_file,
1176
1187
 
1177
1188
  if (video_filename_to_frame_rate is not None):
1178
1189
 
1179
- if options.frame_rates_are_required:
1180
- assert video_name in video_filename_to_frame_rate, \
1181
- 'Could not determine frame rate for {}'.format(video_name)
1190
+ if video_name not in video_filename_to_frame_rate:
1191
+
1192
+ s = 'Could not determine frame rate for {}'.format(video_name)
1193
+ if options.frame_rates_are_required:
1194
+ raise ValueError(s)
1195
+ elif options.verbose:
1196
+ print('Warning: {}'.format(s))
1197
+
1182
1198
  if video_name in video_filename_to_frame_rate:
1183
1199
  im_out['frame_rate'] = video_filename_to_frame_rate[video_name]
1184
1200
 
@@ -130,7 +130,7 @@ class ClassificationSmoothingOptions:
130
130
 
131
131
  ## Populated internally
132
132
 
133
- #: #: Only include these categories in the smoothing process (None to use all categories)
133
+ #: Only include these categories in the smoothing process (None to use all categories)
134
134
  self._detection_category_ids_to_smooth = None
135
135
 
136
136
 
@@ -1015,6 +1015,10 @@ def smooth_classification_results_sequence_level(input_file,
1015
1015
 
1016
1016
  detections_this_sequence = []
1017
1017
  for image_filename in image_filenames_this_sequence:
1018
+ if image_filename not in image_fn_to_classification_results:
1019
+ print('Warning: {} in sequence list but not in results'.format(
1020
+ image_filename))
1021
+ continue
1018
1022
  im = image_fn_to_classification_results[image_filename]
1019
1023
  if 'detections' not in im or im['detections'] is None:
1020
1024
  continue
@@ -2,12 +2,8 @@
2
2
 
3
3
  convert_output_format.py
4
4
 
5
- Converts between file formats output by our batch processing API. Currently
6
- supports json <--> csv conversion, but this should be the landing place for any
7
- conversion - including between hypothetical alternative .json versions - that we support
8
- in the future.
9
-
10
- The .csv format is largely obsolete, don't use it unless you're super-duper sure you need it.
5
+ Converts between file .json and .csv representations of MD output. The .csv format is
6
+ largely obsolete, don't use it unless you're super-duper sure you need it.
11
7
 
12
8
  """
13
9
 
@@ -15,13 +11,16 @@ The .csv format is largely obsolete, don't use it unless you're super-duper sure
15
11
 
16
12
  import argparse
17
13
  import json
18
- import csv
19
14
  import sys
20
15
  import os
21
16
 
22
17
  from tqdm import tqdm
18
+ from collections import defaultdict
19
+
20
+ import pandas as pd
23
21
 
24
22
  from megadetector.postprocessing.load_api_results import load_api_results_csv
23
+ from megadetector.utils.wi_taxonomy_utils import load_md_or_speciesnet_file
25
24
  from megadetector.data_management.annotations import annotation_constants
26
25
  from megadetector.utils import ct_utils
27
26
 
@@ -35,16 +34,13 @@ def convert_json_to_csv(input_path,
35
34
  min_confidence=None,
36
35
  omit_bounding_boxes=False,
37
36
  output_encoding=None,
38
- overwrite=True):
37
+ overwrite=True,
38
+ verbose=False):
39
39
  """
40
40
  Converts a MD results .json file to a totally non-standard .csv format.
41
41
 
42
42
  If [output_path] is None, will convert x.json to x.csv.
43
43
 
44
- TODO: this function should obviously be using Pandas or some other sensible structured
45
- representation of tabular data. Even a list of dicts. This implementation is quite
46
- brittle and depends on adding fields to every row in exactly the right order.
47
-
48
44
  Args:
49
45
  input_path (str): the input .json file to convert
50
46
  output_path (str, optional): the output .csv file to generate; if this is None, uses
@@ -57,7 +53,7 @@ def convert_json_to_csv(input_path,
57
53
  output_encoding (str, optional): encoding to use for the .csv file
58
54
  overwrite (bool, optional): whether to overwrite an existing .csv file; if this is False and
59
55
  the output file exists, no-ops and returns
60
-
56
+ verbose (bool, optional): enable additional debug output
61
57
  """
62
58
 
63
59
  if output_path is None:
@@ -68,36 +64,28 @@ def convert_json_to_csv(input_path,
68
64
  return
69
65
 
70
66
  print('Loading json results from {}...'.format(input_path))
71
- json_output = json.load(open(input_path))
72
-
73
- rows = []
67
+ json_output = load_md_or_speciesnet_file(input_path,
68
+ verbose=verbose)
74
69
 
75
- fixed_columns = ['image_path', 'max_confidence', 'detections']
70
+ def clean_category_name(s):
71
+ return s.replace(',','_').replace(' ','_').lower()
76
72
 
77
- # We add an output column for each class other than 'empty',
78
- # containing the maximum probability of that class for each image
79
- # n_non_empty_detection_categories = len(annotation_constants.annotation_bbox_categories) - 1
80
- n_non_empty_detection_categories = annotation_constants.NUM_DETECTOR_CATEGORIES
81
- detection_category_column_names = []
82
- assert annotation_constants.detector_bbox_category_id_to_name[0] == 'empty'
83
- for cat_id in range(1,n_non_empty_detection_categories+1):
84
- cat_name = annotation_constants.detector_bbox_category_id_to_name[cat_id]
85
- detection_category_column_names.append('max_conf_' + cat_name)
73
+ # Create column names for max detection confidences
74
+ detection_category_id_to_max_conf_column_name = {}
75
+ for category_id in json_output['detection_categories'].keys():
76
+ category_name = clean_category_name(json_output['detection_categories'][category_id])
77
+ detection_category_id_to_max_conf_column_name[category_id] = \
78
+ 'max_conf_' + category_name
86
79
 
87
- n_classification_categories = 0
80
+ classification_category_id_to_max_conf_column_name = {}
88
81
 
82
+ # Create column names for max classification confidences (if necessary)
89
83
  if 'classification_categories' in json_output.keys():
90
- classification_category_id_to_name = json_output['classification_categories']
91
- classification_category_ids = list(classification_category_id_to_name.keys())
92
- classification_category_id_to_column_number = {}
93
- classification_category_column_names = []
94
- for i_category,category_id in enumerate(classification_category_ids):
95
- category_name = classification_category_id_to_name[category_id].\
96
- replace(' ','_').replace(',','')
97
- classification_category_column_names.append('max_classification_conf_' + category_name)
98
- classification_category_id_to_column_number[category_id] = i_category
99
-
100
- n_classification_categories = len(classification_category_ids)
84
+
85
+ for category_id in json_output['classification_categories'].keys():
86
+ category_name = clean_category_name(json_output['classification_categories'][category_id])
87
+ classification_category_id_to_max_conf_column_name[category_id] = \
88
+ 'max_classification_conf_' + category_name
101
89
 
102
90
  # There are several .json fields for which we add .csv columns; other random bespoke fields
103
91
  # will be ignored.
@@ -117,26 +105,43 @@ def convert_json_to_csv(input_path,
117
105
  if len(optional_fields_present) > 0:
118
106
  print('Found {} optional fields'.format(len(optional_fields_present)))
119
107
 
120
- expected_row_length = len(fixed_columns) + len(detection_category_column_names) + \
121
- n_classification_categories + len(optional_fields_present)
122
-
123
108
  print('Formatting results...')
124
109
 
110
+ output_records = []
111
+
125
112
  # i_image = 0; im = json_output['images'][i_image]
126
113
  for im in tqdm(json_output['images']):
127
114
 
128
- image_id = im['file']
115
+ output_record = {}
116
+ output_records.append(output_record)
117
+
118
+ output_record['image_path'] = im['file']
119
+ output_record['max_confidence'] = ''
120
+ output_record['detections'] = ''
121
+
122
+ for field_name in optional_fields_present:
123
+ output_record[field_name] = ''
124
+ if field_name in im:
125
+ output_record[field_name] = im[field_name]
126
+
127
+ for detection_category_id in detection_category_id_to_max_conf_column_name:
128
+ column_name = detection_category_id_to_max_conf_column_name[detection_category_id]
129
+ output_record[column_name] = 0
130
+
131
+ for classification_category_id in classification_category_id_to_max_conf_column_name:
132
+ column_name = classification_category_id_to_max_conf_column_name[classification_category_id]
133
+ output_record[column_name] = 0
129
134
 
130
135
  if 'failure' in im and im['failure'] is not None:
131
- row = [image_id, 'failure', im['failure']]
132
- rows.append(row)
136
+ output_record['max_confidence'] = 'failure'
137
+ output_record['detections'] = im['failure']
133
138
  # print('Skipping failed image {} ({})'.format(im['file'],im['failure']))
134
139
  continue
135
140
 
136
141
  max_conf = ct_utils.get_max_conf(im)
142
+ detection_category_id_to_max_conf = defaultdict(float)
143
+ classification_category_id_to_max_conf = defaultdict(float)
137
144
  detections = []
138
- max_detection_category_probabilities = [None] * n_non_empty_detection_categories
139
- max_classification_category_probabilities = [0] * n_classification_categories
140
145
 
141
146
  # d = im['detections'][0]
142
147
  for d in im['detections']:
@@ -155,31 +160,24 @@ def convert_json_to_csv(input_path,
155
160
  xmax = input_bbox[0] + input_bbox[2]
156
161
  ymax = input_bbox[1] + input_bbox[3]
157
162
  output_detection = [ymin, xmin, ymax, xmax]
158
-
159
163
  output_detection.append(d['conf'])
160
-
161
- # Category 0 is empty, for which we don't have a column, so the max
162
- # confidence for category N goes in column N-1
163
- detection_category_id = int(d['category'])
164
- assert detection_category_id > 0 and detection_category_id <= \
165
- n_non_empty_detection_categories
166
- detection_category_column = detection_category_id - 1
167
- detection_category_max = max_detection_category_probabilities[detection_category_column]
168
- if detection_category_max is None or d['conf'] > detection_category_max:
169
- max_detection_category_probabilities[detection_category_column] = d['conf']
170
-
171
- output_detection.append(detection_category_id)
164
+ output_detection.append(int(d['category']))
172
165
  detections.append(output_detection)
173
166
 
167
+ detection_category_id = d['category']
168
+ detection_category_max = detection_category_id_to_max_conf[detection_category_id]
169
+ if d['conf'] > detection_category_max:
170
+ detection_category_id_to_max_conf[detection_category_id] = d['conf']
171
+
174
172
  if 'classifications' in d:
175
- assert n_classification_categories > 0,\
176
- 'Oops, I have classification results, but no classification metadata'
173
+
177
174
  for c in d['classifications']:
178
- category_id = c[0]
179
- p = c[1]
180
- category_index = classification_category_id_to_column_number[category_id]
181
- if (max_classification_category_probabilities[category_index] < p):
182
- max_classification_category_probabilities[category_index] = p
175
+ classification_category_id = c[0]
176
+ classification_conf = c[1]
177
+ classification_category_max = \
178
+ classification_category_id_to_max_conf[classification_category_id]
179
+ if classification_conf > classification_category_max:
180
+ classification_category_id_to_max_conf[classification_category_id] = d['conf']
183
181
 
184
182
  # ...for each classification
185
183
 
@@ -191,40 +189,36 @@ def convert_json_to_csv(input_path,
191
189
  if not omit_bounding_boxes:
192
190
  detection_string = json.dumps(detections)
193
191
 
194
- row = [image_id, max_conf, detection_string]
195
- row.extend(max_detection_category_probabilities)
196
- row.extend(max_classification_category_probabilities)
192
+ output_record['detections'] = detection_string
193
+ output_record['max_confidence'] = max_conf
197
194
 
198
- for field_name in optional_fields_present:
199
- if field_name not in im:
200
- row.append('')
201
- else:
202
- row.append(str(im[field_name]))
195
+ for detection_category_id in detection_category_id_to_max_conf_column_name:
196
+ column_name = detection_category_id_to_max_conf_column_name[detection_category_id]
197
+ output_record[column_name] = \
198
+ detection_category_id_to_max_conf[detection_category_id]
203
199
 
204
- assert len(row) == expected_row_length
205
- rows.append(row)
200
+ for classification_category_id in classification_category_id_to_max_conf_column_name:
201
+ column_name = classification_category_id_to_max_conf_column_name[classification_category_id]
202
+ output_record[column_name] = \
203
+ classification_category_id_to_max_conf[classification_category_id]
206
204
 
207
205
  # ...for each image
208
206
 
209
207
  print('Writing to csv...')
210
208
 
211
- with open(output_path, 'w', newline='', encoding=output_encoding) as f:
212
- writer = csv.writer(f, delimiter=',')
213
- header = fixed_columns
214
- header.extend(detection_category_column_names)
215
- if n_classification_categories > 0:
216
- header.extend(classification_category_column_names)
217
- for field_name in optional_fields_present:
218
- header.append(field_name)
219
- writer.writerow(header)
220
- writer.writerows(rows)
209
+ df = pd.DataFrame(output_records)
210
+
211
+ if omit_bounding_boxes:
212
+ df = df.drop('detections',axis=1)
213
+ df.to_csv(output_path,index=False,header=True)
221
214
 
222
215
  # ...def convert_json_to_csv(...)
223
216
 
224
217
 
225
218
  def convert_csv_to_json(input_path,output_path=None,overwrite=True):
226
219
  """
227
- Convert .csv to .json. If output_path is None, will convert x.csv to x.json.
220
+ Convert .csv to .json. If output_path is None, will convert x.csv to x.json. This
221
+ supports a largely obsolete .csv format, there's almost no reason you want to do this.
228
222
 
229
223
  Args:
230
224
  input_path (str): .csv filename to convert to .json
@@ -83,6 +83,9 @@ class SubsetJsonDetectorOutputOptions:
83
83
  def __init__(self):
84
84
 
85
85
  #: Only process files containing the token 'query'
86
+ #:
87
+ #: Does not support general regexes, but supports ^ as a special case
88
+ #: regex-like notation for "starts with"
86
89
  self.query = None
87
90
 
88
91
  #: Replace 'query' with 'replacement' if 'replacement' is not None. If 'query' is None,
@@ -21,7 +21,7 @@ from megadetector.utils.path_utils import is_image_file
21
21
 
22
22
  #%% Directory enumeration functions
23
23
 
24
- def create_plain_index(root, dirs, files, dirname=None):
24
+ def _create_plain_index(root, dirs, files, dirname=None):
25
25
  """
26
26
  Creates the fairly plain HTML folder index including a preview of a single image file,
27
27
  if any is present.
@@ -40,6 +40,7 @@ def create_plain_index(root, dirs, files, dirname=None):
40
40
 
41
41
  if dirname is None:
42
42
  dirname = root or '/'
43
+ dirname = dirname.replace('\\','/')
43
44
 
44
45
  html = "<!DOCTYPE html>\n"
45
46
  html += "<html lang='en'><head>"
@@ -104,13 +105,14 @@ def create_plain_index(root, dirs, files, dirname=None):
104
105
  html += "</body></html>\n"
105
106
  return html
106
107
 
107
- # ...def create_plain_index(...)
108
+ # ...def _create_plain_index(...)
108
109
 
109
110
 
110
- def traverse_and_create_index(dir,
111
- overwrite_files=False,
112
- template_fun=create_plain_index,
113
- basepath=None):
111
+ def create_html_index(dir,
112
+ overwrite=False,
113
+ template_fun=_create_plain_index,
114
+ basepath=None,
115
+ recursive=True):
114
116
  """
115
117
  Recursively traverses the local directory [dir] and generates a index
116
118
  file for each folder using [template_fun] to generate the HTML output.
@@ -118,12 +120,13 @@ def traverse_and_create_index(dir,
118
120
 
119
121
  Args:
120
122
  dir (str): directory to process
121
- overwrite_files (bool, optional): whether to over-write existing index file
123
+ overwrite (bool, optional): whether to over-write existing index file
122
124
  template_fun (func, optional): function taking three arguments (string,
123
125
  list of string, list of string) representing the current root, the list of folders,
124
126
  and the list of files. Should return the HTML source of the index file.
125
127
  basepath (str, optional): if not None, the name used for each subfolder in [dir]
126
128
  in the output files will be relative to [basepath]
129
+ recursive (bool, optional): recurse into subfolders
127
130
  """
128
131
 
129
132
  print('Traversing {}'.format(dir))
@@ -141,7 +144,7 @@ def traverse_and_create_index(dir,
141
144
  # Output is written to file *root*/index.html
142
145
  output_file = os.path.join(root, "index.html")
143
146
 
144
- if not overwrite_files and os.path.isfile(output_file):
147
+ if (not overwrite) and os.path.isfile(output_file):
145
148
  print('Skipping {}, file exists'.format(output_file))
146
149
  continue
147
150
 
@@ -157,7 +160,10 @@ def traverse_and_create_index(dir,
157
160
  with open(output_file, 'wt') as fi:
158
161
  fi.write(html)
159
162
 
160
- # ...def traverse_and_create_index(...)
163
+ if not recursive:
164
+ break
165
+
166
+ # ...def create_html_index(...)
161
167
 
162
168
 
163
169
  #%% Command-line driver
@@ -171,7 +177,7 @@ def main(): # noqa
171
177
  parser.add_argument("--basepath", type=str,
172
178
  help='Folder names will be printed relative to basepath, if specified',
173
179
  default=None)
174
- parser.add_argument("--enable_overwrite", action='store_true', default=False,
180
+ parser.add_argument("--overwrite", action='store_true', default=False,
175
181
  help='If set, the script will overwrite existing index.html files.')
176
182
 
177
183
  if len(sys.argv[1:]) == 0:
@@ -182,9 +188,9 @@ def main(): # noqa
182
188
 
183
189
  assert os.path.isdir(args.directory), "{} is not a valid directory".format(args.directory)
184
190
 
185
- traverse_and_create_index(args.directory,
186
- overwrite_files=args.enable_overwrite,
187
- basepath=args.basepath)
191
+ create_html_index(args.directory,
192
+ overwrite=args.overwrite,
193
+ basepath=args.basepath)
188
194
 
189
195
  if __name__ == '__main__':
190
196
  main()
@@ -528,7 +528,8 @@ def find_images(dirname,
528
528
  def clean_filename(filename,
529
529
  allow_list=VALID_FILENAME_CHARS,
530
530
  char_limit=CHAR_LIMIT,
531
- force_lower= False):
531
+ force_lower=False,
532
+ remove_trailing_leading_whitespace=True):
532
533
  r"""
533
534
  Removes non-ASCII and other invalid filename characters (on any
534
535
  reasonable OS) from a filename, then optionally trims to a maximum length.
@@ -544,11 +545,27 @@ def clean_filename(filename,
544
545
  char_limit (int, optional): maximum allowable filename length, if None will skip this
545
546
  step
546
547
  force_lower (bool, optional): convert the resulting filename to lowercase
547
-
548
+ remove_trailing_leading_whitespace (bool, optional): remove trailing and
549
+ leading whitespace from each component of a path, e.g. does not allow
550
+ a/b/c /d.jpg
548
551
  Returns:
549
552
  str: cleaned version of [filename]
550
553
  """
551
554
 
555
+ if remove_trailing_leading_whitespace:
556
+
557
+ # Best effort to preserve the original separator
558
+ separator = '/'
559
+ if '\\' in filename:
560
+ separator = '\\'
561
+
562
+ filename = filename.replace('\\','/')
563
+ components = filename.split('/')
564
+ clean_components = [c.strip() for c in components]
565
+ filename = separator.join(clean_components)
566
+ if separator == '\\':
567
+ filename = filename.replace('/','\\')
568
+
552
569
  # keep only valid ascii chars
553
570
  cleaned_filename = (unicodedata.normalize('NFKD', filename)
554
571
  .encode('ASCII', 'ignore').decode())
@@ -565,7 +582,8 @@ def clean_filename(filename,
565
582
  def clean_path(pathname,
566
583
  allow_list=VALID_PATH_CHARS,
567
584
  char_limit=CHAR_LIMIT,
568
- force_lower=False):
585
+ force_lower=False,
586
+ remove_trailing_leading_whitespace=True):
569
587
  """
570
588
  Removes non-ASCII and other invalid path characters (on any reasonable
571
589
  OS) from a path, then optionally trims to a maximum length.
@@ -576,13 +594,20 @@ def clean_path(pathname,
576
594
  char_limit (int, optional): maximum allowable filename length, if None will skip this
577
595
  step
578
596
  force_lower (bool, optional): convert the resulting filename to lowercase
597
+ remove_trailing_leading_whitespace (bool, optional): remove trailing and
598
+ leading whitespace from each component of a path, e.g. does not allow
599
+ a/b/c /d.jpg
579
600
 
580
601
  Returns:
581
602
  str: cleaned version of [filename]
582
603
  """
583
604
 
584
- return clean_filename(pathname, allow_list=allow_list,
585
- char_limit=char_limit, force_lower=force_lower)
605
+ return clean_filename(pathname,
606
+ allow_list=allow_list,
607
+ char_limit=char_limit,
608
+ force_lower=force_lower,
609
+ remove_trailing_leading_whitespace=\
610
+ remove_trailing_leading_whitespace)
586
611
 
587
612
 
588
613
  def flatten_path(pathname,separator_chars=SEPARATOR_CHARS,separator_char_replacement='~'):
@@ -1553,6 +1578,7 @@ class TestPathUtils:
1553
1578
  """
1554
1579
 
1555
1580
  self.test_dir = make_test_folder(subfolder='megadetector/path_utils_tests')
1581
+ print('Using temporary folder {} for path utils testing'.format(self.test_dir))
1556
1582
  os.makedirs(self.test_dir, exist_ok=True)
1557
1583
 
1558
1584
 
@@ -1776,7 +1802,11 @@ class TestPathUtils:
1776
1802
  ])
1777
1803
  folders_non_recursive_abs = folder_list(folder_list_dir, recursive=False,
1778
1804
  return_relative_paths=False)
1779
- assert sorted(folders_non_recursive_abs) == expected_folders_non_recursive_abs
1805
+ assert sorted(folders_non_recursive_abs) == expected_folders_non_recursive_abs, \
1806
+ 'Non-recursive folder list failured, expected:\n\n{}\n\nFound:\n\n{}'.format(
1807
+ str(expected_folders_non_recursive_abs),
1808
+ str(folders_non_recursive_abs)
1809
+ )
1780
1810
 
1781
1811
  # Test non-recursive, relative paths
1782
1812
  expected_folders_non_recursive_rel = sorted(['subdir1', 'subdir2'])
@@ -2114,7 +2144,17 @@ class TestPathUtils:
2114
2144
  assert clean_filename("test*file?.txt", char_limit=10) == "testfile.t"
2115
2145
  assert clean_filename("TestFile.TXT", force_lower=True) == "testfile.txt"
2116
2146
  assert clean_filename("file:with<illegal>chars.txt") == "filewithillegalchars.txt"
2117
- assert clean_filename(" accented_name_éà.txt") == " accented_name_ea.txt"
2147
+
2148
+ s = " accented_name_éà.txt"
2149
+
2150
+ assert clean_filename(s,
2151
+ remove_trailing_leading_whitespace=False) == " accented_name_ea.txt", \
2152
+ 'clean_filename with remove_trailing_leading_whitespace=False: {}'.format(
2153
+ clean_filename(s, remove_trailing_leading_whitespace=False))
2154
+
2155
+ assert clean_filename(s, remove_trailing_leading_whitespace=True) == "accented_name_ea.txt", \
2156
+ 'clean_filename with remove_trailing_leading_whitespace=False: {}'.format(
2157
+ clean_filename(s, remove_trailing_leading_whitespace=True))
2118
2158
 
2119
2159
  # Separators are not allowed by default in clean_filename
2120
2160
  assert clean_filename("path/to/file.txt") == "pathtofile.txt"
@@ -2444,7 +2484,13 @@ class TestPathUtils:
2444
2484
  un_tar_dir = os.path.join(self.test_dir, "un_tar_contents")
2445
2485
  os.makedirs(un_tar_dir, exist_ok=True)
2446
2486
  with tarfile.open(output_tar_path, 'r:gz') as tf:
2447
- tf.extractall(path=un_tar_dir)
2487
+ # The "filter" option was added as of Python 3.12, and *not* specifying
2488
+ # filter=None will change behavior as of Python 3.14. We want the unmodified
2489
+ # behavior, but we want to support Python <3.12, so we do a version check.
2490
+ if sys.version_info >= (3, 12):
2491
+ tf.extractall(path=un_tar_dir, filter=None)
2492
+ else:
2493
+ tf.extractall(path=un_tar_dir)
2448
2494
 
2449
2495
  expected_untarred_file1 = os.path.join(un_tar_dir, os.path.relpath(file1_path, self.test_dir))
2450
2496
  expected_untarred_file2 = os.path.join(un_tar_dir, os.path.relpath(file2_path, self.test_dir))
@@ -2618,7 +2664,9 @@ def test_path_utils():
2618
2664
 
2619
2665
  test_instance = TestPathUtils()
2620
2666
  test_instance.set_up()
2667
+
2621
2668
  try:
2669
+
2622
2670
  test_instance.test_is_image_file()
2623
2671
  test_instance.test_find_image_strings()
2624
2672
  test_instance.test_find_images()
@@ -2643,5 +2691,7 @@ def test_path_utils():
2643
2691
  test_instance.test_add_files_to_single_tar_file()
2644
2692
  test_instance.test_parallel_zip_individual_files_and_folders()
2645
2693
  test_instance.test_compute_file_hash()
2694
+
2646
2695
  finally:
2696
+
2647
2697
  test_instance.tear_down()
@@ -2,7 +2,7 @@
2
2
 
3
3
  url_utils.py
4
4
 
5
- Frequently-used functions for downloading or manipulating URLs
5
+ Frequently-used functions for downloading, manipulating, or serving URLs
6
6
 
7
7
  """
8
8
 
@@ -16,6 +16,9 @@ import urllib.error
16
16
  import requests
17
17
  import shutil
18
18
  import pytest
19
+ import socketserver
20
+ import threading
21
+ import http.server
19
22
 
20
23
  from functools import partial
21
24
  from tqdm import tqdm
@@ -453,6 +456,93 @@ def get_url_sizes(urls,n_workers=1,pool_type='thread',timeout=None,verbose=False
453
456
  return url_to_size
454
457
 
455
458
 
459
+ #%% Singleton HTTP server
460
+
461
+ class QuietHTTPRequestHandler(http.server.SimpleHTTPRequestHandler):
462
+ """
463
+ SimpleHTTPRequestHandler sublcass that suppresses console printouts
464
+ """
465
+ def __init__(self, *args, directory=None, **kwargs):
466
+ super().__init__(*args, directory=directory, **kwargs)
467
+
468
+ def log_message(self, format, *args): # noqa
469
+ pass
470
+
471
+
472
+ class SingletonHTTPServer:
473
+ """
474
+ HTTP server that runs on a local port, serving a particular local folder. Runs as a
475
+ singleton, so starting a server in a new folder closes the previous server. I use this
476
+ primarily to serve MD/SpeciesNet previews from manage_local_batch, which can exceed
477
+ the 260-character filename length limitation imposed by browser on Windows, so really the
478
+ point here is just to remove characters from the URL.
479
+ """
480
+
481
+ _server = None
482
+ _thread = None
483
+
484
+ @classmethod
485
+ def start_server(cls, directory, port=8000, host='localhost'):
486
+ """
487
+ Start or restart the HTTP server with a specific directory
488
+
489
+ Args:
490
+ directory (str): the root folder served by the server
491
+ port (int, optional): the port on which to create the server
492
+ host (str, optional): the host on which to listen, typically
493
+ either "localhost" (default) or "0.0.0.0"
494
+
495
+ Returns:
496
+ str: URL to the running host
497
+ """
498
+
499
+ # Stop the existing server instance if necessary
500
+ cls.stop_server()
501
+
502
+ # Create new server
503
+ handler = partial(QuietHTTPRequestHandler, directory=directory)
504
+ cls._server = socketserver.TCPServer((host, port), handler)
505
+
506
+ # Start server in daemon thread (dies when parent process dies)
507
+ cls._thread = threading.Thread(target=cls._server.serve_forever)
508
+ cls._thread.daemon = True
509
+ cls._thread.start()
510
+
511
+ print(f"Serving {directory} at http://{host}:{port}")
512
+ return f"http://{host}:{port}"
513
+
514
+
515
+ @classmethod
516
+ def stop_server(cls):
517
+ """
518
+ Stop the current server (if one is running)
519
+ """
520
+
521
+ if cls._server:
522
+ cls._server.shutdown()
523
+ cls._server.server_close()
524
+ cls._server = None
525
+ if cls._thread:
526
+ cls._thread.join(timeout=1)
527
+ cls._thread = None
528
+
529
+
530
+ @classmethod
531
+ def is_running(cls):
532
+ """
533
+ Check whether the server is currently running.
534
+
535
+ Returns:
536
+ bool: True if the server is running
537
+ """
538
+
539
+ return (cls._server is not None) and \
540
+ (cls._thread is not None) and \
541
+ (cls._thread.is_alive())
542
+
543
+ # ...class SingletonHTTPServer
544
+
545
+
456
546
  #%% Tests
457
547
 
458
548
  # Constants for tests
@@ -10,8 +10,6 @@ Functions related to working with the SpeciesNet / Wildlife Insights taxonomy.
10
10
 
11
11
  import os
12
12
  import json
13
- import tempfile
14
- import uuid
15
13
 
16
14
  import pandas as pd
17
15
 
@@ -399,11 +397,12 @@ def generate_whole_image_detections_for_classifications(classifications_json_fil
399
397
 
400
398
 
401
399
  def generate_md_results_from_predictions_json(predictions_json_file,
402
- md_results_file,
400
+ md_results_file=None,
403
401
  base_folder=None,
404
402
  max_decimals=5,
405
403
  convert_human_to_person=True,
406
- convert_homo_species_to_human=True):
404
+ convert_homo_species_to_human=True,
405
+ verbose=False):
407
406
  """
408
407
  Generate an MD-formatted .json file from a predictions.json file, generated by the
409
408
  SpeciesNet ensemble. Typically, MD results files use relative paths, and predictions.json
@@ -424,7 +423,7 @@ def generate_md_results_from_predictions_json(predictions_json_file,
424
423
 
425
424
  Args:
426
425
  predictions_json_file (str): path to a predictions.json file, or a dict
427
- md_results_file (str): path to which we should write an MD-formatted .json file
426
+ md_results_file (str, optional): path to which we should write an MD-formatted .json file
428
427
  base_folder (str, optional): leading string to remove from each path in the
429
428
  predictions.json file
430
429
  max_decimals (int, optional): number of decimal places to which we should round
@@ -435,6 +434,10 @@ def generate_md_results_from_predictions_json(predictions_json_file,
435
434
  convert_homo_species_to_human (bool, optional): the ensemble often rolls human predictions
436
435
  up to "homo species", which isn't wrong, but looks odd. This forces these back to
437
436
  "homo sapiens".
437
+ verbose (bool, optional): enable additional debug output
438
+
439
+ Returns:
440
+ dict: results in MD format
438
441
  """
439
442
 
440
443
  # Read predictions file
@@ -558,7 +561,9 @@ def generate_md_results_from_predictions_json(predictions_json_file,
558
561
  # but a non-blank prediction. For now, create a fake detection to handle this prediction.
559
562
  if len(im_out['detections']) == 0:
560
563
 
561
- print('Warning: creating fake detection for non-blank whole-image classification')
564
+ if verbose:
565
+ print('Warning: creating fake detection for non-blank whole-image classification' + \
566
+ ' in {}'.format(im_in['file']))
562
567
  det_out = {}
563
568
  all_unknown_detections.append(det_out)
564
569
 
@@ -640,12 +645,15 @@ def generate_md_results_from_predictions_json(predictions_json_file,
640
645
  output_dict['classification_category_descriptions'] = classification_category_descriptions
641
646
  output_dict['images'] = images_out
642
647
 
643
- with open(md_results_file,'w') as f:
644
- json.dump(output_dict,f,indent=1)
648
+ if md_results_file is not None:
649
+ with open(md_results_file,'w') as f:
650
+ json.dump(output_dict,f,indent=1)
645
651
 
646
- validation_options = ValidateBatchResultsOptions()
647
- validation_options.raise_errors = True
648
- _ = validate_batch_results(md_results_file, options=validation_options)
652
+ validation_options = ValidateBatchResultsOptions()
653
+ validation_options.raise_errors = True
654
+ _ = validate_batch_results(md_results_file, options=validation_options)
655
+
656
+ return output_dict
649
657
 
650
658
  # ...def generate_md_results_from_predictions_json(...)
651
659
 
@@ -885,24 +893,16 @@ def load_md_or_speciesnet_file(fn,verbose=True):
885
893
  with open(fn,'r') as f:
886
894
  detector_output = json.load(f)
887
895
 
888
- # Convert to MD format if necessary
896
+ # If this is a SpeicesNet file, convert to MD format
889
897
  if 'predictions' in detector_output:
898
+
890
899
  if verbose:
891
900
  print('This appears to be a SpeciesNet output file, converting to MD format')
892
- md_temp_dir = os.path.join(tempfile.gettempdir(), 'megadetector_temp_files')
893
- os.makedirs(md_temp_dir,exist_ok=True)
894
- temp_results_file = os.path.join(md_temp_dir,str(uuid.uuid1()) + '.json')
895
- print('Writing temporary results to {}'.format(temp_results_file))
896
- generate_md_results_from_predictions_json(predictions_json_file=fn,
897
- md_results_file=temp_results_file,
898
- base_folder=None)
899
- with open(temp_results_file,'r') as f:
900
- detector_output = json.load(f)
901
- try:
902
- os.remove(temp_results_file)
903
- except Exception:
904
- if verbose:
905
- print('Warning: error removing temporary .json {}'.format(temp_results_file))
901
+ detector_output = generate_md_results_from_predictions_json(predictions_json_file=fn,
902
+ md_results_file=None,
903
+ base_folder=None)
904
+
905
+ # ...if this is a SpeciesNet file
906
906
 
907
907
  assert 'images' in detector_output, \
908
908
  'Detector output file should be a json file with an "images" field.'
@@ -67,6 +67,10 @@ class VideoVisualizationOptions:
67
67
  #: Skip frames before first and after last above-threshold detection
68
68
  self.trim_to_detections = False
69
69
 
70
+ #: By default, output videos use the same extension as input videos,
71
+ #: use this to force a particular extension
72
+ self.output_extension = None
73
+
70
74
  # ...class VideoVisualizationOptions
71
75
 
72
76
 
@@ -287,10 +291,16 @@ def _process_video(video_entry,
287
291
  result['error'] = 'Video not found: {}'.format(input_video_path)
288
292
  return result
289
293
 
290
- # Create output path preserving directory structure
291
- rel_path = video_entry['file']
292
- output_video_path = os.path.join(out_dir, rel_path)
293
- os.makedirs(os.path.dirname(output_video_path), exist_ok=True)
294
+ output_fn_relative = video_entry['file']
295
+
296
+ if options.output_extension is not None:
297
+ ext = options.output_extension
298
+ if not ext.startswith('.'):
299
+ ext = '.' + ext
300
+ output_fn_relative = os.path.splitext(output_fn_relative)[0] + ext
301
+
302
+ output_fn_abs = os.path.join(out_dir, output_fn_relative)
303
+ os.makedirs(os.path.dirname(output_fn_abs), exist_ok=True)
294
304
 
295
305
  # Get frames to process
296
306
  frames_to_process = _get_frames_to_process(video_entry,
@@ -392,10 +402,10 @@ def _process_video(video_entry,
392
402
 
393
403
  # Create VideoWriter
394
404
  fourcc = cv2.VideoWriter_fourcc(*options.fourcc)
395
- video_writer = cv2.VideoWriter(output_video_path, fourcc, output_framerate, (width, height))
405
+ video_writer = cv2.VideoWriter(output_fn_abs, fourcc, output_framerate, (width, height))
396
406
 
397
407
  if not video_writer.isOpened():
398
- result['error'] = 'Failed to open video writer for {}'.format(output_video_path)
408
+ result['error'] = 'Failed to open video writer for {}'.format(output_fn_abs)
399
409
  return result
400
410
 
401
411
  # Write frames
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: megadetector
3
- Version: 10.0.7
3
+ Version: 10.0.8
4
4
  Summary: MegaDetector is an AI model that helps conservation folks spend less time doing boring things with camera trap images.
5
5
  Author-email: Your friendly neighborhood MegaDetector team <cameratraps@lila.science>
6
6
  Maintainer-email: Your friendly neighborhood MegaDetector team <cameratraps@lila.science>
@@ -32,7 +32,7 @@ megadetector/classification/efficientnet/utils.py,sha256=76SQdh0zK7CFcwTW4kiechC
32
32
  megadetector/data_management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
33
  megadetector/data_management/animl_to_md.py,sha256=Z6PDJxeM_5dMZJBM3j0mxDPW2_1bNkXx0M3_qq42_Ig,4416
34
34
  megadetector/data_management/camtrap_dp_to_coco.py,sha256=HoCGMzZTEvnudnAjbOr-mCizXHmc8mMNSUChy_Q9PkI,9673
35
- megadetector/data_management/cct_json_utils.py,sha256=B9YawIxs1l62AsWlv0yHaV01XZtbRXQ8NByRwfYGjv0,19536
35
+ megadetector/data_management/cct_json_utils.py,sha256=Azyuwok6-g5YGVAdBzv3-eJIlplXCoTcjGWu6zy9bQ0,19917
36
36
  megadetector/data_management/cct_to_md.py,sha256=e1fYevSz0m65n5H16uB6uwzNiXiwxjdB2ka5p68R4d0,5120
37
37
  megadetector/data_management/cct_to_wi.py,sha256=wcBOmurXY5I-hiqV6SmRSGUAeYaKHEU1LgCZjqVmCyw,9561
38
38
  megadetector/data_management/coco_to_labelme.py,sha256=uYJ60XoZfHUEfLzj-EjLyeNM590skNnMp-IThWwNISo,8683
@@ -59,7 +59,7 @@ megadetector/data_management/databases/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JC
59
59
  megadetector/data_management/databases/add_width_and_height_to_db.py,sha256=EYfFGPkXyFz6ZGQfXjCK3pNXLBg0hu73wiACdEEB0E0,2964
60
60
  megadetector/data_management/databases/combine_coco_camera_traps_files.py,sha256=Au7akR2KZHm_l8-MGGRGf0CQy1az_JVgZW5Yz1_XQeQ,6609
61
61
  megadetector/data_management/databases/integrity_check_json_db.py,sha256=kxGCHpBADXT_LHVLUENGvmby-orvVYIsK6fdgqhABBI,17386
62
- megadetector/data_management/databases/subset_json_db.py,sha256=mO1eAkrfCSAp2_r5vHVjHvet_utFJcWRm3rNa8WvSx8,4134
62
+ megadetector/data_management/databases/subset_json_db.py,sha256=AQF-12vnZJ73JrWvjUd2ME666MmQDiCOvO0e7zGjPpg,6243
63
63
  megadetector/data_management/lila/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
64
64
  megadetector/data_management/lila/create_lila_blank_set.py,sha256=tApCL4XN1Vzl6YvcdI6SY4TZbHeCyHLzNYweFfX0sy0,19490
65
65
  megadetector/data_management/lila/create_lila_test_set.py,sha256=UWJPKrwNW-UVeGrMUrFvmIt2UHVyuFiPzRFKkbEbk5A,5014
@@ -73,21 +73,21 @@ megadetector/data_management/lila/test_lila_metadata_urls.py,sha256=ThU78Ks5V3rF
73
73
  megadetector/detection/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
74
74
  megadetector/detection/change_detection.py,sha256=Ne3GajbH_0KPBU8ruHp4Rkr0uKd5oKAMQ3CQTRKRHgQ,28659
75
75
  megadetector/detection/process_video.py,sha256=kuQHrpOC3LQo9ecqJPpzkds9fZVnoLmrfJw_yh-oxi8,17890
76
- megadetector/detection/pytorch_detector.py,sha256=ixU-2_AnCwEP33FaaqTZRoi1WxIUeM4x_ksbNT-tezA,59817
76
+ megadetector/detection/pytorch_detector.py,sha256=4Ix6AOBciMbn5zFHXe4hgb5tXGOup57ju3z8gtsin3s,60567
77
77
  megadetector/detection/run_detector.py,sha256=TTX29zxDN_O7ja61sOmMIVewUz3yRvKg1D1AAYhVEkc,46851
78
78
  megadetector/detection/run_detector_batch.py,sha256=aZgiywL6arrdQ_l3jzlHctlccqL537lwVStjhi1hIWw,89823
79
- megadetector/detection/run_inference_with_yolov5_val.py,sha256=A-AQuARVVy7oR9WtenCZwzvd5U3HQwihMr4Jkiv9U0g,53515
79
+ megadetector/detection/run_inference_with_yolov5_val.py,sha256=dJXh3BwKOQQ4OA-Mq_heEb7AfBAk7qKUAagnIGuFtaU,53689
80
80
  megadetector/detection/run_md_and_speciesnet.py,sha256=Dp_SpJZp0pX9jzFtxM6zPCyBNq49uyQpMDAdNDLVorM,50280
81
- megadetector/detection/run_tiled_inference.py,sha256=wrQkKIloHBO9v2i0nZ1_Tt75iFtVrnco3Y4FafoVxdw,39382
81
+ megadetector/detection/run_tiled_inference.py,sha256=hVpR-URC67e6Ht-cy_EgIrJ4GFina29H_lBXOE3bzwM,39435
82
82
  megadetector/detection/tf_detector.py,sha256=3b2MiqgMw8KBDzHQliUSDXWrmKpa9iZnfe6EgYpMcYo,8398
83
- megadetector/detection/video_utils.py,sha256=AlmNJ5n7qmv3Z65HcjI1ALAxXMmyTG3pUiO7oJm-8rs,53363
83
+ megadetector/detection/video_utils.py,sha256=M7yje6XeOnR_QwDyuG1o6bwTKvRysoA2NiOK2MSi98E,53943
84
84
  megadetector/postprocessing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
85
85
  megadetector/postprocessing/add_max_conf.py,sha256=9MYtsH2mwkiaZb7Qcor5J_HskfAj7d9srp8G_Qldpk0,1722
86
86
  megadetector/postprocessing/categorize_detections_by_size.py,sha256=DpZpRNFlyeOfWuOc6ICuENgIWDCEtiErJ_frBZp9lYM,5382
87
- megadetector/postprocessing/classification_postprocessing.py,sha256=TrhzjMWkoLs4ml0ObCKR7rUpprbT7sgS6RtyjPicj9A,68362
87
+ megadetector/postprocessing/classification_postprocessing.py,sha256=OoPVr34vXyLykB42SplcSKo9cj7dgf8Yju_DCDhd6_k,68574
88
88
  megadetector/postprocessing/combine_batch_outputs.py,sha256=BEP8cVa0sMIPg7tkWQc_8vOEPnbmWjOsQdVJHe61uz8,8468
89
89
  megadetector/postprocessing/compare_batch_results.py,sha256=RDlKLwea76rOWiDneSJUj6P_oMBMnD2BY4inoxLqQiw,84258
90
- megadetector/postprocessing/convert_output_format.py,sha256=FiwKSiMyEeNVLLfjpQtx3CrMbchwNUaW2TgLmdXGFVo,14892
90
+ megadetector/postprocessing/convert_output_format.py,sha256=3KLO6NqddofgIEYjV8_iZIf0iXaplFN2AroUq5i4R7k,14472
91
91
  megadetector/postprocessing/create_crop_folder.py,sha256=T37HnvBEakikXY3n3Bgk5boFo_0-Z5aKnkEWXv-Ki4s,23166
92
92
  megadetector/postprocessing/detector_calibration.py,sha256=UFjJ8D6tMghatLRj3CyrtJ7vrPIJkULMNsYMIj98j2M,20495
93
93
  megadetector/postprocessing/generate_csv_report.py,sha256=KIGT8zFZev-cl4YOCq2BqnodBWsZG-7CZaWuep_211U,19169
@@ -100,7 +100,7 @@ megadetector/postprocessing/postprocess_batch_results.py,sha256=VJyXx8I6KZgefrLN
100
100
  megadetector/postprocessing/remap_detection_categories.py,sha256=BE6Ce-PGBEx1FyG3XwbYp2D5sh5xUlVf6fonaMuPMAg,7927
101
101
  megadetector/postprocessing/render_detection_confusion_matrix.py,sha256=oNvDTh5td5ynELNnhz4XaLP2HiwLuojkJlob15TpgcY,26365
102
102
  megadetector/postprocessing/separate_detections_into_folders.py,sha256=Yvpkl_MsWbGoo4zvQHrXHkATRJaYdYligItfg9bvuV8,32262
103
- megadetector/postprocessing/subset_json_detector_output.py,sha256=FiP1sUYfIBqOiSwjkJ6qkj4hvlF7yJsF7-y9tmfSShc,32187
103
+ megadetector/postprocessing/subset_json_detector_output.py,sha256=R6CtSMcriXq50EHawXWC5pHZ-vtJFSKqjeleGKiouDY,32325
104
104
  megadetector/postprocessing/top_folders_to_bottom.py,sha256=zYrqMHjUZG8urh2CYphfs91ZQ620uqe-TL8jVYy8KVw,6049
105
105
  megadetector/postprocessing/validate_batch_results.py,sha256=9nr7LeKMdki9Y821ag2bZFQCxuq0OqINDH7cPXyVcY8,12059
106
106
  megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py,sha256=XgVeyga8iSC01MAjXxb2rn-CgJTYHqC_gfxxEoSn4aw,9420
@@ -121,17 +121,17 @@ megadetector/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuF
121
121
  megadetector/tests/test_nms_synthetic.py,sha256=oY6xmT1sLSSN7weQJ8TPTaZgAiSiZ6s43EffUhwLWIw,14707
122
122
  megadetector/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
123
123
  megadetector/utils/ct_utils.py,sha256=IiZV8dWtJamveINv_joATMgMPHeDkZ8l82jDEQcLgQg,60502
124
- megadetector/utils/directory_listing.py,sha256=CZBzwg0Fus1xuRAp3ycEBjViDdwwk4eKdGq06ZERLDg,6414
124
+ megadetector/utils/directory_listing.py,sha256=0-VMuQWo6rETIKERqfX6Zn7pRp_GJ4JiFiWvsw9PQcU,6500
125
125
  megadetector/utils/extract_frames_from_video.py,sha256=vjSVgxtb5z2syHCVYWc2KdNUpc-O6yY8nkbj_wqsIvY,12255
126
126
  megadetector/utils/gpu_test.py,sha256=5zUfAVeSjH8I08eCqayFmMxL-0mix8SjJJTe5ORABvU,3544
127
127
  megadetector/utils/md_tests.py,sha256=Iup4KjyIpLUpZ4TzzwEyGK61rg6aH7NrEQsdQ-ov51I,80300
128
- megadetector/utils/path_utils.py,sha256=qU1jTBHYy11i5vOMslFEUjc1VBnxQHnDCPYkVzuUXms,98686
128
+ megadetector/utils/path_utils.py,sha256=tV8eh77m_uS8YYpOQZO8GUKR6l5sZrSSIkApqgi_DmY,101030
129
129
  megadetector/utils/process_utils.py,sha256=gQcpH9WYvGPUs0FhtJ5_Xvl6JsvoGz8_mnDQk0PbTRM,5673
130
130
  megadetector/utils/split_locations_into_train_val.py,sha256=fd_6pj1aWY6hybwaXvBn9kBcOHjI90U-OsTmEAGpeu8,10297
131
131
  megadetector/utils/string_utils.py,sha256=r2Maw3zbzk3EyaZcNkdqr96yP_8m4ey6v0WxlemEY9U,6155
132
- megadetector/utils/url_utils.py,sha256=VWYDHbWctTtw7mvbb_A5DTdF3v9V2mWhBoOP5MGE5S8,25728
132
+ megadetector/utils/url_utils.py,sha256=PzqN-VquAZFBRin2ZaYi5U2WCsMYSwvM0X-NN45Fdh4,28448
133
133
  megadetector/utils/wi_platform_utils.py,sha256=8CGpiox_aL6RVZKfJqPVwpW4_6Cjku0HIajJPcmeNpE,32019
134
- megadetector/utils/wi_taxonomy_utils.py,sha256=vZ_UlRtyLpfF4-ehBt7HHjcj7PsI2dVWFz2tES9cxt4,66641
134
+ megadetector/utils/wi_taxonomy_utils.py,sha256=o4AvY5gZXfk69pPckdGxgIPhqsH2-hJQucavSRsUnoc,66513
135
135
  megadetector/utils/write_html_image_list.py,sha256=6Tbe5wyUxoBYJgH9yVrxxKCeWF2BVre_wQMEOQJ-ZIU,9068
136
136
  megadetector/visualization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
137
137
  megadetector/visualization/plot_utils.py,sha256=uDDlOhdaJ3V8sGj2kS9b0cgszKc8WCq2_ofl6TW_XUs,10727
@@ -139,9 +139,9 @@ megadetector/visualization/render_images_with_thumbnails.py,sha256=-XX4PG4wnrFjF
139
139
  megadetector/visualization/visualization_utils.py,sha256=E5uvysS3F1S_yiPFxZty3U2f6cjuE8zG6XWggYOu-5o,75921
140
140
  megadetector/visualization/visualize_db.py,sha256=8YDWSR0eMehXYdPtak9z8UUw35xV7hu-0eCuzgSLjWc,25558
141
141
  megadetector/visualization/visualize_detector_output.py,sha256=HpWh7ugwo51YBHsFi40iAp9G-uRAMMjgsm8H_uBolBs,20295
142
- megadetector/visualization/visualize_video_output.py,sha256=4A5uit_JVV46kZCsO6j0bZ5-o6ZTAlXKuVvvR_xWpho,20266
143
- megadetector-10.0.7.dist-info/licenses/LICENSE,sha256=RMa3qq-7Cyk7DdtqRj_bP1oInGFgjyHn9-PZ3PcrqIs,1100
144
- megadetector-10.0.7.dist-info/METADATA,sha256=TOU1IZ7EWaMp6D_11fpJNaH8_csJvVIXGynIol6flLc,6486
145
- megadetector-10.0.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
146
- megadetector-10.0.7.dist-info/top_level.txt,sha256=wf9DXa8EwiOSZ4G5IPjakSxBPxTDjhYYnqWRfR-zS4M,13
147
- megadetector-10.0.7.dist-info/RECORD,,
142
+ megadetector/visualization/visualize_video_output.py,sha256=ibMGB5ynMwNXmaMlY8h8tURb-Lyvuxs1EB08x_jvev0,20606
143
+ megadetector-10.0.8.dist-info/licenses/LICENSE,sha256=RMa3qq-7Cyk7DdtqRj_bP1oInGFgjyHn9-PZ3PcrqIs,1100
144
+ megadetector-10.0.8.dist-info/METADATA,sha256=eMFedi5m5t_vYsJzAQsf7Q2Z9mHASAqEOuJjiQVWgZE,6486
145
+ megadetector-10.0.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
146
+ megadetector-10.0.8.dist-info/top_level.txt,sha256=wf9DXa8EwiOSZ4G5IPjakSxBPxTDjhYYnqWRfR-zS4M,13
147
+ megadetector-10.0.8.dist-info/RECORD,,