megadetector 10.0.9__py3-none-any.whl → 10.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

@@ -210,6 +210,8 @@ def read_pil_exif(im,options=None):
210
210
  if exif_info is None:
211
211
  return exif_tags
212
212
 
213
+ # Read all standard EXIF tags; if necessary, we'll filter later to a restricted
214
+ # list of tags.
213
215
  for k, v in exif_info.items():
214
216
  assert isinstance(k,str) or isinstance(k,int), \
215
217
  'Invalid EXIF key {}'.format(str(k))
@@ -221,6 +223,7 @@ def read_pil_exif(im,options=None):
221
223
 
222
224
  exif_ifd_tags = _get_exif_ifd(exif_info)
223
225
 
226
+ # Read tags that are only available via offset
224
227
  for k in exif_ifd_tags.keys():
225
228
  v = exif_ifd_tags[k]
226
229
  if k in exif_tags:
@@ -266,7 +269,7 @@ def read_pil_exif(im,options=None):
266
269
 
267
270
  # Convert to strings, e.g. 'GPSTimeStamp'
268
271
  gps_info = {}
269
- for int_tag,v in enumerate(gps_info_raw.keys()):
272
+ for int_tag,v in gps_info_raw.items():
270
273
  assert isinstance(int_tag,int)
271
274
  if int_tag in ExifTags.GPSTAGS:
272
275
  gps_info[ExifTags.GPSTAGS[int_tag]] = v
@@ -276,11 +279,15 @@ def read_pil_exif(im,options=None):
276
279
  exif_tags['GPSInfo'] = gps_info
277
280
 
278
281
  except Exception as e:
282
+
279
283
  if options.verbose:
280
284
  print('Warning: error reading GPS info: {}'.format(str(e)))
281
285
 
282
286
  # ...if we think there might be GPS tags in this image
283
287
 
288
+ # Filter tags if necessary
289
+ exif_tags = _filter_tags(exif_tags,options)
290
+
284
291
  return exif_tags
285
292
 
286
293
  # ...read_pil_exif()
@@ -337,10 +344,16 @@ def _filter_tags(tags,options):
337
344
  if options.tags_to_include is None and options.tags_to_exclude is None:
338
345
  return tags
339
346
  if options.tags_to_include is not None:
347
+ if isinstance(options.tags_to_include,str):
348
+ if options.tags_to_include == 'all':
349
+ return tags
340
350
  assert options.tags_to_exclude is None, "tags_to_include and tags_to_exclude are incompatible"
351
+ tags_to_include = options.tags_to_include.split(',')
352
+ # Case-insensitive matching
353
+ tags_to_include = [s.lower() for s in tags_to_include]
341
354
  tags_to_return = {}
342
355
  for tag_name in tags.keys():
343
- if tag_name in options.tags_to_include:
356
+ if str(tag_name).lower() in tags_to_include:
344
357
  tags_to_return[tag_name] = tags[tag_name]
345
358
  return tags_to_return
346
359
  if options.tags_to_exclude is not None:
@@ -94,10 +94,10 @@ max_queue_size = 10
94
94
  # How often should we print progress when using the image queue?
95
95
  n_queue_print = 1000
96
96
 
97
- # Only used if --include_exif_data or --include_image_timestamp are supplied
98
- exif_options = read_exif.ReadExifOptions()
99
- exif_options.processing_library = 'pil'
100
- exif_options.byte_handling = 'convert_to_string'
97
+ # Only used if --include_exif_tags or --include_image_timestamp are supplied
98
+ exif_options_base = read_exif.ReadExifOptions()
99
+ exif_options_base.processing_library = 'pil'
100
+ exif_options_base.byte_handling = 'convert_to_string'
101
101
 
102
102
  # Only relevant when we're running our test harness; because bugs in batch
103
103
  # inference are dependent on batch grouping, we randomize batch grouping
@@ -208,7 +208,7 @@ def _consumer_func(q,
208
208
  image_size=None,
209
209
  include_image_size=False,
210
210
  include_image_timestamp=False,
211
- include_exif_data=False,
211
+ include_exif_tags=None,
212
212
  augment=False,
213
213
  detector_options=None,
214
214
  preprocess_on_image_queue=default_preprocess_on_image_queue,
@@ -232,7 +232,7 @@ def _consumer_func(q,
232
232
  image_size (int, optional): image size to use for inference
233
233
  include_image_size (bool, optional): include image dimensions in output
234
234
  include_image_timestamp (bool, optional): include image timestamps in output
235
- include_exif_data (bool, optional): include EXIF data in output
235
+ include_exif_tags (str, optional): comma-separated list of EXIF tags to include in output
236
236
  augment (bool, optional): enable image augmentation
237
237
  detector_options (dict, optional): key/value pairs that are interpreted differently
238
238
  by different detectors
@@ -334,7 +334,7 @@ def _consumer_func(q,
334
334
  image_size=image_size,
335
335
  include_image_size=include_image_size,
336
336
  include_image_timestamp=include_image_timestamp,
337
- include_exif_data=include_exif_data,
337
+ include_exif_tags=include_exif_tags,
338
338
  augment=augment)
339
339
  results.extend(batch_results)
340
340
 
@@ -411,7 +411,7 @@ def _consumer_func(q,
411
411
  image_size=image_size,
412
412
  include_image_size=include_image_size,
413
413
  include_image_timestamp=include_image_timestamp,
414
- include_exif_data=include_exif_data,
414
+ include_exif_tags=include_exif_tags,
415
415
  augment=augment)
416
416
  results.extend(batch_results)
417
417
 
@@ -431,7 +431,7 @@ def _consumer_func(q,
431
431
  image_size=image_size,
432
432
  include_image_size=include_image_size,
433
433
  include_image_timestamp=include_image_timestamp,
434
- include_exif_data=include_exif_data,
434
+ include_exif_tags=include_exif_tags,
435
435
  augment=augment)
436
436
  results.append(result)
437
437
  n_images_processed += 1
@@ -464,7 +464,7 @@ def _run_detector_with_image_queue(image_files,
464
464
  image_size=None,
465
465
  include_image_size=False,
466
466
  include_image_timestamp=False,
467
- include_exif_data=False,
467
+ include_exif_tags=None,
468
468
  augment=False,
469
469
  detector_options=None,
470
470
  loader_workers=default_loaders,
@@ -487,7 +487,7 @@ def _run_detector_with_image_queue(image_files,
487
487
  doing
488
488
  include_image_size (bool, optional): should we include image size in the output for each image?
489
489
  include_image_timestamp (bool, optional): should we include image timestamps in the output for each image?
490
- include_exif_data (bool, optional): should we include EXIF data in the output for each image?
490
+ include_exif_tags (str, optional): comma-separated list of EXIF tags to include in output
491
491
  augment (bool, optional): enable image augmentation
492
492
  detector_options (dict, optional): key/value pairs that are interpreted differently
493
493
  by different detectors
@@ -562,7 +562,7 @@ def _run_detector_with_image_queue(image_files,
562
562
  image_size,
563
563
  include_image_size,
564
564
  include_image_timestamp,
565
- include_exif_data,
565
+ include_exif_tags,
566
566
  augment,
567
567
  detector_options,
568
568
  preprocess_on_image_queue,
@@ -579,7 +579,7 @@ def _run_detector_with_image_queue(image_files,
579
579
  image_size,
580
580
  include_image_size,
581
581
  include_image_timestamp,
582
- include_exif_data,
582
+ include_exif_tags,
583
583
  augment,
584
584
  detector_options,
585
585
  preprocess_on_image_queue,
@@ -598,7 +598,7 @@ def _run_detector_with_image_queue(image_files,
598
598
  image_size,
599
599
  include_image_size,
600
600
  include_image_timestamp,
601
- include_exif_data,
601
+ include_exif_tags,
602
602
  augment,
603
603
  detector_options,
604
604
  preprocess_on_image_queue,
@@ -680,7 +680,7 @@ def _process_batch(image_items_batch,
680
680
  image_size=None,
681
681
  include_image_size=False,
682
682
  include_image_timestamp=False,
683
- include_exif_data=False,
683
+ include_exif_tags=None,
684
684
  augment=False):
685
685
  """
686
686
  Process a batch of images using generate_detections_one_batch(). Does not necessarily return
@@ -695,7 +695,7 @@ def _process_batch(image_items_batch,
695
695
  image_size (int, optional): image size override
696
696
  include_image_size (bool, optional): include image dimensions in results
697
697
  include_image_timestamp (bool, optional): include image timestamps in results
698
- include_exif_data (bool, optional): include EXIF data in results
698
+ include_exif_tags (str, optional): comma-separated list of EXIF tags to include in output
699
699
  augment (bool, optional): whether to use image augmentation
700
700
 
701
701
  Returns:
@@ -762,12 +762,13 @@ def _process_batch(image_items_batch,
762
762
  image_result['detections'] = \
763
763
  [det for det in image_result['detections'] if det['conf'] >= confidence_threshold]
764
764
 
765
- if include_image_size or include_image_timestamp or include_exif_data:
765
+ if include_image_size or include_image_timestamp or (include_exif_tags is not None):
766
766
 
767
767
  image = valid_images[i_valid_image]
768
768
 
769
769
  # If this was preprocessed by the producer thread, pull out the PIL version
770
770
  if isinstance(image,dict):
771
+
771
772
  image = image['img_original_pil']
772
773
 
773
774
  if include_image_size:
@@ -779,9 +780,12 @@ def _process_batch(image_items_batch,
779
780
 
780
781
  image_result['datetime'] = get_image_datetime(image)
781
782
 
782
- if include_exif_data:
783
+ if include_exif_tags is not None:
783
784
 
784
- image_result['exif_metadata'] = read_exif.read_pil_exif(image,exif_options)
785
+ exif_options = copy.copy(exif_options_base)
786
+ exif_options.tags_to_include = include_exif_tags
787
+ image_result['exif_metadata'] = read_exif.read_pil_exif(
788
+ image,exif_options)
785
789
 
786
790
  # ...if we need to store metadata
787
791
 
@@ -834,7 +838,7 @@ def _process_images(im_files,
834
838
  checkpoint_queue=None,
835
839
  include_image_size=False,
836
840
  include_image_timestamp=False,
837
- include_exif_data=False,
841
+ include_exif_tags=None,
838
842
  augment=False,
839
843
  detector_options=None,
840
844
  loader_workers=default_loaders,
@@ -856,7 +860,7 @@ def _process_images(im_files,
856
860
  checkpoint_queue (Queue, optional): internal parameter used to pass image queues around
857
861
  include_image_size (bool, optional): should we include image size in the output for each image?
858
862
  include_image_timestamp (bool, optional): should we include image timestamps in the output for each image?
859
- include_exif_data (bool, optional): should we include EXIF data in the output for each image?
863
+ include_exif_tags (str, optional): comma-separated list of EXIF tags to include in output
860
864
  augment (bool, optional): enable image augmentation
861
865
  detector_options (dict, optional): key/value pairs that are interpreted differently
862
866
  by different detectors
@@ -890,7 +894,7 @@ def _process_images(im_files,
890
894
  image_size=image_size,
891
895
  include_image_size=include_image_size,
892
896
  include_image_timestamp=include_image_timestamp,
893
- include_exif_data=include_exif_data,
897
+ include_exif_tags=include_exif_tags,
894
898
  augment=augment,
895
899
  detector_options=detector_options,
896
900
  loader_workers=loader_workers,
@@ -907,7 +911,7 @@ def _process_images(im_files,
907
911
  image_size=image_size,
908
912
  include_image_size=include_image_size,
909
913
  include_image_timestamp=include_image_timestamp,
910
- include_exif_data=include_exif_data,
914
+ include_exif_tags=include_exif_tags,
911
915
  augment=augment)
912
916
 
913
917
  if checkpoint_queue is not None:
@@ -929,7 +933,7 @@ def _process_image(im_file,
929
933
  image_size=None,
930
934
  include_image_size=False,
931
935
  include_image_timestamp=False,
932
- include_exif_data=False,
936
+ include_exif_tags=False,
933
937
  augment=False):
934
938
  """
935
939
  Runs a detector (typically MegaDetector) on a single image file.
@@ -947,7 +951,7 @@ def _process_image(im_file,
947
951
  doing
948
952
  include_image_size (bool, optional): should we include image size in the output for each image?
949
953
  include_image_timestamp (bool, optional): should we include image timestamps in the output for each image?
950
- include_exif_data (bool, optional): should we include EXIF data in the output for each image?
954
+ include_exif_tags (str, optional): comma-separated list of EXIF tags to include in output
951
955
  augment (bool, optional): enable image augmentation
952
956
 
953
957
  Returns:
@@ -1000,7 +1004,9 @@ def _process_image(im_file,
1000
1004
  if include_image_timestamp:
1001
1005
  result['datetime'] = get_image_datetime(image)
1002
1006
 
1003
- if include_exif_data:
1007
+ if include_exif_tags is not None:
1008
+ exif_options = copy.copy(exif_options_base)
1009
+ exif_options.tags_to_include = include_exif_tags
1004
1010
  result['exif_metadata'] = read_exif.read_pil_exif(image,exif_options)
1005
1011
 
1006
1012
  return result
@@ -1055,7 +1061,7 @@ def load_and_run_detector_batch(model_file,
1055
1061
  class_mapping_filename=None,
1056
1062
  include_image_size=False,
1057
1063
  include_image_timestamp=False,
1058
- include_exif_data=False,
1064
+ include_exif_tags=None,
1059
1065
  augment=False,
1060
1066
  force_model_download=False,
1061
1067
  detector_options=None,
@@ -1088,7 +1094,7 @@ def load_and_run_detector_batch(model_file,
1088
1094
  file or YOLOv5 dataset.yaml file
1089
1095
  include_image_size (bool, optional): should we include image size in the output for each image?
1090
1096
  include_image_timestamp (bool, optional): should we include image timestamps in the output for each image?
1091
- include_exif_data (bool, optional): should we include EXIF data in the output for each image?
1097
+ include_exif_tags (str, optional): comma-separated list of EXIF tags to include in output
1092
1098
  augment (bool, optional): enable image augmentation
1093
1099
  force_model_download (bool, optional): force downloading the model file if
1094
1100
  a named model (e.g. "MDV5A") is supplied, even if the local file already
@@ -1207,7 +1213,7 @@ def load_and_run_detector_batch(model_file,
1207
1213
  image_size=image_size,
1208
1214
  include_image_size=include_image_size,
1209
1215
  include_image_timestamp=include_image_timestamp,
1210
- include_exif_data=include_exif_data,
1216
+ include_exif_tags=include_exif_tags,
1211
1217
  augment=augment,
1212
1218
  detector_options=detector_options,
1213
1219
  loader_workers=loader_workers,
@@ -1269,7 +1275,7 @@ def load_and_run_detector_batch(model_file,
1269
1275
  image_size=image_size,
1270
1276
  include_image_size=include_image_size,
1271
1277
  include_image_timestamp=include_image_timestamp,
1272
- include_exif_data=include_exif_data,
1278
+ include_exif_tags=include_exif_tags,
1273
1279
  augment=augment)
1274
1280
 
1275
1281
  results.extend(batch_results)
@@ -1295,7 +1301,7 @@ def load_and_run_detector_batch(model_file,
1295
1301
  image_size=image_size,
1296
1302
  include_image_size=include_image_size,
1297
1303
  include_image_timestamp=include_image_timestamp,
1298
- include_exif_data=include_exif_data,
1304
+ include_exif_tags=include_exif_tags,
1299
1305
  augment=augment)
1300
1306
  results.append(result)
1301
1307
 
@@ -1354,7 +1360,7 @@ def load_and_run_detector_batch(model_file,
1354
1360
  checkpoint_queue=checkpoint_queue,
1355
1361
  include_image_size=include_image_size,
1356
1362
  include_image_timestamp=include_image_timestamp,
1357
- include_exif_data=include_exif_data,
1363
+ include_exif_tags=include_exif_tags,
1358
1364
  augment=augment,
1359
1365
  detector_options=detector_options),
1360
1366
  image_chunks)
@@ -1374,7 +1380,7 @@ def load_and_run_detector_batch(model_file,
1374
1380
  image_size=image_size,
1375
1381
  include_image_size=include_image_size,
1376
1382
  include_image_timestamp=include_image_timestamp,
1377
- include_exif_data=include_exif_data,
1383
+ include_exif_tags=include_exif_tags,
1378
1384
  augment=augment,
1379
1385
  detector_options=detector_options),
1380
1386
  image_chunks)
@@ -1495,7 +1501,7 @@ def get_image_datetime(image):
1495
1501
  returns None if EXIF datetime is not available.
1496
1502
  """
1497
1503
 
1498
- exif_tags = read_exif.read_pil_exif(image,exif_options)
1504
+ exif_tags = read_exif.read_pil_exif(image,exif_options_base)
1499
1505
 
1500
1506
  try:
1501
1507
  datetime_str = exif_tags['DateTimeOriginal']
@@ -1654,7 +1660,7 @@ if False:
1654
1660
  class_mapping_filename = None
1655
1661
  include_image_size = True
1656
1662
  include_image_timestamp = True
1657
- include_exif_data = True
1663
+ include_exif_tags = None
1658
1664
  overwrite_handling = None
1659
1665
 
1660
1666
  # Generate a command line
@@ -1689,8 +1695,8 @@ if False:
1689
1695
  cmd += ' --include_image_size'
1690
1696
  if include_image_timestamp:
1691
1697
  cmd += ' --include_image_timestamp'
1692
- if include_exif_data:
1693
- cmd += ' --include_exif_data'
1698
+ if include_exif_tags is not None:
1699
+ cmd += ' --include_exif_tags "{}"'.format(include_exif_tags)
1694
1700
  if overwrite_handling is not None:
1695
1701
  cmd += ' --overwrite_handling {}'.format(overwrite_handling)
1696
1702
 
@@ -1837,9 +1843,10 @@ def main(): # noqa
1837
1843
  help='Include image datetime (if available) in output file'
1838
1844
  )
1839
1845
  parser.add_argument(
1840
- '--include_exif_data',
1841
- action='store_true',
1842
- help='Include available EXIF data in output file'
1846
+ '--include_exif_tags',
1847
+ type=str,
1848
+ default=None,
1849
+ help='Command-separated list of EXIF tags to include in output, or "all" to include all tags'
1843
1850
  )
1844
1851
  parser.add_argument(
1845
1852
  '--overwrite_handling',
@@ -1878,6 +1885,12 @@ def main(): # noqa
1878
1885
  action='store_true',
1879
1886
  help=argparse.SUPPRESS)
1880
1887
 
1888
+ # This argument is deprecated in favor use --include_exif_tags
1889
+ parser.add_argument(
1890
+ '--include_exif_data',
1891
+ action='store_true',
1892
+ help=argparse.SUPPRESS)
1893
+
1881
1894
  if len(sys.argv[1:]) == 0:
1882
1895
  parser.print_help()
1883
1896
  parser.exit()
@@ -1888,6 +1901,10 @@ def main(): # noqa
1888
1901
  if args.use_threads_for_queue:
1889
1902
  use_threads_for_queue = True
1890
1903
 
1904
+ # Support the legacy --include_exif_data flag
1905
+ if args.include_exif_data and (args.include_exif_tags is None):
1906
+ args.include_exif_tags = 'all'
1907
+
1891
1908
  detector_options = parse_kvp_list(args.detector_options)
1892
1909
 
1893
1910
  # If the specified detector file is really the name of a known model, find
@@ -2094,7 +2111,7 @@ def main(): # noqa
2094
2111
  class_mapping_filename=args.class_mapping_filename,
2095
2112
  include_image_size=args.include_image_size,
2096
2113
  include_image_timestamp=args.include_image_timestamp,
2097
- include_exif_data=args.include_exif_data,
2114
+ include_exif_tags=args.include_exif_tags,
2098
2115
  augment=args.augment,
2099
2116
  # Don't download the model *again*
2100
2117
  force_model_download=False,
@@ -61,7 +61,18 @@ DEFAULT_DETECTION_CONFIDENCE_THRESHOLD_FOR_OUTPUT = DEFAULT_OUTPUT_CONFIDENCE_TH
61
61
  DEFAULT_DETECTOR_BATCH_SIZE = 1
62
62
  DEFAULT_CLASSIFIER_BATCH_SIZE = 8
63
63
  DEFAULT_LOADER_WORKERS = 4
64
- MAX_QUEUE_SIZE_IMAGES_PER_WORKER = 10
64
+
65
+ # This determines the maximum number of images that can get read from disk
66
+ # on each of the producer workers before blocking. The actual size of the queue
67
+ # will be MAX_IMAGE_QUEUE_SIZE_PER_WORKER * n_workers. This is only used for
68
+ # the classification step.
69
+ MAX_IMAGE_QUEUE_SIZE_PER_WORKER = 10
70
+
71
+ # This determines the maximum number of crops that can accumulate in the queue
72
+ # used to communicate between the producers (which read and crop images) and the
73
+ # consumer (which runs the classifier). This is only used for the classification step.
74
+ MAX_BATCH_QUEUE_SIZE = 300
75
+
65
76
  DEAFULT_SECONDS_PER_VIDEO_FRAME = 1.0
66
77
 
67
78
  # Max number of classification scores to include per detection
@@ -71,6 +82,11 @@ DEFAULT_TOP_N_SCORES = 2
71
82
  # cumulative confidence is above this value
72
83
  ROLLUP_TARGET_CONFIDENCE = 0.5
73
84
 
85
+ # When the called supplies an existing MD results file, should we validate it before
86
+ # starting classification? This tends
87
+ VALIDATE_DETECTION_FILE = False
88
+
89
+
74
90
  verbose = False
75
91
 
76
92
 
@@ -109,10 +125,10 @@ class CropBatch:
109
125
  """
110
126
 
111
127
  def __init__(self):
112
- # List of preprocessed images
128
+ #: List of preprocessed images
113
129
  self.crops = []
114
130
 
115
- # List of CropMetadata objects
131
+ #: List of CropMetadata objects
116
132
  self.metadata = []
117
133
 
118
134
  def add_crop(self, crop_data, metadata):
@@ -192,6 +208,7 @@ def _process_image_detections(file_path: str,
192
208
 
193
209
  # Preprocess the crop
194
210
  try:
211
+
195
212
  preprocessed_crop = classifier.preprocess(
196
213
  image,
197
214
  bboxes=[speciesnet_bbox],
@@ -199,6 +216,7 @@ def _process_image_detections(file_path: str,
199
216
  )
200
217
 
201
218
  if preprocessed_crop is not None:
219
+
202
220
  metadata = CropMetadata(
203
221
  image_file=file_path,
204
222
  detection_index=detection_index,
@@ -207,10 +225,11 @@ def _process_image_detections(file_path: str,
207
225
  original_height=original_height
208
226
  )
209
227
 
210
- # Send individual crop immediately to consumer
228
+ # Send individual crop to the consumer
211
229
  batch_queue.put(('crop', preprocessed_crop, metadata))
212
230
 
213
231
  except Exception as e:
232
+
214
233
  print('Warning: failed to preprocess crop from {}, detection {}: {}'.format(
215
234
  file_path, detection_index, str(e)))
216
235
 
@@ -226,6 +245,8 @@ def _process_image_detections(file_path: str,
226
245
  'Failed to preprocess crop: {}'.format(str(e)),
227
246
  failure_metadata))
228
247
 
248
+ # ...try/except
249
+
229
250
  # ...for each detection in this image
230
251
 
231
252
  # ...def _process_image_detections(...)
@@ -256,6 +277,7 @@ def _process_video_detections(file_path: str,
256
277
  frame_to_detections = {}
257
278
 
258
279
  for detection_index, detection in enumerate(detections):
280
+
259
281
  conf = detection['conf']
260
282
  if conf < detection_confidence_threshold:
261
283
  continue
@@ -267,6 +289,8 @@ def _process_video_detections(file_path: str,
267
289
  frame_to_detections[frame_number] = []
268
290
  frame_to_detections[frame_number].append((detection_index, detection))
269
291
 
292
+ # ...for each detection in this video
293
+
270
294
  if len(frames_with_detections) == 0:
271
295
  return
272
296
 
@@ -290,6 +314,7 @@ def _process_video_detections(file_path: str,
290
314
  return
291
315
  frame_number = int(match.group(1))
292
316
 
317
+ # Only process frames for which we have detection results
293
318
  if frame_number not in frame_to_detections:
294
319
  return
295
320
 
@@ -360,13 +385,16 @@ def _process_video_detections(file_path: str,
360
385
 
361
386
  # Process the video frames
362
387
  try:
388
+
363
389
  run_callback_on_frames(
364
390
  input_video_file=absolute_file_path,
365
391
  frame_callback=frame_callback,
366
392
  frames_to_process=frames_to_process,
367
393
  verbose=verbose
368
394
  )
395
+
369
396
  except Exception as e:
397
+
370
398
  print('Warning: failed to process video {}: {}'.format(file_path, str(e)))
371
399
 
372
400
  # Send failure information to consumer for the whole video
@@ -448,6 +476,7 @@ def _crop_producer_func(image_queue: JoinableQueue,
448
476
  is_video = is_video_file(file_path)
449
477
 
450
478
  if is_video:
479
+
451
480
  # Process video
452
481
  _process_video_detections(
453
482
  file_path=file_path,
@@ -457,7 +486,9 @@ def _crop_producer_func(image_queue: JoinableQueue,
457
486
  detection_confidence_threshold=detection_confidence_threshold,
458
487
  batch_queue=batch_queue
459
488
  )
489
+
460
490
  else:
491
+
461
492
  # Process image
462
493
  _process_image_detections(
463
494
  file_path=file_path,
@@ -571,9 +602,9 @@ def _crop_consumer_func(batch_queue: Queue,
571
602
  item_type, data, metadata = item
572
603
 
573
604
  if metadata.image_file not in all_results:
574
- all_results[metadata.image_file] = {}
605
+ all_results[metadata.image_file] = {}
575
606
 
576
- # We should never be processing the same detetion twice
607
+ # We should never be processing the same detection twice
577
608
  assert metadata.detection_index not in all_results[metadata.image_file]
578
609
 
579
610
  if item_type == 'failure':
@@ -601,6 +632,7 @@ def _crop_consumer_func(batch_queue: Queue,
601
632
 
602
633
  # ...while (we have items to process)
603
634
 
635
+ # Send all the results at once back to the main process
604
636
  results_queue.put(all_results)
605
637
 
606
638
  if verbose:
@@ -828,7 +860,7 @@ def _run_detection_step(source_folder: str,
828
860
  batch_size=detector_batch_size,
829
861
  include_image_size=False,
830
862
  include_image_timestamp=False,
831
- include_exif_data=False,
863
+ include_exif_tags=None,
832
864
  loader_workers=detector_worker_threads,
833
865
  preprocess_on_image_queue=True
834
866
  )
@@ -914,9 +946,11 @@ def _run_classification_step(detector_results_file: str,
914
946
  top_n_scores (int, optional): maximum number of scores to include for each detection
915
947
  """
916
948
 
917
- print('Starting SpeciesNet classification step...')
949
+ print('Starting classification step...')
918
950
 
919
951
  # Load MegaDetector results
952
+ print('Reading detection results from {}'.format(detector_results_file))
953
+
920
954
  with open(detector_results_file, 'r') as f:
921
955
  detector_results = json.load(f)
922
956
 
@@ -936,10 +970,22 @@ def _run_classification_step(detector_results_file: str,
936
970
  print('Set multiprocessing start method to spawn (was {})'.format(
937
971
  original_start_method))
938
972
 
939
- # Set up multiprocessing queues
940
- max_queue_size = classifier_worker_threads * MAX_QUEUE_SIZE_IMAGES_PER_WORKER
941
- image_queue = JoinableQueue(max_queue_size)
942
- batch_queue = Queue()
973
+ ## Set up multiprocessing queues
974
+
975
+ # This queue receives lists of image filenames (and associated detection results)
976
+ # from the "main" thread (the one you're reading right now). Items are pulled off
977
+ # of this queue by producer workers (on _crop_producer_func), where the corresponding
978
+ # images are loaded from disk and preprocessed into crops.
979
+ image_queue = JoinableQueue(maxsize= \
980
+ classifier_worker_threads * MAX_IMAGE_QUEUE_SIZE_PER_WORKER)
981
+
982
+ # This queue receives cropped images from producers (on _crop_producer_func); those
983
+ # crops are pulled off of this queue by the consumer (on _crop_consumer_func).
984
+ batch_queue = Queue(maxsize=MAX_BATCH_QUEUE_SIZE)
985
+
986
+ # This is not really used as a queue, rather it's just used to send all the results
987
+ # at once from the consumer process to the main process (the one you're reading right
988
+ # now).
943
989
  results_queue = Queue()
944
990
 
945
991
  # Start producer workers
@@ -951,7 +997,9 @@ def _run_classification_step(detector_results_file: str,
951
997
  p.start()
952
998
  producers.append(p)
953
999
 
954
- # Start consumer worker
1000
+
1001
+ ## Start consumer worker
1002
+
955
1003
  consumer = Process(target=_crop_consumer_func,
956
1004
  args=(batch_queue, results_queue, classifier_model,
957
1005
  classifier_batch_size, classifier_worker_threads,
@@ -974,16 +1022,23 @@ def _run_classification_step(detector_results_file: str,
974
1022
 
975
1023
  print('Finished waiting for input queue')
976
1024
 
977
- # Wait for results
1025
+
1026
+ ## Wait for results
1027
+
978
1028
  classification_results = results_queue.get()
979
1029
 
980
- # Clean up processes
1030
+
1031
+ ## Clean up processes
1032
+
981
1033
  for p in producers:
982
1034
  p.join()
983
1035
  consumer.join()
984
1036
 
985
1037
  print('Finished waiting for workers')
986
1038
 
1039
+
1040
+ ## Format results and write output
1041
+
987
1042
  class CategoryState:
988
1043
  """
989
1044
  Helper class to manage classification category IDs.
@@ -1257,15 +1312,18 @@ def main():
1257
1312
  print('Intermediate files: {}'.format(temp_folder))
1258
1313
 
1259
1314
  # Determine detector output file path
1260
- if args.detections_file:
1315
+ if args.detections_file is not None:
1261
1316
  detector_output_file = args.detections_file
1262
- print('Using existing detections file: {}'.format(detector_output_file))
1263
- validation_options = ValidateBatchResultsOptions()
1264
- validation_options.check_image_existence = True
1265
- validation_options.relative_path_base = args.source
1266
- validation_options.raise_errors = True
1267
- validate_batch_results(detector_output_file,options=validation_options)
1268
- print('Validated detections file')
1317
+ if VALIDATE_DETECTION_FILE:
1318
+ print('Using existing detections file: {}'.format(detector_output_file))
1319
+ validation_options = ValidateBatchResultsOptions()
1320
+ validation_options.check_image_existence = True
1321
+ validation_options.relative_path_base = args.source
1322
+ validation_options.raise_errors = True
1323
+ validate_batch_results(detector_output_file,options=validation_options)
1324
+ print('Validated detections file')
1325
+ else:
1326
+ print('Bypassing validation of {}'.format(args.detections_file))
1269
1327
  else:
1270
1328
  detector_output_file = os.path.join(temp_folder, 'detector_output.json')
1271
1329
 
@@ -138,8 +138,8 @@ class TFDetector:
138
138
  image_id,
139
139
  detection_threshold,
140
140
  image_size=None,
141
- skip_image_resizing=False,
142
- augment=False):
141
+ augment=False,
142
+ verbose=False):
143
143
  """
144
144
  Runs the detector on an image.
145
145
 
@@ -152,10 +152,9 @@ class TFDetector:
152
152
  image_size (tuple, optional): image size to use for inference, only mess with this
153
153
  if (a) you're using a model other than MegaDetector or (b) you know what you're
154
154
  doing
155
- skip_image_resizing (bool, optional): whether to skip internal image resizing (and rely on external
156
- resizing). Not currently supported, but included here for compatibility with PTDetector.
157
155
  augment (bool, optional): enable image augmentation. Not currently supported, but included
158
156
  here for compatibility with PTDetector.
157
+ verbose (bool, optional): enable additional debug output
159
158
 
160
159
  Returns:
161
160
  dict: a dictionary with the following fields:
@@ -166,7 +165,6 @@ class TFDetector:
166
165
  """
167
166
 
168
167
  assert image_size is None, 'Image sizing not supported for TF detectors'
169
- assert not skip_image_resizing, 'Image sizing not supported for TF detectors'
170
168
  assert not augment, 'Image augmentation is not supported for TF detectors'
171
169
 
172
170
  if detection_threshold is None:
@@ -1168,7 +1168,7 @@ def restrict_to_taxa_list(taxa_list,
1168
1168
  # Convert all NaN values in the "common" column to empty strings
1169
1169
  taxa_list_df['common'] = taxa_list_df['common'].fillna('')
1170
1170
 
1171
- # Create a dictionary mapping latin names to common names
1171
+ # Create a dictionary mapping source Latin names to target common names
1172
1172
  target_latin_to_common = {}
1173
1173
 
1174
1174
  for i_row,row in taxa_list_df.iterrows():
@@ -1332,7 +1332,7 @@ def restrict_to_taxa_list(taxa_list,
1332
1332
  _insert_taxonomy_string(new_taxon_string)
1333
1333
 
1334
1334
 
1335
- ##%% Make sure all species on the allow-list are in the taxonomy
1335
+ ##%% Make sure all taxa on the allow-list are in the taxonomy
1336
1336
 
1337
1337
  n_failed_mappings = 0
1338
1338
 
@@ -1498,7 +1498,8 @@ def restrict_to_taxa_list(taxa_list,
1498
1498
  if (protected_common_names is not None) and \
1499
1499
  (common_name in protected_common_names):
1500
1500
  if verbose:
1501
- print('Not messing with protected category {}'.format(common_name))
1501
+ print('Not messing with protected category {}:\n{}'.format(
1502
+ common_name,input_taxon_string))
1502
1503
  input_category_id_to_output_taxon_string[input_category_id] = \
1503
1504
  input_taxon_string
1504
1505
  continue
@@ -1578,12 +1579,13 @@ def restrict_to_taxa_list(taxa_list,
1578
1579
  output_taxon_string = speciesnet_latin_name_to_taxon_string[target_taxon]
1579
1580
  input_category_id_to_output_taxon_string[input_category_id] = output_taxon_string
1580
1581
 
1581
- # ...for each category
1582
+ # ...for each category (mapping input category IDs to output taxon strings)
1582
1583
 
1583
1584
 
1584
- ##%% Build the new tables
1585
+ ##%% Map input category IDs to output category IDs
1585
1586
 
1586
- speciesnet_taxon_string_to_latin_name = invert_dictionary(speciesnet_latin_name_to_taxon_string)
1587
+ speciesnet_taxon_string_to_latin_name = \
1588
+ invert_dictionary(speciesnet_latin_name_to_taxon_string)
1587
1589
 
1588
1590
  input_category_id_to_output_category_id = {}
1589
1591
  output_taxon_string_to_category_id = {}
@@ -1604,7 +1606,8 @@ def restrict_to_taxa_list(taxa_list,
1604
1606
  if speciesnet_latin_name in speciesnet_latin_name_to_output_common_name:
1605
1607
  custom_common_name = speciesnet_latin_name_to_output_common_name[speciesnet_latin_name]
1606
1608
  if custom_common_name != output_common_name:
1607
- print('Substituting common name {} for {}'.format(custom_common_name,output_common_name))
1609
+ if verbose:
1610
+ print('Substituting common name {} for {}'.format(custom_common_name,output_common_name))
1608
1611
  output_common_name = custom_common_name
1609
1612
 
1610
1613
  # Do we need to create a new output category?
@@ -1625,20 +1628,16 @@ def restrict_to_taxa_list(taxa_list,
1625
1628
  if False:
1626
1629
  original_common_name = \
1627
1630
  input_category_id_to_common_name[input_category_id]
1628
-
1629
1631
  original_taxon_string = \
1630
1632
  input_category_id_to_taxonomy_string[input_category_id]
1631
-
1632
1633
  print('Mapping {} ({}) to:\n{} ({})\n'.format(
1633
1634
  original_common_name,original_taxon_string,
1634
1635
  output_common_name,output_taxon_string))
1635
- print('Mapping {} to {}'.format(
1636
- original_common_name,output_common_name,))
1637
1636
 
1638
- # ...for each category
1637
+ # ...for each category (mapping input category IDs to output category IDs)
1639
1638
 
1640
1639
 
1641
- #%% Remap all category labels
1640
+ ##%% Remap all category labels
1642
1641
 
1643
1642
  assert len(set(output_taxon_string_to_category_id.keys())) == \
1644
1643
  len(set(output_taxon_string_to_category_id.values())), \
@@ -129,6 +129,9 @@ def create_html_index(dir,
129
129
  recursive (bool, optional): recurse into subfolders
130
130
  """
131
131
 
132
+ if template_fun is None:
133
+ template_fun = _create_plain_index
134
+
132
135
  print('Traversing {}'.format(dir))
133
136
 
134
137
  # Make sure we remove the trailing /
@@ -1046,6 +1046,73 @@ def parallel_copy_files(input_file_to_output_file,
1046
1046
  # ...def parallel_copy_files(...)
1047
1047
 
1048
1048
 
1049
+ #%% File deletion functions
1050
+
1051
+ def delete_file(input_file, verbose=False):
1052
+ """
1053
+ Deletes a single file.
1054
+
1055
+ Args:
1056
+ input_file (str): file to delete
1057
+ verbose (bool, optional): enable additional debug console output
1058
+
1059
+ Returns:
1060
+ bool: True if file was deleted successfully, False otherwise
1061
+ """
1062
+
1063
+ try:
1064
+ if verbose:
1065
+ print('Deleting file {}'.format(input_file))
1066
+
1067
+ if os.path.isfile(input_file):
1068
+ os.remove(input_file)
1069
+ return True
1070
+ else:
1071
+ if verbose:
1072
+ print('File {} does not exist'.format(input_file))
1073
+ return False
1074
+
1075
+ except Exception as e:
1076
+ if verbose:
1077
+ print('Error deleting file {}: {}'.format(input_file, str(e)))
1078
+ return False
1079
+
1080
+ # ...def delete_file(...)
1081
+
1082
+
1083
+ def parallel_delete_files(input_files,
1084
+ max_workers=16,
1085
+ use_threads=True,
1086
+ verbose=False):
1087
+ """
1088
+ Deletes one or more files in parallel.
1089
+
1090
+ Args:
1091
+ input_files (list): list of files to delete
1092
+ max_workers (int, optional): number of concurrent workers, set to <= 1 to disable parallelism
1093
+ use_threads (bool, optional): whether to use threads (True) or processes (False); ignored if
1094
+ max_workers <= 1
1095
+ verbose (bool, optional): enable additional debug console output
1096
+ """
1097
+
1098
+ if len(input_files) == 0:
1099
+ return
1100
+
1101
+ n_workers = min(max_workers, len(input_files))
1102
+
1103
+ if use_threads:
1104
+ pool = ThreadPool(n_workers)
1105
+ else:
1106
+ pool = Pool(n_workers)
1107
+
1108
+ with tqdm(total=len(input_files)) as pbar:
1109
+ for i, _ in enumerate(pool.imap_unordered(partial(delete_file, verbose=verbose),
1110
+ input_files)):
1111
+ pbar.update()
1112
+
1113
+ # ...def parallel_delete_files(...)
1114
+
1115
+
1049
1116
  #%% File size functions
1050
1117
 
1051
1118
  def get_file_sizes(base_dir, convert_slashes=True):
@@ -34,6 +34,27 @@ def is_float(s):
34
34
  return True
35
35
 
36
36
 
37
+ def is_int(s):
38
+ """
39
+ Checks whether [s] is an object (typically a string) that can be cast to a int
40
+
41
+ Args:
42
+ s (object): object to evaluate
43
+
44
+ Returns:
45
+ bool: True if s successfully casts to a int, otherwise False
46
+ """
47
+
48
+ if s is None:
49
+ return False
50
+
51
+ try:
52
+ _ = int(s)
53
+ except ValueError:
54
+ return False
55
+ return True
56
+
57
+
37
58
  def human_readable_to_bytes(size):
38
59
  """
39
60
  Given a human-readable byte string (e.g. 2G, 10GB, 30MB, 20KB),
@@ -24,10 +24,14 @@ from multiprocessing.pool import Pool, ThreadPool
24
24
  from functools import partial
25
25
 
26
26
  from megadetector.utils.path_utils import insert_before_extension
27
+ from megadetector.utils.path_utils import path_join
28
+
27
29
  from megadetector.utils.ct_utils import split_list_into_n_chunks
28
30
  from megadetector.utils.ct_utils import invert_dictionary
29
31
  from megadetector.utils.ct_utils import compare_values_nan_equal
30
32
 
33
+ from megadetector.utils.string_utils import is_int
34
+
31
35
  from megadetector.utils.wi_taxonomy_utils import is_valid_prediction_string
32
36
  from megadetector.utils.wi_taxonomy_utils import no_cv_result_prediction_string
33
37
  from megadetector.utils.wi_taxonomy_utils import blank_prediction_string
@@ -68,7 +72,7 @@ def read_sequences_from_download_bundle(download_folder):
68
72
  assert len(sequence_list_files) == 1, \
69
73
  'Could not find sequences.csv in {}'.format(download_folder)
70
74
 
71
- sequence_list_file = os.path.join(download_folder,sequence_list_files[0])
75
+ sequence_list_file = path_join(download_folder,sequence_list_files[0])
72
76
 
73
77
  df = pd.read_csv(sequence_list_file)
74
78
  sequence_records = df.to_dict('records')
@@ -104,7 +108,7 @@ def read_images_from_download_bundle(download_folder):
104
108
  image_list_files = \
105
109
  [fn for fn in image_list_files if fn.startswith('images_') and fn.endswith('.csv')]
106
110
  image_list_files = \
107
- [os.path.join(download_folder,fn) for fn in image_list_files]
111
+ [path_join(download_folder,fn) for fn in image_list_files]
108
112
  print('Found {} image list files'.format(len(image_list_files)))
109
113
 
110
114
 
@@ -118,7 +122,7 @@ def read_images_from_download_bundle(download_folder):
118
122
  print('Reading images from list file {}'.format(
119
123
  os.path.basename(image_list_file)))
120
124
 
121
- df = pd.read_csv(image_list_file)
125
+ df = pd.read_csv(image_list_file,low_memory=False)
122
126
 
123
127
  # i_row = 0; row = df.iloc[i_row]
124
128
  for i_row,row in tqdm(df.iterrows(),total=len(df)):
@@ -203,11 +207,91 @@ def find_images_in_identify_tab(download_folder_with_identify,download_folder_ex
203
207
  # ...def find_images_in_identify_tab(...)
204
208
 
205
209
 
206
- def write_download_commands(image_records_to_download,
210
+ def write_prefix_download_command(image_records,
211
+ download_dir_base,
212
+ force_download=False,
213
+ download_command_file=None):
214
+ """
215
+ Write a .sh script to download all images (using gcloud) from the longest common URL
216
+ prefix in the images represented in [image_records].
217
+
218
+ Args:
219
+ image_records (list of dict): list of dicts with at least the field 'location'.
220
+ Can also be a dict whose values are lists of record dicts.
221
+ download_dir_base (str): local destination folder
222
+ force_download (bool, optional): overwrite existing files
223
+ download_command_file (str, optional): path of the .sh script we should write, defaults
224
+ to "download_wi_images_with_prefix.sh" in the destination folder.
225
+ """
226
+
227
+ ##%% Input validation
228
+
229
+ # If a dict is provided, assume it maps image GUIDs to lists of records, flatten to a list
230
+ if isinstance(image_records,dict):
231
+ all_image_records = []
232
+ for k in image_records:
233
+ records_this_image = image_records[k]
234
+ all_image_records.extend(records_this_image)
235
+ image_records = all_image_records
236
+
237
+ assert isinstance(image_records,list), \
238
+ 'Illegal image record list format {}'.format(type(image_records))
239
+ assert isinstance(image_records[0],dict), \
240
+ 'Illegal image record format {}'.format(type(image_records[0]))
241
+
242
+ urls = [r['location'] for r in image_records]
243
+
244
+ # "urls" is a list of URLs starting with gs://. Find the highest-level folder
245
+ # that is common to all URLs in the list. For example, if the list is:
246
+ #
247
+ # gs://a/b/c
248
+ # gs://a/b/d
249
+ #
250
+ # The result should be:
251
+ #
252
+ # gs://a/b
253
+ common_prefix = os.path.commonprefix(urls)
254
+
255
+ # Remove the gs:// prefix if it's still there
256
+ if common_prefix.startswith('gs://'):
257
+ common_prefix = common_prefix[len('gs://'):]
258
+
259
+ # Ensure the common prefix ends with a '/' if it's not empty
260
+ if (len(common_prefix) > 0) and (not common_prefix.endswith('/')):
261
+ common_prefix = os.path.dirname(common_prefix) + '/'
262
+
263
+ print('Longest common prefix: {}'.format(common_prefix))
264
+
265
+ if download_command_file is None:
266
+ download_command_file = \
267
+ path_join(download_dir_base,'download_wi_images_with_prefix.sh')
268
+
269
+ os.makedirs(download_dir_base,exist_ok=True)
270
+
271
+ with open(download_command_file,'w',newline='\n') as f:
272
+ # The --no-clobber flag prevents overwriting existing files
273
+ # The -r flag is for recursive download
274
+ # The gs:// prefix is added back for the gcloud command
275
+ no_clobber_string = ''
276
+ if not force_download:
277
+ no_clobber_string = '--no-clobber'
278
+
279
+ cmd = 'gcloud storage cp -r {} "gs://{}" "{}"'.format(
280
+ no_clobber_string,common_prefix,download_dir_base)
281
+ print('Writing download command:\n{}'.format(cmd))
282
+ f.write(cmd + '\n')
283
+
284
+ print('Download script written to {}'.format(download_command_file))
285
+
286
+ # ...def write_prefix_download_command(...)
287
+
288
+
289
+ def write_download_commands(image_records,
207
290
  download_dir_base,
208
291
  force_download=False,
209
292
  n_download_workers=25,
210
- download_command_file_base=None):
293
+ download_command_file_base=None,
294
+ image_flattening='deployment'):
211
295
  """
212
296
  Given a list of dicts with at least the field 'location' (a gs:// URL), prepare a set of "gcloud
213
297
  storage" commands to download images, and write those to a series of .sh scripts, along with one
@@ -215,10 +299,9 @@ def write_download_commands(image_records_to_download,
215
299
 
216
300
  gcloud commands will use relative paths.
217
301
 
218
- image_records_to_download can also be a dict mapping IDs to lists of records.
219
-
220
302
  Args:
221
- image_records_to_download (list of dict): list of dicts with at least the field 'location'
303
+ image_records (list of dict): list of dicts with at least the field 'location'.
304
+ Can also be a dict whose values are lists of record dicts.
222
305
  download_dir_base (str): local destination folder
223
306
  force_download (bool, optional): include gs commands even if the target file exists
224
307
  n_download_workers (int, optional): number of scripts to write (that's our hacky way
@@ -226,42 +309,103 @@ def write_download_commands(image_records_to_download,
226
309
  download_command_file_base (str, optional): path of the .sh script we should write, defaults
227
310
  to "download_wi_images.sh" in the destination folder. Individual worker scripts will
228
311
  have a number added, e.g. download_wi_images_00.sh.
312
+ image_flattening (str, optional): if 'none', relative paths will be preserved
313
+ representing the entire URL for each image. Can be 'guid' (just download to
314
+ [GUID].JPG) or 'deployment' (download to [deployment]/[GUID].JPG).
229
315
  """
230
316
 
231
- if isinstance(image_records_to_download,dict):
317
+ ##%% Input validation
232
318
 
319
+ # If a dict is provided, assume it maps image GUIDs to lists of records, flatten to a list
320
+ if isinstance(image_records,dict):
233
321
  all_image_records = []
234
- for k in image_records_to_download:
235
- records_this_image = image_records_to_download[k]
322
+ for k in image_records:
323
+ records_this_image = image_records[k]
236
324
  all_image_records.extend(records_this_image)
237
- return write_download_commands(all_image_records,
238
- download_dir_base=download_dir_base,
239
- force_download=force_download,
240
- n_download_workers=n_download_workers,
241
- download_command_file_base=download_command_file_base)
325
+ image_records = all_image_records
326
+
327
+ assert isinstance(image_records,list), \
328
+ 'Illegal image record list format {}'.format(type(image_records))
329
+ assert isinstance(image_records[0],dict), \
330
+ 'Illegal image record format {}'.format(type(image_records[0]))
331
+
332
+
333
+ ##%% Map URLs to relative paths
334
+
335
+ # URLs look like:
336
+ #
337
+ # gs://145625555_2004881_2323_name__main/deployment/2241000/prod/directUpload/5fda0ddd-511e-46ca-95c1-302b3c71f8ea.JPG
338
+ if image_flattening is None:
339
+ image_flattening = 'none'
340
+ image_flattening = image_flattening.lower().strip()
341
+
342
+ assert image_flattening in ('none','guid','deployment'), \
343
+ 'Illegal image flattening strategy {}'.format(image_flattening)
344
+
345
+ url_to_relative_path = {}
346
+
347
+ for image_record in image_records:
348
+
349
+ url = image_record['location']
350
+ assert url.startswith('gs://'), 'Illegal URL {}'.format(url)
351
+
352
+ relative_path = None
353
+
354
+ if image_flattening == 'none':
355
+ relative_path = url.replace('gs://','')
356
+ elif image_flattening == 'guid':
357
+ relative_path = url.split('/')[-1]
358
+ else:
359
+ assert image_flattening == 'deployment'
360
+ tokens = url.split('/')
361
+ found_deployment_id = False
362
+ for i_token,token in enumerate(tokens):
363
+ if token == 'deployment':
364
+ assert i_token < (len(tokens)-1)
365
+ deployment_id_string = tokens[i_token + 1]
366
+ deployment_id_string = deployment_id_string.replace('_thumb','')
367
+ assert is_int(deployment_id_string), \
368
+ 'Illegal deployment ID {}'.format(deployment_id_string)
369
+ image_id = url.split('/')[-1]
370
+ relative_path = deployment_id_string + '/' + image_id
371
+ found_deployment_id = True
372
+ break
373
+ assert found_deployment_id, \
374
+ 'Could not find deployment ID in record {}'.format(str(image_record))
375
+
376
+ assert relative_path is not None
377
+
378
+ if url in url_to_relative_path:
379
+ assert url_to_relative_path[url] == relative_path, \
380
+ 'URL path mapping error'
381
+ else:
382
+ url_to_relative_path[url] = relative_path
383
+
384
+ # ...for each image record
385
+
242
386
 
243
387
  ##%% Make list of gcloud storage commands
244
388
 
245
389
  if download_command_file_base is None:
246
- download_command_file_base = os.path.join(download_dir_base,'download_wi_images.sh')
390
+ download_command_file_base = path_join(download_dir_base,'download_wi_images.sh')
247
391
 
248
392
  commands = []
249
393
  skipped_urls = []
250
394
  downloaded_urls = set()
251
395
 
252
- # image_record = image_records_to_download[0]
253
- for image_record in tqdm(image_records_to_download):
396
+ # image_record = image_records[0]
397
+ for image_record in tqdm(image_records):
254
398
 
255
399
  url = image_record['location']
256
400
  if url in downloaded_urls:
257
401
  continue
258
402
 
259
- assert url.startswith('gs://')
403
+ assert url.startswith('gs://'), 'Illegal URL {}'.format(url)
260
404
 
261
- relative_path = url.replace('gs://','')
262
- abs_path = os.path.join(download_dir_base,relative_path)
405
+ relative_path = url_to_relative_path[url]
406
+ abs_path = path_join(download_dir_base,relative_path)
263
407
 
264
- # Skip files that already exist
408
+ # Optionally skip files that already exist
265
409
  if (not force_download) and (os.path.isfile(abs_path)):
266
410
  skipped_urls.append(url)
267
411
  continue
@@ -271,7 +415,7 @@ def write_download_commands(image_records_to_download,
271
415
  commands.append(command)
272
416
 
273
417
  print('Generated {} commands for {} image records'.format(
274
- len(commands),len(image_records_to_download)))
418
+ len(commands),len(image_records)))
275
419
 
276
420
  print('Skipped {} URLs'.format(len(skipped_urls)))
277
421
 
@@ -754,6 +754,7 @@ def generate_instances_json_from_folder(folder,
754
754
 
755
755
  assert os.path.isdir(folder)
756
756
 
757
+ print('Enumerating images in {}'.format(folder))
757
758
  image_files_abs = find_images(folder,recursive=True,return_relative_paths=False)
758
759
 
759
760
  if tokens_to_ignore is not None:
@@ -428,6 +428,7 @@ def main(): # noqa
428
428
  category_names_to_blur=category_names_to_blur)
429
429
 
430
430
  if (args.html_output_file is not None) and args.open_html_output_file:
431
+ print('Opening output file {}'.format(args.html_output_file))
431
432
  open_file(args.html_output_file)
432
433
 
433
434
  if __name__ == '__main__':
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: megadetector
3
- Version: 10.0.9
3
+ Version: 10.0.10
4
4
  Summary: MegaDetector is an AI model that helps conservation folks spend less time doing boring things with camera trap images.
5
5
  Author-email: Your friendly neighborhood MegaDetector team <cameratraps@lila.science>
6
6
  Maintainer-email: Your friendly neighborhood MegaDetector team <cameratraps@lila.science>
@@ -43,7 +43,7 @@ megadetector/data_management/labelme_to_coco.py,sha256=SO6DMfJ9WNlMUHF9EUYWjSNye
43
43
  megadetector/data_management/labelme_to_yolo.py,sha256=bsqpNUsnDJucJ60wSQD_yvq_tWiots1u4tSFNiHeaYA,12769
44
44
  megadetector/data_management/mewc_to_md.py,sha256=09XHEykIG-whGkgEIkho7xfVuPlic1TYTKGAufv_tto,12637
45
45
  megadetector/data_management/ocr_tools.py,sha256=aYpULCPn_tHaqatOd8qjEpKJ7MksRZS0o1kqQF04IE0,31389
46
- megadetector/data_management/read_exif.py,sha256=M_8492al57kWgZ-0gNWLNdzpm442zPCC8J2DtgzHAyA,29646
46
+ megadetector/data_management/read_exif.py,sha256=80L_P2mGRKGA02X7jaGje27pwxrZ7mfIPo5IHKpYihc,30166
47
47
  megadetector/data_management/remap_coco_categories.py,sha256=DT4Rdt7Y1IdhbO2TZiBhQDESdit-l_-b_Hw0tbJ2Nuw,7090
48
48
  megadetector/data_management/remove_exif.py,sha256=5JHGWMIeXqB2PE2ZwIMJOEtNYopxknNDwynQAuJCLvw,4031
49
49
  megadetector/data_management/rename_images.py,sha256=iHkdQ_c1G9Oc8C4wcnPLmhKv0S9i9g7ppbytfBBqn2Y,6516
@@ -75,16 +75,16 @@ megadetector/detection/change_detection.py,sha256=Ne3GajbH_0KPBU8ruHp4Rkr0uKd5oK
75
75
  megadetector/detection/process_video.py,sha256=kuQHrpOC3LQo9ecqJPpzkds9fZVnoLmrfJw_yh-oxi8,17890
76
76
  megadetector/detection/pytorch_detector.py,sha256=-TvtDcX2Hh_CgBEz7Eg2NzyEts8DjOgY0mE_fle6zkM,60705
77
77
  megadetector/detection/run_detector.py,sha256=JWTIYsk5aCgW9PBCGnAECe31JwKHhkfp6zKsSDqfrsA,46831
78
- megadetector/detection/run_detector_batch.py,sha256=Ah-LSsA73Io-GH0BpI8qopGRI5eUKWhhIR19lC6s41A,90602
78
+ megadetector/detection/run_detector_batch.py,sha256=gc3T_h61tRk705Lxbi4BXboc-BmaitiNd-5cR5MzHC0,91423
79
79
  megadetector/detection/run_inference_with_yolov5_val.py,sha256=dJXh3BwKOQQ4OA-Mq_heEb7AfBAk7qKUAagnIGuFtaU,53689
80
- megadetector/detection/run_md_and_speciesnet.py,sha256=Dp_SpJZp0pX9jzFtxM6zPCyBNq49uyQpMDAdNDLVorM,50280
80
+ megadetector/detection/run_md_and_speciesnet.py,sha256=LMpQVqpLqju2WFznHGKEyc9QCgU4pHSPyfDnf0HL9GA,52179
81
81
  megadetector/detection/run_tiled_inference.py,sha256=v_wL4uZfYdswJdYXBoGci62UvVprBD9OHKkrw2g-G5M,41081
82
- megadetector/detection/tf_detector.py,sha256=3b2MiqgMw8KBDzHQliUSDXWrmKpa9iZnfe6EgYpMcYo,8398
82
+ megadetector/detection/tf_detector.py,sha256=E0PZ1jHAv31kvsdaUD8IjuPmdLulkKwx6s2wbPMK-WQ,8151
83
83
  megadetector/detection/video_utils.py,sha256=M7yje6XeOnR_QwDyuG1o6bwTKvRysoA2NiOK2MSi98E,53943
84
84
  megadetector/postprocessing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
85
85
  megadetector/postprocessing/add_max_conf.py,sha256=9MYtsH2mwkiaZb7Qcor5J_HskfAj7d9srp8G_Qldpk0,1722
86
86
  megadetector/postprocessing/categorize_detections_by_size.py,sha256=DpZpRNFlyeOfWuOc6ICuENgIWDCEtiErJ_frBZp9lYM,5382
87
- megadetector/postprocessing/classification_postprocessing.py,sha256=OoPVr34vXyLykB42SplcSKo9cj7dgf8Yju_DCDhd6_k,68574
87
+ megadetector/postprocessing/classification_postprocessing.py,sha256=WtZqgY43-KdbVmuG_PPh_5HK3J7-Q-laKQtQmbhdvI0,68701
88
88
  megadetector/postprocessing/combine_batch_outputs.py,sha256=BEP8cVa0sMIPg7tkWQc_8vOEPnbmWjOsQdVJHe61uz8,8468
89
89
  megadetector/postprocessing/compare_batch_results.py,sha256=QbdegGZkgVLZdO5Vjm3aTAQS5VzP9_tX0PKwCSkHKhw,85009
90
90
  megadetector/postprocessing/convert_output_format.py,sha256=3KLO6NqddofgIEYjV8_iZIf0iXaplFN2AroUq5i4R7k,14472
@@ -121,27 +121,27 @@ megadetector/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuF
121
121
  megadetector/tests/test_nms_synthetic.py,sha256=oY6xmT1sLSSN7weQJ8TPTaZgAiSiZ6s43EffUhwLWIw,14707
122
122
  megadetector/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
123
123
  megadetector/utils/ct_utils.py,sha256=IiZV8dWtJamveINv_joATMgMPHeDkZ8l82jDEQcLgQg,60502
124
- megadetector/utils/directory_listing.py,sha256=0-VMuQWo6rETIKERqfX6Zn7pRp_GJ4JiFiWvsw9PQcU,6500
124
+ megadetector/utils/directory_listing.py,sha256=ZTwryqP_02XSHlsaVNCo7qyLGM4EKB-2NYxcmQpQEwM,6573
125
125
  megadetector/utils/extract_frames_from_video.py,sha256=vjSVgxtb5z2syHCVYWc2KdNUpc-O6yY8nkbj_wqsIvY,12255
126
126
  megadetector/utils/gpu_test.py,sha256=5zUfAVeSjH8I08eCqayFmMxL-0mix8SjJJTe5ORABvU,3544
127
127
  megadetector/utils/md_tests.py,sha256=Iup4KjyIpLUpZ4TzzwEyGK61rg6aH7NrEQsdQ-ov51I,80300
128
- megadetector/utils/path_utils.py,sha256=tV8eh77m_uS8YYpOQZO8GUKR6l5sZrSSIkApqgi_DmY,101030
128
+ megadetector/utils/path_utils.py,sha256=RDz3-Cts6NG118ll2WXzSThNZe5rJUxWZdxeIbjUMcg,102900
129
129
  megadetector/utils/process_utils.py,sha256=gQcpH9WYvGPUs0FhtJ5_Xvl6JsvoGz8_mnDQk0PbTRM,5673
130
130
  megadetector/utils/split_locations_into_train_val.py,sha256=fd_6pj1aWY6hybwaXvBn9kBcOHjI90U-OsTmEAGpeu8,10297
131
- megadetector/utils/string_utils.py,sha256=r2Maw3zbzk3EyaZcNkdqr96yP_8m4ey6v0WxlemEY9U,6155
131
+ megadetector/utils/string_utils.py,sha256=OejBfVWdmc-uHaCTfQN5PChsd1tMuiRJVRHQV0xZWt8,6533
132
132
  megadetector/utils/url_utils.py,sha256=PzqN-VquAZFBRin2ZaYi5U2WCsMYSwvM0X-NN45Fdh4,28448
133
- megadetector/utils/wi_platform_utils.py,sha256=8CGpiox_aL6RVZKfJqPVwpW4_6Cjku0HIajJPcmeNpE,32019
134
- megadetector/utils/wi_taxonomy_utils.py,sha256=o4AvY5gZXfk69pPckdGxgIPhqsH2-hJQucavSRsUnoc,66513
133
+ megadetector/utils/wi_platform_utils.py,sha256=TSckCBi6yIe6VAJhZtbf7aiZ_U0ipBzr_RU9OEqneaU,37688
134
+ megadetector/utils/wi_taxonomy_utils.py,sha256=yd8C38DJWUfhSTaO5Zr_YoDqqR02OA-ZbrSHCbU1utI,66566
135
135
  megadetector/utils/write_html_image_list.py,sha256=6Tbe5wyUxoBYJgH9yVrxxKCeWF2BVre_wQMEOQJ-ZIU,9068
136
136
  megadetector/visualization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
137
137
  megadetector/visualization/plot_utils.py,sha256=uDDlOhdaJ3V8sGj2kS9b0cgszKc8WCq2_ofl6TW_XUs,10727
138
138
  megadetector/visualization/render_images_with_thumbnails.py,sha256=-XX4PG4wnrFjFTIwd0sMxXxKMxPuu0SZ_TfK3dI1x8Y,8425
139
139
  megadetector/visualization/visualization_utils.py,sha256=E5uvysS3F1S_yiPFxZty3U2f6cjuE8zG6XWggYOu-5o,75921
140
140
  megadetector/visualization/visualize_db.py,sha256=8YDWSR0eMehXYdPtak9z8UUw35xV7hu-0eCuzgSLjWc,25558
141
- megadetector/visualization/visualize_detector_output.py,sha256=HpWh7ugwo51YBHsFi40iAp9G-uRAMMjgsm8H_uBolBs,20295
141
+ megadetector/visualization/visualize_detector_output.py,sha256=nfB4JtfNU5PgFzuWxXSUSfCib29DWSNPhIf9drtD9Qs,20365
142
142
  megadetector/visualization/visualize_video_output.py,sha256=ibMGB5ynMwNXmaMlY8h8tURb-Lyvuxs1EB08x_jvev0,20606
143
- megadetector-10.0.9.dist-info/licenses/LICENSE,sha256=RMa3qq-7Cyk7DdtqRj_bP1oInGFgjyHn9-PZ3PcrqIs,1100
144
- megadetector-10.0.9.dist-info/METADATA,sha256=s8q_fi96c5kt67xihApa5_Lko4voT5lH0swUawbUgzE,6486
145
- megadetector-10.0.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
146
- megadetector-10.0.9.dist-info/top_level.txt,sha256=wf9DXa8EwiOSZ4G5IPjakSxBPxTDjhYYnqWRfR-zS4M,13
147
- megadetector-10.0.9.dist-info/RECORD,,
143
+ megadetector-10.0.10.dist-info/licenses/LICENSE,sha256=RMa3qq-7Cyk7DdtqRj_bP1oInGFgjyHn9-PZ3PcrqIs,1100
144
+ megadetector-10.0.10.dist-info/METADATA,sha256=-4OEXDgVH3BLpXqLN2a3BKeVBI4wDbOQTnfzRs2dggw,6487
145
+ megadetector-10.0.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
146
+ megadetector-10.0.10.dist-info/top_level.txt,sha256=wf9DXa8EwiOSZ4G5IPjakSxBPxTDjhYYnqWRfR-zS4M,13
147
+ megadetector-10.0.10.dist-info/RECORD,,