megadetector 5.0.29__py3-none-any.whl → 10.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (95) hide show
  1. megadetector/classification/efficientnet/model.py +8 -8
  2. megadetector/classification/efficientnet/utils.py +6 -5
  3. megadetector/classification/prepare_classification_script_mc.py +3 -3
  4. megadetector/data_management/annotations/annotation_constants.py +0 -1
  5. megadetector/data_management/camtrap_dp_to_coco.py +34 -1
  6. megadetector/data_management/cct_json_utils.py +2 -2
  7. megadetector/data_management/coco_to_yolo.py +22 -5
  8. megadetector/data_management/databases/add_width_and_height_to_db.py +85 -12
  9. megadetector/data_management/databases/combine_coco_camera_traps_files.py +2 -2
  10. megadetector/data_management/databases/integrity_check_json_db.py +29 -15
  11. megadetector/data_management/generate_crops_from_cct.py +50 -1
  12. megadetector/data_management/labelme_to_coco.py +4 -2
  13. megadetector/data_management/labelme_to_yolo.py +82 -2
  14. megadetector/data_management/lila/generate_lila_per_image_labels.py +276 -18
  15. megadetector/data_management/lila/get_lila_annotation_counts.py +5 -3
  16. megadetector/data_management/lila/lila_common.py +3 -0
  17. megadetector/data_management/lila/test_lila_metadata_urls.py +15 -5
  18. megadetector/data_management/mewc_to_md.py +5 -0
  19. megadetector/data_management/ocr_tools.py +4 -3
  20. megadetector/data_management/read_exif.py +20 -5
  21. megadetector/data_management/remap_coco_categories.py +66 -4
  22. megadetector/data_management/remove_exif.py +50 -1
  23. megadetector/data_management/rename_images.py +3 -3
  24. megadetector/data_management/resize_coco_dataset.py +563 -95
  25. megadetector/data_management/yolo_output_to_md_output.py +131 -2
  26. megadetector/data_management/yolo_to_coco.py +140 -5
  27. megadetector/detection/change_detection.py +4 -3
  28. megadetector/detection/pytorch_detector.py +60 -22
  29. megadetector/detection/run_detector.py +225 -25
  30. megadetector/detection/run_detector_batch.py +42 -16
  31. megadetector/detection/run_inference_with_yolov5_val.py +12 -2
  32. megadetector/detection/run_tiled_inference.py +1 -0
  33. megadetector/detection/video_utils.py +53 -24
  34. megadetector/postprocessing/add_max_conf.py +4 -0
  35. megadetector/postprocessing/categorize_detections_by_size.py +1 -1
  36. megadetector/postprocessing/classification_postprocessing.py +55 -20
  37. megadetector/postprocessing/combine_batch_outputs.py +3 -2
  38. megadetector/postprocessing/compare_batch_results.py +64 -10
  39. megadetector/postprocessing/convert_output_format.py +12 -8
  40. megadetector/postprocessing/create_crop_folder.py +137 -10
  41. megadetector/postprocessing/load_api_results.py +26 -8
  42. megadetector/postprocessing/md_to_coco.py +4 -4
  43. megadetector/postprocessing/md_to_labelme.py +18 -7
  44. megadetector/postprocessing/merge_detections.py +5 -0
  45. megadetector/postprocessing/postprocess_batch_results.py +6 -3
  46. megadetector/postprocessing/remap_detection_categories.py +55 -2
  47. megadetector/postprocessing/render_detection_confusion_matrix.py +9 -6
  48. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +2 -2
  49. megadetector/taxonomy_mapping/map_new_lila_datasets.py +3 -4
  50. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +40 -19
  51. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +1 -1
  52. megadetector/taxonomy_mapping/species_lookup.py +123 -41
  53. megadetector/utils/ct_utils.py +133 -113
  54. megadetector/utils/md_tests.py +93 -13
  55. megadetector/utils/path_utils.py +137 -107
  56. megadetector/utils/split_locations_into_train_val.py +2 -2
  57. megadetector/utils/string_utils.py +7 -7
  58. megadetector/utils/url_utils.py +81 -58
  59. megadetector/utils/wi_utils.py +46 -17
  60. megadetector/visualization/plot_utils.py +13 -9
  61. megadetector/visualization/render_images_with_thumbnails.py +2 -1
  62. megadetector/visualization/visualization_utils.py +94 -46
  63. megadetector/visualization/visualize_db.py +36 -9
  64. megadetector/visualization/visualize_detector_output.py +4 -4
  65. {megadetector-5.0.29.dist-info → megadetector-10.0.0.dist-info}/METADATA +135 -135
  66. megadetector-10.0.0.dist-info/RECORD +139 -0
  67. {megadetector-5.0.29.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
  68. {megadetector-5.0.29.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0
  69. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  70. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  71. megadetector/api/batch_processing/api_core/batch_service/score.py +0 -438
  72. megadetector/api/batch_processing/api_core/server.py +0 -294
  73. megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
  74. megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
  75. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  76. megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
  77. megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
  78. megadetector/api/batch_processing/api_core/server_utils.py +0 -88
  79. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  80. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  81. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  82. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  83. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  84. megadetector/api/synchronous/__init__.py +0 -0
  85. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  86. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
  87. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
  88. megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
  89. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  90. megadetector/api/synchronous/api_core/tests/load_test.py +0 -109
  91. megadetector/utils/azure_utils.py +0 -178
  92. megadetector/utils/sas_blob_utils.py +0 -513
  93. megadetector-5.0.29.dist-info/RECORD +0 -163
  94. /megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
  95. {megadetector-5.0.29.dist-info → megadetector-10.0.0.dist-info}/WHEEL +0 -0
@@ -93,7 +93,7 @@ class MBConvBlock(nn.Module):
93
93
 
94
94
  Args:
95
95
  inputs (tensor): Input tensor.
96
- drop_connect_rate (bool): Drop connect rate (float, between 0 and 1).
96
+ drop_connect_rate (bool, optional): Drop connect rate (float, between 0 and 1).
97
97
 
98
98
  Returns:
99
99
  Output of this block after processing.
@@ -135,7 +135,7 @@ class MBConvBlock(nn.Module):
135
135
  """Sets swish function as memory efficient (for training) or standard (for export).
136
136
 
137
137
  Args:
138
- memory_efficient (bool): Whether to use memory-efficient version of swish.
138
+ memory_efficient (bool, optional): Whether to use memory-efficient version of swish.
139
139
  """
140
140
  self._swish = MemoryEfficientSwish() if memory_efficient else Swish()
141
141
 
@@ -221,7 +221,7 @@ class EfficientNet(nn.Module):
221
221
  """Sets swish function as memory efficient (for training) or standard (for export).
222
222
 
223
223
  Args:
224
- memory_efficient (bool): Whether to use memory-efficient version of swish.
224
+ memory_efficient (bool, optional): Whether to use memory-efficient version of swish.
225
225
 
226
226
  """
227
227
  self._swish = MemoryEfficientSwish() if memory_efficient else Swish()
@@ -323,7 +323,7 @@ class EfficientNet(nn.Module):
323
323
 
324
324
  Args:
325
325
  model_name (str): Name for efficientnet.
326
- in_channels (int): Input data's channel number.
326
+ in_channels (int, optional): Input data's channel number.
327
327
  override_params (other key word params):
328
328
  Params to override model's global_params.
329
329
  Optional key:
@@ -349,14 +349,14 @@ class EfficientNet(nn.Module):
349
349
 
350
350
  Args:
351
351
  model_name (str): Name for efficientnet.
352
- weights_path (None or str):
352
+ weights_path (None or str, optional):
353
353
  str: path to pretrained weights file on the local disk.
354
354
  None: use pretrained weights downloaded from the Internet.
355
- advprop (bool):
355
+ advprop (bool, optional):
356
356
  Whether to load pretrained weights
357
357
  trained with advprop (valid when weights_path is None).
358
- in_channels (int): Input data's channel number.
359
- num_classes (int):
358
+ in_channels (int, optional): Input data's channel number.
359
+ num_classes (int, optional):
360
360
  Number of categories for classification.
361
361
  It controls the output size for final linear layer.
362
362
  override_params (other key word params):
@@ -194,7 +194,7 @@ def get_same_padding_conv2d(image_size=None):
194
194
  Static padding is necessary for ONNX exporting of models.
195
195
 
196
196
  Args:
197
- image_size (int or tuple): Size of the image.
197
+ image_size (int or tuple, optional): Size of the image.
198
198
 
199
199
  Returns:
200
200
  Conv2dDynamicSamePadding or Conv2dStaticSamePadding.
@@ -274,7 +274,7 @@ def get_same_padding_maxPool2d(image_size=None):
274
274
  Static padding is necessary for ONNX exporting of models.
275
275
 
276
276
  Args:
277
- image_size (int or tuple): Size of the image.
277
+ image_size (int or tuple, optional): Size of the image.
278
278
 
279
279
  Returns:
280
280
  MaxPool2dDynamicSamePadding or MaxPool2dStaticSamePadding.
@@ -579,11 +579,12 @@ def load_pretrained_weights(model, model_name, weights_path=None, load_fc=True,
579
579
  Args:
580
580
  model (Module): The whole model of efficientnet.
581
581
  model_name (str): Model name of efficientnet.
582
- weights_path (None or str):
582
+ weights_path (None or str, optional):
583
583
  str: path to pretrained weights file on the local disk.
584
584
  None: use pretrained weights downloaded from the Internet.
585
- load_fc (bool): Whether to load pretrained weights for fc layer at the end of the model.
586
- advprop (bool): Whether to load pretrained weights
585
+ load_fc (bool, optional): Whether to load pretrained weights for fc layer at the end
586
+ of the model.
587
+ advprop (bool, optional): Whether to load pretrained weights
587
588
  trained with advprop (valid when weights_path is None).
588
589
  """
589
590
  if isinstance(weights_path, str):
@@ -99,7 +99,7 @@ for fn in input_files:
99
99
  crop_cmd += crop_comment
100
100
 
101
101
  crop_cmd += "python crop_detections.py \\\n" + \
102
- input_file_path + ' \\\n' + \
102
+ input_file_path + ' \\\n' + \
103
103
  crop_path + ' \\\n' + \
104
104
  '--images-dir "' + image_base + '"' + ' \\\n' + \
105
105
  '--threshold "' + threshold_str + '"' + ' \\\n' + \
@@ -127,7 +127,7 @@ for fn in input_files:
127
127
  classify_cmd += classify_comment
128
128
 
129
129
  classify_cmd += "python run_classifier.py \\\n" + \
130
- checkpoint_path + ' \\\n' + \
130
+ checkpoint_path + ' \\\n' + \
131
131
  crop_path + ' \\\n' + \
132
132
  classifier_output_path + ' \\\n' + \
133
133
  '--detections-json "' + input_file_path + '"' + ' \\\n' + \
@@ -205,7 +205,7 @@ for fn in input_files:
205
205
  merge_cmd += merge_comment
206
206
 
207
207
  merge_cmd += "python merge_classification_detection_output.py \\\n" + \
208
- classifier_output_path_remapped + ' \\\n' + \
208
+ classifier_output_path_remapped + ' \\\n' + \
209
209
  output_label_index + ' \\\n' + \
210
210
  '--output-json "' + final_output_path + '"' + ' \\\n' + \
211
211
  '--detection-json "' + input_file_path + '"' + ' \\\n' + \
@@ -31,4 +31,3 @@ detector_bbox_category_name_to_id = {}
31
31
  for cat in detector_bbox_categories:
32
32
  detector_bbox_category_id_to_name[cat['id']] = cat['name']
33
33
  detector_bbox_category_name_to_id[cat['name']] = cat['id']
34
-
@@ -19,6 +19,8 @@ Currently supports only sequence-level labeling.
19
19
 
20
20
  import os
21
21
  import json
22
+ import argparse
23
+
22
24
  import pandas as pd
23
25
 
24
26
  from dateutil import parser as dateparser
@@ -36,6 +38,10 @@ def camtrap_dp_to_coco(camtrap_dp_folder,output_file=None):
36
38
  the resulting COCO file.
37
39
 
38
40
  Optionally writes the results to [output_file]
41
+
42
+ Args:
43
+ camtrap_dp_folder (str): input folder, containing a CamtrapDP package
44
+ output_file (str, optional): COCO-formatted output file
39
45
  """
40
46
 
41
47
  required_files = ('datapackage.json','deployments.csv','events.csv','media.csv','observations.csv')
@@ -234,4 +240,31 @@ if False:
234
240
 
235
241
  #%% Command-line driver
236
242
 
237
- # TODO
243
+ def main():
244
+ """
245
+ Command-line interface to convert Camtrap DP to COCO.
246
+ """
247
+
248
+ parser = argparse.ArgumentParser(description='Convert Camtrap DP to COCO format')
249
+ parser.add_argument('camtrap_dp_folder', type=str,
250
+ help='Input folder, containing a CamtrapDP package')
251
+ parser.add_argument('--output_file', type=str, default=None,
252
+ help='COCO-formatted output file (defaults to [camtrap_dp_folder]_coco.json)')
253
+
254
+ args = parser.parse_args()
255
+
256
+ if args.output_file is None:
257
+ # Default output file name: [camtrap_dp_folder]_coco.json
258
+ #
259
+ # Remove trailing slash if present
260
+ folder_name = args.camtrap_dp_folder.rstrip(os.sep)
261
+ output_file = folder_name + '_coco.json'
262
+ else:
263
+ output_file = args.output_file
264
+
265
+ camtrap_dp_to_coco(camtrap_dp_folder=args.camtrap_dp_folder, output_file=output_file)
266
+ print(f"Successfully converted Camtrap DP package at '{args.camtrap_dp_folder}' to " + \
267
+ f"COCO format at '{output_file}'")
268
+
269
+ if __name__ == '__main__':
270
+ main()
@@ -418,8 +418,8 @@ def create_sequences(image_info,options=None):
418
418
  image_info (str, dict, or list): a dict in CCT format, a CCT .json file, or just the
419
419
  'images' component of a CCT dataset (a list of dicts with fields 'file_name' (str),
420
420
  'datetime' (datetime), and 'location' (str)).
421
- options (SequenceOptions): options parameterizing the assembly of images into sequences;
422
- see the SequenceOptions class for details.
421
+ options (SequenceOptions, optional): options parameterizing the assembly of images into
422
+ sequences; see the SequenceOptions class for details.
423
423
 
424
424
  Returns:
425
425
  image_info: if [image_info] is passed as a list, returns the list, otherwise returns
@@ -49,9 +49,15 @@ def write_yolo_dataset_file(yolo_dataset_file,
49
49
  class_list (list or str): an ordered list of class names (the first item will be class 0,
50
50
  etc.), or the name of a text file containing an ordered list of class names (one per
51
51
  line, starting from class zero).
52
- train_folder_relative (str, optional): train folder name, used only to populate dataset.yaml
53
- val_folder_relative (str, optional): val folder name, used only to populate dataset.yaml
54
- test_folder_relative (str, optional): test folder name, used only to populate dataset.yaml
52
+ train_folder_relative (str, optional): train folder name, used only to
53
+ populate dataset.yaml. Can also be a filename (e.g. a .txt file with image
54
+ files).
55
+ val_folder_relative (str, optional): val folder name, used only to
56
+ populate dataset.yaml. Can also be a filename (e.g. a .txt file with image
57
+ files).
58
+ test_folder_relative (str, optional): test folder name, used only to
59
+ populate dataset.yaml. Can also be a filename (e.g. a .txt file with image
60
+ files).
55
61
  """
56
62
 
57
63
  # Read class names
@@ -126,7 +132,7 @@ def coco_to_yolo(input_image_folder,
126
132
  either treat images as empty or error, depending on the value of [allow_empty_annotations].
127
133
  [allow_empty_annotations] has no effect if source_format is 'coco_camera_traps'.
128
134
  overwrite_images (bool, optional): over-write images in the output folder if they exist
129
- create_image_and_label_folder (bool, optional): whether to create separate folders called 'images' and
135
+ create_image_and_label_folders (bool, optional): whether to create separate folders called 'images' and
130
136
  'labels' in the YOLO output folder. If create_image_and_label_folders is False,
131
137
  a/b/c/image001.jpg will become a#b#c#image001.jpg, and the corresponding text file will
132
138
  be a#b#c#image001.txt. If create_image_and_label_folders is True, a/b/c/image001.jpg will become
@@ -540,7 +546,9 @@ def coco_to_yolo(input_image_folder,
540
546
  # ...def coco_to_yolo(...)
541
547
 
542
548
 
543
- def create_yolo_symlinks(source_folder,images_folder,labels_folder,
549
+ def create_yolo_symlinks(source_folder,
550
+ images_folder,
551
+ labels_folder,
544
552
  class_list_file=None,
545
553
  class_list_output_name='object.data',
546
554
  force_lowercase_image_extension=False):
@@ -550,6 +558,15 @@ def create_yolo_symlinks(source_folder,images_folder,labels_folder,
550
558
  Used to support preview/editing tools that assume images and labels are in separate
551
559
  folders.
552
560
 
561
+ Args:
562
+ source_folder (str): input folder
563
+ images_folder (str): output folder with links to images
564
+ labels_folder (str): output folder with links to labels
565
+ class_list_file (str, optional): list to classes.txt file
566
+ class_list_output_name (str, optional): output file to write with class information
567
+ force_lowercase_image_extension (bool, False): create symlinks with, e.g., .jpg, even
568
+ if the input image is, e.g., .JPG
569
+
553
570
  :meta private:
554
571
  """
555
572
 
@@ -4,31 +4,104 @@ add_width_and_height_to_db.py
4
4
 
5
5
  Grabs width and height from actual image files for a .json database that is missing w/h.
6
6
 
7
- TODO: this is a one-off script waiting to be cleaned up for more general use.
8
-
9
7
  """
10
8
 
11
9
  #%% Imports and constants
12
10
 
11
+ import os
12
+ import sys
13
13
  import json
14
+ import argparse
15
+
16
+ from tqdm import tqdm
14
17
  from PIL import Image
18
+
15
19
  from megadetector.utils import ct_utils
16
20
 
17
- datafile = '/datadrive/snapshotserengeti/databases/snapshotserengeti.json'
18
- image_base = '/datadrive/snapshotserengeti/images/'
19
21
 
20
- def main(): # noqa
22
+ #%% Main resizing function
23
+
24
+ def add_width_and_height_to_db(input_file,output_file,image_base_folder):
25
+ """
26
+ Add width and height to images in the COCO db [input_file]
27
+ that don't have non-None w/h values. Does not verify correctness
28
+ for images that already have non-None w/h values. Ignores files that
29
+ fail to open.
30
+
31
+ Args:
32
+ input_file (str): the COCO .json file to process
33
+ output_file (str): the COCO .json file to write
34
+ image_base_folder (str): image filenames in [input_file] should be relative
35
+ to this folder
36
+
37
+ Returns:
38
+ list: the list of image dicts that were modified
39
+ """
40
+
41
+ with open(input_file,'r') as f:
42
+ d = json.load(f)
43
+
44
+ to_return = []
45
+
46
+ for im in tqdm(d['images']):
47
+
48
+ if ('height' not in im) or ('width' not in im) or \
49
+ (im['height'] is None) or (im['width'] is None) or \
50
+ (im['height'] <= 0) or (im['width'] <= 0):
51
+
52
+ fn_relative = im['file_name']
53
+ fn_abs = os.path.join(image_base_folder,fn_relative)
54
+
55
+ if not os.path.isfile(fn_abs):
56
+ print('Could not find image file {}'.format(fn_abs))
57
+ continue
21
58
 
22
- with open(datafile,'r') as f:
23
- data = json.load(f)
59
+ try:
60
+ im_w, im_h = Image.open(fn_abs).size
61
+ except Exception as e:
62
+ print('Error opening file {}: {}'.format(fn_abs,str(e)))
63
+ continue
64
+
65
+ assert isinstance(im_w,int) and isinstance(im_h,int) and \
66
+ im_w > 0 and im_h > 0, \
67
+ 'Illegal size retrieved for {}'.format(fn_abs)
24
68
 
25
- for im in data['images']:
26
- if 'height' not in im:
27
- im_w, im_h = Image.open(image_base+im['file_name']).size
28
69
  im['height'] = im_h
29
70
  im['width'] = im_w
71
+ to_return.append(im)
72
+
73
+ # ...if we need to add width and/or height to this image
74
+
75
+ # ...for each image
76
+
77
+ ct_utils.write_json(output_file, d)
78
+
79
+ print('Added size information to {} of {} images'.format(
80
+ len(to_return), len(d['images'])))
81
+
82
+ return to_return
30
83
 
31
- ct_utils.write_json(datafile, data, indent=None)
84
+ # ...def add_width_and_height_to_db(...)
85
+
86
+
87
+ #%% Command-line driver
32
88
 
33
89
  if __name__ == '__main__':
34
- main()
90
+
91
+ parser = argparse.ArgumentParser()
92
+ parser.add_argument('input_file', type=str,
93
+ help='Input COCO-formatted .json file')
94
+ parser.add_argument('output_file', type=str,
95
+ help='Output COCO-formatted .json file')
96
+ parser.add_argument('image_base_folder', type=str,
97
+ help='Base directory for images')
98
+
99
+ if len(sys.argv[1:]) == 0:
100
+ parser.print_help()
101
+ parser.exit()
102
+
103
+ args = parser.parse_args()
104
+
105
+ add_width_and_height_to_db(args.input_file,
106
+ args.output_file,
107
+ args.image_base_folder)
@@ -72,8 +72,8 @@ def combine_cct_dictionaries(input_dicts, require_uniqueness=True):
72
72
  comment for details on merge rules.
73
73
 
74
74
  Args:
75
- input_dicts: list of CCT dicts
76
- require_uniqueness: bool, whether to require that the images in
75
+ input_dicts (list of dict): list of CCT dicts
76
+ require_uniqueness (bool, optional): whether to require that the images in
77
77
  each input_dict be unique
78
78
 
79
79
  Returns:
@@ -22,7 +22,8 @@ import json
22
22
  import os
23
23
  import sys
24
24
 
25
- from multiprocessing.pool import ThreadPool
25
+ from functools import partial
26
+ from multiprocessing.pool import Pool, ThreadPool
26
27
  from operator import itemgetter
27
28
  from tqdm import tqdm
28
29
 
@@ -61,6 +62,9 @@ class IntegrityCheckOptions:
61
62
  #: Number of threads to use for parallelization, set to <= 1 to disable parallelization
62
63
  self.nThreads = 10
63
64
 
65
+ #: Whether to use threads (rather than processes for parallelization)
66
+ self.parallelizeWithThreads = True
67
+
64
68
  #: Enable additional debug output
65
69
  self.verbose = True
66
70
 
@@ -70,9 +74,6 @@ class IntegrityCheckOptions:
70
74
  #: If True, error if the 'info' field is not present
71
75
  self.requireInfo = False
72
76
 
73
- # This is used in a medium-hacky way to share modified options across threads
74
- default_options = IntegrityCheckOptions()
75
-
76
77
 
77
78
  #%% Functions
78
79
 
@@ -93,7 +94,7 @@ def _check_image_existence_and_size(image,options=None):
93
94
  """
94
95
 
95
96
  if options is None:
96
- options = default_options
97
+ options = IntegrityCheckOptions()
97
98
 
98
99
  assert options.bCheckImageExistence
99
100
 
@@ -108,7 +109,12 @@ def _check_image_existence_and_size(image,options=None):
108
109
  return s
109
110
 
110
111
  # width, height = Image.open(file_path).size
111
- pil_im = open_image(file_path)
112
+ try:
113
+ pil_im = open_image(file_path)
114
+ except Exception as e:
115
+ s = 'Error opening {}: {}'.format(file_path,str(e))
116
+ return s
117
+
112
118
  width,height = pil_im.size
113
119
  if (not (width == image['width'] and height == image['height'])):
114
120
  s = 'Size mismatch for image {}: {} (reported {},{}, actual {},{})'.format(
@@ -125,6 +131,7 @@ def integrity_check_json_db(json_file, options=None):
125
131
 
126
132
  Args:
127
133
  json_file (str): filename to validate, or an already-loaded dict
134
+ options (IntegrityCheckOptions, optional): see IntegrityCheckOptions
128
135
 
129
136
  Returns:
130
137
  tuple: tuple containing:
@@ -172,7 +179,7 @@ def integrity_check_json_db(json_file, options=None):
172
179
  images = data['images']
173
180
  annotations = data['annotations']
174
181
  categories = data['categories']
175
-
182
+
176
183
  if options.requireInfo:
177
184
  assert 'info' in data, 'No info struct in database'
178
185
 
@@ -219,7 +226,7 @@ def integrity_check_json_db(json_file, options=None):
219
226
  # ...for each category
220
227
 
221
228
  if options.verbose:
222
- print('\nChecking images...')
229
+ print('\nChecking image records...')
223
230
 
224
231
  if options.iMaxNumImages > 0 and len(images) > options.iMaxNumImages:
225
232
 
@@ -321,15 +328,22 @@ def integrity_check_json_db(json_file, options=None):
321
328
  print('Checking image existence and/or image sizes...')
322
329
 
323
330
  if options.nThreads is not None and options.nThreads > 1:
331
+
332
+ if options.parallelizeWithThreads:
333
+ worker_string = 'threads'
334
+ else:
335
+ worker_string = 'processes'
336
+
324
337
  if options.verbose:
325
- print('Starting a pool of {} workers'.format(options.nThreads))
326
- pool = ThreadPool(options.nThreads)
327
- # results = pool.imap_unordered(lambda x: fetch_url(x,nImages), indexedUrlList)
328
- default_options.baseDir = options.baseDir
329
- default_options.bCheckImageSizes = options.bCheckImageSizes
330
- default_options.bCheckImageExistence = options.bCheckImageExistence
338
+ print('Starting a pool of {} {}'.format(options.nThreads,worker_string))
339
+ if options.parallelizeWithThreads:
340
+ pool = ThreadPool(options.nThreads)
341
+ else:
342
+ pool = Pool(options.nThreads)
331
343
  try:
332
- results = tqdm(pool.imap(_check_image_existence_and_size, images), total=len(images))
344
+ results = list(tqdm(pool.imap(
345
+ partial(_check_image_existence_and_size,options=options), images),
346
+ total=len(images)))
333
347
  finally:
334
348
  pool.close()
335
349
  pool.join()
@@ -10,6 +10,7 @@ each bounding box.
10
10
  #%% Imports and constants
11
11
 
12
12
  import os
13
+ import argparse
13
14
  import json
14
15
 
15
16
  from tqdm import tqdm
@@ -146,4 +147,52 @@ if False:
146
147
 
147
148
  #%% Command-line driver
148
149
 
149
- # TODO
150
+ def main():
151
+ """
152
+ Command-line interface to generate crops from a COCO Camera Traps .json file.
153
+ """
154
+
155
+ parser = argparse.ArgumentParser(
156
+ description='Generate cropped images from a COCO Camera Traps .json file'
157
+ )
158
+ parser.add_argument(
159
+ 'cct_file',
160
+ type=str,
161
+ help='COCO .json file to load data from'
162
+ )
163
+ parser.add_argument(
164
+ 'image_dir',
165
+ type=str,
166
+ help='Folder where images are located'
167
+ )
168
+ parser.add_argument(
169
+ 'output_dir',
170
+ type=str,
171
+ help='Folder to which we should write cropped images'
172
+ )
173
+ parser.add_argument(
174
+ '--padding',
175
+ type=int,
176
+ default=0,
177
+ help='Pixels to expand each box before cropping'
178
+ )
179
+ parser.add_argument(
180
+ '--flat_output',
181
+ action='store_true',
182
+ help='Flatten folder structure in output (preserves folder structure by default)'
183
+ )
184
+
185
+ args = parser.parse_args()
186
+
187
+ generate_crops_from_cct(
188
+ cct_file=args.cct_file,
189
+ image_dir=args.image_dir,
190
+ output_dir=args.output_dir,
191
+ padding=args.padding,
192
+ flat_output=args.flat_output
193
+ )
194
+
195
+ print(f'Generated crops in {args.output_dir}')
196
+
197
+ if __name__ == '__main__':
198
+ main()
@@ -8,10 +8,10 @@ Converts a folder of labelme-formatted .json files to COCO.
8
8
 
9
9
  #%% Constants and imports
10
10
 
11
- import json
12
11
  import os
13
- import uuid
14
12
  import sys
13
+ import json
14
+ import uuid
15
15
  import argparse
16
16
 
17
17
  from multiprocessing.pool import Pool, ThreadPool
@@ -232,6 +232,8 @@ def labelme_to_coco(input_folder,
232
232
  category_id_to_category_name (dict, optional): dict mapping category IDs to category names;
233
233
  really used to map Labelme category names to COCO category IDs. IDs will be auto-generated
234
234
  if this is None.
235
+ empty_category_name (str, optional): if images are present without boxes, the category name
236
+ we should use for whole-image (and not-very-COCO-like) empty categories.
235
237
  empty_category_id (int, optional): category ID to use for the not-very-COCO-like "empty" category;
236
238
  also see the no_json_handling parameter.
237
239
  info_struct (dict, optional): dict to stash in the "info" field of the resulting COCO dict