megadetector 5.0.8__py3-none-any.whl → 5.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (190) hide show
  1. api/__init__.py +0 -0
  2. api/batch_processing/__init__.py +0 -0
  3. api/batch_processing/api_core/__init__.py +0 -0
  4. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. api/batch_processing/api_core/batch_service/score.py +0 -1
  6. api/batch_processing/api_core/server_job_status_table.py +0 -1
  7. api/batch_processing/api_core_support/__init__.py +0 -0
  8. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
  9. api/batch_processing/api_support/__init__.py +0 -0
  10. api/batch_processing/api_support/summarize_daily_activity.py +0 -1
  11. api/batch_processing/data_preparation/__init__.py +0 -0
  12. api/batch_processing/data_preparation/manage_local_batch.py +65 -65
  13. api/batch_processing/data_preparation/manage_video_batch.py +8 -8
  14. api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
  15. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  16. api/batch_processing/postprocessing/__init__.py +0 -0
  17. api/batch_processing/postprocessing/add_max_conf.py +12 -12
  18. api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
  19. api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
  20. api/batch_processing/postprocessing/compare_batch_results.py +113 -43
  21. api/batch_processing/postprocessing/convert_output_format.py +41 -16
  22. api/batch_processing/postprocessing/load_api_results.py +16 -17
  23. api/batch_processing/postprocessing/md_to_coco.py +31 -21
  24. api/batch_processing/postprocessing/md_to_labelme.py +52 -22
  25. api/batch_processing/postprocessing/merge_detections.py +14 -14
  26. api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
  27. api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
  28. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
  29. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
  30. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
  31. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
  32. api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
  33. api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
  34. api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
  35. api/synchronous/__init__.py +0 -0
  36. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  37. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
  38. api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
  39. api/synchronous/api_core/animal_detection_api/config.py +35 -35
  40. api/synchronous/api_core/tests/__init__.py +0 -0
  41. api/synchronous/api_core/tests/load_test.py +109 -109
  42. classification/__init__.py +0 -0
  43. classification/aggregate_classifier_probs.py +21 -24
  44. classification/analyze_failed_images.py +11 -13
  45. classification/cache_batchapi_outputs.py +51 -51
  46. classification/create_classification_dataset.py +69 -68
  47. classification/crop_detections.py +54 -53
  48. classification/csv_to_json.py +97 -100
  49. classification/detect_and_crop.py +105 -105
  50. classification/evaluate_model.py +43 -42
  51. classification/identify_mislabeled_candidates.py +47 -46
  52. classification/json_to_azcopy_list.py +10 -10
  53. classification/json_validator.py +72 -71
  54. classification/map_classification_categories.py +44 -43
  55. classification/merge_classification_detection_output.py +68 -68
  56. classification/prepare_classification_script.py +157 -154
  57. classification/prepare_classification_script_mc.py +228 -228
  58. classification/run_classifier.py +27 -26
  59. classification/save_mislabeled.py +30 -30
  60. classification/train_classifier.py +20 -20
  61. classification/train_classifier_tf.py +21 -22
  62. classification/train_utils.py +10 -10
  63. data_management/__init__.py +0 -0
  64. data_management/annotations/__init__.py +0 -0
  65. data_management/annotations/annotation_constants.py +18 -31
  66. data_management/camtrap_dp_to_coco.py +238 -0
  67. data_management/cct_json_utils.py +102 -59
  68. data_management/cct_to_md.py +176 -158
  69. data_management/cct_to_wi.py +247 -219
  70. data_management/coco_to_labelme.py +272 -263
  71. data_management/coco_to_yolo.py +79 -58
  72. data_management/databases/__init__.py +0 -0
  73. data_management/databases/add_width_and_height_to_db.py +20 -16
  74. data_management/databases/combine_coco_camera_traps_files.py +35 -31
  75. data_management/databases/integrity_check_json_db.py +62 -24
  76. data_management/databases/subset_json_db.py +24 -15
  77. data_management/generate_crops_from_cct.py +27 -45
  78. data_management/get_image_sizes.py +188 -162
  79. data_management/importers/add_nacti_sizes.py +8 -8
  80. data_management/importers/add_timestamps_to_icct.py +78 -78
  81. data_management/importers/animl_results_to_md_results.py +158 -158
  82. data_management/importers/auckland_doc_test_to_json.py +9 -9
  83. data_management/importers/auckland_doc_to_json.py +8 -8
  84. data_management/importers/awc_to_json.py +7 -7
  85. data_management/importers/bellevue_to_json.py +15 -15
  86. data_management/importers/cacophony-thermal-importer.py +13 -13
  87. data_management/importers/carrizo_shrubfree_2018.py +8 -8
  88. data_management/importers/carrizo_trail_cam_2017.py +8 -8
  89. data_management/importers/cct_field_adjustments.py +9 -9
  90. data_management/importers/channel_islands_to_cct.py +10 -10
  91. data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
  92. data_management/importers/ena24_to_json.py +7 -7
  93. data_management/importers/filenames_to_json.py +8 -8
  94. data_management/importers/helena_to_cct.py +7 -7
  95. data_management/importers/idaho-camera-traps.py +7 -7
  96. data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
  97. data_management/importers/jb_csv_to_json.py +9 -9
  98. data_management/importers/mcgill_to_json.py +8 -8
  99. data_management/importers/missouri_to_json.py +18 -18
  100. data_management/importers/nacti_fieldname_adjustments.py +10 -10
  101. data_management/importers/noaa_seals_2019.py +7 -7
  102. data_management/importers/pc_to_json.py +7 -7
  103. data_management/importers/plot_wni_giraffes.py +7 -7
  104. data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
  105. data_management/importers/prepare_zsl_imerit.py +7 -7
  106. data_management/importers/rspb_to_json.py +8 -8
  107. data_management/importers/save_the_elephants_survey_A.py +8 -8
  108. data_management/importers/save_the_elephants_survey_B.py +9 -9
  109. data_management/importers/snapshot_safari_importer.py +26 -26
  110. data_management/importers/snapshot_safari_importer_reprise.py +665 -665
  111. data_management/importers/snapshot_serengeti_lila.py +14 -14
  112. data_management/importers/sulross_get_exif.py +8 -9
  113. data_management/importers/timelapse_csv_set_to_json.py +11 -11
  114. data_management/importers/ubc_to_json.py +13 -13
  115. data_management/importers/umn_to_json.py +7 -7
  116. data_management/importers/wellington_to_json.py +8 -8
  117. data_management/importers/wi_to_json.py +9 -9
  118. data_management/importers/zamba_results_to_md_results.py +181 -181
  119. data_management/labelme_to_coco.py +65 -24
  120. data_management/labelme_to_yolo.py +8 -8
  121. data_management/lila/__init__.py +0 -0
  122. data_management/lila/add_locations_to_island_camera_traps.py +9 -9
  123. data_management/lila/add_locations_to_nacti.py +147 -147
  124. data_management/lila/create_lila_blank_set.py +13 -13
  125. data_management/lila/create_lila_test_set.py +8 -8
  126. data_management/lila/create_links_to_md_results_files.py +106 -106
  127. data_management/lila/download_lila_subset.py +44 -110
  128. data_management/lila/generate_lila_per_image_labels.py +55 -42
  129. data_management/lila/get_lila_annotation_counts.py +18 -15
  130. data_management/lila/get_lila_image_counts.py +11 -11
  131. data_management/lila/lila_common.py +96 -33
  132. data_management/lila/test_lila_metadata_urls.py +132 -116
  133. data_management/ocr_tools.py +173 -128
  134. data_management/read_exif.py +110 -97
  135. data_management/remap_coco_categories.py +83 -83
  136. data_management/remove_exif.py +58 -62
  137. data_management/resize_coco_dataset.py +30 -23
  138. data_management/wi_download_csv_to_coco.py +246 -239
  139. data_management/yolo_output_to_md_output.py +86 -73
  140. data_management/yolo_to_coco.py +300 -60
  141. detection/__init__.py +0 -0
  142. detection/detector_training/__init__.py +0 -0
  143. detection/process_video.py +85 -33
  144. detection/pytorch_detector.py +43 -25
  145. detection/run_detector.py +157 -72
  146. detection/run_detector_batch.py +179 -113
  147. detection/run_inference_with_yolov5_val.py +108 -48
  148. detection/run_tiled_inference.py +111 -40
  149. detection/tf_detector.py +51 -29
  150. detection/video_utils.py +606 -521
  151. docs/source/conf.py +43 -0
  152. md_utils/__init__.py +0 -0
  153. md_utils/azure_utils.py +9 -9
  154. md_utils/ct_utils.py +228 -68
  155. md_utils/directory_listing.py +59 -64
  156. md_utils/md_tests.py +968 -871
  157. md_utils/path_utils.py +460 -134
  158. md_utils/process_utils.py +157 -133
  159. md_utils/sas_blob_utils.py +20 -20
  160. md_utils/split_locations_into_train_val.py +45 -32
  161. md_utils/string_utils.py +33 -10
  162. md_utils/url_utils.py +176 -60
  163. md_utils/write_html_image_list.py +40 -33
  164. md_visualization/__init__.py +0 -0
  165. md_visualization/plot_utils.py +102 -109
  166. md_visualization/render_images_with_thumbnails.py +34 -34
  167. md_visualization/visualization_utils.py +597 -291
  168. md_visualization/visualize_db.py +76 -48
  169. md_visualization/visualize_detector_output.py +61 -42
  170. {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/METADATA +13 -7
  171. megadetector-5.0.9.dist-info/RECORD +224 -0
  172. {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
  173. taxonomy_mapping/__init__.py +0 -0
  174. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
  175. taxonomy_mapping/map_new_lila_datasets.py +154 -154
  176. taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
  177. taxonomy_mapping/preview_lila_taxonomy.py +591 -591
  178. taxonomy_mapping/retrieve_sample_image.py +12 -12
  179. taxonomy_mapping/simple_image_download.py +11 -11
  180. taxonomy_mapping/species_lookup.py +10 -10
  181. taxonomy_mapping/taxonomy_csv_checker.py +18 -18
  182. taxonomy_mapping/taxonomy_graph.py +47 -47
  183. taxonomy_mapping/validate_lila_category_mappings.py +83 -76
  184. data_management/cct_json_to_filename_json.py +0 -89
  185. data_management/cct_to_csv.py +0 -140
  186. data_management/databases/remove_corrupted_images_from_db.py +0 -191
  187. detection/detector_training/copy_checkpoints.py +0 -43
  188. megadetector-5.0.8.dist-info/RECORD +0 -205
  189. {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
  190. {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/WHEEL +0 -0
@@ -1,20 +1,20 @@
1
- ########
2
- #
3
- # coco_to_yolo.py
4
- #
5
- # Converts a COCO-formatted dataset to a YOLO-formatted dataset, flattening
6
- # the dataset (to a single folder) in the process.
7
- #
8
- # If the input and output folders are the same, writes .txt files to the input folder,
9
- # and neither moves nor modifies images.
10
- #
11
- # Currently ignores segmentation masks, and errors if an annotation has a
12
- # segmentation polygon but no bbox.
13
- #
14
- # Has only been tested on a handful of COCO Camera Traps data sets; if you
15
- # use it for more general COCO conversion, YMMV.
16
- #
17
- ########
1
+ """
2
+
3
+ coco_to_yolo.py
4
+
5
+ Converts a COCO-formatted dataset to a YOLO-formatted dataset, flattening
6
+ the dataset (to a single folder) in the process.
7
+
8
+ If the input and output folders are the same, writes .txt files to the input folder,
9
+ and neither moves nor modifies images.
10
+
11
+ Currently ignores segmentation masks, and errors if an annotation has a
12
+ segmentation polygon but no bbox.
13
+
14
+ Has only been tested on a handful of COCO Camera Traps data sets; if you
15
+ use it for more general COCO conversion, YMMV.
16
+
17
+ """
18
18
 
19
19
  #%% Imports and constants
20
20
 
@@ -37,16 +37,16 @@ def write_yolo_dataset_file(yolo_dataset_file,
37
37
  val_folder_relative=None,
38
38
  test_folder_relative=None):
39
39
  """
40
- Write a YOLOv5 dataset.yaml file to the absolute path yolo_dataset_file (should
40
+ Write a YOLOv5 dataset.yaml file to the absolute path [yolo_dataset_file] (should
41
41
  have a .yaml extension, though it's only a warning if it doesn't).
42
-
43
- [dataset_base_dir] should be the absolute path of the dataset root.
44
42
 
45
- yolo_dataset_file does not have to be within dataset_base_dir.
46
-
47
- [class_list] can be an ordered list of class names (the first item will be class 0,
48
- etc.), or the name of a text file containing an ordered list of class names (one per
49
- line, starting from class zero).
43
+ Args:
44
+ yolo_dataset_file (str): the file, typically ending in .yaml or .yml, to write.
45
+ Does not have to be within dataset_base_dir.
46
+ dataset_base_dir (str): the absolute base path of the YOLO dataset
47
+ class_list (list or str): an ordered list of class names (the first item will be class 0,
48
+ etc.), or the name of a text file containing an ordered list of class names (one per
49
+ line, starting from class zero).
50
50
  """
51
51
 
52
52
  # Read class names
@@ -82,7 +82,9 @@ def write_yolo_dataset_file(yolo_dataset_file,
82
82
  # ...def write_yolo_dataset_file(...)
83
83
 
84
84
 
85
- def coco_to_yolo(input_image_folder,output_folder,input_file,
85
+ def coco_to_yolo(input_image_folder,
86
+ output_folder,
87
+ input_file,
86
88
  source_format='coco',
87
89
  overwrite_images=False,
88
90
  create_image_and_label_folders=False,
@@ -97,7 +99,7 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
97
99
  write_output=True,
98
100
  flatten_paths=True):
99
101
  """
100
- Convert a COCO-formatted dataset to a YOLO-formatted dataset, optionally flattening the
102
+ Converts a COCO-formatted dataset to a YOLO-formatted dataset, optionally flattening the
101
103
  dataset to a single folder in the process.
102
104
 
103
105
  If the input and output folders are the same, writes .txt files to the input folder,
@@ -106,32 +108,51 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
106
108
  Currently ignores segmentation masks, and errors if an annotation has a
107
109
  segmentation polygon but no bbox.
108
110
 
109
- source_format can be 'coco' (default) or 'coco_camera_traps'. The only difference
110
- is that when source_format is 'coco_camera_traps', we treat an image with a non-bbox
111
- annotation with a category id of 0 as a special case, i.e. that's how an empty image
112
- is indicated. The original COCO standard is a little ambiguous on this issue. If
113
- source_format is 'coco', we either treat images as empty or error, depending on the value
114
- of allow_empty_annotations. allow_empty_annotations has no effect if source_format is
115
- 'coco_camera_traps'.
116
-
117
- If create_image_and_label_folders is false, a/b/c/image001.jpg will become a#b#c#image001.jpg,
118
- and the corresponding text file will be a#b#c#image001.txt.
119
-
120
- If create_image_and_label_folders is true, a/b/c/image001.jpg will become
121
- images/a#b#c#image001.jpg, and the corresponding text file will be
122
- labels/a#b#c#image001.txt. Some tools still use this variant of the YOLO standard.
123
-
124
- If clip_boxes is True, bounding boxes coordinates will be clipped to [0,1].
125
-
126
- image_id_to_output_image_json_file is an optional *output* file, to which we will write
127
- a mapping from image IDs to output file names.
128
-
129
- images_to_exclude is a list of image files (relative paths in the input folder) that we
130
- should ignore.
131
-
132
- write_output determines whether we actually copy images and write annotations;
133
- setting this to False basically puts this function in "test mode". The class list
134
- file is written regardless of the value of write_output.
111
+ Args:
112
+ input_image_folder (str): the folder where images live; filenames in the COCO .json
113
+ file [input_file] should be relative to this folder
114
+ output_folder (str): the base folder for the YOLO dataset
115
+ input_file (str): a .json file in COCO format; can be the same as [input_image_folder], in which case
116
+ images are left alone.
117
+ source_format (str, optional): can be 'coco' (default) or 'coco_camera_traps'. The only difference
118
+ is that when source_format is 'coco_camera_traps', we treat an image with a non-bbox
119
+ annotation with a category id of 0 as a special case, i.e. that's how an empty image
120
+ is indicated. The original COCO standard is a little ambiguous on this issue. If
121
+ source_format is 'coco', we either treat images as empty or error, depending on the value
122
+ of [allow_empty_annotations]. [allow_empty_annotations] has no effect if source_format is
123
+ 'coco_camera_traps'.
124
+ create_image_and_label_folder (bool, optional): whether to create separate folders called 'images' and
125
+ 'labels' in the YOLO output folder. If create_image_and_label_folders is False,
126
+ a/b/c/image001.jpg will become a#b#c#image001.jpg, and the corresponding text file will
127
+ be a#b#c#image001.txt. If create_image_and_label_folders is True, a/b/c/image001.jpg will become
128
+ images/a#b#c#image001.jpg, and the corresponding text file will be
129
+ labels/a#b#c#image001.txt.
130
+ clip_boxes (bool, optional): whether to clip bounding box coordinates to the range [0,1] before
131
+ converting to YOLO xywh format
132
+ image_id_to_output_image_json_file (str, optional): an optional *output* file, to which we will write
133
+ a mapping from image IDs to output file names
134
+ images_to_exclude (list, optional): a list of image files (relative paths in the input folder) that we
135
+ should ignore
136
+ path_replacement_char (str, optional): only relevant if [flatten_paths] is True; this is used to replace
137
+ path separators, e.g. if [path_replacement_char] is '#' and [flatten_paths] is True, a/b/c/d.jpg
138
+ becomes a#b#c#d.jpg
139
+ category_names_to_exclude (str, optional): category names that should not be represented in the
140
+ YOLO output; only impacts annotations, does not prevent copying images. There's almost no reason
141
+ you would want to specify this and [category_names_to_include].
142
+ category_names_to_include (str, optional): allow-list of category names that should be represented in the
143
+ YOLO output; only impacts annotations, does not prevent copying images. There's almost no reason
144
+ you would want to specify this and [category_names_to_exclude].
145
+ write_output (bool, optional): determines whether we actually copy images and write annotations;
146
+ setting this to False mostly puts this function in "dry run" "mode. The class list
147
+ file is written regardless of the value of write_output.
148
+
149
+ Returns:
150
+ dict: information about the coco --> yolo mapping, containing at least the fields:
151
+
152
+ - class_list_filename: the filename to which we wrote the flat list of class names required
153
+ by the YOLO format.
154
+ - source_image_to_dest_image: a dict mapping source images to destination images
155
+ - coco_id_to_yolo_id: a dict mapping COCO category IDs to YOLO category IDs
135
156
  """
136
157
 
137
158
  ## Validate input
@@ -500,12 +521,12 @@ def create_yolo_symlinks(source_folder,images_folder,labels_folder,
500
521
  class_list_output_name='object.data',
501
522
  force_lowercase_image_extension=False):
502
523
  """
503
- Given a YOLO-formatted folder of images and .txt files, create a folder
524
+ Given a YOLO-formatted folder of images and .txt files, creates a folder
504
525
  of symlinks to all the images, and a folder of symlinks to all the labels.
505
- Used to support preview/editing tools (like BoundingBoxEditor) that assume
506
- images and labels are in separate folders.
526
+ Used to support preview/editing tools that assume images and labels are in separate
527
+ folders.
507
528
 
508
- images_folder and labels_folder are absolute paths.
529
+ :meta private:
509
530
  """
510
531
 
511
532
  assert source_folder != images_folder and source_folder != labels_folder
@@ -619,7 +640,7 @@ def main():
619
640
  parser.add_argument(
620
641
  '--create_bounding_box_editor_symlinks',
621
642
  action='store_true',
622
- help='Prepare symlinks so the whole folder is BoundingBoxEditor-friendly')
643
+ help='Prepare symlinks so the whole folder appears to contain "images" and "labels" folderss')
623
644
 
624
645
  if len(sys.argv[1:]) == 0:
625
646
  parser.print_help()
File without changes
@@ -1,10 +1,12 @@
1
- ########
2
- #
3
- # add_width_and_height_to_db.py
4
- #
5
- # Grabs width and height from actual image files for a .json database that is missing w/h.
6
- #
7
- ########
1
+ """
2
+
3
+ add_width_and_height_to_db.py
4
+
5
+ Grabs width and height from actual image files for a .json database that is missing w/h.
6
+
7
+ TODO: this is a one-off script waiting to be cleaned up for more general use.
8
+
9
+ """
8
10
 
9
11
  #%% Imports and constants
10
12
 
@@ -14,16 +16,18 @@ from PIL import Image
14
16
  datafile = '/datadrive/snapshotserengeti/databases/snapshotserengeti.json'
15
17
  image_base = '/datadrive/snapshotserengeti/images/'
16
18
 
19
+ def main():
17
20
 
18
- #%% Execution
21
+ with open(datafile,'r') as f:
22
+ data = json.load(f)
19
23
 
20
- with open(datafile,'r') as f:
21
- data = json.load(f)
24
+ for im in data['images']:
25
+ if 'height' not in im:
26
+ im_w, im_h = Image.open(image_base+im['file_name']).size
27
+ im['height'] = im_h
28
+ im['width'] = im_w
22
29
 
23
- for im in data['images']:
24
- if 'height' not in im:
25
- im_w, im_h = Image.open(image_base+im['file_name']).size
26
- im['height'] = im_h
27
- im['width'] = im_w
30
+ json.dump(data, open(datafile,'w'))
28
31
 
29
- json.dump(data, open(datafile,'w'))
32
+ if __name__ == '__main__':
33
+ main()
@@ -1,17 +1,19 @@
1
- ########
2
- #
3
- # combine_coco_camera_traps_files.py
4
- #
5
- # Merges two or more .json files in COCO Camera Traps format, optionally
6
- # writing the results to another .json file.
7
- #
8
- # - Concatenates image lists, erroring if images are not unique.
9
- # - Errors on unrecognized fields.
10
- # - Checks compatibility in info structs, within reason.
11
- #
12
- # combine_coco_camera_traps_files input1.json input2.json ... inputN.json output.json
13
- #
14
- ########
1
+ """
2
+
3
+ combine_coco_camera_traps_files.py
4
+
5
+ Merges two or more .json files in COCO Camera Traps format, optionally
6
+ writing the results to another .json file.
7
+
8
+ - Concatenates image lists, erroring if images are not unique.
9
+ - Errors on unrecognized fields.
10
+ - Checks compatibility in info structs, within reason.
11
+
12
+ *Example command-line invocation*
13
+
14
+ combine_coco_camera_traps_files input1.json input2.json ... inputN.json output.json
15
+
16
+ """
15
17
 
16
18
  #%% Constants and imports
17
19
 
@@ -19,26 +21,25 @@ import argparse
19
21
  import json
20
22
  import sys
21
23
 
22
- from typing import Any, Dict, Iterable, Mapping, List, Optional
23
-
24
24
 
25
25
  #%% Merge functions
26
26
 
27
- def combine_cct_files(input_files: List[str],
28
- output_file: Optional[str] = None,
29
- require_uniqueness: Optional[bool] = True,
30
- filename_prefixes: Optional[dict] = None
31
- ) -> Dict[str, Any]:
27
+ def combine_cct_files(input_files, output_file=None, require_uniqueness=True,
28
+ filename_prefixes=None):
32
29
  """
33
- Merges list of COCO Camera Traps files *input_files* into a single
34
- dictionary, optionally writing the result to *output_file*.
30
+ Merges the list of COCO Camera Traps files [input_files] into a single
31
+ dictionary, optionally writing the result to [output_file].
35
32
 
36
33
  Args:
37
- input_files: list of str, paths to JSON detection files
38
- output_file: optional str, path to write merged JSON
39
- require_uniqueness: bool, whether to require that the images in
34
+ input_files (list): paths to CCT .json files
35
+ output_file (str, optional): path to write merged .json file
36
+ require_uniqueness (bool): whether to require that the images in
40
37
  each input_dict be unique
38
+
39
+ Returns:
40
+ dict: the merged COCO-formatted .json dict
41
41
  """
42
+
42
43
  input_dicts = []
43
44
  print('Loading input files')
44
45
  for fn in input_files:
@@ -61,11 +62,9 @@ def combine_cct_files(input_files: List[str],
61
62
  return merged_dict
62
63
 
63
64
 
64
- def combine_cct_dictionaries(input_dicts: Iterable[Mapping[str, Any]],
65
- require_uniqueness: Optional[bool] = True
66
- ) -> Dict[str, Any]:
65
+ def combine_cct_dictionaries(input_dicts, require_uniqueness=True):
67
66
  """
68
- Merges the list of COCO Camera Traps dictionaries *input_dicts*. See header
67
+ Merges the list of COCO Camera Traps dictionaries [input_dicts]. See module header
69
68
  comment for details on merge rules.
70
69
 
71
70
  Args:
@@ -73,7 +72,8 @@ def combine_cct_dictionaries(input_dicts: Iterable[Mapping[str, Any]],
73
72
  require_uniqueness: bool, whether to require that the images in
74
73
  each input_dict be unique
75
74
 
76
- Returns: dict, represents the merged JSON
75
+ Returns:
76
+ dict: the merged COCO-formatted .json dict
77
77
  """
78
78
 
79
79
  filename_to_image = {}
@@ -177,12 +177,16 @@ def combine_cct_dictionaries(input_dicts: Iterable[Mapping[str, Any]],
177
177
  'categories': all_categories,
178
178
  'images': sorted_images,
179
179
  'annotations': all_annotations}
180
+
180
181
  return merged_dict
181
182
 
183
+ # ...combine_cct_dictionaries(...)
184
+
182
185
 
183
186
  #%% Command-line driver
184
187
 
185
188
  def main():
189
+
186
190
  parser = argparse.ArgumentParser()
187
191
  parser.add_argument(
188
192
  'input_paths', nargs='+',
@@ -1,19 +1,19 @@
1
- ########
2
- #
3
- # integrity_check_json_db.py
4
- #
5
- # Does some integrity-checking and computes basic statistics on a db, specifically:
6
- #
7
- # * Verifies that required fields are present and have the right types
8
- # * Verifies that annotations refer to valid images
9
- # * Verifies that annotations refer to valid categories
10
- # * Verifies that image, category, and annotation IDs are unique
11
- # * Optionally checks file existence
12
- # * Finds un-annotated images
13
- # * Finds unused categories
14
- # * Prints a list of categories sorted by count
15
- #
16
- ########
1
+ """
2
+
3
+ integrity_check_json_db.py
4
+
5
+ Does some integrity-checking and computes basic statistics on a COCO Camera Traps .json file, specifically:
6
+
7
+ * Verifies that required fields are present and have the right types
8
+ * Verifies that annotations refer to valid images
9
+ * Verifies that annotations refer to valid categories
10
+ * Verifies that image, category, and annotation IDs are unique
11
+ * Optionally checks file existence
12
+ * Finds un-annotated images
13
+ * Finds unused categories
14
+ * Prints a list of categories sorted by count
15
+
16
+ """
17
17
 
18
18
  #%% Constants and environment
19
19
 
@@ -33,14 +33,32 @@ from md_utils import ct_utils
33
33
  #%% Classes and environment
34
34
 
35
35
  class IntegrityCheckOptions:
36
+ """
37
+ Options for integrity_check_json_db()
38
+ """
36
39
 
40
+ #: Image path; the filenames in the .json file should be relative to this folder
37
41
  baseDir = ''
42
+
43
+ #: Should we validate the image sizes?
38
44
  bCheckImageSizes = False
45
+
46
+ #: Should we check that all the images in the .json file exist on disk?
39
47
  bCheckImageExistence = False
48
+
49
+ #: Should we search [baseDir] for images that are not used in the .json file?
40
50
  bFindUnusedImages = False
51
+
52
+ #: Should we require that all images in the .json file have a 'location' field?
41
53
  bRequireLocation = True
54
+
55
+ #: For debugging, limit the number of images we'll process
42
56
  iMaxNumImages = -1
57
+
58
+ #: Number of threads to use for parallelization, set to <= 1 to disable parallelization
43
59
  nThreads = 10
60
+
61
+ #: Enable additional debug output
44
62
  verbose = True
45
63
 
46
64
 
@@ -50,7 +68,21 @@ defaultOptions = IntegrityCheckOptions()
50
68
 
51
69
  #%% Functions
52
70
 
53
- def check_image_existence_and_size(image,options=None):
71
+ def _check_image_existence_and_size(image,options=None):
72
+ """
73
+ Validate the image represented in the CCT image dict [image], which should have fields:
74
+
75
+ * file_name
76
+ * width
77
+ * height
78
+
79
+ Args:
80
+ image (dict): image to validate
81
+ options (IntegrityCheckOptions): parameters impacting validation
82
+
83
+ Returns:
84
+ bool: whether this image passes validation
85
+ """
54
86
 
55
87
  if options is None:
56
88
  options = defaultOptions
@@ -80,9 +112,17 @@ def check_image_existence_and_size(image,options=None):
80
112
 
81
113
  def integrity_check_json_db(jsonFile, options=None):
82
114
  """
83
- jsonFile can be a filename or an already-loaded json database
115
+ Does some integrity-checking and computes basic statistics on a COCO Camera Traps .json file; see
116
+ module header comment for a list of the validation steps.
84
117
 
85
- return sortedCategories, data, errorInfo
118
+ Args:
119
+ jsonFile (str): filename to validate, or an already-loaded dict
120
+
121
+ Returns:
122
+ tuple: tuple containing:
123
+ - sortedCategories (dict): list of categories used in [jsonFile], sorted by frequency
124
+ - data (dict): the data loaded from [jsonFile]
125
+ - errorInfo (dict): specific validation errors
86
126
  """
87
127
 
88
128
  if options is None:
@@ -264,11 +304,11 @@ def integrity_check_json_db(jsonFile, options=None):
264
304
  defaultOptions.baseDir = options.baseDir
265
305
  defaultOptions.bCheckImageSizes = options.bCheckImageSizes
266
306
  defaultOptions.bCheckImageExistence = options.bCheckImageExistence
267
- results = tqdm(pool.imap(check_image_existence_and_size, images), total=len(images))
307
+ results = tqdm(pool.imap(_check_image_existence_and_size, images), total=len(images))
268
308
  else:
269
309
  results = []
270
310
  for im in tqdm(images):
271
- results.append(check_image_existence_and_size(im,options))
311
+ results.append(_check_image_existence_and_size(im,options))
272
312
 
273
313
  for iImage,r in enumerate(results):
274
314
  if not r:
@@ -407,9 +447,7 @@ def main():
407
447
  ct_utils.args_to_object(args, options)
408
448
  integrity_check_json_db(args.jsonFile,options)
409
449
 
410
-
411
- if __name__ == '__main__':
412
-
450
+ if __name__ == '__main__':
413
451
  main()
414
452
 
415
453
 
@@ -1,14 +1,14 @@
1
- ########
2
- #
3
- # subset_json_db.py
4
- #
5
- # Select a subset of images (and associated annotations) from a .json file
6
- # in COCO Camera Traps format.
7
- #
8
- # To subset the .json files in the MegaDetector output format, see
9
- # subset_json_detector_output.py
10
- #
11
- ########
1
+ """
2
+
3
+ subset_json_db.py
4
+
5
+ Select a subset of images (and associated annotations) from a .json file in COCO
6
+ Camera Traps format based on a string query.
7
+
8
+ To subset .json files in the MegaDetector output format, see
9
+ subset_json_detector_output.py.
10
+
11
+ """
12
12
 
13
13
  #%% Constants and imports
14
14
 
@@ -26,6 +26,16 @@ def subset_json_db(input_json, query, output_json=None, ignore_case=False):
26
26
  Given a json file (or dictionary already loaded from a json file), produce a new
27
27
  database containing only the images whose filenames contain the string 'query',
28
28
  optionally writing that DB output to a new json file.
29
+
30
+ Args:
31
+ input_json (str): COCO Camera Traps .json file to load, or an already-loaded dict
32
+ query (str): string to query for, only include images in the output whose filenames
33
+ contain this string.
34
+ output_json (str, optional): file to write the resulting .json file to
35
+ ignore_case (bool, optional): whether to perform a case-insensitive search for [query]
36
+
37
+ Returns:
38
+ dict: possibly-modified CCT dictionary
29
39
  """
30
40
 
31
41
  if ignore_case:
@@ -76,8 +86,8 @@ if False:
76
86
 
77
87
  #%%
78
88
 
79
- input_json = r"E:\Statewide_wolf_container\idfg_20190409.json"
80
- output_json = r"E:\Statewide_wolf_container\idfg_20190409_clearcreek.json"
89
+ input_json = r"e:\Statewide_wolf_container\idfg_20190409.json"
90
+ output_json = r"e:\Statewide_wolf_container\idfg_20190409_clearcreek.json"
81
91
  query = 'clearcreek'
82
92
  ignore_case = True
83
93
  db = subset_json_db(input_json, query, output_json, ignore_case)
@@ -101,6 +111,5 @@ def main():
101
111
 
102
112
  subset_json_db(args.input_json,args.query,args.output_json,args.ignore_case)
103
113
 
104
- if __name__ == '__main__':
105
-
114
+ if __name__ == '__main__':
106
115
  main()
@@ -1,11 +1,11 @@
1
- ########
2
- #
3
- # generate_crops_from_cct.py
4
- #
5
- # Given a .json file in COCO Camera Traps format, create a cropped image for
6
- # each bounding box.
7
- #
8
- ########
1
+ """
2
+
3
+ generate_crops_from_cct.py
4
+
5
+ Given a .json file in COCO Camera Traps format, creates a cropped image for
6
+ each bounding box.
7
+
8
+ """
9
9
 
10
10
  #%% Imports and constants
11
11
 
@@ -19,6 +19,23 @@ from PIL import Image
19
19
  #%% Functions
20
20
 
21
21
  def generate_crops_from_cct(cct_file,image_dir,output_dir,padding=0,flat_output=True):
22
+ """
23
+ Given a .json file in COCO Camera Traps format, creates a cropped image for
24
+ each bounding box.
25
+
26
+ Args:
27
+ cct_file (str): the COCO .json file from which we should load data
28
+ image_dir (str): the folder where the images live; filenames in the .json
29
+ file should be relative to this folder
30
+ output_dir (str): the folder where we should write cropped images
31
+ padding (float, optional): number of pixels we should expand each box before
32
+ cropping
33
+ flat_output (bool, optional): if False, folder structure will be preserved
34
+ in the output, e.g. the image a/b/c/d.jpg will result in image files
35
+ in the output folder called, e.g., a/b/c/d_crop_000_id_12345.jpg. If
36
+ [flat_output] is True, the corresponding output image will be
37
+ a_b_c_d_crop_000_id_12345.jpg.
38
+ """
22
39
 
23
40
  ## Read and validate input
24
41
 
@@ -123,45 +140,10 @@ if False:
123
140
  flat_output = True
124
141
  output_dir = '/home/user/tmp/noaa-fish-crops'
125
142
 
126
- #%%
127
-
128
143
  generate_crops_from_cct(cct_file,image_dir,output_dir,padding,flat_output=True)
129
144
  files = os.listdir(output_dir)
130
145
 
131
- #%%
132
-
133
- import random
134
- fn = os.path.join(output_dir,random.choice(files))
135
-
136
- from md_utils.path_utils import open_file
137
- open_file(fn)
138
-
139
-
140
- #%% Scrap
141
-
142
- if False:
143
146
 
144
- pass
147
+ #%% Command-line driver
145
148
 
146
- #%%
147
-
148
- from md_visualization.visualize_db import DbVizOptions,visualize_db
149
-
150
- db_path = cct_file
151
- output_dir = os.path.expanduser('~/tmp/noaa-fish-preview')
152
- image_base_dir = image_dir
153
-
154
- options = DbVizOptions()
155
- options.num_to_visualize = None
156
-
157
- options.parallelize_rendering_n_cores = 5
158
- options.parallelize_rendering = True
159
-
160
- options.viz_size = (-1, -1)
161
- options.trim_to_images_with_bboxes = True
162
-
163
- options.box_thickness = 4
164
- options.box_expansion = 25
165
-
166
- htmlOutputFile,db = visualize_db(db_path,output_dir,image_base_dir,options)
167
-
149
+ # TODO