megadetector 5.0.8__py3-none-any.whl → 5.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (190) hide show
  1. api/__init__.py +0 -0
  2. api/batch_processing/__init__.py +0 -0
  3. api/batch_processing/api_core/__init__.py +0 -0
  4. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. api/batch_processing/api_core/batch_service/score.py +0 -1
  6. api/batch_processing/api_core/server_job_status_table.py +0 -1
  7. api/batch_processing/api_core_support/__init__.py +0 -0
  8. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
  9. api/batch_processing/api_support/__init__.py +0 -0
  10. api/batch_processing/api_support/summarize_daily_activity.py +0 -1
  11. api/batch_processing/data_preparation/__init__.py +0 -0
  12. api/batch_processing/data_preparation/manage_local_batch.py +65 -65
  13. api/batch_processing/data_preparation/manage_video_batch.py +8 -8
  14. api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
  15. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  16. api/batch_processing/postprocessing/__init__.py +0 -0
  17. api/batch_processing/postprocessing/add_max_conf.py +12 -12
  18. api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
  19. api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
  20. api/batch_processing/postprocessing/compare_batch_results.py +113 -43
  21. api/batch_processing/postprocessing/convert_output_format.py +41 -16
  22. api/batch_processing/postprocessing/load_api_results.py +16 -17
  23. api/batch_processing/postprocessing/md_to_coco.py +31 -21
  24. api/batch_processing/postprocessing/md_to_labelme.py +52 -22
  25. api/batch_processing/postprocessing/merge_detections.py +14 -14
  26. api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
  27. api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
  28. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
  29. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
  30. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
  31. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
  32. api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
  33. api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
  34. api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
  35. api/synchronous/__init__.py +0 -0
  36. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  37. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
  38. api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
  39. api/synchronous/api_core/animal_detection_api/config.py +35 -35
  40. api/synchronous/api_core/tests/__init__.py +0 -0
  41. api/synchronous/api_core/tests/load_test.py +109 -109
  42. classification/__init__.py +0 -0
  43. classification/aggregate_classifier_probs.py +21 -24
  44. classification/analyze_failed_images.py +11 -13
  45. classification/cache_batchapi_outputs.py +51 -51
  46. classification/create_classification_dataset.py +69 -68
  47. classification/crop_detections.py +54 -53
  48. classification/csv_to_json.py +97 -100
  49. classification/detect_and_crop.py +105 -105
  50. classification/evaluate_model.py +43 -42
  51. classification/identify_mislabeled_candidates.py +47 -46
  52. classification/json_to_azcopy_list.py +10 -10
  53. classification/json_validator.py +72 -71
  54. classification/map_classification_categories.py +44 -43
  55. classification/merge_classification_detection_output.py +68 -68
  56. classification/prepare_classification_script.py +157 -154
  57. classification/prepare_classification_script_mc.py +228 -228
  58. classification/run_classifier.py +27 -26
  59. classification/save_mislabeled.py +30 -30
  60. classification/train_classifier.py +20 -20
  61. classification/train_classifier_tf.py +21 -22
  62. classification/train_utils.py +10 -10
  63. data_management/__init__.py +0 -0
  64. data_management/annotations/__init__.py +0 -0
  65. data_management/annotations/annotation_constants.py +18 -31
  66. data_management/camtrap_dp_to_coco.py +238 -0
  67. data_management/cct_json_utils.py +102 -59
  68. data_management/cct_to_md.py +176 -158
  69. data_management/cct_to_wi.py +247 -219
  70. data_management/coco_to_labelme.py +272 -263
  71. data_management/coco_to_yolo.py +79 -58
  72. data_management/databases/__init__.py +0 -0
  73. data_management/databases/add_width_and_height_to_db.py +20 -16
  74. data_management/databases/combine_coco_camera_traps_files.py +35 -31
  75. data_management/databases/integrity_check_json_db.py +62 -24
  76. data_management/databases/subset_json_db.py +24 -15
  77. data_management/generate_crops_from_cct.py +27 -45
  78. data_management/get_image_sizes.py +188 -162
  79. data_management/importers/add_nacti_sizes.py +8 -8
  80. data_management/importers/add_timestamps_to_icct.py +78 -78
  81. data_management/importers/animl_results_to_md_results.py +158 -158
  82. data_management/importers/auckland_doc_test_to_json.py +9 -9
  83. data_management/importers/auckland_doc_to_json.py +8 -8
  84. data_management/importers/awc_to_json.py +7 -7
  85. data_management/importers/bellevue_to_json.py +15 -15
  86. data_management/importers/cacophony-thermal-importer.py +13 -13
  87. data_management/importers/carrizo_shrubfree_2018.py +8 -8
  88. data_management/importers/carrizo_trail_cam_2017.py +8 -8
  89. data_management/importers/cct_field_adjustments.py +9 -9
  90. data_management/importers/channel_islands_to_cct.py +10 -10
  91. data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
  92. data_management/importers/ena24_to_json.py +7 -7
  93. data_management/importers/filenames_to_json.py +8 -8
  94. data_management/importers/helena_to_cct.py +7 -7
  95. data_management/importers/idaho-camera-traps.py +7 -7
  96. data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
  97. data_management/importers/jb_csv_to_json.py +9 -9
  98. data_management/importers/mcgill_to_json.py +8 -8
  99. data_management/importers/missouri_to_json.py +18 -18
  100. data_management/importers/nacti_fieldname_adjustments.py +10 -10
  101. data_management/importers/noaa_seals_2019.py +7 -7
  102. data_management/importers/pc_to_json.py +7 -7
  103. data_management/importers/plot_wni_giraffes.py +7 -7
  104. data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
  105. data_management/importers/prepare_zsl_imerit.py +7 -7
  106. data_management/importers/rspb_to_json.py +8 -8
  107. data_management/importers/save_the_elephants_survey_A.py +8 -8
  108. data_management/importers/save_the_elephants_survey_B.py +9 -9
  109. data_management/importers/snapshot_safari_importer.py +26 -26
  110. data_management/importers/snapshot_safari_importer_reprise.py +665 -665
  111. data_management/importers/snapshot_serengeti_lila.py +14 -14
  112. data_management/importers/sulross_get_exif.py +8 -9
  113. data_management/importers/timelapse_csv_set_to_json.py +11 -11
  114. data_management/importers/ubc_to_json.py +13 -13
  115. data_management/importers/umn_to_json.py +7 -7
  116. data_management/importers/wellington_to_json.py +8 -8
  117. data_management/importers/wi_to_json.py +9 -9
  118. data_management/importers/zamba_results_to_md_results.py +181 -181
  119. data_management/labelme_to_coco.py +65 -24
  120. data_management/labelme_to_yolo.py +8 -8
  121. data_management/lila/__init__.py +0 -0
  122. data_management/lila/add_locations_to_island_camera_traps.py +9 -9
  123. data_management/lila/add_locations_to_nacti.py +147 -147
  124. data_management/lila/create_lila_blank_set.py +13 -13
  125. data_management/lila/create_lila_test_set.py +8 -8
  126. data_management/lila/create_links_to_md_results_files.py +106 -106
  127. data_management/lila/download_lila_subset.py +44 -110
  128. data_management/lila/generate_lila_per_image_labels.py +55 -42
  129. data_management/lila/get_lila_annotation_counts.py +18 -15
  130. data_management/lila/get_lila_image_counts.py +11 -11
  131. data_management/lila/lila_common.py +96 -33
  132. data_management/lila/test_lila_metadata_urls.py +132 -116
  133. data_management/ocr_tools.py +173 -128
  134. data_management/read_exif.py +110 -97
  135. data_management/remap_coco_categories.py +83 -83
  136. data_management/remove_exif.py +58 -62
  137. data_management/resize_coco_dataset.py +30 -23
  138. data_management/wi_download_csv_to_coco.py +246 -239
  139. data_management/yolo_output_to_md_output.py +86 -73
  140. data_management/yolo_to_coco.py +300 -60
  141. detection/__init__.py +0 -0
  142. detection/detector_training/__init__.py +0 -0
  143. detection/process_video.py +85 -33
  144. detection/pytorch_detector.py +43 -25
  145. detection/run_detector.py +157 -72
  146. detection/run_detector_batch.py +179 -113
  147. detection/run_inference_with_yolov5_val.py +108 -48
  148. detection/run_tiled_inference.py +111 -40
  149. detection/tf_detector.py +51 -29
  150. detection/video_utils.py +606 -521
  151. docs/source/conf.py +43 -0
  152. md_utils/__init__.py +0 -0
  153. md_utils/azure_utils.py +9 -9
  154. md_utils/ct_utils.py +228 -68
  155. md_utils/directory_listing.py +59 -64
  156. md_utils/md_tests.py +968 -871
  157. md_utils/path_utils.py +460 -134
  158. md_utils/process_utils.py +157 -133
  159. md_utils/sas_blob_utils.py +20 -20
  160. md_utils/split_locations_into_train_val.py +45 -32
  161. md_utils/string_utils.py +33 -10
  162. md_utils/url_utils.py +176 -60
  163. md_utils/write_html_image_list.py +40 -33
  164. md_visualization/__init__.py +0 -0
  165. md_visualization/plot_utils.py +102 -109
  166. md_visualization/render_images_with_thumbnails.py +34 -34
  167. md_visualization/visualization_utils.py +597 -291
  168. md_visualization/visualize_db.py +76 -48
  169. md_visualization/visualize_detector_output.py +61 -42
  170. {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/METADATA +13 -7
  171. megadetector-5.0.9.dist-info/RECORD +224 -0
  172. {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
  173. taxonomy_mapping/__init__.py +0 -0
  174. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
  175. taxonomy_mapping/map_new_lila_datasets.py +154 -154
  176. taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
  177. taxonomy_mapping/preview_lila_taxonomy.py +591 -591
  178. taxonomy_mapping/retrieve_sample_image.py +12 -12
  179. taxonomy_mapping/simple_image_download.py +11 -11
  180. taxonomy_mapping/species_lookup.py +10 -10
  181. taxonomy_mapping/taxonomy_csv_checker.py +18 -18
  182. taxonomy_mapping/taxonomy_graph.py +47 -47
  183. taxonomy_mapping/validate_lila_category_mappings.py +83 -76
  184. data_management/cct_json_to_filename_json.py +0 -89
  185. data_management/cct_to_csv.py +0 -140
  186. data_management/databases/remove_corrupted_images_from_db.py +0 -191
  187. detection/detector_training/copy_checkpoints.py +0 -43
  188. megadetector-5.0.8.dist-info/RECORD +0 -205
  189. {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
  190. {megadetector-5.0.8.dist-info → megadetector-5.0.9.dist-info}/WHEEL +0 -0
@@ -1,27 +1,16 @@
1
- ########
2
- #
3
- # train_classifier_tf.py
4
- #
5
- # Train an EfficientNet classifier.
6
- #
7
- # Currently the implementation of multi-label multi-class classification is
8
- # non-functional.
9
- #
10
- # During training, start tensorboard from within the classification/ directory:
11
- # tensorboard --logdir run --bind_all --samples_per_plugin scalars=0,images=0
12
- #
13
- ########
1
+ """
14
2
 
15
- #%% Example usage
3
+ train_classifier_tf.py
16
4
 
17
- """
18
- python train_classifier_tf.py run_idfg /ssd/crops_sq \
19
- -m "efficientnet-b0" --pretrained --finetune --label-weighted \
20
- --epochs 50 --batch-size 512 --lr 1e-4 \
21
- --seed 123 \
22
- --logdir run_idfg
23
- """
5
+ Train an EfficientNet classifier.
24
6
 
7
+ Currently the implementation of multi-label multi-class classification is
8
+ non-functional.
9
+
10
+ During training, start tensorboard from within the classification/ directory:
11
+ tensorboard --logdir run --bind_all --samples_per_plugin scalars=0,images=0
12
+
13
+ """
25
14
 
26
15
  #%% Imports and constants
27
16
 
@@ -47,7 +36,6 @@ from classification.train_utils import (
47
36
  imgs_with_confidences, load_dataset_csv, prefix_all_keys)
48
37
  from md_visualization import plot_utils
49
38
 
50
-
51
39
  AUTOTUNE = tf.data.experimental.AUTOTUNE
52
40
 
53
41
  # match pytorch EfficientNet model names
@@ -63,6 +51,17 @@ EFFICIENTNET_MODELS: Mapping[str, Mapping[str, Any]] = {
63
51
  }
64
52
 
65
53
 
54
+ #%% Example usage
55
+
56
+ """
57
+ python train_classifier_tf.py run_idfg /ssd/crops_sq \
58
+ -m "efficientnet-b0" --pretrained --finetune --label-weighted \
59
+ --epochs 50 --batch-size 512 --lr 1e-4 \
60
+ --seed 123 \
61
+ --logdir run_idfg
62
+ """
63
+
64
+
66
65
  #%% Support functions
67
66
 
68
67
  def create_dataset(
@@ -1,13 +1,13 @@
1
- ########
2
- #
3
- # train_utils.py
4
- #
5
- # Utility functions useful for training a classifier.
6
- #
7
- # This script should NOT depend on any other file within this repo. It should
8
- # especially be agnostic to PyTorch vs. TensorFlow.
9
- #
10
- ########
1
+ """
2
+
3
+ train_utils.py
4
+
5
+ Utility functions useful for training a classifier.
6
+
7
+ This script should NOT depend on any other file within this repo. It should
8
+ especially be agnostic to PyTorch vs. TensorFlow.
9
+
10
+ """
11
11
 
12
12
  #%% Imports
13
13
 
File without changes
File without changes
@@ -1,37 +1,20 @@
1
- ########
2
- #
3
- # annotation_constants.py
4
- #
5
- # Shared constants used to interpret annotation output
6
- #
7
- # Categories assigned to bounding boxes. Used throughout our repo; do not change unless
8
- # you are Dan or Siyu. In fact, do not change unless you are both Dan *and* Siyu.
9
- #
10
- # We use integer indices here; this is different than the API output .json file,
11
- # where indices are string integers.
12
- #
13
- ########
14
-
15
- NUM_DETECTOR_CATEGORIES = 3 # this is for choosing colors, so ignoring the "empty" class
16
-
17
- # This is the label mapping used for our incoming iMerit annotations
18
- # Only used to parse the incoming annotations. In our database, the string name is used to avoid confusion
19
- annotation_bbox_categories = [
20
- {'id': 0, 'name': 'empty'},
21
- {'id': 1, 'name': 'animal'},
22
- {'id': 2, 'name': 'person'},
23
- {'id': 3, 'name': 'group'}, # group of animals
24
- {'id': 4, 'name': 'vehicle'}
25
- ]
1
+ """
2
+
3
+ annotation_constants.py
26
4
 
27
- annotation_bbox_category_id_to_name = {}
28
- annotation_bbox_category_name_to_id = {}
5
+ Defines default categories for MegaDetector output boxes.
29
6
 
30
- for cat in annotation_bbox_categories:
31
- annotation_bbox_category_id_to_name[cat['id']] = cat['name']
32
- annotation_bbox_category_name_to_id[cat['name']] = cat['id']
7
+ Used throughout the repo; do not change unless you are Dan or Siyu. In fact, do not change unless
8
+ you are both Dan *and* Siyu.
33
9
 
34
- # MegaDetector outputs
10
+ We use integer IDs here; this is different from the MD .json file format,
11
+ where indices are string integers.
12
+
13
+ """
14
+
15
+ #%% Constants
16
+
17
+ # MegaDetector output categories (the "empty" category is implicit)
35
18
  detector_bbox_categories = [
36
19
  {'id': 0, 'name': 'empty'},
37
20
  {'id': 1, 'name': 'animal'},
@@ -39,9 +22,13 @@ detector_bbox_categories = [
39
22
  {'id': 3, 'name': 'vehicle'}
40
23
  ]
41
24
 
25
+ # This is used for choosing colors, so it ignores the "empty" class.
26
+ NUM_DETECTOR_CATEGORIES = len(detector_bbox_categories) - 1
27
+
42
28
  detector_bbox_category_id_to_name = {}
43
29
  detector_bbox_category_name_to_id = {}
44
30
 
45
31
  for cat in detector_bbox_categories:
46
32
  detector_bbox_category_id_to_name[cat['id']] = cat['name']
47
33
  detector_bbox_category_name_to_id[cat['name']] = cat['id']
34
+
@@ -0,0 +1,238 @@
1
+ """
2
+
3
+ camtrap_dp_to_coco.py
4
+
5
+ Parse a very limited subset of the Camtrap DP data package format:
6
+
7
+ https://camtrap-dp.tdwg.org/
8
+
9
+ ...and convert to COCO format. Assumes that all required metadata files have been
10
+ put in the same directory (which is standard).
11
+
12
+ Does not currently parse bounding boxes, just attaches species labels to images.
13
+
14
+ Currently supports only sequence-level labeling.
15
+
16
+ """
17
+
18
+ #%% Imports and constants
19
+
20
+ import os
21
+ import json
22
+ import pandas as pd
23
+
24
+ from dateutil import parser as dateparser
25
+
26
+ from collections import defaultdict
27
+
28
+
29
+ #%% Functions
30
+
31
+ def camtrap_dp_to_coco(camtrap_dp_folder,output_file=None):
32
+ """
33
+ Convert the Camtrap DP package in [camtrap_dp_folder] to COCO.
34
+
35
+ Does not validate images, just converts. Use integrity_check_json_db to validate
36
+ the resulting COCO file.
37
+
38
+ Optionally writes the results to [output_file]
39
+ """
40
+
41
+ required_files = ('datapackage.json','deployments.csv','events.csv','media.csv','observations.csv')
42
+
43
+ for fn in required_files:
44
+ fn_abs = os.path.join(camtrap_dp_folder,fn)
45
+ assert os.path.isfile(fn_abs), 'Could not find required file {}'.format(fn_abs)
46
+
47
+ with open(os.path.join(camtrap_dp_folder,'datapackage.json'),'r') as f:
48
+ datapackage = json.load(f)
49
+
50
+ assert datapackage['profile'] == 'https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0/camtrap-dp-profile.json', \
51
+ 'I only know how to parse Camtrap DP 1.0 packages'
52
+
53
+ deployments_file = None
54
+ events_file = None
55
+ media_file = None
56
+ observations_file = None
57
+
58
+ resources = datapackage['resources']
59
+ for r in resources:
60
+ if r['name'] == 'deployments':
61
+ deployments_file = r['path']
62
+ elif r['name'] == 'media':
63
+ media_file = r['path']
64
+ elif r['name'] == 'events':
65
+ events_file = r['path']
66
+ elif r['name'] == 'observations':
67
+ observations_file = r['path']
68
+
69
+ assert deployments_file is not None, 'No deployment file specified'
70
+ assert events_file is not None, 'No events file specified'
71
+ assert media_file is not None, 'No media file specified'
72
+ assert observations_file is not None, 'No observation file specified'
73
+
74
+ deployments_df = pd.read_csv(os.path.join(camtrap_dp_folder,deployments_file))
75
+ events_df = pd.read_csv(os.path.join(camtrap_dp_folder,events_file))
76
+ media_df = pd.read_csv(os.path.join(camtrap_dp_folder,media_file))
77
+ observations_df = pd.read_csv(os.path.join(camtrap_dp_folder,observations_file))
78
+
79
+ print('Read {} deployment lines'.format(len(deployments_df)))
80
+ print('Read {} events lines'.format(len(events_df)))
81
+ print('Read {} media lines'.format(len(media_df)))
82
+ print('Read {} observation lines'.format(len(observations_df)))
83
+
84
+ media_id_to_media_info = {}
85
+
86
+ # i_row = 0; row = media_df.iloc[i_row]
87
+ for i_row,row in media_df.iterrows():
88
+ media_info = {}
89
+ media_info['file_name'] = os.path.join(row['filePath'],row['fileName']).replace('\\','/')
90
+ media_info['location'] = row['deploymentID']
91
+ media_info['id'] = row['mediaID']
92
+ media_info['datetime'] = row['timestamp']
93
+ media_info['datetime'] = dateparser.parse(media_info['datetime'])
94
+ media_info['frame_num'] = -1
95
+ media_info['seq_num_frames'] = -1
96
+ media_id_to_media_info[row['mediaID']] = media_info
97
+
98
+ event_id_to_media_ids = defaultdict(list)
99
+
100
+ # i_row = 0; row = events_df.iloc[i_row]
101
+ for i_row,row in events_df.iterrows():
102
+ media_id = row['mediaID']
103
+ assert media_id in media_id_to_media_info
104
+ event_id_to_media_ids[row['eventID']].append(media_id)
105
+
106
+ event_id_to_category_names = defaultdict(set)
107
+
108
+ # i_row = 0; row = observations_df.iloc[i_row]
109
+ for i_row,row in observations_df.iterrows():
110
+
111
+ if row['observationLevel'] != 'event':
112
+ raise ValueError("I don't know how to parse image-level events yet")
113
+
114
+ if row['observationType'] == 'blank':
115
+ event_id_to_category_names[row['eventID']].add('empty')
116
+ elif row['observationType'] == 'unknown':
117
+ event_id_to_category_names[row['eventID']].add('unknown')
118
+ elif row['observationType'] == 'human':
119
+ assert row['scientificName'] == 'Homo sapiens'
120
+ event_id_to_category_names[row['eventID']].add(row['scientificName'])
121
+ else:
122
+ assert row['observationType'] == 'animal'
123
+ assert isinstance(row['scientificName'],str)
124
+ event_id_to_category_names[row['eventID']].add(row['scientificName'])
125
+
126
+ # Sort images within an event into frame numbers
127
+ #
128
+ # event_id = next(iter(event_id_to_media_ids))
129
+ for event_id in event_id_to_media_ids.keys():
130
+ media_ids_this_event = event_id_to_media_ids[event_id]
131
+ media_info_this_event = [media_id_to_media_info[media_id] for media_id in media_ids_this_event]
132
+ media_info_this_event = sorted(media_info_this_event, key=lambda x: x['datetime'])
133
+ for i_media,media_info in enumerate(media_info_this_event):
134
+ media_info['frame_num'] = i_media
135
+ media_info['seq_num_frames'] = len(media_info_this_event)
136
+ media_info['seq_id'] = event_id
137
+
138
+ # Create category names
139
+ category_name_to_category_id = {'empty':0}
140
+ for event_id in event_id_to_category_names:
141
+ category_names_this_event = event_id_to_category_names[event_id]
142
+ for name in category_names_this_event:
143
+ if name not in category_name_to_category_id:
144
+ category_name_to_category_id[name] = len(category_name_to_category_id)
145
+
146
+ # Move everything into COCO format
147
+ images = list(media_id_to_media_info.values())
148
+
149
+ categories = []
150
+ for name in category_name_to_category_id:
151
+ categories.append({'name':name,'id':category_name_to_category_id[name]})
152
+ info = {'version':1.0,'description':datapackage['name']}
153
+
154
+ # Create annotations
155
+ annotations = []
156
+
157
+ for event_id in event_id_to_media_ids.keys():
158
+ i_ann = 0
159
+ media_ids_this_event = event_id_to_media_ids[event_id]
160
+ media_info_this_event = [media_id_to_media_info[media_id] for media_id in media_ids_this_event]
161
+ categories_this_event = event_id_to_category_names[event_id]
162
+ for im in media_info_this_event:
163
+ for category_name in categories_this_event:
164
+ ann = {}
165
+ ann['id'] = event_id + '_' + str(i_ann)
166
+ i_ann += 1
167
+ ann['image_id'] = im['id']
168
+ ann['category_id'] = category_name_to_category_id[category_name]
169
+ ann['sequence_level_annotation'] = True
170
+ annotations.append(ann)
171
+
172
+ coco_data = {}
173
+ coco_data['images'] = images
174
+ coco_data['annotations'] = annotations
175
+ coco_data['categories'] = categories
176
+ coco_data['info'] = info
177
+
178
+ for im in coco_data['images']:
179
+ im['datetime'] = str(im['datetime'] )
180
+
181
+ if output_file is not None:
182
+ with open(output_file,'w') as f:
183
+ json.dump(coco_data,f,indent=1)
184
+
185
+ return coco_data
186
+
187
+
188
+ #%% Interactive driver
189
+
190
+ if False:
191
+
192
+ pass
193
+
194
+ #%%
195
+
196
+ camtrap_dp_folder = r'C:\temp\pilot2\pilot2'
197
+ coco_file = os.path.join(camtrap_dp_folder,'test-coco.json')
198
+ coco_data = camtrap_dp_to_coco(camtrap_dp_folder,
199
+ output_file=coco_file)
200
+
201
+ #%% Validate
202
+
203
+ from data_management.databases.integrity_check_json_db import integrity_check_json_db, IntegrityCheckOptions
204
+
205
+ options = IntegrityCheckOptions()
206
+
207
+ options.baseDir = camtrap_dp_folder
208
+ options.bCheckImageSizes = False
209
+ options.bCheckImageExistence = True
210
+ options.bFindUnusedImages = True
211
+ options.bRequireLocation = True
212
+ options.iMaxNumImages = -1
213
+ options.nThreads = 1
214
+ options.verbose = True
215
+
216
+ sortedCategories, data, errorInfo = integrity_check_json_db(coco_file,options)
217
+
218
+ #%% Preview
219
+
220
+ from md_visualization.visualize_db import DbVizOptions, visualize_db
221
+
222
+ options = DbVizOptions()
223
+ options.parallelize_rendering = True
224
+ options.parallelize_rendering_with_threads = True
225
+ options.parallelize_rendering_n_cores = 10
226
+
227
+ preview_dir = r'c:\temp\camtrapdp-preview'
228
+ htmlOutputFile,image_db = visualize_db(coco_file, preview_dir, camtrap_dp_folder, options=options)
229
+
230
+ from md_utils.path_utils import open_file
231
+ open_file(htmlOutputFile)
232
+
233
+
234
+ #%% Command-line driver
235
+
236
+ # TODO
237
+
238
+
@@ -1,12 +1,12 @@
1
- ########
2
- #
3
- # cct_json_utils.py
4
- #
5
- # Utilities for working with COCO Camera Traps .json databases
6
- #
7
- # https://github.com/agentmorris/MegaDetector/blob/master/data_management/README.md#coco-cameratraps-format
8
- #
9
- ########
1
+ """
2
+
3
+ cct_json_utils.py
4
+
5
+ Utilities for working with COCO Camera Traps .json databases
6
+
7
+ https://github.com/agentmorris/MegaDetector/blob/master/data_management/README.md#coco-cameratraps-format
8
+
9
+ """
10
10
 
11
11
  #%% Constants and imports
12
12
 
@@ -15,9 +15,6 @@ import os
15
15
 
16
16
  from tqdm import tqdm
17
17
  from collections import defaultdict, OrderedDict
18
- from typing import Any, Dict, Iterable, List, Mapping, Optional, Union
19
-
20
- JSONObject = Mapping[str, Any]
21
18
 
22
19
 
23
20
  #%% Classes
@@ -31,18 +28,32 @@ class CameraTrapJsonUtils:
31
28
  def annotations_to_string(annotations, cat_id_to_name):
32
29
  """
33
30
  Given a list of annotations and a mapping from class IDs to names, produces
34
- a concatenated class list, always sorting alphabetically.
31
+ a comma-delimited string containing a list of class names, sorted alphabetically.
32
+
33
+ Args:
34
+ annotations (list): a list of annotation dicts
35
+ cat_id_to_name (dict): a dict mapping category IDs to category names
36
+
37
+ Returns:
38
+ str: a comma-delimited list of class names
35
39
  """
36
40
 
37
- class_names = CameraTrapJsonUtils.annotationsToClassnames(annotations, cat_id_to_name)
41
+ class_names = CameraTrapJsonUtils.annotations_to_class_names(annotations, cat_id_to_name)
38
42
  return ','.join(class_names)
39
43
 
40
44
 
41
45
  @staticmethod
42
- def annotations_to_classnames(annotations, cat_id_to_name):
46
+ def annotations_to_class_names(annotations, cat_id_to_name):
43
47
  """
44
48
  Given a list of annotations and a mapping from class IDs to names, produces
45
- a list of class names, always sorting alphabetically.
49
+ a list of class names, sorted alphabetically.
50
+
51
+ Args:
52
+ annotations (list): a list of annotation dicts
53
+ cat_id_to_name (dict): a dict mapping category IDs to category names
54
+
55
+ Returns:
56
+ list: a list of class names present in [annotations]
46
57
  """
47
58
 
48
59
  # Collect all names
@@ -53,18 +64,19 @@ class CameraTrapJsonUtils:
53
64
 
54
65
 
55
66
  @staticmethod
56
- def order_db_keys(db: JSONObject) -> OrderedDict:
67
+ def order_db_keys(db):
57
68
  """
58
69
  Given a dict representing a JSON database in the COCO Camera Trap
59
- format, return an OrderedDict with keys in the order of 'info',
70
+ format, returns an OrderedDict with keys in the order of 'info',
60
71
  'categories', 'annotations' and 'images'. When this OrderedDict is
61
72
  serialized with json.dump(), the order of the keys are preserved.
62
73
 
63
74
  Args:
64
- db: dict representing a JSON database in the COCO Camera Trap format
75
+ db (dict): a JSON database in the COCO Camera Trap format
65
76
 
66
77
  Returns:
67
- the same db but as an OrderedDict with keys ordered for readability
78
+ dict: the same content as [db] but as an OrderedDict with keys ordered for
79
+ readability
68
80
  """
69
81
 
70
82
  ordered = OrderedDict([
@@ -76,10 +88,20 @@ class CameraTrapJsonUtils:
76
88
 
77
89
 
78
90
  @staticmethod
79
- def annotations_groupby_image_field(db_indexed, image_field='seq_id'):
91
+ def group_annotations_by_image_field(db_indexed, image_field='seq_id'):
80
92
  """
81
93
  Given an instance of IndexedJsonDb, group annotation entries by a field in the
82
- image entry.
94
+ image entry. Typically used to find all the annotations associated with a sequence.
95
+
96
+ Args:
97
+ db_indexed (IndexedJsonDb): an initialized IndexedJsonDb, typically loaded from a
98
+ COCO Camera Traps .json file
99
+ image_field (str, optional): a field by which to group annotations (defaults
100
+ to 'seq_id')
101
+
102
+ Returns:
103
+ dict: a dict mapping objects (typically strings, in fact typically sequence IDs) to
104
+ lists of annotations
83
105
  """
84
106
 
85
107
  image_id_to_image_field = {}
@@ -95,27 +117,24 @@ class CameraTrapJsonUtils:
95
117
 
96
118
 
97
119
  @staticmethod
98
- def get_entries_from_locations(db: JSONObject, locations: Iterable[str]
99
- ) -> Dict[str, Any]:
120
+ def get_entries_for_locations(db, locations):
100
121
  """
101
- Given a dict representing a JSON database in the COCO Camera Trap format, return a dict
122
+ Given a dict representing a JSON database in the COCO Camera Trap format, returns a dict
102
123
  with the 'images' and 'annotations' fields in the CCT format, each is an array that only
103
- includes entries in the original `db` that are in the `locations` set.
124
+ includes entries in the original [db] that are in the [locations] set.
125
+
104
126
  Args:
105
- db: a dict representing a JSON database in the COCO Camera Trap format
106
- locations: a set or list of locations to include; each item is a string
127
+ db (dict): a dict representing a JSON database in the COCO Camera Trap format
128
+ locations (set): a set or list of locations to include; each item is a string
107
129
 
108
130
  Returns:
109
- a dict with the 'images' and 'annotations' fields in the CCT format
131
+ dict: a dict with the 'images' and 'annotations' fields in the CCT format
110
132
  """
111
133
 
112
134
  locations = set(locations)
113
135
  print('Original DB has {} image and {} annotation entries.'.format(
114
136
  len(db['images']), len(db['annotations'])))
115
- new_db: Dict[str, Any] = {
116
- 'images': [],
117
- 'annotations': []
118
- }
137
+ new_db = { 'images': [], 'annotations': [] }
119
138
  new_images = set()
120
139
  for i in db['images']:
121
140
  # cast location to string as the entries in locations are strings
@@ -139,13 +158,26 @@ class IndexedJsonDb:
139
158
  a .json database.
140
159
  """
141
160
 
142
- def __init__(self, json_filename: Union[str, JSONObject],
143
- b_normalize_paths: bool = False,
144
- filename_replacements: Optional[Mapping[str, str]] = None,
145
- b_convert_classes_to_lower: bool = True,
146
- b_force_forward_slashes: bool = True):
161
+ def __init__(self,
162
+ json_filename,
163
+ b_normalize_paths=False,
164
+ filename_replacements=None,
165
+ b_convert_classes_to_lower=True,
166
+ b_force_forward_slashes=True):
147
167
  """
148
- json_filename can also be an existing json db
168
+ Constructor for IndexedJsonDb that loads from a .json file or CCT-formatted dict.
169
+
170
+ Args:
171
+ json_filename (str): filename to load, or an already-loaded dict
172
+ b_normalize_paths (bool, optional): whether to invoke os.path.normpath on
173
+ all filenames. Not relevant if b_force_forward_slashes is True.
174
+ filename_replacements (dict, optional): a set of string --> string mappings
175
+ that will trigger replacements in all filenames, typically used to remove
176
+ leading folders
177
+ b_convert_classes_to_lower (bool, optional): whether to convert all class
178
+ names to lowercase
179
+ b_force_forward_slashes (bool, optional): whether to convert backslashes to
180
+ forward slashes in all path names
149
181
  """
150
182
 
151
183
  if isinstance(json_filename, str):
@@ -197,7 +229,7 @@ class IndexedJsonDb:
197
229
 
198
230
  # Image ID --> annotations
199
231
  # Each image can potentially multiple annotations, hence using lists
200
- self.image_id_to_annotations: Dict[str, List[Dict[str, Any]]]
232
+ self.image_id_to_annotations = {}
201
233
  self.image_id_to_annotations = defaultdict(list)
202
234
  for ann in self.db['annotations']:
203
235
  self.image_id_to_annotations[ann['image_id']].append(ann)
@@ -205,12 +237,17 @@ class IndexedJsonDb:
205
237
  # ...__init__
206
238
 
207
239
 
208
- def get_annotations_for_image(self, image: JSONObject
209
- ) -> Optional[List[Dict[str, Any]]]:
240
+ def get_annotations_for_image(self, image):
210
241
  """
211
- Returns: list of annotations associated with an image,
212
- None if the db has not been loaded,
213
- [] if no annotations are available
242
+ Finds all the annnotations associated with the image dict [image].
243
+
244
+ Args:
245
+ image (dict): an image dict loaded from a CCT .json file. Only the 'id' field
246
+ is used.
247
+
248
+ Returns:
249
+ list: list of annotations associated with this image. Returns None if the db
250
+ has not been loaded, or [] if no annotations are available for this image.
214
251
  """
215
252
 
216
253
  if self.db is None:
@@ -223,11 +260,17 @@ class IndexedJsonDb:
223
260
  return image_annotations
224
261
 
225
262
 
226
- def get_classes_for_image(self, image: JSONObject) -> Optional[List[str]]:
263
+ def get_classes_for_image(self, image):
227
264
  """
228
- Returns a list of class names associated with [image]
265
+ Returns a list of class names associated with [image].
229
266
 
230
- Returns None is the db has not been loaded, [] if no annotations are available
267
+ Args:
268
+ image (dict): an image dict loaded from a CCT .json file. Only the 'id' field
269
+ is used.
270
+
271
+ Returns:
272
+ list: list of class names associated with this image. Returns None if the db
273
+ has not been loaded, or [] if no annotations are available for this image.
231
274
  """
232
275
 
233
276
  if self.db is None:
@@ -247,27 +290,27 @@ class IndexedJsonDb:
247
290
 
248
291
  # ...class IndexedJsonDb
249
292
 
250
-
251
- #%% Functions
252
-
253
293
  class SequenceOptions:
294
+ """
295
+ Options parameterizing the grouping of images into sequences by time.
296
+ """
254
297
 
298
+ #: Images separated by <= this duration will be grouped into the same sequence.
255
299
  episode_interval_seconds = 60.0
256
300
 
257
301
 
302
+ #%% Functions
303
+
258
304
  def create_sequences(image_info,options=None):
259
305
  """
260
- Synthesize episodes/sequences/bursts for the images in [image_info]. [image_info]
261
- should be a list of dicts in CCT format, i.e. with fields 'file_name','datetime','location'.
262
-
263
- 'filename' should be a string.
264
-
265
- 'datetime' should be a Python datetime object
266
-
267
- 'location' should be a string.
306
+ Synthesizes episodes/sequences/bursts for the images in [image_info].
268
307
 
269
308
  Modifies [image_info], populating the 'seq_id', 'seq_num_frames', and 'frame_num' fields
270
309
  for each image.
310
+
311
+ Args:
312
+ image_info (dict): a list of dicts in CCT format, i.e. with fields 'file_name' (str),
313
+ 'datetime' (datetime), and 'location' (str).
271
314
  """
272
315
 
273
316
  if options is None: