megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (197) hide show
  1. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  2. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  3. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  4. megadetector/classification/aggregate_classifier_probs.py +3 -3
  5. megadetector/classification/analyze_failed_images.py +5 -5
  6. megadetector/classification/cache_batchapi_outputs.py +5 -5
  7. megadetector/classification/create_classification_dataset.py +11 -12
  8. megadetector/classification/crop_detections.py +10 -10
  9. megadetector/classification/csv_to_json.py +8 -8
  10. megadetector/classification/detect_and_crop.py +13 -15
  11. megadetector/classification/efficientnet/model.py +8 -8
  12. megadetector/classification/efficientnet/utils.py +6 -5
  13. megadetector/classification/evaluate_model.py +7 -7
  14. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  15. megadetector/classification/json_to_azcopy_list.py +1 -1
  16. megadetector/classification/json_validator.py +29 -32
  17. megadetector/classification/map_classification_categories.py +9 -9
  18. megadetector/classification/merge_classification_detection_output.py +12 -9
  19. megadetector/classification/prepare_classification_script.py +19 -19
  20. megadetector/classification/prepare_classification_script_mc.py +26 -26
  21. megadetector/classification/run_classifier.py +4 -4
  22. megadetector/classification/save_mislabeled.py +6 -6
  23. megadetector/classification/train_classifier.py +1 -1
  24. megadetector/classification/train_classifier_tf.py +9 -9
  25. megadetector/classification/train_utils.py +10 -10
  26. megadetector/data_management/annotations/annotation_constants.py +1 -2
  27. megadetector/data_management/camtrap_dp_to_coco.py +79 -46
  28. megadetector/data_management/cct_json_utils.py +103 -103
  29. megadetector/data_management/cct_to_md.py +49 -49
  30. megadetector/data_management/cct_to_wi.py +33 -33
  31. megadetector/data_management/coco_to_labelme.py +75 -75
  32. megadetector/data_management/coco_to_yolo.py +210 -193
  33. megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
  34. megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
  35. megadetector/data_management/databases/integrity_check_json_db.py +228 -200
  36. megadetector/data_management/databases/subset_json_db.py +33 -33
  37. megadetector/data_management/generate_crops_from_cct.py +88 -39
  38. megadetector/data_management/get_image_sizes.py +54 -49
  39. megadetector/data_management/labelme_to_coco.py +133 -125
  40. megadetector/data_management/labelme_to_yolo.py +159 -73
  41. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  42. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  43. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  44. megadetector/data_management/lila/download_lila_subset.py +21 -24
  45. megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
  46. megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
  47. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  48. megadetector/data_management/lila/lila_common.py +73 -70
  49. megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
  50. megadetector/data_management/mewc_to_md.py +344 -340
  51. megadetector/data_management/ocr_tools.py +262 -255
  52. megadetector/data_management/read_exif.py +249 -227
  53. megadetector/data_management/remap_coco_categories.py +90 -28
  54. megadetector/data_management/remove_exif.py +81 -21
  55. megadetector/data_management/rename_images.py +187 -187
  56. megadetector/data_management/resize_coco_dataset.py +588 -120
  57. megadetector/data_management/speciesnet_to_md.py +41 -41
  58. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  59. megadetector/data_management/yolo_output_to_md_output.py +248 -122
  60. megadetector/data_management/yolo_to_coco.py +333 -191
  61. megadetector/detection/change_detection.py +832 -0
  62. megadetector/detection/process_video.py +340 -337
  63. megadetector/detection/pytorch_detector.py +358 -278
  64. megadetector/detection/run_detector.py +399 -186
  65. megadetector/detection/run_detector_batch.py +404 -377
  66. megadetector/detection/run_inference_with_yolov5_val.py +340 -327
  67. megadetector/detection/run_tiled_inference.py +257 -249
  68. megadetector/detection/tf_detector.py +24 -24
  69. megadetector/detection/video_utils.py +332 -295
  70. megadetector/postprocessing/add_max_conf.py +19 -11
  71. megadetector/postprocessing/categorize_detections_by_size.py +45 -45
  72. megadetector/postprocessing/classification_postprocessing.py +468 -433
  73. megadetector/postprocessing/combine_batch_outputs.py +23 -23
  74. megadetector/postprocessing/compare_batch_results.py +590 -525
  75. megadetector/postprocessing/convert_output_format.py +106 -102
  76. megadetector/postprocessing/create_crop_folder.py +347 -147
  77. megadetector/postprocessing/detector_calibration.py +173 -168
  78. megadetector/postprocessing/generate_csv_report.py +508 -499
  79. megadetector/postprocessing/load_api_results.py +48 -27
  80. megadetector/postprocessing/md_to_coco.py +133 -102
  81. megadetector/postprocessing/md_to_labelme.py +107 -90
  82. megadetector/postprocessing/md_to_wi.py +40 -40
  83. megadetector/postprocessing/merge_detections.py +92 -114
  84. megadetector/postprocessing/postprocess_batch_results.py +319 -301
  85. megadetector/postprocessing/remap_detection_categories.py +91 -38
  86. megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
  87. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  88. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  89. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
  90. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  91. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  92. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  93. megadetector/postprocessing/validate_batch_results.py +70 -70
  94. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  95. megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
  96. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
  97. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
  98. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  99. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  100. megadetector/taxonomy_mapping/species_lookup.py +156 -74
  101. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  102. megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
  103. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  104. megadetector/utils/ct_utils.py +1049 -211
  105. megadetector/utils/directory_listing.py +21 -77
  106. megadetector/utils/gpu_test.py +22 -22
  107. megadetector/utils/md_tests.py +632 -529
  108. megadetector/utils/path_utils.py +1520 -431
  109. megadetector/utils/process_utils.py +41 -41
  110. megadetector/utils/split_locations_into_train_val.py +62 -62
  111. megadetector/utils/string_utils.py +148 -27
  112. megadetector/utils/url_utils.py +489 -176
  113. megadetector/utils/wi_utils.py +2658 -2526
  114. megadetector/utils/write_html_image_list.py +137 -137
  115. megadetector/visualization/plot_utils.py +34 -30
  116. megadetector/visualization/render_images_with_thumbnails.py +39 -74
  117. megadetector/visualization/visualization_utils.py +487 -435
  118. megadetector/visualization/visualize_db.py +232 -198
  119. megadetector/visualization/visualize_detector_output.py +82 -76
  120. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
  121. megadetector-10.0.0.dist-info/RECORD +139 -0
  122. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
  123. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  124. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  125. megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
  126. megadetector/api/batch_processing/api_core/server.py +0 -294
  127. megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
  128. megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
  129. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  130. megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
  131. megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
  132. megadetector/api/batch_processing/api_core/server_utils.py +0 -88
  133. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  134. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  135. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  136. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  137. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  138. megadetector/api/synchronous/__init__.py +0 -0
  139. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  140. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
  141. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
  142. megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
  143. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  144. megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
  145. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  146. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  147. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  148. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  149. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  150. megadetector/data_management/importers/awc_to_json.py +0 -191
  151. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  152. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  153. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  154. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  155. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  156. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  157. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  158. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  159. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  160. megadetector/data_management/importers/ena24_to_json.py +0 -276
  161. megadetector/data_management/importers/filenames_to_json.py +0 -386
  162. megadetector/data_management/importers/helena_to_cct.py +0 -283
  163. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  164. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  165. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  166. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  167. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  168. megadetector/data_management/importers/missouri_to_json.py +0 -490
  169. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  170. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  171. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  172. megadetector/data_management/importers/pc_to_json.py +0 -365
  173. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  174. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  175. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  176. megadetector/data_management/importers/rspb_to_json.py +0 -356
  177. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  178. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  179. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  180. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  181. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  182. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  183. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  184. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  185. megadetector/data_management/importers/ubc_to_json.py +0 -399
  186. megadetector/data_management/importers/umn_to_json.py +0 -507
  187. megadetector/data_management/importers/wellington_to_json.py +0 -263
  188. megadetector/data_management/importers/wi_to_json.py +0 -442
  189. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  190. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  191. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  192. megadetector/utils/azure_utils.py +0 -178
  193. megadetector/utils/sas_blob_utils.py +0 -509
  194. megadetector-5.0.28.dist-info/RECORD +0 -209
  195. /megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
  196. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
  197. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- """
1
+ r"""
2
2
 
3
3
  save_mislabeled.py
4
4
 
@@ -10,7 +10,7 @@ List of known mislabeled images is stored in Azure Blob Storage.
10
10
  * blob: megadb_mislabeled/{dataset}.csv, one file per dataset
11
11
 
12
12
  Each file megadb_mislabeled/{dataset}.csv has two columns:
13
-
13
+
14
14
  * 'file': str, blob name
15
15
 
16
16
  * 'correct_class': optional str, correct dataset class
@@ -41,7 +41,7 @@ import pandas as pd
41
41
  #%% Main function
42
42
 
43
43
  def update_mislabeled_images(container_path: str, input_csv_path: str) -> None:
44
-
44
+
45
45
  df = pd.read_csv(input_csv_path, index_col=False)
46
46
 
47
47
  # error checking
@@ -62,7 +62,7 @@ def update_mislabeled_images(container_path: str, input_csv_path: str) -> None:
62
62
  df['file'] = df['blob_dirname'] + '/' + df['File']
63
63
 
64
64
  for ds, ds_df in df.groupby('dataset'):
65
-
65
+
66
66
  sr_path = os.path.join(container_path, 'megadb_mislabeled', f'{ds}.csv')
67
67
  if os.path.exists(sr_path):
68
68
  old_sr = pd.read_csv(sr_path, index_col='file', squeeze=True)
@@ -89,7 +89,7 @@ def update_mislabeled_images(container_path: str, input_csv_path: str) -> None:
89
89
  #%% Command-line driver
90
90
 
91
91
  def _parse_args() -> argparse.Namespace:
92
-
92
+
93
93
  parser = argparse.ArgumentParser(
94
94
  formatter_class=argparse.ArgumentDefaultsHelpFormatter,
95
95
  description='Merges classification results with Batch Detection API '
@@ -104,7 +104,7 @@ def _parse_args() -> argparse.Namespace:
104
104
 
105
105
 
106
106
  if __name__ == '__main__':
107
-
107
+
108
108
  args = _parse_args()
109
109
  update_mislabeled_images(container_path=args.container_path,
110
110
  input_csv_path=args.input_csv)
@@ -44,7 +44,7 @@ from megadetector.visualization import plot_utils
44
44
 
45
45
  #%% Example usage
46
46
 
47
- """
47
+ """
48
48
  python train_classifier.py run_idfg /ssd/crops_sq \
49
49
  -m "efficientnet-b0" --pretrained --finetune --label-weighted \
50
50
  --epochs 50 --batch-size 512 --lr 1e-4 \
@@ -54,7 +54,7 @@ EFFICIENTNET_MODELS: Mapping[str, Mapping[str, Any]] = {
54
54
 
55
55
  #%% Example usage
56
56
 
57
- """
57
+ """
58
58
  python train_classifier_tf.py run_idfg /ssd/crops_sq \
59
59
  -m "efficientnet-b0" --pretrained --finetune --label-weighted \
60
60
  --epochs 50 --batch-size 512 --lr 1e-4 \
@@ -97,7 +97,7 @@ def create_dataset(
97
97
 
98
98
  Returns: tf.data.Dataset
99
99
  """
100
-
100
+
101
101
  # images dataset
102
102
  img_ds = tf.data.Dataset.from_tensor_slices(img_files)
103
103
  img_ds = img_ds.map(lambda p: tf.io.read_file(img_base_dir + os.sep + p),
@@ -162,7 +162,7 @@ def create_dataloaders(
162
162
  datasets: dict, maps split to DataLoader
163
163
  label_names: list of str, label names in order of label id
164
164
  """
165
-
165
+
166
166
  df, label_names, split_to_locs = load_dataset_csv(
167
167
  dataset_csv_path, label_index_json_path, splits_json_path,
168
168
  multilabel=multilabel, label_weighted=label_weighted,
@@ -238,7 +238,7 @@ def build_model(model_name: str, num_classes: int, img_size: int,
238
238
  """
239
239
  Creates a model with an EfficientNet base.
240
240
  """
241
-
241
+
242
242
  class_name = EFFICIENTNET_MODELS[model_name]['cls']
243
243
  dropout = EFFICIENTNET_MODELS[model_name]['dropout']
244
244
 
@@ -279,7 +279,7 @@ def log_images_with_confidence(
279
279
  epoch: int
280
280
  tag: str
281
281
  """
282
-
282
+
283
283
  for label_id, heap in heap_dict.items():
284
284
  label_name = label_names[label_id]
285
285
 
@@ -319,7 +319,7 @@ def track_extreme_examples(tp_heaps: dict[int, list[HeapItem]],
319
319
  img_files: tf.Tensor, shape [batch_size], type tf.string
320
320
  logits: tf.Tensor, shape [batch_size, num_classes]
321
321
  """
322
-
322
+
323
323
  labels = labels.numpy().tolist()
324
324
  inputs = inputs.numpy().astype(np.uint8)
325
325
  img_files = img_files.numpy().astype(str).tolist()
@@ -480,7 +480,7 @@ def log_run(split: str, epoch: int, writer: tf.summary.SummaryWriter,
480
480
  Args:
481
481
  metrics: dict, keys already prefixed with {split}/
482
482
  """
483
-
483
+
484
484
  per_class_recall = recall_from_confusion_matrix(cm, label_names)
485
485
  metrics.update(prefix_all_keys(per_class_recall, f'{split}/label_recall/'))
486
486
 
@@ -518,7 +518,7 @@ def main(dataset_dir: str,
518
518
  seed: Optional[int] = None,
519
519
  logdir: str = '',
520
520
  cache_splits: Sequence[str] = ()) -> None:
521
-
521
+
522
522
  # input validation
523
523
  assert os.path.exists(dataset_dir)
524
524
  assert os.path.exists(cropped_images_dir)
@@ -597,7 +597,7 @@ def main(dataset_dir: str,
597
597
  model.base_model.trainable = True
598
598
 
599
599
  print('- train:')
600
-
600
+
601
601
  train_metrics, train_heaps, train_cm = run_epoch(
602
602
  model, loader=loaders['train'], weighted=label_weighted,
603
603
  loss_fn=loss_fn, weight_decay=weight_decay, optimizer=optimizer,
@@ -35,7 +35,7 @@ class HeapItem:
35
35
  """
36
36
  A wrapper over non-comparable data with a comparable priority value.
37
37
  """
38
-
38
+
39
39
  priority: Any
40
40
  data: Any = dataclasses.field(compare=False, repr=False)
41
41
 
@@ -53,7 +53,7 @@ def add_to_heap(h: list[Any], item: HeapItem, k: Optional[int] = None) -> None:
53
53
  item: HeapItem
54
54
  k: int, desired capacity of the heap, or None for no limit
55
55
  """
56
-
56
+
57
57
  if k is None or len(h) < k:
58
58
  heapq.heappush(h, item)
59
59
  else:
@@ -66,17 +66,17 @@ def prefix_all_keys(d: Mapping[str, Any], prefix: str) -> dict[str, Any]:
66
66
  """
67
67
  Returns a new dict where the keys are prefixed by <prefix>.
68
68
  """
69
-
69
+
70
70
  return {f'{prefix}{k}': v for k, v in d.items()}
71
71
 
72
72
 
73
73
  def fig_to_img(fig: matplotlib.figure.Figure) -> np.ndarray:
74
74
  """
75
75
  Converts a matplotlib figure to an image represented by a numpy array.
76
-
76
+
77
77
  Returns: np.ndarray, type uint8, shape [H, W, 3]
78
78
  """
79
-
79
+
80
80
  with io.BytesIO() as b:
81
81
  fig.savefig(b, transparent=False, bbox_inches='tight', pad_inches=0,
82
82
  format='png')
@@ -103,7 +103,7 @@ def imgs_with_confidences(imgs_list: list[tuple[Any, ...]],
103
103
  fig: matplotlib.figure.Figure
104
104
  img_files: list of str
105
105
  """
106
-
106
+
107
107
  imgs, img_files, tags, titles = [], [], [], []
108
108
  for img, label_id, topk_conf, topk_preds, img_file in imgs_list:
109
109
  imgs.append(img)
@@ -140,7 +140,7 @@ def plot_img_grid(imgs: Sequence[Any], row_h: float, col_w: float,
140
140
 
141
141
  Returns: matplotlib.figure.Figure
142
142
  """
143
-
143
+
144
144
  # input validation
145
145
  num_images = len(imgs)
146
146
  if tags is not None:
@@ -186,7 +186,7 @@ def load_splits(splits_json_path: str) -> dict[str, set[tuple[str, str]]]:
186
186
 
187
187
  Returns: dict, maps split to set of (dataset, location) tuples
188
188
  """
189
-
189
+
190
190
  with open(splits_json_path, 'r') as f:
191
191
  split_to_locs_js = json.load(f)
192
192
  split_to_locs = {
@@ -235,7 +235,7 @@ def load_dataset_csv(dataset_csv_path: str,
235
235
  label_names: list of str, label names in order of label id
236
236
  split_to_locs: dict, maps split to set of (dataset, location) tuples
237
237
  """
238
-
238
+
239
239
  # read in dataset CSV and create merged (dataset, location) col
240
240
  df = pd.read_csv(dataset_csv_path, index_col=False, float_precision='high')
241
241
  df['dataset_location'] = list(zip(df['dataset'], df['location']))
@@ -315,7 +315,7 @@ def recall_from_confusion_matrix(
315
315
 
316
316
  Returns: dict, label_name => recall
317
317
  """
318
-
318
+
319
319
  result = {
320
320
  label_name: confusion_matrix[i, i] / (confusion_matrix[i].sum() + 1e-8)
321
321
  for i, label_name in enumerate(label_names)
@@ -4,7 +4,7 @@ annotation_constants.py
4
4
 
5
5
  Defines default categories for MegaDetector output boxes.
6
6
 
7
- Used throughout the repo; do not change unless you are Dan or Siyu. In fact, do not change unless
7
+ Used throughout the repo; do not change unless you are Dan or Siyu. In fact, do not change unless
8
8
  you are both Dan *and* Siyu.
9
9
 
10
10
  We use integer IDs here; this is different from the MD .json file format,
@@ -31,4 +31,3 @@ detector_bbox_category_name_to_id = {}
31
31
  for cat in detector_bbox_categories:
32
32
  detector_bbox_category_id_to_name[cat['id']] = cat['name']
33
33
  detector_bbox_category_name_to_id[cat['name']] = cat['id']
34
-
@@ -19,6 +19,8 @@ Currently supports only sequence-level labeling.
19
19
 
20
20
  import os
21
21
  import json
22
+ import argparse
23
+
22
24
  import pandas as pd
23
25
 
24
26
  from dateutil import parser as dateparser
@@ -31,22 +33,26 @@ from collections import defaultdict
31
33
  def camtrap_dp_to_coco(camtrap_dp_folder,output_file=None):
32
34
  """
33
35
  Convert the Camtrap DP package in [camtrap_dp_folder] to COCO.
34
-
36
+
35
37
  Does not validate images, just converts. Use integrity_check_json_db to validate
36
- the resulting COCO file.
37
-
38
+ the resulting COCO file.
39
+
38
40
  Optionally writes the results to [output_file]
41
+
42
+ Args:
43
+ camtrap_dp_folder (str): input folder, containing a CamtrapDP package
44
+ output_file (str, optional): COCO-formatted output file
39
45
  """
40
-
46
+
41
47
  required_files = ('datapackage.json','deployments.csv','events.csv','media.csv','observations.csv')
42
-
48
+
43
49
  for fn in required_files:
44
50
  fn_abs = os.path.join(camtrap_dp_folder,fn)
45
51
  assert os.path.isfile(fn_abs), 'Could not find required file {}'.format(fn_abs)
46
-
52
+
47
53
  with open(os.path.join(camtrap_dp_folder,'datapackage.json'),'r') as f:
48
54
  datapackage = json.load(f)
49
-
55
+
50
56
  assert datapackage['profile'] == 'https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0/camtrap-dp-profile.json', \
51
57
  'I only know how to parse Camtrap DP 1.0 packages'
52
58
 
@@ -54,7 +60,7 @@ def camtrap_dp_to_coco(camtrap_dp_folder,output_file=None):
54
60
  events_file = None
55
61
  media_file = None
56
62
  observations_file = None
57
-
63
+
58
64
  resources = datapackage['resources']
59
65
  for r in resources:
60
66
  if r['name'] == 'deployments':
@@ -70,19 +76,19 @@ def camtrap_dp_to_coco(camtrap_dp_folder,output_file=None):
70
76
  assert events_file is not None, 'No events file specified'
71
77
  assert media_file is not None, 'No media file specified'
72
78
  assert observations_file is not None, 'No observation file specified'
73
-
79
+
74
80
  deployments_df = pd.read_csv(os.path.join(camtrap_dp_folder,deployments_file))
75
81
  events_df = pd.read_csv(os.path.join(camtrap_dp_folder,events_file))
76
82
  media_df = pd.read_csv(os.path.join(camtrap_dp_folder,media_file))
77
83
  observations_df = pd.read_csv(os.path.join(camtrap_dp_folder,observations_file))
78
-
84
+
79
85
  print('Read {} deployment lines'.format(len(deployments_df)))
80
86
  print('Read {} events lines'.format(len(events_df)))
81
87
  print('Read {} media lines'.format(len(media_df)))
82
88
  print('Read {} observation lines'.format(len(observations_df)))
83
-
89
+
84
90
  media_id_to_media_info = {}
85
-
91
+
86
92
  # i_row = 0; row = media_df.iloc[i_row]
87
93
  for i_row,row in media_df.iterrows():
88
94
  media_info = {}
@@ -94,23 +100,23 @@ def camtrap_dp_to_coco(camtrap_dp_folder,output_file=None):
94
100
  media_info['frame_num'] = -1
95
101
  media_info['seq_num_frames'] = -1
96
102
  media_id_to_media_info[row['mediaID']] = media_info
97
-
103
+
98
104
  event_id_to_media_ids = defaultdict(list)
99
-
105
+
100
106
  # i_row = 0; row = events_df.iloc[i_row]
101
107
  for i_row,row in events_df.iterrows():
102
108
  media_id = row['mediaID']
103
109
  assert media_id in media_id_to_media_info
104
110
  event_id_to_media_ids[row['eventID']].append(media_id)
105
-
111
+
106
112
  event_id_to_category_names = defaultdict(set)
107
-
113
+
108
114
  # i_row = 0; row = observations_df.iloc[i_row]
109
115
  for i_row,row in observations_df.iterrows():
110
-
116
+
111
117
  if row['observationLevel'] != 'event':
112
118
  raise ValueError("I don't know how to parse image-level events yet")
113
-
119
+
114
120
  if row['observationType'] == 'blank':
115
121
  event_id_to_category_names[row['eventID']].add('empty')
116
122
  elif row['observationType'] == 'unknown':
@@ -122,7 +128,7 @@ def camtrap_dp_to_coco(camtrap_dp_folder,output_file=None):
122
128
  assert row['observationType'] == 'animal'
123
129
  assert isinstance(row['scientificName'],str)
124
130
  event_id_to_category_names[row['eventID']].add(row['scientificName'])
125
-
131
+
126
132
  # Sort images within an event into frame numbers
127
133
  #
128
134
  # event_id = next(iter(event_id_to_media_ids))
@@ -134,7 +140,7 @@ def camtrap_dp_to_coco(camtrap_dp_folder,output_file=None):
134
140
  media_info['frame_num'] = i_media
135
141
  media_info['seq_num_frames'] = len(media_info_this_event)
136
142
  media_info['seq_id'] = event_id
137
-
143
+
138
144
  # Create category names
139
145
  category_name_to_category_id = {'empty':0}
140
146
  for event_id in event_id_to_category_names:
@@ -142,18 +148,18 @@ def camtrap_dp_to_coco(camtrap_dp_folder,output_file=None):
142
148
  for name in category_names_this_event:
143
149
  if name not in category_name_to_category_id:
144
150
  category_name_to_category_id[name] = len(category_name_to_category_id)
145
-
151
+
146
152
  # Move everything into COCO format
147
153
  images = list(media_id_to_media_info.values())
148
-
154
+
149
155
  categories = []
150
156
  for name in category_name_to_category_id:
151
157
  categories.append({'name':name,'id':category_name_to_category_id[name]})
152
158
  info = {'version':1.0,'description':datapackage['name']}
153
-
159
+
154
160
  # Create annotations
155
161
  annotations = []
156
-
162
+
157
163
  for event_id in event_id_to_media_ids.keys():
158
164
  i_ann = 0
159
165
  media_ids_this_event = event_id_to_media_ids[event_id]
@@ -168,23 +174,23 @@ def camtrap_dp_to_coco(camtrap_dp_folder,output_file=None):
168
174
  ann['category_id'] = category_name_to_category_id[category_name]
169
175
  ann['sequence_level_annotation'] = True
170
176
  annotations.append(ann)
171
-
177
+
172
178
  coco_data = {}
173
179
  coco_data['images'] = images
174
180
  coco_data['annotations'] = annotations
175
181
  coco_data['categories'] = categories
176
182
  coco_data['info'] = info
177
-
183
+
178
184
  for im in coco_data['images']:
179
185
  im['datetime'] = str(im['datetime'] )
180
-
186
+
181
187
  if output_file is not None:
182
188
  with open(output_file,'w') as f:
183
189
  json.dump(coco_data,f,indent=1)
184
-
190
+
185
191
  return coco_data
186
-
187
-
192
+
193
+
188
194
  #%% Interactive driver
189
195
 
190
196
  if False:
@@ -192,19 +198,19 @@ if False:
192
198
  pass
193
199
 
194
200
  #%%
195
-
201
+
196
202
  camtrap_dp_folder = r'C:\temp\pilot2\pilot2'
197
203
  coco_file = os.path.join(camtrap_dp_folder,'test-coco.json')
198
204
  coco_data = camtrap_dp_to_coco(camtrap_dp_folder,
199
205
  output_file=coco_file)
200
-
206
+
201
207
  #%% Validate
202
-
208
+
203
209
  from megadetector.data_management.databases.integrity_check_json_db import \
204
210
  integrity_check_json_db, IntegrityCheckOptions
205
-
211
+
206
212
  options = IntegrityCheckOptions()
207
-
213
+
208
214
  options.baseDir = camtrap_dp_folder
209
215
  options.bCheckImageSizes = False
210
216
  options.bCheckImageExistence = True
@@ -213,25 +219,52 @@ if False:
213
219
  options.iMaxNumImages = -1
214
220
  options.nThreads = 1
215
221
  options.verbose = True
216
-
217
- sortedCategories, data, errorInfo = integrity_check_json_db(coco_file,options)
222
+
223
+ sorted_categories, data, error_info = integrity_check_json_db(coco_file,options)
218
224
 
219
225
  #%% Preview
220
-
226
+
221
227
  from megadetector.visualization.visualize_db import DbVizOptions, visualize_db
222
-
228
+
223
229
  options = DbVizOptions()
224
230
  options.parallelize_rendering = True
225
231
  options.parallelize_rendering_with_threads = True
226
232
  options.parallelize_rendering_n_cores = 10
227
-
233
+
228
234
  preview_dir = r'c:\temp\camtrapdp-preview'
229
- htmlOutputFile,image_db = visualize_db(coco_file, preview_dir, camtrap_dp_folder, options=options)
230
-
235
+ html_output_file, image_db = visualize_db(coco_file, preview_dir, camtrap_dp_folder, options=options)
236
+
231
237
  from megadetector.utils.path_utils import open_file
232
- open_file(htmlOutputFile)
233
-
234
-
238
+ open_file(html_output_file)
239
+
240
+
235
241
  #%% Command-line driver
236
242
 
237
- # TODO
243
+ def main():
244
+ """
245
+ Command-line interface to convert Camtrap DP to COCO.
246
+ """
247
+
248
+ parser = argparse.ArgumentParser(description='Convert Camtrap DP to COCO format')
249
+ parser.add_argument('camtrap_dp_folder', type=str,
250
+ help='Input folder, containing a CamtrapDP package')
251
+ parser.add_argument('--output_file', type=str, default=None,
252
+ help='COCO-formatted output file (defaults to [camtrap_dp_folder]_coco.json)')
253
+
254
+ args = parser.parse_args()
255
+
256
+ if args.output_file is None:
257
+ # Default output file name: [camtrap_dp_folder]_coco.json
258
+ #
259
+ # Remove trailing slash if present
260
+ folder_name = args.camtrap_dp_folder.rstrip(os.sep)
261
+ output_file = folder_name + '_coco.json'
262
+ else:
263
+ output_file = args.output_file
264
+
265
+ camtrap_dp_to_coco(camtrap_dp_folder=args.camtrap_dp_folder, output_file=output_file)
266
+ print(f"Successfully converted Camtrap DP package at '{args.camtrap_dp_folder}' to " + \
267
+ f"COCO format at '{output_file}'")
268
+
269
+ if __name__ == '__main__':
270
+ main()