megadetector 5.0.11__py3-none-any.whl → 5.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (201) hide show
  1. megadetector/api/__init__.py +0 -0
  2. megadetector/api/batch_processing/__init__.py +0 -0
  3. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  4. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. megadetector/api/batch_processing/api_core/batch_service/score.py +439 -0
  6. megadetector/api/batch_processing/api_core/server.py +294 -0
  7. megadetector/api/batch_processing/api_core/server_api_config.py +98 -0
  8. megadetector/api/batch_processing/api_core/server_app_config.py +55 -0
  9. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +220 -0
  10. megadetector/api/batch_processing/api_core/server_job_status_table.py +152 -0
  11. megadetector/api/batch_processing/api_core/server_orchestration.py +360 -0
  12. megadetector/api/batch_processing/api_core/server_utils.py +92 -0
  13. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  14. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +46 -0
  15. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  16. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +152 -0
  17. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  18. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  19. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  20. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  21. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +126 -0
  22. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  23. megadetector/api/synchronous/__init__.py +0 -0
  24. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  25. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +152 -0
  26. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -0
  27. megadetector/api/synchronous/api_core/animal_detection_api/config.py +35 -0
  28. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  29. megadetector/api/synchronous/api_core/tests/load_test.py +110 -0
  30. megadetector/classification/__init__.py +0 -0
  31. megadetector/classification/aggregate_classifier_probs.py +108 -0
  32. megadetector/classification/analyze_failed_images.py +227 -0
  33. megadetector/classification/cache_batchapi_outputs.py +198 -0
  34. megadetector/classification/create_classification_dataset.py +627 -0
  35. megadetector/classification/crop_detections.py +516 -0
  36. megadetector/classification/csv_to_json.py +226 -0
  37. megadetector/classification/detect_and_crop.py +855 -0
  38. megadetector/classification/efficientnet/__init__.py +9 -0
  39. megadetector/classification/efficientnet/model.py +415 -0
  40. megadetector/classification/efficientnet/utils.py +610 -0
  41. megadetector/classification/evaluate_model.py +520 -0
  42. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  43. megadetector/classification/json_to_azcopy_list.py +63 -0
  44. megadetector/classification/json_validator.py +699 -0
  45. megadetector/classification/map_classification_categories.py +276 -0
  46. megadetector/classification/merge_classification_detection_output.py +506 -0
  47. megadetector/classification/prepare_classification_script.py +194 -0
  48. megadetector/classification/prepare_classification_script_mc.py +228 -0
  49. megadetector/classification/run_classifier.py +287 -0
  50. megadetector/classification/save_mislabeled.py +110 -0
  51. megadetector/classification/train_classifier.py +827 -0
  52. megadetector/classification/train_classifier_tf.py +725 -0
  53. megadetector/classification/train_utils.py +323 -0
  54. megadetector/data_management/__init__.py +0 -0
  55. megadetector/data_management/annotations/__init__.py +0 -0
  56. megadetector/data_management/annotations/annotation_constants.py +34 -0
  57. megadetector/data_management/camtrap_dp_to_coco.py +239 -0
  58. megadetector/data_management/cct_json_utils.py +395 -0
  59. megadetector/data_management/cct_to_md.py +176 -0
  60. megadetector/data_management/cct_to_wi.py +289 -0
  61. megadetector/data_management/coco_to_labelme.py +272 -0
  62. megadetector/data_management/coco_to_yolo.py +662 -0
  63. megadetector/data_management/databases/__init__.py +0 -0
  64. megadetector/data_management/databases/add_width_and_height_to_db.py +33 -0
  65. megadetector/data_management/databases/combine_coco_camera_traps_files.py +206 -0
  66. megadetector/data_management/databases/integrity_check_json_db.py +477 -0
  67. megadetector/data_management/databases/subset_json_db.py +115 -0
  68. megadetector/data_management/generate_crops_from_cct.py +149 -0
  69. megadetector/data_management/get_image_sizes.py +189 -0
  70. megadetector/data_management/importers/add_nacti_sizes.py +52 -0
  71. megadetector/data_management/importers/add_timestamps_to_icct.py +79 -0
  72. megadetector/data_management/importers/animl_results_to_md_results.py +158 -0
  73. megadetector/data_management/importers/auckland_doc_test_to_json.py +373 -0
  74. megadetector/data_management/importers/auckland_doc_to_json.py +201 -0
  75. megadetector/data_management/importers/awc_to_json.py +191 -0
  76. megadetector/data_management/importers/bellevue_to_json.py +273 -0
  77. megadetector/data_management/importers/cacophony-thermal-importer.py +796 -0
  78. megadetector/data_management/importers/carrizo_shrubfree_2018.py +269 -0
  79. megadetector/data_management/importers/carrizo_trail_cam_2017.py +289 -0
  80. megadetector/data_management/importers/cct_field_adjustments.py +58 -0
  81. megadetector/data_management/importers/channel_islands_to_cct.py +913 -0
  82. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +180 -0
  83. megadetector/data_management/importers/eMammal/eMammal_helpers.py +249 -0
  84. megadetector/data_management/importers/eMammal/make_eMammal_json.py +223 -0
  85. megadetector/data_management/importers/ena24_to_json.py +276 -0
  86. megadetector/data_management/importers/filenames_to_json.py +386 -0
  87. megadetector/data_management/importers/helena_to_cct.py +283 -0
  88. megadetector/data_management/importers/idaho-camera-traps.py +1407 -0
  89. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +294 -0
  90. megadetector/data_management/importers/jb_csv_to_json.py +150 -0
  91. megadetector/data_management/importers/mcgill_to_json.py +250 -0
  92. megadetector/data_management/importers/missouri_to_json.py +490 -0
  93. megadetector/data_management/importers/nacti_fieldname_adjustments.py +79 -0
  94. megadetector/data_management/importers/noaa_seals_2019.py +181 -0
  95. megadetector/data_management/importers/pc_to_json.py +365 -0
  96. megadetector/data_management/importers/plot_wni_giraffes.py +123 -0
  97. megadetector/data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -0
  98. megadetector/data_management/importers/prepare_zsl_imerit.py +131 -0
  99. megadetector/data_management/importers/rspb_to_json.py +356 -0
  100. megadetector/data_management/importers/save_the_elephants_survey_A.py +320 -0
  101. megadetector/data_management/importers/save_the_elephants_survey_B.py +329 -0
  102. megadetector/data_management/importers/snapshot_safari_importer.py +758 -0
  103. megadetector/data_management/importers/snapshot_safari_importer_reprise.py +665 -0
  104. megadetector/data_management/importers/snapshot_serengeti_lila.py +1067 -0
  105. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +150 -0
  106. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +153 -0
  107. megadetector/data_management/importers/sulross_get_exif.py +65 -0
  108. megadetector/data_management/importers/timelapse_csv_set_to_json.py +490 -0
  109. megadetector/data_management/importers/ubc_to_json.py +399 -0
  110. megadetector/data_management/importers/umn_to_json.py +507 -0
  111. megadetector/data_management/importers/wellington_to_json.py +263 -0
  112. megadetector/data_management/importers/wi_to_json.py +442 -0
  113. megadetector/data_management/importers/zamba_results_to_md_results.py +181 -0
  114. megadetector/data_management/labelme_to_coco.py +547 -0
  115. megadetector/data_management/labelme_to_yolo.py +272 -0
  116. megadetector/data_management/lila/__init__.py +0 -0
  117. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +97 -0
  118. megadetector/data_management/lila/add_locations_to_nacti.py +147 -0
  119. megadetector/data_management/lila/create_lila_blank_set.py +558 -0
  120. megadetector/data_management/lila/create_lila_test_set.py +152 -0
  121. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  122. megadetector/data_management/lila/download_lila_subset.py +178 -0
  123. megadetector/data_management/lila/generate_lila_per_image_labels.py +516 -0
  124. megadetector/data_management/lila/get_lila_annotation_counts.py +170 -0
  125. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  126. megadetector/data_management/lila/lila_common.py +300 -0
  127. megadetector/data_management/lila/test_lila_metadata_urls.py +132 -0
  128. megadetector/data_management/ocr_tools.py +874 -0
  129. megadetector/data_management/read_exif.py +681 -0
  130. megadetector/data_management/remap_coco_categories.py +84 -0
  131. megadetector/data_management/remove_exif.py +66 -0
  132. megadetector/data_management/resize_coco_dataset.py +189 -0
  133. megadetector/data_management/wi_download_csv_to_coco.py +246 -0
  134. megadetector/data_management/yolo_output_to_md_output.py +441 -0
  135. megadetector/data_management/yolo_to_coco.py +676 -0
  136. megadetector/detection/__init__.py +0 -0
  137. megadetector/detection/detector_training/__init__.py +0 -0
  138. megadetector/detection/detector_training/model_main_tf2.py +114 -0
  139. megadetector/detection/process_video.py +702 -0
  140. megadetector/detection/pytorch_detector.py +341 -0
  141. megadetector/detection/run_detector.py +779 -0
  142. megadetector/detection/run_detector_batch.py +1219 -0
  143. megadetector/detection/run_inference_with_yolov5_val.py +917 -0
  144. megadetector/detection/run_tiled_inference.py +934 -0
  145. megadetector/detection/tf_detector.py +189 -0
  146. megadetector/detection/video_utils.py +606 -0
  147. megadetector/postprocessing/__init__.py +0 -0
  148. megadetector/postprocessing/add_max_conf.py +64 -0
  149. megadetector/postprocessing/categorize_detections_by_size.py +163 -0
  150. megadetector/postprocessing/combine_api_outputs.py +249 -0
  151. megadetector/postprocessing/compare_batch_results.py +958 -0
  152. megadetector/postprocessing/convert_output_format.py +396 -0
  153. megadetector/postprocessing/load_api_results.py +195 -0
  154. megadetector/postprocessing/md_to_coco.py +310 -0
  155. megadetector/postprocessing/md_to_labelme.py +330 -0
  156. megadetector/postprocessing/merge_detections.py +401 -0
  157. megadetector/postprocessing/postprocess_batch_results.py +1902 -0
  158. megadetector/postprocessing/remap_detection_categories.py +170 -0
  159. megadetector/postprocessing/render_detection_confusion_matrix.py +660 -0
  160. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +211 -0
  161. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +83 -0
  162. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1631 -0
  163. megadetector/postprocessing/separate_detections_into_folders.py +730 -0
  164. megadetector/postprocessing/subset_json_detector_output.py +696 -0
  165. megadetector/postprocessing/top_folders_to_bottom.py +223 -0
  166. megadetector/taxonomy_mapping/__init__.py +0 -0
  167. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  168. megadetector/taxonomy_mapping/map_new_lila_datasets.py +150 -0
  169. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -0
  170. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +590 -0
  171. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  172. megadetector/taxonomy_mapping/simple_image_download.py +219 -0
  173. megadetector/taxonomy_mapping/species_lookup.py +834 -0
  174. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  175. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  176. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  177. megadetector/utils/__init__.py +0 -0
  178. megadetector/utils/azure_utils.py +178 -0
  179. megadetector/utils/ct_utils.py +612 -0
  180. megadetector/utils/directory_listing.py +246 -0
  181. megadetector/utils/md_tests.py +968 -0
  182. megadetector/utils/path_utils.py +1044 -0
  183. megadetector/utils/process_utils.py +157 -0
  184. megadetector/utils/sas_blob_utils.py +509 -0
  185. megadetector/utils/split_locations_into_train_val.py +228 -0
  186. megadetector/utils/string_utils.py +92 -0
  187. megadetector/utils/url_utils.py +323 -0
  188. megadetector/utils/write_html_image_list.py +225 -0
  189. megadetector/visualization/__init__.py +0 -0
  190. megadetector/visualization/plot_utils.py +293 -0
  191. megadetector/visualization/render_images_with_thumbnails.py +275 -0
  192. megadetector/visualization/visualization_utils.py +1536 -0
  193. megadetector/visualization/visualize_db.py +550 -0
  194. megadetector/visualization/visualize_detector_output.py +405 -0
  195. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/METADATA +1 -1
  196. megadetector-5.0.12.dist-info/RECORD +199 -0
  197. megadetector-5.0.12.dist-info/top_level.txt +1 -0
  198. megadetector-5.0.11.dist-info/RECORD +0 -5
  199. megadetector-5.0.11.dist-info/top_level.txt +0 -1
  200. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/LICENSE +0 -0
  201. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/WHEEL +0 -0
@@ -0,0 +1,323 @@
1
+ """
2
+
3
+ train_utils.py
4
+
5
+ Utility functions useful for training a classifier.
6
+
7
+ This script should NOT depend on any other file within this repo. It should
8
+ especially be agnostic to PyTorch vs. TensorFlow.
9
+
10
+ """
11
+
12
+ #%% Imports
13
+
14
+ from __future__ import annotations
15
+
16
+ import dataclasses
17
+ import heapq
18
+ import io
19
+ import json
20
+
21
+ import matplotlib.figure
22
+ import numpy as np
23
+ import pandas as pd
24
+ import PIL.Image
25
+ import scipy.interpolate
26
+
27
+ from collections.abc import Mapping, Sequence
28
+ from typing import Any, Optional
29
+
30
+
31
+ #%% Classes
32
+
33
+ @dataclasses.dataclass(order=True)
34
+ class HeapItem:
35
+ """
36
+ A wrapper over non-comparable data with a comparable priority value.
37
+ """
38
+
39
+ priority: Any
40
+ data: Any = dataclasses.field(compare=False, repr=False)
41
+
42
+
43
+ def add_to_heap(h: list[Any], item: HeapItem, k: Optional[int] = None) -> None:
44
+ """
45
+ Tracks the max k elements using a heap.
46
+
47
+ We use a min-heap for this task. When a new element comes in, we compare it
48
+ to the smallest node in the heap, h[0]. If the new value is greater than
49
+ h[0], we pop h[0] and add the new element in.
50
+
51
+ Args:
52
+ h: list, either empty [] or already heapified
53
+ item: HeapItem
54
+ k: int, desired capacity of the heap, or None for no limit
55
+ """
56
+
57
+ if k is None or len(h) < k:
58
+ heapq.heappush(h, item)
59
+ else:
60
+ heapq.heappushpop(h, item)
61
+
62
+
63
+ #%% Functions
64
+
65
+ def prefix_all_keys(d: Mapping[str, Any], prefix: str) -> dict[str, Any]:
66
+ """
67
+ Returns a new dict where the keys are prefixed by <prefix>.
68
+ """
69
+
70
+ return {f'{prefix}{k}': v for k, v in d.items()}
71
+
72
+
73
+ def fig_to_img(fig: matplotlib.figure.Figure) -> np.ndarray:
74
+ """
75
+ Converts a matplotlib figure to an image represented by a numpy array.
76
+
77
+ Returns: np.ndarray, type uint8, shape [H, W, 3]
78
+ """
79
+
80
+ with io.BytesIO() as b:
81
+ fig.savefig(b, transparent=False, bbox_inches='tight', pad_inches=0,
82
+ format='png')
83
+ b.seek(0)
84
+ fig_img = np.asarray(PIL.Image.open(b).convert('RGB'))
85
+ assert fig_img.dtype == np.uint8
86
+ return fig_img
87
+
88
+
89
+ def imgs_with_confidences(imgs_list: list[tuple[Any, ...]],
90
+ label_names: Sequence[str],
91
+ ) -> tuple[matplotlib.figure.Figure, list[str]]:
92
+ """
93
+ Args:
94
+ imgs_list: list of tuple, each tuple consists of:
95
+ img: array_like, shape [H, W, C], type either float [0, 1] or uint8
96
+ label_id: int, label index
97
+ topk_conf: list of float, confidence scores for topk predictions
98
+ topk_preds: list of int, label indices for topk predictions
99
+ img_file: str, path to image file
100
+ label_names: list of str, label names in order of label id
101
+
102
+ Returns:
103
+ fig: matplotlib.figure.Figure
104
+ img_files: list of str
105
+ """
106
+
107
+ imgs, img_files, tags, titles = [], [], [], []
108
+ for img, label_id, topk_conf, topk_preds, img_file in imgs_list:
109
+ imgs.append(img)
110
+ img_files.append(img_file)
111
+ tags.append(label_names[label_id])
112
+
113
+ lines = []
114
+ for pred, conf in zip(topk_preds, topk_conf):
115
+ pred_name = label_names[pred]
116
+ lines.append(f'{pred_name}: {conf:.03f}')
117
+ titles.append('\n'.join(lines))
118
+
119
+ fig = plot_img_grid(imgs=imgs, row_h=3, col_w=2.5, tags=tags, titles=titles)
120
+ return fig, img_files
121
+
122
+
123
+ def plot_img_grid(imgs: Sequence[Any], row_h: float, col_w: float,
124
+ ncols: Optional[int] = None,
125
+ tags: Optional[Sequence[str]] = None,
126
+ titles: Optional[Sequence[str]] = None
127
+ ) -> matplotlib.figure.Figure:
128
+ """
129
+ Plots a grid of images.
130
+
131
+ Args:
132
+ imgs: list of images, each image is either an array or a PIL Image,
133
+ see matplotlib.axes.Axes.imshow() documentation for supported shapes
134
+ row_h: float, row height in inches
135
+ col_w: float, col width in inches
136
+ ncols: optional int, number of columns, defaults to len(imgs)
137
+ tags: optional list of str, tags are displayed in upper-left corner of
138
+ each image on a white background
139
+ titles: optional list of str, text displayed above each image
140
+
141
+ Returns: matplotlib.figure.Figure
142
+ """
143
+
144
+ # input validation
145
+ num_images = len(imgs)
146
+ if tags is not None:
147
+ assert len(tags) == len(imgs)
148
+ if titles is not None:
149
+ assert len(titles) == len(imgs)
150
+
151
+ if ncols is None:
152
+ ncols = num_images
153
+
154
+ nrows = int(np.ceil(len(imgs) / ncols))
155
+ fig = matplotlib.figure.Figure(figsize=(ncols * col_w, nrows * row_h),
156
+ tight_layout=True)
157
+ axs = fig.subplots(nrows, ncols, squeeze=False)
158
+
159
+ # plot the images
160
+ for i in range(num_images):
161
+ r, c = i // ncols, i % ncols
162
+ ax = axs[r, c]
163
+ ax.imshow(imgs[i])
164
+ if tags is not None:
165
+ ax.text(-0.2, -0.2, tags[i], ha='left', va='top',
166
+ bbox=dict(lw=0, facecolor='white'))
167
+ if titles is not None:
168
+ ax.set_title(titles[i])
169
+
170
+ # adjust the figure
171
+ for r in range(nrows):
172
+ for c in range(ncols):
173
+ axs[r, c].set_axis_off()
174
+ axs[r, c].set_aspect('equal')
175
+ fig.subplots_adjust(wspace=0, hspace=0)
176
+ return fig
177
+
178
+
179
+ def load_splits(splits_json_path: str) -> dict[str, set[tuple[str, str]]]:
180
+ """
181
+ Loads location splits from JSON file and assert that there are no
182
+ overlaps between splits.
183
+
184
+ Args:
185
+ splits_json_path: str, path to JSON file
186
+
187
+ Returns: dict, maps split to set of (dataset, location) tuples
188
+ """
189
+
190
+ with open(splits_json_path, 'r') as f:
191
+ split_to_locs_js = json.load(f)
192
+ split_to_locs = {
193
+ split: set((loc[0], loc[1]) for loc in locs)
194
+ for split, locs in split_to_locs_js.items()
195
+ }
196
+ assert split_to_locs['train'].isdisjoint(split_to_locs['val'])
197
+ assert split_to_locs['train'].isdisjoint(split_to_locs['test'])
198
+ assert split_to_locs['val'].isdisjoint(split_to_locs['test'])
199
+ return split_to_locs
200
+
201
+
202
+ def load_dataset_csv(dataset_csv_path: str,
203
+ label_index_json_path: str,
204
+ splits_json_path: str,
205
+ multilabel: bool,
206
+ weight_by_detection_conf: bool | str,
207
+ label_weighted: bool
208
+ ) -> tuple[pd.DataFrame,
209
+ list[str],
210
+ dict[str, set[tuple[str, str]]]
211
+ ]:
212
+ """
213
+ Args:
214
+ dataset_csv_path: str, path to CSV file with columns
215
+ ['dataset', 'location', 'label', 'confidence'], where label is a
216
+ comma-delimited list of labels
217
+ label_index_json_path: str, path to label index JSON file
218
+ splits_json_path: str, path to splits JSON file
219
+ multilabel: bool, whether a single example can have multiple labels
220
+ weight_by_detection_conf: bool or str
221
+ - if True: assumes classification CSV's 'confidence' column
222
+ represents calibrated scores
223
+ - if str: path the .npz file containing x/y values for isotonic
224
+ regression calibration function
225
+ label_weighted: bool, whether to give each label equal weight
226
+
227
+ Returns:
228
+ df: pd.DataFrame, with columns
229
+ dataset_location: tuples of (dataset, location)
230
+ label: str if not multilabel, list of str if multilabel
231
+ label_index: int if not multilabel, list of int if multilabel
232
+ weights: float, weight for each example
233
+ column exists if and only if label_weighted=True or
234
+ weight_by_detection_conf is not False
235
+ label_names: list of str, label names in order of label id
236
+ split_to_locs: dict, maps split to set of (dataset, location) tuples
237
+ """
238
+
239
+ # read in dataset CSV and create merged (dataset, location) col
240
+ df = pd.read_csv(dataset_csv_path, index_col=False, float_precision='high')
241
+ df['dataset_location'] = list(zip(df['dataset'], df['location']))
242
+
243
+ with open(label_index_json_path, 'r') as f:
244
+ idx_to_label = json.load(f)
245
+ label_names = [idx_to_label[str(i)] for i in range(len(idx_to_label))]
246
+ label_to_idx = {label: idx for idx, label in enumerate(label_names)}
247
+
248
+ # map label to label_index
249
+ if multilabel:
250
+ df['label'] = df['label'].map(lambda x: x.split(','))
251
+ df['label_index'] = df['label'].map(
252
+ lambda labellist: tuple(sorted(label_to_idx[y] for y in labellist)))
253
+ else:
254
+ assert not any(df['label'].str.contains(','))
255
+ df['label_index'] = df['label'].map(label_to_idx.__getitem__)
256
+
257
+ # load the splits
258
+ split_to_locs = load_splits(splits_json_path)
259
+
260
+ if weight_by_detection_conf:
261
+ df['weights'] = 1.0
262
+
263
+ # only weight the training set by detection confidence
264
+ train_mask = df['dataset_location'].isin(split_to_locs['train'])
265
+ df.loc[train_mask, 'weights'] = df.loc[train_mask, 'confidence']
266
+
267
+ if isinstance(weight_by_detection_conf, str):
268
+ # isotonic regression calibration of MegaDetector confidence
269
+ with np.load(weight_by_detection_conf) as npz:
270
+ calib = scipy.interpolate.interp1d(
271
+ x=npz['x'], y=npz['y'], kind='linear')
272
+ df.loc[train_mask, 'weights'] = calib(df.loc[train_mask, 'weights'])
273
+
274
+ if label_weighted:
275
+ if multilabel:
276
+ raise NotImplementedError
277
+
278
+ if 'weights' not in df.columns:
279
+ df['weights'] = 1.0
280
+
281
+ # treat each split separately
282
+ # new_weight[i] = confidence[i] * (n / c) / total_confidence(i's label)
283
+ # - n = # examples in split (weighted by confidence); c = # labels
284
+ # - weight allocated to each label is n/c
285
+ # - within each label, weigh each example proportional to confidence
286
+ # - new weights sum to n
287
+ c = len(label_names)
288
+ for split, locs in split_to_locs.items():
289
+ split_mask = df['dataset_location'].isin(locs)
290
+ n = df.loc[split_mask, 'weights'].sum()
291
+ per_label_conf = df[split_mask].groupby('label')['weights'].sum()
292
+ assert len(per_label_conf) == c, (
293
+ f'{split} split only has {len(per_label_conf)}/{c} labels')
294
+ scaling = (n / c) / per_label_conf[df.loc[split_mask, 'label']]
295
+ df.loc[split_mask, 'weights'] *= scaling.to_numpy()
296
+ w_sum = df.loc[split_mask, 'weights'].sum()
297
+ assert np.isclose(w_sum, n), (
298
+ f'Expected {split} weights to sum to {n}, got {w_sum} instead')
299
+
300
+ # error checking
301
+ assert (df['weights'] > 0).all()
302
+
303
+ return df, label_names, split_to_locs
304
+
305
+
306
+ def recall_from_confusion_matrix(
307
+ confusion_matrix: np.ndarray,
308
+ label_names: Sequence[str],
309
+ ) -> dict[str, float]:
310
+ """
311
+ Args:
312
+ confusion_matrix: np.ndarray, shape [n_classes, n_classes], type int
313
+ C[i, j] = # of samples with true label i, predicted as label j
314
+ label_names: list of str, label names in order by label id
315
+
316
+ Returns: dict, label_name => recall
317
+ """
318
+
319
+ result = {
320
+ label_name: confusion_matrix[i, i] / (confusion_matrix[i].sum() + 1e-8)
321
+ for i, label_name in enumerate(label_names)
322
+ }
323
+ return result
File without changes
File without changes
@@ -0,0 +1,34 @@
1
+ """
2
+
3
+ annotation_constants.py
4
+
5
+ Defines default categories for MegaDetector output boxes.
6
+
7
+ Used throughout the repo; do not change unless you are Dan or Siyu. In fact, do not change unless
8
+ you are both Dan *and* Siyu.
9
+
10
+ We use integer IDs here; this is different from the MD .json file format,
11
+ where indices are string integers.
12
+
13
+ """
14
+
15
+ #%% Constants
16
+
17
+ # MegaDetector output categories (the "empty" category is implicit)
18
+ detector_bbox_categories = [
19
+ {'id': 0, 'name': 'empty'},
20
+ {'id': 1, 'name': 'animal'},
21
+ {'id': 2, 'name': 'person'},
22
+ {'id': 3, 'name': 'vehicle'}
23
+ ]
24
+
25
+ # This is used for choosing colors, so it ignores the "empty" class.
26
+ NUM_DETECTOR_CATEGORIES = len(detector_bbox_categories) - 1
27
+
28
+ detector_bbox_category_id_to_name = {}
29
+ detector_bbox_category_name_to_id = {}
30
+
31
+ for cat in detector_bbox_categories:
32
+ detector_bbox_category_id_to_name[cat['id']] = cat['name']
33
+ detector_bbox_category_name_to_id[cat['name']] = cat['id']
34
+
@@ -0,0 +1,239 @@
1
+ """
2
+
3
+ camtrap_dp_to_coco.py
4
+
5
+ Parse a very limited subset of the Camtrap DP data package format:
6
+
7
+ https://camtrap-dp.tdwg.org/
8
+
9
+ ...and convert to COCO format. Assumes that all required metadata files have been
10
+ put in the same directory (which is standard).
11
+
12
+ Does not currently parse bounding boxes, just attaches species labels to images.
13
+
14
+ Currently supports only sequence-level labeling.
15
+
16
+ """
17
+
18
+ #%% Imports and constants
19
+
20
+ import os
21
+ import json
22
+ import pandas as pd
23
+
24
+ from dateutil import parser as dateparser
25
+
26
+ from collections import defaultdict
27
+
28
+
29
+ #%% Functions
30
+
31
+ def camtrap_dp_to_coco(camtrap_dp_folder,output_file=None):
32
+ """
33
+ Convert the Camtrap DP package in [camtrap_dp_folder] to COCO.
34
+
35
+ Does not validate images, just converts. Use integrity_check_json_db to validate
36
+ the resulting COCO file.
37
+
38
+ Optionally writes the results to [output_file]
39
+ """
40
+
41
+ required_files = ('datapackage.json','deployments.csv','events.csv','media.csv','observations.csv')
42
+
43
+ for fn in required_files:
44
+ fn_abs = os.path.join(camtrap_dp_folder,fn)
45
+ assert os.path.isfile(fn_abs), 'Could not find required file {}'.format(fn_abs)
46
+
47
+ with open(os.path.join(camtrap_dp_folder,'datapackage.json'),'r') as f:
48
+ datapackage = json.load(f)
49
+
50
+ assert datapackage['profile'] == 'https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0/camtrap-dp-profile.json', \
51
+ 'I only know how to parse Camtrap DP 1.0 packages'
52
+
53
+ deployments_file = None
54
+ events_file = None
55
+ media_file = None
56
+ observations_file = None
57
+
58
+ resources = datapackage['resources']
59
+ for r in resources:
60
+ if r['name'] == 'deployments':
61
+ deployments_file = r['path']
62
+ elif r['name'] == 'media':
63
+ media_file = r['path']
64
+ elif r['name'] == 'events':
65
+ events_file = r['path']
66
+ elif r['name'] == 'observations':
67
+ observations_file = r['path']
68
+
69
+ assert deployments_file is not None, 'No deployment file specified'
70
+ assert events_file is not None, 'No events file specified'
71
+ assert media_file is not None, 'No media file specified'
72
+ assert observations_file is not None, 'No observation file specified'
73
+
74
+ deployments_df = pd.read_csv(os.path.join(camtrap_dp_folder,deployments_file))
75
+ events_df = pd.read_csv(os.path.join(camtrap_dp_folder,events_file))
76
+ media_df = pd.read_csv(os.path.join(camtrap_dp_folder,media_file))
77
+ observations_df = pd.read_csv(os.path.join(camtrap_dp_folder,observations_file))
78
+
79
+ print('Read {} deployment lines'.format(len(deployments_df)))
80
+ print('Read {} events lines'.format(len(events_df)))
81
+ print('Read {} media lines'.format(len(media_df)))
82
+ print('Read {} observation lines'.format(len(observations_df)))
83
+
84
+ media_id_to_media_info = {}
85
+
86
+ # i_row = 0; row = media_df.iloc[i_row]
87
+ for i_row,row in media_df.iterrows():
88
+ media_info = {}
89
+ media_info['file_name'] = os.path.join(row['filePath'],row['fileName']).replace('\\','/')
90
+ media_info['location'] = row['deploymentID']
91
+ media_info['id'] = row['mediaID']
92
+ media_info['datetime'] = row['timestamp']
93
+ media_info['datetime'] = dateparser.parse(media_info['datetime'])
94
+ media_info['frame_num'] = -1
95
+ media_info['seq_num_frames'] = -1
96
+ media_id_to_media_info[row['mediaID']] = media_info
97
+
98
+ event_id_to_media_ids = defaultdict(list)
99
+
100
+ # i_row = 0; row = events_df.iloc[i_row]
101
+ for i_row,row in events_df.iterrows():
102
+ media_id = row['mediaID']
103
+ assert media_id in media_id_to_media_info
104
+ event_id_to_media_ids[row['eventID']].append(media_id)
105
+
106
+ event_id_to_category_names = defaultdict(set)
107
+
108
+ # i_row = 0; row = observations_df.iloc[i_row]
109
+ for i_row,row in observations_df.iterrows():
110
+
111
+ if row['observationLevel'] != 'event':
112
+ raise ValueError("I don't know how to parse image-level events yet")
113
+
114
+ if row['observationType'] == 'blank':
115
+ event_id_to_category_names[row['eventID']].add('empty')
116
+ elif row['observationType'] == 'unknown':
117
+ event_id_to_category_names[row['eventID']].add('unknown')
118
+ elif row['observationType'] == 'human':
119
+ assert row['scientificName'] == 'Homo sapiens'
120
+ event_id_to_category_names[row['eventID']].add(row['scientificName'])
121
+ else:
122
+ assert row['observationType'] == 'animal'
123
+ assert isinstance(row['scientificName'],str)
124
+ event_id_to_category_names[row['eventID']].add(row['scientificName'])
125
+
126
+ # Sort images within an event into frame numbers
127
+ #
128
+ # event_id = next(iter(event_id_to_media_ids))
129
+ for event_id in event_id_to_media_ids.keys():
130
+ media_ids_this_event = event_id_to_media_ids[event_id]
131
+ media_info_this_event = [media_id_to_media_info[media_id] for media_id in media_ids_this_event]
132
+ media_info_this_event = sorted(media_info_this_event, key=lambda x: x['datetime'])
133
+ for i_media,media_info in enumerate(media_info_this_event):
134
+ media_info['frame_num'] = i_media
135
+ media_info['seq_num_frames'] = len(media_info_this_event)
136
+ media_info['seq_id'] = event_id
137
+
138
+ # Create category names
139
+ category_name_to_category_id = {'empty':0}
140
+ for event_id in event_id_to_category_names:
141
+ category_names_this_event = event_id_to_category_names[event_id]
142
+ for name in category_names_this_event:
143
+ if name not in category_name_to_category_id:
144
+ category_name_to_category_id[name] = len(category_name_to_category_id)
145
+
146
+ # Move everything into COCO format
147
+ images = list(media_id_to_media_info.values())
148
+
149
+ categories = []
150
+ for name in category_name_to_category_id:
151
+ categories.append({'name':name,'id':category_name_to_category_id[name]})
152
+ info = {'version':1.0,'description':datapackage['name']}
153
+
154
+ # Create annotations
155
+ annotations = []
156
+
157
+ for event_id in event_id_to_media_ids.keys():
158
+ i_ann = 0
159
+ media_ids_this_event = event_id_to_media_ids[event_id]
160
+ media_info_this_event = [media_id_to_media_info[media_id] for media_id in media_ids_this_event]
161
+ categories_this_event = event_id_to_category_names[event_id]
162
+ for im in media_info_this_event:
163
+ for category_name in categories_this_event:
164
+ ann = {}
165
+ ann['id'] = event_id + '_' + str(i_ann)
166
+ i_ann += 1
167
+ ann['image_id'] = im['id']
168
+ ann['category_id'] = category_name_to_category_id[category_name]
169
+ ann['sequence_level_annotation'] = True
170
+ annotations.append(ann)
171
+
172
+ coco_data = {}
173
+ coco_data['images'] = images
174
+ coco_data['annotations'] = annotations
175
+ coco_data['categories'] = categories
176
+ coco_data['info'] = info
177
+
178
+ for im in coco_data['images']:
179
+ im['datetime'] = str(im['datetime'] )
180
+
181
+ if output_file is not None:
182
+ with open(output_file,'w') as f:
183
+ json.dump(coco_data,f,indent=1)
184
+
185
+ return coco_data
186
+
187
+
188
+ #%% Interactive driver
189
+
190
+ if False:
191
+
192
+ pass
193
+
194
+ #%%
195
+
196
+ camtrap_dp_folder = r'C:\temp\pilot2\pilot2'
197
+ coco_file = os.path.join(camtrap_dp_folder,'test-coco.json')
198
+ coco_data = camtrap_dp_to_coco(camtrap_dp_folder,
199
+ output_file=coco_file)
200
+
201
+ #%% Validate
202
+
203
+ from megadetector.data_management.databases.integrity_check_json_db import \
204
+ integrity_check_json_db, IntegrityCheckOptions
205
+
206
+ options = IntegrityCheckOptions()
207
+
208
+ options.baseDir = camtrap_dp_folder
209
+ options.bCheckImageSizes = False
210
+ options.bCheckImageExistence = True
211
+ options.bFindUnusedImages = True
212
+ options.bRequireLocation = True
213
+ options.iMaxNumImages = -1
214
+ options.nThreads = 1
215
+ options.verbose = True
216
+
217
+ sortedCategories, data, errorInfo = integrity_check_json_db(coco_file,options)
218
+
219
+ #%% Preview
220
+
221
+ from megadetector.visualization.visualize_db import DbVizOptions, visualize_db
222
+
223
+ options = DbVizOptions()
224
+ options.parallelize_rendering = True
225
+ options.parallelize_rendering_with_threads = True
226
+ options.parallelize_rendering_n_cores = 10
227
+
228
+ preview_dir = r'c:\temp\camtrapdp-preview'
229
+ htmlOutputFile,image_db = visualize_db(coco_file, preview_dir, camtrap_dp_folder, options=options)
230
+
231
+ from megadetector.utils.path_utils import open_file
232
+ open_file(htmlOutputFile)
233
+
234
+
235
+ #%% Command-line driver
236
+
237
+ # TODO
238
+
239
+