megadetector 5.0.10__py3-none-any.whl → 5.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (226) hide show
  1. {megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/LICENSE +0 -0
  2. {megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/METADATA +12 -11
  3. megadetector-5.0.11.dist-info/RECORD +5 -0
  4. megadetector-5.0.11.dist-info/top_level.txt +1 -0
  5. api/__init__.py +0 -0
  6. api/batch_processing/__init__.py +0 -0
  7. api/batch_processing/api_core/__init__.py +0 -0
  8. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  9. api/batch_processing/api_core/batch_service/score.py +0 -439
  10. api/batch_processing/api_core/server.py +0 -294
  11. api/batch_processing/api_core/server_api_config.py +0 -98
  12. api/batch_processing/api_core/server_app_config.py +0 -55
  13. api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  14. api/batch_processing/api_core/server_job_status_table.py +0 -152
  15. api/batch_processing/api_core/server_orchestration.py +0 -360
  16. api/batch_processing/api_core/server_utils.py +0 -92
  17. api/batch_processing/api_core_support/__init__.py +0 -0
  18. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  19. api/batch_processing/api_support/__init__.py +0 -0
  20. api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  21. api/batch_processing/data_preparation/__init__.py +0 -0
  22. api/batch_processing/data_preparation/manage_local_batch.py +0 -2391
  23. api/batch_processing/data_preparation/manage_video_batch.py +0 -327
  24. api/batch_processing/integration/digiKam/setup.py +0 -6
  25. api/batch_processing/integration/digiKam/xmp_integration.py +0 -465
  26. api/batch_processing/integration/eMammal/test_scripts/config_template.py +0 -5
  27. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -126
  28. api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +0 -55
  29. api/batch_processing/postprocessing/__init__.py +0 -0
  30. api/batch_processing/postprocessing/add_max_conf.py +0 -64
  31. api/batch_processing/postprocessing/categorize_detections_by_size.py +0 -163
  32. api/batch_processing/postprocessing/combine_api_outputs.py +0 -249
  33. api/batch_processing/postprocessing/compare_batch_results.py +0 -958
  34. api/batch_processing/postprocessing/convert_output_format.py +0 -397
  35. api/batch_processing/postprocessing/load_api_results.py +0 -195
  36. api/batch_processing/postprocessing/md_to_coco.py +0 -310
  37. api/batch_processing/postprocessing/md_to_labelme.py +0 -330
  38. api/batch_processing/postprocessing/merge_detections.py +0 -401
  39. api/batch_processing/postprocessing/postprocess_batch_results.py +0 -1904
  40. api/batch_processing/postprocessing/remap_detection_categories.py +0 -170
  41. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +0 -661
  42. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +0 -211
  43. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +0 -82
  44. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +0 -1631
  45. api/batch_processing/postprocessing/separate_detections_into_folders.py +0 -731
  46. api/batch_processing/postprocessing/subset_json_detector_output.py +0 -696
  47. api/batch_processing/postprocessing/top_folders_to_bottom.py +0 -223
  48. api/synchronous/__init__.py +0 -0
  49. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  50. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -152
  51. api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -266
  52. api/synchronous/api_core/animal_detection_api/config.py +0 -35
  53. api/synchronous/api_core/animal_detection_api/data_management/annotations/annotation_constants.py +0 -47
  54. api/synchronous/api_core/animal_detection_api/detection/detector_training/copy_checkpoints.py +0 -43
  55. api/synchronous/api_core/animal_detection_api/detection/detector_training/model_main_tf2.py +0 -114
  56. api/synchronous/api_core/animal_detection_api/detection/process_video.py +0 -543
  57. api/synchronous/api_core/animal_detection_api/detection/pytorch_detector.py +0 -304
  58. api/synchronous/api_core/animal_detection_api/detection/run_detector.py +0 -627
  59. api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +0 -1029
  60. api/synchronous/api_core/animal_detection_api/detection/run_inference_with_yolov5_val.py +0 -581
  61. api/synchronous/api_core/animal_detection_api/detection/run_tiled_inference.py +0 -754
  62. api/synchronous/api_core/animal_detection_api/detection/tf_detector.py +0 -165
  63. api/synchronous/api_core/animal_detection_api/detection/video_utils.py +0 -495
  64. api/synchronous/api_core/animal_detection_api/md_utils/azure_utils.py +0 -174
  65. api/synchronous/api_core/animal_detection_api/md_utils/ct_utils.py +0 -262
  66. api/synchronous/api_core/animal_detection_api/md_utils/directory_listing.py +0 -251
  67. api/synchronous/api_core/animal_detection_api/md_utils/matlab_porting_tools.py +0 -97
  68. api/synchronous/api_core/animal_detection_api/md_utils/path_utils.py +0 -416
  69. api/synchronous/api_core/animal_detection_api/md_utils/process_utils.py +0 -110
  70. api/synchronous/api_core/animal_detection_api/md_utils/sas_blob_utils.py +0 -509
  71. api/synchronous/api_core/animal_detection_api/md_utils/string_utils.py +0 -59
  72. api/synchronous/api_core/animal_detection_api/md_utils/url_utils.py +0 -144
  73. api/synchronous/api_core/animal_detection_api/md_utils/write_html_image_list.py +0 -226
  74. api/synchronous/api_core/animal_detection_api/md_visualization/visualization_utils.py +0 -841
  75. api/synchronous/api_core/tests/__init__.py +0 -0
  76. api/synchronous/api_core/tests/load_test.py +0 -110
  77. classification/__init__.py +0 -0
  78. classification/aggregate_classifier_probs.py +0 -108
  79. classification/analyze_failed_images.py +0 -227
  80. classification/cache_batchapi_outputs.py +0 -198
  81. classification/create_classification_dataset.py +0 -627
  82. classification/crop_detections.py +0 -516
  83. classification/csv_to_json.py +0 -226
  84. classification/detect_and_crop.py +0 -855
  85. classification/efficientnet/__init__.py +0 -9
  86. classification/efficientnet/model.py +0 -415
  87. classification/efficientnet/utils.py +0 -610
  88. classification/evaluate_model.py +0 -520
  89. classification/identify_mislabeled_candidates.py +0 -152
  90. classification/json_to_azcopy_list.py +0 -63
  91. classification/json_validator.py +0 -695
  92. classification/map_classification_categories.py +0 -276
  93. classification/merge_classification_detection_output.py +0 -506
  94. classification/prepare_classification_script.py +0 -194
  95. classification/prepare_classification_script_mc.py +0 -228
  96. classification/run_classifier.py +0 -286
  97. classification/save_mislabeled.py +0 -110
  98. classification/train_classifier.py +0 -825
  99. classification/train_classifier_tf.py +0 -724
  100. classification/train_utils.py +0 -322
  101. data_management/__init__.py +0 -0
  102. data_management/annotations/__init__.py +0 -0
  103. data_management/annotations/annotation_constants.py +0 -34
  104. data_management/camtrap_dp_to_coco.py +0 -238
  105. data_management/cct_json_utils.py +0 -395
  106. data_management/cct_to_md.py +0 -176
  107. data_management/cct_to_wi.py +0 -289
  108. data_management/coco_to_labelme.py +0 -272
  109. data_management/coco_to_yolo.py +0 -662
  110. data_management/databases/__init__.py +0 -0
  111. data_management/databases/add_width_and_height_to_db.py +0 -33
  112. data_management/databases/combine_coco_camera_traps_files.py +0 -206
  113. data_management/databases/integrity_check_json_db.py +0 -477
  114. data_management/databases/subset_json_db.py +0 -115
  115. data_management/generate_crops_from_cct.py +0 -149
  116. data_management/get_image_sizes.py +0 -188
  117. data_management/importers/add_nacti_sizes.py +0 -52
  118. data_management/importers/add_timestamps_to_icct.py +0 -79
  119. data_management/importers/animl_results_to_md_results.py +0 -158
  120. data_management/importers/auckland_doc_test_to_json.py +0 -372
  121. data_management/importers/auckland_doc_to_json.py +0 -200
  122. data_management/importers/awc_to_json.py +0 -189
  123. data_management/importers/bellevue_to_json.py +0 -273
  124. data_management/importers/cacophony-thermal-importer.py +0 -796
  125. data_management/importers/carrizo_shrubfree_2018.py +0 -268
  126. data_management/importers/carrizo_trail_cam_2017.py +0 -287
  127. data_management/importers/cct_field_adjustments.py +0 -57
  128. data_management/importers/channel_islands_to_cct.py +0 -913
  129. data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  130. data_management/importers/eMammal/eMammal_helpers.py +0 -249
  131. data_management/importers/eMammal/make_eMammal_json.py +0 -223
  132. data_management/importers/ena24_to_json.py +0 -275
  133. data_management/importers/filenames_to_json.py +0 -385
  134. data_management/importers/helena_to_cct.py +0 -282
  135. data_management/importers/idaho-camera-traps.py +0 -1407
  136. data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  137. data_management/importers/jb_csv_to_json.py +0 -150
  138. data_management/importers/mcgill_to_json.py +0 -250
  139. data_management/importers/missouri_to_json.py +0 -489
  140. data_management/importers/nacti_fieldname_adjustments.py +0 -79
  141. data_management/importers/noaa_seals_2019.py +0 -181
  142. data_management/importers/pc_to_json.py +0 -365
  143. data_management/importers/plot_wni_giraffes.py +0 -123
  144. data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
  145. data_management/importers/prepare_zsl_imerit.py +0 -131
  146. data_management/importers/rspb_to_json.py +0 -356
  147. data_management/importers/save_the_elephants_survey_A.py +0 -320
  148. data_management/importers/save_the_elephants_survey_B.py +0 -332
  149. data_management/importers/snapshot_safari_importer.py +0 -758
  150. data_management/importers/snapshot_safari_importer_reprise.py +0 -665
  151. data_management/importers/snapshot_serengeti_lila.py +0 -1067
  152. data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  153. data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  154. data_management/importers/sulross_get_exif.py +0 -65
  155. data_management/importers/timelapse_csv_set_to_json.py +0 -490
  156. data_management/importers/ubc_to_json.py +0 -399
  157. data_management/importers/umn_to_json.py +0 -507
  158. data_management/importers/wellington_to_json.py +0 -263
  159. data_management/importers/wi_to_json.py +0 -441
  160. data_management/importers/zamba_results_to_md_results.py +0 -181
  161. data_management/labelme_to_coco.py +0 -548
  162. data_management/labelme_to_yolo.py +0 -272
  163. data_management/lila/__init__.py +0 -0
  164. data_management/lila/add_locations_to_island_camera_traps.py +0 -97
  165. data_management/lila/add_locations_to_nacti.py +0 -147
  166. data_management/lila/create_lila_blank_set.py +0 -557
  167. data_management/lila/create_lila_test_set.py +0 -151
  168. data_management/lila/create_links_to_md_results_files.py +0 -106
  169. data_management/lila/download_lila_subset.py +0 -177
  170. data_management/lila/generate_lila_per_image_labels.py +0 -515
  171. data_management/lila/get_lila_annotation_counts.py +0 -170
  172. data_management/lila/get_lila_image_counts.py +0 -111
  173. data_management/lila/lila_common.py +0 -300
  174. data_management/lila/test_lila_metadata_urls.py +0 -132
  175. data_management/ocr_tools.py +0 -874
  176. data_management/read_exif.py +0 -681
  177. data_management/remap_coco_categories.py +0 -84
  178. data_management/remove_exif.py +0 -66
  179. data_management/resize_coco_dataset.py +0 -189
  180. data_management/wi_download_csv_to_coco.py +0 -246
  181. data_management/yolo_output_to_md_output.py +0 -441
  182. data_management/yolo_to_coco.py +0 -676
  183. detection/__init__.py +0 -0
  184. detection/detector_training/__init__.py +0 -0
  185. detection/detector_training/model_main_tf2.py +0 -114
  186. detection/process_video.py +0 -703
  187. detection/pytorch_detector.py +0 -337
  188. detection/run_detector.py +0 -779
  189. detection/run_detector_batch.py +0 -1219
  190. detection/run_inference_with_yolov5_val.py +0 -917
  191. detection/run_tiled_inference.py +0 -935
  192. detection/tf_detector.py +0 -188
  193. detection/video_utils.py +0 -606
  194. docs/source/conf.py +0 -43
  195. md_utils/__init__.py +0 -0
  196. md_utils/azure_utils.py +0 -174
  197. md_utils/ct_utils.py +0 -612
  198. md_utils/directory_listing.py +0 -246
  199. md_utils/md_tests.py +0 -968
  200. md_utils/path_utils.py +0 -1044
  201. md_utils/process_utils.py +0 -157
  202. md_utils/sas_blob_utils.py +0 -509
  203. md_utils/split_locations_into_train_val.py +0 -228
  204. md_utils/string_utils.py +0 -92
  205. md_utils/url_utils.py +0 -323
  206. md_utils/write_html_image_list.py +0 -225
  207. md_visualization/__init__.py +0 -0
  208. md_visualization/plot_utils.py +0 -293
  209. md_visualization/render_images_with_thumbnails.py +0 -275
  210. md_visualization/visualization_utils.py +0 -1537
  211. md_visualization/visualize_db.py +0 -551
  212. md_visualization/visualize_detector_output.py +0 -406
  213. megadetector-5.0.10.dist-info/RECORD +0 -224
  214. megadetector-5.0.10.dist-info/top_level.txt +0 -8
  215. taxonomy_mapping/__init__.py +0 -0
  216. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +0 -491
  217. taxonomy_mapping/map_new_lila_datasets.py +0 -154
  218. taxonomy_mapping/prepare_lila_taxonomy_release.py +0 -142
  219. taxonomy_mapping/preview_lila_taxonomy.py +0 -591
  220. taxonomy_mapping/retrieve_sample_image.py +0 -71
  221. taxonomy_mapping/simple_image_download.py +0 -218
  222. taxonomy_mapping/species_lookup.py +0 -834
  223. taxonomy_mapping/taxonomy_csv_checker.py +0 -159
  224. taxonomy_mapping/taxonomy_graph.py +0 -346
  225. taxonomy_mapping/validate_lila_category_mappings.py +0 -83
  226. {megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/WHEEL +0 -0
@@ -1,322 +0,0 @@
1
- """
2
-
3
- train_utils.py
4
-
5
- Utility functions useful for training a classifier.
6
-
7
- This script should NOT depend on any other file within this repo. It should
8
- especially be agnostic to PyTorch vs. TensorFlow.
9
-
10
- """
11
-
12
- #%% Imports
13
-
14
- from __future__ import annotations
15
-
16
- from collections.abc import Mapping, Sequence
17
- import dataclasses
18
- import heapq
19
- import io
20
- import json
21
- from typing import Any, Optional
22
-
23
- import matplotlib.figure
24
- import numpy as np
25
- import pandas as pd
26
- import PIL.Image
27
- import scipy.interpolate
28
-
29
-
30
- #%% Classes
31
-
32
- @dataclasses.dataclass(order=True)
33
- class HeapItem:
34
- """
35
- A wrapper over non-comparable data with a comparable priority value.
36
- """
37
-
38
- priority: Any
39
- data: Any = dataclasses.field(compare=False, repr=False)
40
-
41
-
42
- def add_to_heap(h: list[Any], item: HeapItem, k: Optional[int] = None) -> None:
43
- """
44
- Tracks the max k elements using a heap.
45
-
46
- We use a min-heap for this task. When a new element comes in, we compare it
47
- to the smallest node in the heap, h[0]. If the new value is greater than
48
- h[0], we pop h[0] and add the new element in.
49
-
50
- Args:
51
- h: list, either empty [] or already heapified
52
- item: HeapItem
53
- k: int, desired capacity of the heap, or None for no limit
54
- """
55
-
56
- if k is None or len(h) < k:
57
- heapq.heappush(h, item)
58
- else:
59
- heapq.heappushpop(h, item)
60
-
61
-
62
- #%% Functions
63
-
64
- def prefix_all_keys(d: Mapping[str, Any], prefix: str) -> dict[str, Any]:
65
- """
66
- Returns a new dict where the keys are prefixed by <prefix>.
67
- """
68
-
69
- return {f'{prefix}{k}': v for k, v in d.items()}
70
-
71
-
72
- def fig_to_img(fig: matplotlib.figure.Figure) -> np.ndarray:
73
- """
74
- Converts a matplotlib figure to an image represented by a numpy array.
75
-
76
- Returns: np.ndarray, type uint8, shape [H, W, 3]
77
- """
78
-
79
- with io.BytesIO() as b:
80
- fig.savefig(b, transparent=False, bbox_inches='tight', pad_inches=0,
81
- format='png')
82
- b.seek(0)
83
- fig_img = np.asarray(PIL.Image.open(b).convert('RGB'))
84
- assert fig_img.dtype == np.uint8
85
- return fig_img
86
-
87
-
88
- def imgs_with_confidences(imgs_list: list[tuple[Any, ...]],
89
- label_names: Sequence[str],
90
- ) -> tuple[matplotlib.figure.Figure, list[str]]:
91
- """
92
- Args:
93
- imgs_list: list of tuple, each tuple consists of:
94
- img: array_like, shape [H, W, C], type either float [0, 1] or uint8
95
- label_id: int, label index
96
- topk_conf: list of float, confidence scores for topk predictions
97
- topk_preds: list of int, label indices for topk predictions
98
- img_file: str, path to image file
99
- label_names: list of str, label names in order of label id
100
-
101
- Returns:
102
- fig: matplotlib.figure.Figure
103
- img_files: list of str
104
- """
105
-
106
- imgs, img_files, tags, titles = [], [], [], []
107
- for img, label_id, topk_conf, topk_preds, img_file in imgs_list:
108
- imgs.append(img)
109
- img_files.append(img_file)
110
- tags.append(label_names[label_id])
111
-
112
- lines = []
113
- for pred, conf in zip(topk_preds, topk_conf):
114
- pred_name = label_names[pred]
115
- lines.append(f'{pred_name}: {conf:.03f}')
116
- titles.append('\n'.join(lines))
117
-
118
- fig = plot_img_grid(imgs=imgs, row_h=3, col_w=2.5, tags=tags, titles=titles)
119
- return fig, img_files
120
-
121
-
122
- def plot_img_grid(imgs: Sequence[Any], row_h: float, col_w: float,
123
- ncols: Optional[int] = None,
124
- tags: Optional[Sequence[str]] = None,
125
- titles: Optional[Sequence[str]] = None
126
- ) -> matplotlib.figure.Figure:
127
- """
128
- Plots a grid of images.
129
-
130
- Args:
131
- imgs: list of images, each image is either an array or a PIL Image,
132
- see matplotlib.axes.Axes.imshow() documentation for supported shapes
133
- row_h: float, row height in inches
134
- col_w: float, col width in inches
135
- ncols: optional int, number of columns, defaults to len(imgs)
136
- tags: optional list of str, tags are displayed in upper-left corner of
137
- each image on a white background
138
- titles: optional list of str, text displayed above each image
139
-
140
- Returns: matplotlib.figure.Figure
141
- """
142
-
143
- # input validation
144
- num_images = len(imgs)
145
- if tags is not None:
146
- assert len(tags) == len(imgs)
147
- if titles is not None:
148
- assert len(titles) == len(imgs)
149
-
150
- if ncols is None:
151
- ncols = num_images
152
-
153
- nrows = int(np.ceil(len(imgs) / ncols))
154
- fig = matplotlib.figure.Figure(figsize=(ncols * col_w, nrows * row_h),
155
- tight_layout=True)
156
- axs = fig.subplots(nrows, ncols, squeeze=False)
157
-
158
- # plot the images
159
- for i in range(num_images):
160
- r, c = i // ncols, i % ncols
161
- ax = axs[r, c]
162
- ax.imshow(imgs[i])
163
- if tags is not None:
164
- ax.text(-0.2, -0.2, tags[i], ha='left', va='top',
165
- bbox=dict(lw=0, facecolor='white'))
166
- if titles is not None:
167
- ax.set_title(titles[i])
168
-
169
- # adjust the figure
170
- for r in range(nrows):
171
- for c in range(ncols):
172
- axs[r, c].set_axis_off()
173
- axs[r, c].set_aspect('equal')
174
- fig.subplots_adjust(wspace=0, hspace=0)
175
- return fig
176
-
177
-
178
- def load_splits(splits_json_path: str) -> dict[str, set[tuple[str, str]]]:
179
- """
180
- Loads location splits from JSON file and assert that there are no
181
- overlaps between splits.
182
-
183
- Args:
184
- splits_json_path: str, path to JSON file
185
-
186
- Returns: dict, maps split to set of (dataset, location) tuples
187
- """
188
-
189
- with open(splits_json_path, 'r') as f:
190
- split_to_locs_js = json.load(f)
191
- split_to_locs = {
192
- split: set((loc[0], loc[1]) for loc in locs)
193
- for split, locs in split_to_locs_js.items()
194
- }
195
- assert split_to_locs['train'].isdisjoint(split_to_locs['val'])
196
- assert split_to_locs['train'].isdisjoint(split_to_locs['test'])
197
- assert split_to_locs['val'].isdisjoint(split_to_locs['test'])
198
- return split_to_locs
199
-
200
-
201
- def load_dataset_csv(dataset_csv_path: str,
202
- label_index_json_path: str,
203
- splits_json_path: str,
204
- multilabel: bool,
205
- weight_by_detection_conf: bool | str,
206
- label_weighted: bool
207
- ) -> tuple[pd.DataFrame,
208
- list[str],
209
- dict[str, set[tuple[str, str]]]
210
- ]:
211
- """
212
- Args:
213
- dataset_csv_path: str, path to CSV file with columns
214
- ['dataset', 'location', 'label', 'confidence'], where label is a
215
- comma-delimited list of labels
216
- label_index_json_path: str, path to label index JSON file
217
- splits_json_path: str, path to splits JSON file
218
- multilabel: bool, whether a single example can have multiple labels
219
- weight_by_detection_conf: bool or str
220
- - if True: assumes classification CSV's 'confidence' column
221
- represents calibrated scores
222
- - if str: path the .npz file containing x/y values for isotonic
223
- regression calibration function
224
- label_weighted: bool, whether to give each label equal weight
225
-
226
- Returns:
227
- df: pd.DataFrame, with columns
228
- dataset_location: tuples of (dataset, location)
229
- label: str if not multilabel, list of str if multilabel
230
- label_index: int if not multilabel, list of int if multilabel
231
- weights: float, weight for each example
232
- column exists if and only if label_weighted=True or
233
- weight_by_detection_conf is not False
234
- label_names: list of str, label names in order of label id
235
- split_to_locs: dict, maps split to set of (dataset, location) tuples
236
- """
237
-
238
- # read in dataset CSV and create merged (dataset, location) col
239
- df = pd.read_csv(dataset_csv_path, index_col=False, float_precision='high')
240
- df['dataset_location'] = list(zip(df['dataset'], df['location']))
241
-
242
- with open(label_index_json_path, 'r') as f:
243
- idx_to_label = json.load(f)
244
- label_names = [idx_to_label[str(i)] for i in range(len(idx_to_label))]
245
- label_to_idx = {label: idx for idx, label in enumerate(label_names)}
246
-
247
- # map label to label_index
248
- if multilabel:
249
- df['label'] = df['label'].map(lambda x: x.split(','))
250
- df['label_index'] = df['label'].map(
251
- lambda labellist: tuple(sorted(label_to_idx[y] for y in labellist)))
252
- else:
253
- assert not any(df['label'].str.contains(','))
254
- df['label_index'] = df['label'].map(label_to_idx.__getitem__)
255
-
256
- # load the splits
257
- split_to_locs = load_splits(splits_json_path)
258
-
259
- if weight_by_detection_conf:
260
- df['weights'] = 1.0
261
-
262
- # only weight the training set by detection confidence
263
- train_mask = df['dataset_location'].isin(split_to_locs['train'])
264
- df.loc[train_mask, 'weights'] = df.loc[train_mask, 'confidence']
265
-
266
- if isinstance(weight_by_detection_conf, str):
267
- # isotonic regression calibration of MegaDetector confidence
268
- with np.load(weight_by_detection_conf) as npz:
269
- calib = scipy.interpolate.interp1d(
270
- x=npz['x'], y=npz['y'], kind='linear')
271
- df.loc[train_mask, 'weights'] = calib(df.loc[train_mask, 'weights'])
272
-
273
- if label_weighted:
274
- if multilabel:
275
- raise NotImplementedError
276
-
277
- if 'weights' not in df.columns:
278
- df['weights'] = 1.0
279
-
280
- # treat each split separately
281
- # new_weight[i] = confidence[i] * (n / c) / total_confidence(i's label)
282
- # - n = # examples in split (weighted by confidence); c = # labels
283
- # - weight allocated to each label is n/c
284
- # - within each label, weigh each example proportional to confidence
285
- # - new weights sum to n
286
- c = len(label_names)
287
- for split, locs in split_to_locs.items():
288
- split_mask = df['dataset_location'].isin(locs)
289
- n = df.loc[split_mask, 'weights'].sum()
290
- per_label_conf = df[split_mask].groupby('label')['weights'].sum()
291
- assert len(per_label_conf) == c, (
292
- f'{split} split only has {len(per_label_conf)}/{c} labels')
293
- scaling = (n / c) / per_label_conf[df.loc[split_mask, 'label']]
294
- df.loc[split_mask, 'weights'] *= scaling.to_numpy()
295
- w_sum = df.loc[split_mask, 'weights'].sum()
296
- assert np.isclose(w_sum, n), (
297
- f'Expected {split} weights to sum to {n}, got {w_sum} instead')
298
-
299
- # error checking
300
- assert (df['weights'] > 0).all()
301
-
302
- return df, label_names, split_to_locs
303
-
304
-
305
- def recall_from_confusion_matrix(
306
- confusion_matrix: np.ndarray,
307
- label_names: Sequence[str],
308
- ) -> dict[str, float]:
309
- """
310
- Args:
311
- confusion_matrix: np.ndarray, shape [n_classes, n_classes], type int
312
- C[i, j] = # of samples with true label i, predicted as label j
313
- label_names: list of str, label names in order by label id
314
-
315
- Returns: dict, label_name => recall
316
- """
317
-
318
- result = {
319
- label_name: confusion_matrix[i, i] / (confusion_matrix[i].sum() + 1e-8)
320
- for i, label_name in enumerate(label_names)
321
- }
322
- return result
File without changes
File without changes
@@ -1,34 +0,0 @@
1
- """
2
-
3
- annotation_constants.py
4
-
5
- Defines default categories for MegaDetector output boxes.
6
-
7
- Used throughout the repo; do not change unless you are Dan or Siyu. In fact, do not change unless
8
- you are both Dan *and* Siyu.
9
-
10
- We use integer IDs here; this is different from the MD .json file format,
11
- where indices are string integers.
12
-
13
- """
14
-
15
- #%% Constants
16
-
17
- # MegaDetector output categories (the "empty" category is implicit)
18
- detector_bbox_categories = [
19
- {'id': 0, 'name': 'empty'},
20
- {'id': 1, 'name': 'animal'},
21
- {'id': 2, 'name': 'person'},
22
- {'id': 3, 'name': 'vehicle'}
23
- ]
24
-
25
- # This is used for choosing colors, so it ignores the "empty" class.
26
- NUM_DETECTOR_CATEGORIES = len(detector_bbox_categories) - 1
27
-
28
- detector_bbox_category_id_to_name = {}
29
- detector_bbox_category_name_to_id = {}
30
-
31
- for cat in detector_bbox_categories:
32
- detector_bbox_category_id_to_name[cat['id']] = cat['name']
33
- detector_bbox_category_name_to_id[cat['name']] = cat['id']
34
-
@@ -1,238 +0,0 @@
1
- """
2
-
3
- camtrap_dp_to_coco.py
4
-
5
- Parse a very limited subset of the Camtrap DP data package format:
6
-
7
- https://camtrap-dp.tdwg.org/
8
-
9
- ...and convert to COCO format. Assumes that all required metadata files have been
10
- put in the same directory (which is standard).
11
-
12
- Does not currently parse bounding boxes, just attaches species labels to images.
13
-
14
- Currently supports only sequence-level labeling.
15
-
16
- """
17
-
18
- #%% Imports and constants
19
-
20
- import os
21
- import json
22
- import pandas as pd
23
-
24
- from dateutil import parser as dateparser
25
-
26
- from collections import defaultdict
27
-
28
-
29
- #%% Functions
30
-
31
- def camtrap_dp_to_coco(camtrap_dp_folder,output_file=None):
32
- """
33
- Convert the Camtrap DP package in [camtrap_dp_folder] to COCO.
34
-
35
- Does not validate images, just converts. Use integrity_check_json_db to validate
36
- the resulting COCO file.
37
-
38
- Optionally writes the results to [output_file]
39
- """
40
-
41
- required_files = ('datapackage.json','deployments.csv','events.csv','media.csv','observations.csv')
42
-
43
- for fn in required_files:
44
- fn_abs = os.path.join(camtrap_dp_folder,fn)
45
- assert os.path.isfile(fn_abs), 'Could not find required file {}'.format(fn_abs)
46
-
47
- with open(os.path.join(camtrap_dp_folder,'datapackage.json'),'r') as f:
48
- datapackage = json.load(f)
49
-
50
- assert datapackage['profile'] == 'https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0/camtrap-dp-profile.json', \
51
- 'I only know how to parse Camtrap DP 1.0 packages'
52
-
53
- deployments_file = None
54
- events_file = None
55
- media_file = None
56
- observations_file = None
57
-
58
- resources = datapackage['resources']
59
- for r in resources:
60
- if r['name'] == 'deployments':
61
- deployments_file = r['path']
62
- elif r['name'] == 'media':
63
- media_file = r['path']
64
- elif r['name'] == 'events':
65
- events_file = r['path']
66
- elif r['name'] == 'observations':
67
- observations_file = r['path']
68
-
69
- assert deployments_file is not None, 'No deployment file specified'
70
- assert events_file is not None, 'No events file specified'
71
- assert media_file is not None, 'No media file specified'
72
- assert observations_file is not None, 'No observation file specified'
73
-
74
- deployments_df = pd.read_csv(os.path.join(camtrap_dp_folder,deployments_file))
75
- events_df = pd.read_csv(os.path.join(camtrap_dp_folder,events_file))
76
- media_df = pd.read_csv(os.path.join(camtrap_dp_folder,media_file))
77
- observations_df = pd.read_csv(os.path.join(camtrap_dp_folder,observations_file))
78
-
79
- print('Read {} deployment lines'.format(len(deployments_df)))
80
- print('Read {} events lines'.format(len(events_df)))
81
- print('Read {} media lines'.format(len(media_df)))
82
- print('Read {} observation lines'.format(len(observations_df)))
83
-
84
- media_id_to_media_info = {}
85
-
86
- # i_row = 0; row = media_df.iloc[i_row]
87
- for i_row,row in media_df.iterrows():
88
- media_info = {}
89
- media_info['file_name'] = os.path.join(row['filePath'],row['fileName']).replace('\\','/')
90
- media_info['location'] = row['deploymentID']
91
- media_info['id'] = row['mediaID']
92
- media_info['datetime'] = row['timestamp']
93
- media_info['datetime'] = dateparser.parse(media_info['datetime'])
94
- media_info['frame_num'] = -1
95
- media_info['seq_num_frames'] = -1
96
- media_id_to_media_info[row['mediaID']] = media_info
97
-
98
- event_id_to_media_ids = defaultdict(list)
99
-
100
- # i_row = 0; row = events_df.iloc[i_row]
101
- for i_row,row in events_df.iterrows():
102
- media_id = row['mediaID']
103
- assert media_id in media_id_to_media_info
104
- event_id_to_media_ids[row['eventID']].append(media_id)
105
-
106
- event_id_to_category_names = defaultdict(set)
107
-
108
- # i_row = 0; row = observations_df.iloc[i_row]
109
- for i_row,row in observations_df.iterrows():
110
-
111
- if row['observationLevel'] != 'event':
112
- raise ValueError("I don't know how to parse image-level events yet")
113
-
114
- if row['observationType'] == 'blank':
115
- event_id_to_category_names[row['eventID']].add('empty')
116
- elif row['observationType'] == 'unknown':
117
- event_id_to_category_names[row['eventID']].add('unknown')
118
- elif row['observationType'] == 'human':
119
- assert row['scientificName'] == 'Homo sapiens'
120
- event_id_to_category_names[row['eventID']].add(row['scientificName'])
121
- else:
122
- assert row['observationType'] == 'animal'
123
- assert isinstance(row['scientificName'],str)
124
- event_id_to_category_names[row['eventID']].add(row['scientificName'])
125
-
126
- # Sort images within an event into frame numbers
127
- #
128
- # event_id = next(iter(event_id_to_media_ids))
129
- for event_id in event_id_to_media_ids.keys():
130
- media_ids_this_event = event_id_to_media_ids[event_id]
131
- media_info_this_event = [media_id_to_media_info[media_id] for media_id in media_ids_this_event]
132
- media_info_this_event = sorted(media_info_this_event, key=lambda x: x['datetime'])
133
- for i_media,media_info in enumerate(media_info_this_event):
134
- media_info['frame_num'] = i_media
135
- media_info['seq_num_frames'] = len(media_info_this_event)
136
- media_info['seq_id'] = event_id
137
-
138
- # Create category names
139
- category_name_to_category_id = {'empty':0}
140
- for event_id in event_id_to_category_names:
141
- category_names_this_event = event_id_to_category_names[event_id]
142
- for name in category_names_this_event:
143
- if name not in category_name_to_category_id:
144
- category_name_to_category_id[name] = len(category_name_to_category_id)
145
-
146
- # Move everything into COCO format
147
- images = list(media_id_to_media_info.values())
148
-
149
- categories = []
150
- for name in category_name_to_category_id:
151
- categories.append({'name':name,'id':category_name_to_category_id[name]})
152
- info = {'version':1.0,'description':datapackage['name']}
153
-
154
- # Create annotations
155
- annotations = []
156
-
157
- for event_id in event_id_to_media_ids.keys():
158
- i_ann = 0
159
- media_ids_this_event = event_id_to_media_ids[event_id]
160
- media_info_this_event = [media_id_to_media_info[media_id] for media_id in media_ids_this_event]
161
- categories_this_event = event_id_to_category_names[event_id]
162
- for im in media_info_this_event:
163
- for category_name in categories_this_event:
164
- ann = {}
165
- ann['id'] = event_id + '_' + str(i_ann)
166
- i_ann += 1
167
- ann['image_id'] = im['id']
168
- ann['category_id'] = category_name_to_category_id[category_name]
169
- ann['sequence_level_annotation'] = True
170
- annotations.append(ann)
171
-
172
- coco_data = {}
173
- coco_data['images'] = images
174
- coco_data['annotations'] = annotations
175
- coco_data['categories'] = categories
176
- coco_data['info'] = info
177
-
178
- for im in coco_data['images']:
179
- im['datetime'] = str(im['datetime'] )
180
-
181
- if output_file is not None:
182
- with open(output_file,'w') as f:
183
- json.dump(coco_data,f,indent=1)
184
-
185
- return coco_data
186
-
187
-
188
- #%% Interactive driver
189
-
190
- if False:
191
-
192
- pass
193
-
194
- #%%
195
-
196
- camtrap_dp_folder = r'C:\temp\pilot2\pilot2'
197
- coco_file = os.path.join(camtrap_dp_folder,'test-coco.json')
198
- coco_data = camtrap_dp_to_coco(camtrap_dp_folder,
199
- output_file=coco_file)
200
-
201
- #%% Validate
202
-
203
- from data_management.databases.integrity_check_json_db import integrity_check_json_db, IntegrityCheckOptions
204
-
205
- options = IntegrityCheckOptions()
206
-
207
- options.baseDir = camtrap_dp_folder
208
- options.bCheckImageSizes = False
209
- options.bCheckImageExistence = True
210
- options.bFindUnusedImages = True
211
- options.bRequireLocation = True
212
- options.iMaxNumImages = -1
213
- options.nThreads = 1
214
- options.verbose = True
215
-
216
- sortedCategories, data, errorInfo = integrity_check_json_db(coco_file,options)
217
-
218
- #%% Preview
219
-
220
- from md_visualization.visualize_db import DbVizOptions, visualize_db
221
-
222
- options = DbVizOptions()
223
- options.parallelize_rendering = True
224
- options.parallelize_rendering_with_threads = True
225
- options.parallelize_rendering_n_cores = 10
226
-
227
- preview_dir = r'c:\temp\camtrapdp-preview'
228
- htmlOutputFile,image_db = visualize_db(coco_file, preview_dir, camtrap_dp_folder, options=options)
229
-
230
- from md_utils.path_utils import open_file
231
- open_file(htmlOutputFile)
232
-
233
-
234
- #%% Command-line driver
235
-
236
- # TODO
237
-
238
-