megadetector 10.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (147) hide show
  1. megadetector/__init__.py +0 -0
  2. megadetector/api/__init__.py +0 -0
  3. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
  7. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  8. megadetector/classification/__init__.py +0 -0
  9. megadetector/classification/aggregate_classifier_probs.py +108 -0
  10. megadetector/classification/analyze_failed_images.py +227 -0
  11. megadetector/classification/cache_batchapi_outputs.py +198 -0
  12. megadetector/classification/create_classification_dataset.py +626 -0
  13. megadetector/classification/crop_detections.py +516 -0
  14. megadetector/classification/csv_to_json.py +226 -0
  15. megadetector/classification/detect_and_crop.py +853 -0
  16. megadetector/classification/efficientnet/__init__.py +9 -0
  17. megadetector/classification/efficientnet/model.py +415 -0
  18. megadetector/classification/efficientnet/utils.py +608 -0
  19. megadetector/classification/evaluate_model.py +520 -0
  20. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  21. megadetector/classification/json_to_azcopy_list.py +63 -0
  22. megadetector/classification/json_validator.py +696 -0
  23. megadetector/classification/map_classification_categories.py +276 -0
  24. megadetector/classification/merge_classification_detection_output.py +509 -0
  25. megadetector/classification/prepare_classification_script.py +194 -0
  26. megadetector/classification/prepare_classification_script_mc.py +228 -0
  27. megadetector/classification/run_classifier.py +287 -0
  28. megadetector/classification/save_mislabeled.py +110 -0
  29. megadetector/classification/train_classifier.py +827 -0
  30. megadetector/classification/train_classifier_tf.py +725 -0
  31. megadetector/classification/train_utils.py +323 -0
  32. megadetector/data_management/__init__.py +0 -0
  33. megadetector/data_management/animl_to_md.py +161 -0
  34. megadetector/data_management/annotations/__init__.py +0 -0
  35. megadetector/data_management/annotations/annotation_constants.py +33 -0
  36. megadetector/data_management/camtrap_dp_to_coco.py +270 -0
  37. megadetector/data_management/cct_json_utils.py +566 -0
  38. megadetector/data_management/cct_to_md.py +184 -0
  39. megadetector/data_management/cct_to_wi.py +293 -0
  40. megadetector/data_management/coco_to_labelme.py +284 -0
  41. megadetector/data_management/coco_to_yolo.py +702 -0
  42. megadetector/data_management/databases/__init__.py +0 -0
  43. megadetector/data_management/databases/add_width_and_height_to_db.py +107 -0
  44. megadetector/data_management/databases/combine_coco_camera_traps_files.py +210 -0
  45. megadetector/data_management/databases/integrity_check_json_db.py +528 -0
  46. megadetector/data_management/databases/subset_json_db.py +195 -0
  47. megadetector/data_management/generate_crops_from_cct.py +200 -0
  48. megadetector/data_management/get_image_sizes.py +164 -0
  49. megadetector/data_management/labelme_to_coco.py +559 -0
  50. megadetector/data_management/labelme_to_yolo.py +349 -0
  51. megadetector/data_management/lila/__init__.py +0 -0
  52. megadetector/data_management/lila/create_lila_blank_set.py +556 -0
  53. megadetector/data_management/lila/create_lila_test_set.py +187 -0
  54. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  55. megadetector/data_management/lila/download_lila_subset.py +182 -0
  56. megadetector/data_management/lila/generate_lila_per_image_labels.py +777 -0
  57. megadetector/data_management/lila/get_lila_annotation_counts.py +174 -0
  58. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  59. megadetector/data_management/lila/lila_common.py +319 -0
  60. megadetector/data_management/lila/test_lila_metadata_urls.py +164 -0
  61. megadetector/data_management/mewc_to_md.py +344 -0
  62. megadetector/data_management/ocr_tools.py +873 -0
  63. megadetector/data_management/read_exif.py +964 -0
  64. megadetector/data_management/remap_coco_categories.py +195 -0
  65. megadetector/data_management/remove_exif.py +156 -0
  66. megadetector/data_management/rename_images.py +194 -0
  67. megadetector/data_management/resize_coco_dataset.py +663 -0
  68. megadetector/data_management/speciesnet_to_md.py +41 -0
  69. megadetector/data_management/wi_download_csv_to_coco.py +247 -0
  70. megadetector/data_management/yolo_output_to_md_output.py +594 -0
  71. megadetector/data_management/yolo_to_coco.py +876 -0
  72. megadetector/data_management/zamba_to_md.py +188 -0
  73. megadetector/detection/__init__.py +0 -0
  74. megadetector/detection/change_detection.py +840 -0
  75. megadetector/detection/process_video.py +479 -0
  76. megadetector/detection/pytorch_detector.py +1451 -0
  77. megadetector/detection/run_detector.py +1267 -0
  78. megadetector/detection/run_detector_batch.py +2159 -0
  79. megadetector/detection/run_inference_with_yolov5_val.py +1314 -0
  80. megadetector/detection/run_md_and_speciesnet.py +1494 -0
  81. megadetector/detection/run_tiled_inference.py +1038 -0
  82. megadetector/detection/tf_detector.py +209 -0
  83. megadetector/detection/video_utils.py +1379 -0
  84. megadetector/postprocessing/__init__.py +0 -0
  85. megadetector/postprocessing/add_max_conf.py +72 -0
  86. megadetector/postprocessing/categorize_detections_by_size.py +166 -0
  87. megadetector/postprocessing/classification_postprocessing.py +1752 -0
  88. megadetector/postprocessing/combine_batch_outputs.py +249 -0
  89. megadetector/postprocessing/compare_batch_results.py +2110 -0
  90. megadetector/postprocessing/convert_output_format.py +403 -0
  91. megadetector/postprocessing/create_crop_folder.py +629 -0
  92. megadetector/postprocessing/detector_calibration.py +570 -0
  93. megadetector/postprocessing/generate_csv_report.py +522 -0
  94. megadetector/postprocessing/load_api_results.py +223 -0
  95. megadetector/postprocessing/md_to_coco.py +428 -0
  96. megadetector/postprocessing/md_to_labelme.py +351 -0
  97. megadetector/postprocessing/md_to_wi.py +41 -0
  98. megadetector/postprocessing/merge_detections.py +392 -0
  99. megadetector/postprocessing/postprocess_batch_results.py +2077 -0
  100. megadetector/postprocessing/remap_detection_categories.py +226 -0
  101. megadetector/postprocessing/render_detection_confusion_matrix.py +677 -0
  102. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +206 -0
  103. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +82 -0
  104. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1665 -0
  105. megadetector/postprocessing/separate_detections_into_folders.py +795 -0
  106. megadetector/postprocessing/subset_json_detector_output.py +964 -0
  107. megadetector/postprocessing/top_folders_to_bottom.py +238 -0
  108. megadetector/postprocessing/validate_batch_results.py +332 -0
  109. megadetector/taxonomy_mapping/__init__.py +0 -0
  110. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  111. megadetector/taxonomy_mapping/map_new_lila_datasets.py +213 -0
  112. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +165 -0
  113. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +543 -0
  114. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  115. megadetector/taxonomy_mapping/simple_image_download.py +224 -0
  116. megadetector/taxonomy_mapping/species_lookup.py +1008 -0
  117. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  118. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  119. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  120. megadetector/tests/__init__.py +0 -0
  121. megadetector/tests/test_nms_synthetic.py +335 -0
  122. megadetector/utils/__init__.py +0 -0
  123. megadetector/utils/ct_utils.py +1857 -0
  124. megadetector/utils/directory_listing.py +199 -0
  125. megadetector/utils/extract_frames_from_video.py +307 -0
  126. megadetector/utils/gpu_test.py +125 -0
  127. megadetector/utils/md_tests.py +2072 -0
  128. megadetector/utils/path_utils.py +2832 -0
  129. megadetector/utils/process_utils.py +172 -0
  130. megadetector/utils/split_locations_into_train_val.py +237 -0
  131. megadetector/utils/string_utils.py +234 -0
  132. megadetector/utils/url_utils.py +825 -0
  133. megadetector/utils/wi_platform_utils.py +968 -0
  134. megadetector/utils/wi_taxonomy_utils.py +1759 -0
  135. megadetector/utils/write_html_image_list.py +239 -0
  136. megadetector/visualization/__init__.py +0 -0
  137. megadetector/visualization/plot_utils.py +309 -0
  138. megadetector/visualization/render_images_with_thumbnails.py +243 -0
  139. megadetector/visualization/visualization_utils.py +1940 -0
  140. megadetector/visualization/visualize_db.py +630 -0
  141. megadetector/visualization/visualize_detector_output.py +479 -0
  142. megadetector/visualization/visualize_video_output.py +705 -0
  143. megadetector-10.0.13.dist-info/METADATA +134 -0
  144. megadetector-10.0.13.dist-info/RECORD +147 -0
  145. megadetector-10.0.13.dist-info/WHEEL +5 -0
  146. megadetector-10.0.13.dist-info/licenses/LICENSE +19 -0
  147. megadetector-10.0.13.dist-info/top_level.txt +1 -0
@@ -0,0 +1,323 @@
1
+ """
2
+
3
+ train_utils.py
4
+
5
+ Utility functions useful for training a classifier.
6
+
7
+ This script should NOT depend on any other file within this repo. It should
8
+ especially be agnostic to PyTorch vs. TensorFlow.
9
+
10
+ """
11
+
12
+ #%% Imports
13
+
14
+ from __future__ import annotations
15
+
16
+ import dataclasses
17
+ import heapq
18
+ import io
19
+ import json
20
+
21
+ import matplotlib.figure
22
+ import numpy as np
23
+ import pandas as pd
24
+ import PIL.Image
25
+ import scipy.interpolate
26
+
27
+ from collections.abc import Mapping, Sequence
28
+ from typing import Any, Optional
29
+
30
+
31
+ #%% Classes
32
+
33
+ @dataclasses.dataclass(order=True)
34
+ class HeapItem:
35
+ """
36
+ A wrapper over non-comparable data with a comparable priority value.
37
+ """
38
+
39
+ priority: Any
40
+ data: Any = dataclasses.field(compare=False, repr=False)
41
+
42
+
43
+ def add_to_heap(h: list[Any], item: HeapItem, k: Optional[int] = None) -> None:
44
+ """
45
+ Tracks the max k elements using a heap.
46
+
47
+ We use a min-heap for this task. When a new element comes in, we compare it
48
+ to the smallest node in the heap, h[0]. If the new value is greater than
49
+ h[0], we pop h[0] and add the new element in.
50
+
51
+ Args:
52
+ h: list, either empty [] or already heapified
53
+ item: HeapItem
54
+ k: int, desired capacity of the heap, or None for no limit
55
+ """
56
+
57
+ if k is None or len(h) < k:
58
+ heapq.heappush(h, item)
59
+ else:
60
+ heapq.heappushpop(h, item)
61
+
62
+
63
+ #%% Functions
64
+
65
+ def prefix_all_keys(d: Mapping[str, Any], prefix: str) -> dict[str, Any]:
66
+ """
67
+ Returns a new dict where the keys are prefixed by <prefix>.
68
+ """
69
+
70
+ return {f'{prefix}{k}': v for k, v in d.items()}
71
+
72
+
73
+ def fig_to_img(fig: matplotlib.figure.Figure) -> np.ndarray:
74
+ """
75
+ Converts a matplotlib figure to an image represented by a numpy array.
76
+
77
+ Returns: np.ndarray, type uint8, shape [H, W, 3]
78
+ """
79
+
80
+ with io.BytesIO() as b:
81
+ fig.savefig(b, transparent=False, bbox_inches='tight', pad_inches=0,
82
+ format='png')
83
+ b.seek(0)
84
+ fig_img = np.asarray(PIL.Image.open(b).convert('RGB'))
85
+ assert fig_img.dtype == np.uint8
86
+ return fig_img
87
+
88
+
89
+ def imgs_with_confidences(imgs_list: list[tuple[Any, ...]],
90
+ label_names: Sequence[str],
91
+ ) -> tuple[matplotlib.figure.Figure, list[str]]:
92
+ """
93
+ Args:
94
+ imgs_list: list of tuple, each tuple consists of:
95
+ img: array_like, shape [H, W, C], type either float [0, 1] or uint8
96
+ label_id: int, label index
97
+ topk_conf: list of float, confidence scores for topk predictions
98
+ topk_preds: list of int, label indices for topk predictions
99
+ img_file: str, path to image file
100
+ label_names: list of str, label names in order of label id
101
+
102
+ Returns:
103
+ fig: matplotlib.figure.Figure
104
+ img_files: list of str
105
+ """
106
+
107
+ imgs, img_files, tags, titles = [], [], [], []
108
+ for img, label_id, topk_conf, topk_preds, img_file in imgs_list:
109
+ imgs.append(img)
110
+ img_files.append(img_file)
111
+ tags.append(label_names[label_id])
112
+
113
+ lines = []
114
+ for pred, conf in zip(topk_preds, topk_conf):
115
+ pred_name = label_names[pred]
116
+ lines.append(f'{pred_name}: {conf:.03f}')
117
+ titles.append('\n'.join(lines))
118
+
119
+ fig = plot_img_grid(imgs=imgs, row_h=3, col_w=2.5, tags=tags, titles=titles)
120
+ return fig, img_files
121
+
122
+
123
+ def plot_img_grid(imgs: Sequence[Any], row_h: float, col_w: float,
124
+ ncols: Optional[int] = None,
125
+ tags: Optional[Sequence[str]] = None,
126
+ titles: Optional[Sequence[str]] = None
127
+ ) -> matplotlib.figure.Figure:
128
+ """
129
+ Plots a grid of images.
130
+
131
+ Args:
132
+ imgs: list of images, each image is either an array or a PIL Image,
133
+ see matplotlib.axes.Axes.imshow() documentation for supported shapes
134
+ row_h: float, row height in inches
135
+ col_w: float, col width in inches
136
+ ncols: optional int, number of columns, defaults to len(imgs)
137
+ tags: optional list of str, tags are displayed in upper-left corner of
138
+ each image on a white background
139
+ titles: optional list of str, text displayed above each image
140
+
141
+ Returns: matplotlib.figure.Figure
142
+ """
143
+
144
+ # input validation
145
+ num_images = len(imgs)
146
+ if tags is not None:
147
+ assert len(tags) == len(imgs)
148
+ if titles is not None:
149
+ assert len(titles) == len(imgs)
150
+
151
+ if ncols is None:
152
+ ncols = num_images
153
+
154
+ nrows = int(np.ceil(len(imgs) / ncols))
155
+ fig = matplotlib.figure.Figure(figsize=(ncols * col_w, nrows * row_h),
156
+ tight_layout=True)
157
+ axs = fig.subplots(nrows, ncols, squeeze=False)
158
+
159
+ # plot the images
160
+ for i in range(num_images):
161
+ r, c = i // ncols, i % ncols
162
+ ax = axs[r, c]
163
+ ax.imshow(imgs[i])
164
+ if tags is not None:
165
+ ax.text(-0.2, -0.2, tags[i], ha='left', va='top',
166
+ bbox=dict(lw=0, facecolor='white'))
167
+ if titles is not None:
168
+ ax.set_title(titles[i])
169
+
170
+ # adjust the figure
171
+ for r in range(nrows):
172
+ for c in range(ncols):
173
+ axs[r, c].set_axis_off()
174
+ axs[r, c].set_aspect('equal')
175
+ fig.subplots_adjust(wspace=0, hspace=0)
176
+ return fig
177
+
178
+
179
+ def load_splits(splits_json_path: str) -> dict[str, set[tuple[str, str]]]:
180
+ """
181
+ Loads location splits from JSON file and assert that there are no
182
+ overlaps between splits.
183
+
184
+ Args:
185
+ splits_json_path: str, path to JSON file
186
+
187
+ Returns: dict, maps split to set of (dataset, location) tuples
188
+ """
189
+
190
+ with open(splits_json_path, 'r') as f:
191
+ split_to_locs_js = json.load(f)
192
+ split_to_locs = {
193
+ split: set((loc[0], loc[1]) for loc in locs)
194
+ for split, locs in split_to_locs_js.items()
195
+ }
196
+ assert split_to_locs['train'].isdisjoint(split_to_locs['val'])
197
+ assert split_to_locs['train'].isdisjoint(split_to_locs['test'])
198
+ assert split_to_locs['val'].isdisjoint(split_to_locs['test'])
199
+ return split_to_locs
200
+
201
+
202
+ def load_dataset_csv(dataset_csv_path: str,
203
+ label_index_json_path: str,
204
+ splits_json_path: str,
205
+ multilabel: bool,
206
+ weight_by_detection_conf: bool | str,
207
+ label_weighted: bool
208
+ ) -> tuple[pd.DataFrame,
209
+ list[str],
210
+ dict[str, set[tuple[str, str]]]
211
+ ]:
212
+ """
213
+ Args:
214
+ dataset_csv_path: str, path to CSV file with columns
215
+ ['dataset', 'location', 'label', 'confidence'], where label is a
216
+ comma-delimited list of labels
217
+ label_index_json_path: str, path to label index JSON file
218
+ splits_json_path: str, path to splits JSON file
219
+ multilabel: bool, whether a single example can have multiple labels
220
+ weight_by_detection_conf: bool or str
221
+ - if True: assumes classification CSV's 'confidence' column
222
+ represents calibrated scores
223
+ - if str: path the .npz file containing x/y values for isotonic
224
+ regression calibration function
225
+ label_weighted: bool, whether to give each label equal weight
226
+
227
+ Returns:
228
+ df: pd.DataFrame, with columns
229
+ dataset_location: tuples of (dataset, location)
230
+ label: str if not multilabel, list of str if multilabel
231
+ label_index: int if not multilabel, list of int if multilabel
232
+ weights: float, weight for each example
233
+ column exists if and only if label_weighted=True or
234
+ weight_by_detection_conf is not False
235
+ label_names: list of str, label names in order of label id
236
+ split_to_locs: dict, maps split to set of (dataset, location) tuples
237
+ """
238
+
239
+ # read in dataset CSV and create merged (dataset, location) col
240
+ df = pd.read_csv(dataset_csv_path, index_col=False, float_precision='high')
241
+ df['dataset_location'] = list(zip(df['dataset'], df['location']))
242
+
243
+ with open(label_index_json_path, 'r') as f:
244
+ idx_to_label = json.load(f)
245
+ label_names = [idx_to_label[str(i)] for i in range(len(idx_to_label))]
246
+ label_to_idx = {label: idx for idx, label in enumerate(label_names)}
247
+
248
+ # map label to label_index
249
+ if multilabel:
250
+ df['label'] = df['label'].map(lambda x: x.split(','))
251
+ df['label_index'] = df['label'].map(
252
+ lambda labellist: tuple(sorted(label_to_idx[y] for y in labellist)))
253
+ else:
254
+ assert not any(df['label'].str.contains(','))
255
+ df['label_index'] = df['label'].map(label_to_idx.__getitem__)
256
+
257
+ # load the splits
258
+ split_to_locs = load_splits(splits_json_path)
259
+
260
+ if weight_by_detection_conf:
261
+ df['weights'] = 1.0
262
+
263
+ # only weight the training set by detection confidence
264
+ train_mask = df['dataset_location'].isin(split_to_locs['train'])
265
+ df.loc[train_mask, 'weights'] = df.loc[train_mask, 'confidence']
266
+
267
+ if isinstance(weight_by_detection_conf, str):
268
+ # isotonic regression calibration of MegaDetector confidence
269
+ with np.load(weight_by_detection_conf) as npz:
270
+ calib = scipy.interpolate.interp1d(
271
+ x=npz['x'], y=npz['y'], kind='linear')
272
+ df.loc[train_mask, 'weights'] = calib(df.loc[train_mask, 'weights'])
273
+
274
+ if label_weighted:
275
+ if multilabel:
276
+ raise NotImplementedError
277
+
278
+ if 'weights' not in df.columns:
279
+ df['weights'] = 1.0
280
+
281
+ # treat each split separately
282
+ # new_weight[i] = confidence[i] * (n / c) / total_confidence(i's label)
283
+ # - n = # examples in split (weighted by confidence); c = # labels
284
+ # - weight allocated to each label is n/c
285
+ # - within each label, weigh each example proportional to confidence
286
+ # - new weights sum to n
287
+ c = len(label_names)
288
+ for split, locs in split_to_locs.items():
289
+ split_mask = df['dataset_location'].isin(locs)
290
+ n = df.loc[split_mask, 'weights'].sum()
291
+ per_label_conf = df[split_mask].groupby('label')['weights'].sum()
292
+ assert len(per_label_conf) == c, (
293
+ f'{split} split only has {len(per_label_conf)}/{c} labels')
294
+ scaling = (n / c) / per_label_conf[df.loc[split_mask, 'label']]
295
+ df.loc[split_mask, 'weights'] *= scaling.to_numpy()
296
+ w_sum = df.loc[split_mask, 'weights'].sum()
297
+ assert np.isclose(w_sum, n), (
298
+ f'Expected {split} weights to sum to {n}, got {w_sum} instead')
299
+
300
+ # error checking
301
+ assert (df['weights'] > 0).all()
302
+
303
+ return df, label_names, split_to_locs
304
+
305
+
306
+ def recall_from_confusion_matrix(
307
+ confusion_matrix: np.ndarray,
308
+ label_names: Sequence[str],
309
+ ) -> dict[str, float]:
310
+ """
311
+ Args:
312
+ confusion_matrix: np.ndarray, shape [n_classes, n_classes], type int
313
+ C[i, j] = # of samples with true label i, predicted as label j
314
+ label_names: list of str, label names in order by label id
315
+
316
+ Returns: dict, label_name => recall
317
+ """
318
+
319
+ result = {
320
+ label_name: confusion_matrix[i, i] / (confusion_matrix[i].sum() + 1e-8)
321
+ for i, label_name in enumerate(label_names)
322
+ }
323
+ return result
File without changes
@@ -0,0 +1,161 @@
1
+ """
2
+
3
+ animl_to_md.py
4
+
5
+ Convert a .csv file produced by the Animl package:
6
+
7
+ https://github.com/conservationtechlab/animl-py
8
+
9
+ ...to a MD results file suitable for import into Timelapse.
10
+
11
+ Columns are expected to be:
12
+
13
+ file
14
+ category (MD category identifies: 1==animal, 2==person, 3==vehicle)
15
+ detection_conf
16
+ bbox1,bbox2,bbox3,bbox4
17
+ class
18
+ classification_conf
19
+
20
+ """
21
+
22
+ #%% Imports and constants
23
+
24
+ import sys
25
+ import argparse
26
+
27
+ import pandas as pd
28
+
29
+ from megadetector.utils.ct_utils import write_json
30
+ from megadetector.detection.run_detector import DEFAULT_DETECTOR_LABEL_MAP
31
+ detection_category_id_to_name = DEFAULT_DETECTOR_LABEL_MAP
32
+
33
+
34
+ #%% Main function
35
+
36
+ def animl_results_to_md_results(input_file,output_file=None):
37
+ """
38
+ Converts the .csv file [input_file] to the MD-formatted .json file [output_file].
39
+
40
+ If [output_file] is None, '.json' will be appended to the input file.
41
+ """
42
+
43
+ if output_file is None:
44
+ output_file = input_file + '.json'
45
+
46
+ df = pd.read_csv(input_file)
47
+
48
+ expected_columns = ('file','category','detection_conf',
49
+ 'bbox1','bbox2','bbox3','bbox4','class','classification_conf')
50
+
51
+ for s in expected_columns:
52
+ assert s in df.columns,\
53
+ 'Expected column {} not found'.format(s)
54
+
55
+ classification_category_name_to_id = {}
56
+ filename_to_results = {}
57
+
58
+ # i_row = 0; row = df.iloc[i_row]
59
+ for i_row,row in df.iterrows():
60
+
61
+ # Is this the first detection we've seen for this file?
62
+ if row['file'] in filename_to_results:
63
+ im = filename_to_results[row['file']]
64
+ else:
65
+ im = {}
66
+ im['detections'] = []
67
+ im['file'] = row['file']
68
+ filename_to_results[im['file']] = im
69
+
70
+ # Pandas often reads integer columns as float64, so check integer-ness
71
+ # rather than just isinstance(..., int)
72
+ assert pd.notna(row['category']) and float(row['category']).is_integer(), \
73
+ 'Invalid category identifier in row {} (file: {})'.format(i_row, im['file'])
74
+ detection_category_id = str(int(row['category']))
75
+ assert detection_category_id in detection_category_id_to_name,\
76
+ 'Unrecognized detection category ID {}'.format(detection_category_id)
77
+
78
+ detection = {}
79
+ detection['category'] = detection_category_id
80
+ detection['conf'] = row['detection_conf']
81
+ bbox = [row['bbox1'],row['bbox2'],row['bbox3'],row['bbox4']]
82
+ detection['bbox'] = bbox
83
+ classification_category_name = row['class']
84
+
85
+ # Have we seen this classification category before?
86
+ if classification_category_name in classification_category_name_to_id:
87
+ classification_category_id = \
88
+ classification_category_name_to_id[classification_category_name]
89
+ else:
90
+ classification_category_id = str(len(classification_category_name_to_id))
91
+ classification_category_name_to_id[classification_category_name] = \
92
+ classification_category_id
93
+
94
+ classifications = [[classification_category_id,row['classification_conf']]]
95
+ detection['classifications'] = classifications
96
+
97
+ im['detections'].append(detection)
98
+
99
+ # ...for each row
100
+
101
+ info = {}
102
+ info['format_version'] = '1.3'
103
+ info['detector'] = 'Animl'
104
+ info['classifier'] = 'Animl'
105
+
106
+ results = {}
107
+ results['info'] = info
108
+ results['detection_categories'] = detection_category_id_to_name
109
+ results['classification_categories'] = \
110
+ {v: k for k, v in classification_category_name_to_id.items()}
111
+ results['images'] = list(filename_to_results.values())
112
+
113
+ write_json(output_file,results)
114
+
115
+ # ...animl_results_to_md_results(...)
116
+
117
+
118
+ #%% Interactive driver
119
+
120
+ if False:
121
+
122
+ pass
123
+
124
+ #%%
125
+
126
+ input_file = r"G:\temp\animl-runs\animl-runs\Coati_v2\manifest.csv"
127
+ output_file = None
128
+ animl_results_to_md_results(input_file,output_file)
129
+
130
+
131
+ #%% Command-line driver
132
+
133
+ def main():
134
+ """
135
+ Command-line driver for animl_to_md
136
+ """
137
+
138
+ parser = argparse.ArgumentParser(
139
+ description='Convert an Animl-formatted .csv results file to MD-formatted .json results file')
140
+
141
+ parser.add_argument(
142
+ 'input_file',
143
+ type=str,
144
+ help='input .csv file')
145
+
146
+ parser.add_argument(
147
+ '--output_file',
148
+ type=str,
149
+ default=None,
150
+ help='output .json file (defaults to input file appended with ".json")')
151
+
152
+ if len(sys.argv[1:]) == 0:
153
+ parser.print_help()
154
+ parser.exit()
155
+
156
+ args = parser.parse_args()
157
+
158
+ animl_results_to_md_results(args.input_file,args.output_file)
159
+
160
+ if __name__ == '__main__':
161
+ main()
File without changes
@@ -0,0 +1,33 @@
1
+ """
2
+
3
+ annotation_constants.py
4
+
5
+ Defines default categories for MegaDetector output boxes.
6
+
7
+ Used throughout the repo; do not change unless you are Dan or Siyu. In fact, do not change unless
8
+ you are both Dan *and* Siyu.
9
+
10
+ We use integer IDs here; this is different from the MD .json file format,
11
+ where indices are string integers.
12
+
13
+ """
14
+
15
+ #%% Constants
16
+
17
+ # MegaDetector output categories (the "empty" category is implicit)
18
+ detector_bbox_categories = [
19
+ {'id': 0, 'name': 'empty'},
20
+ {'id': 1, 'name': 'animal'},
21
+ {'id': 2, 'name': 'person'},
22
+ {'id': 3, 'name': 'vehicle'}
23
+ ]
24
+
25
+ # This is used for choosing colors, so it ignores the "empty" class.
26
+ NUM_DETECTOR_CATEGORIES = len(detector_bbox_categories) - 1
27
+
28
+ detector_bbox_category_id_to_name = {}
29
+ detector_bbox_category_name_to_id = {}
30
+
31
+ for cat in detector_bbox_categories:
32
+ detector_bbox_category_id_to_name[cat['id']] = cat['name']
33
+ detector_bbox_category_name_to_id[cat['name']] = cat['id']