megadetector 5.0.27__py3-none-any.whl → 5.0.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
- megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
- megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
- megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
- megadetector/classification/aggregate_classifier_probs.py +3 -3
- megadetector/classification/analyze_failed_images.py +5 -5
- megadetector/classification/cache_batchapi_outputs.py +5 -5
- megadetector/classification/create_classification_dataset.py +11 -12
- megadetector/classification/crop_detections.py +10 -10
- megadetector/classification/csv_to_json.py +8 -8
- megadetector/classification/detect_and_crop.py +13 -15
- megadetector/classification/evaluate_model.py +7 -7
- megadetector/classification/identify_mislabeled_candidates.py +6 -6
- megadetector/classification/json_to_azcopy_list.py +1 -1
- megadetector/classification/json_validator.py +29 -32
- megadetector/classification/map_classification_categories.py +9 -9
- megadetector/classification/merge_classification_detection_output.py +12 -9
- megadetector/classification/prepare_classification_script.py +19 -19
- megadetector/classification/prepare_classification_script_mc.py +23 -23
- megadetector/classification/run_classifier.py +4 -4
- megadetector/classification/save_mislabeled.py +6 -6
- megadetector/classification/train_classifier.py +1 -1
- megadetector/classification/train_classifier_tf.py +9 -9
- megadetector/classification/train_utils.py +10 -10
- megadetector/data_management/annotations/annotation_constants.py +1 -1
- megadetector/data_management/camtrap_dp_to_coco.py +45 -45
- megadetector/data_management/cct_json_utils.py +101 -101
- megadetector/data_management/cct_to_md.py +49 -49
- megadetector/data_management/cct_to_wi.py +33 -33
- megadetector/data_management/coco_to_labelme.py +75 -75
- megadetector/data_management/coco_to_yolo.py +189 -189
- megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
- megadetector/data_management/databases/integrity_check_json_db.py +202 -188
- megadetector/data_management/databases/subset_json_db.py +33 -33
- megadetector/data_management/generate_crops_from_cct.py +38 -38
- megadetector/data_management/get_image_sizes.py +54 -49
- megadetector/data_management/labelme_to_coco.py +130 -124
- megadetector/data_management/labelme_to_yolo.py +78 -72
- megadetector/data_management/lila/create_lila_blank_set.py +81 -83
- megadetector/data_management/lila/create_lila_test_set.py +32 -31
- megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
- megadetector/data_management/lila/download_lila_subset.py +21 -24
- megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
- megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
- megadetector/data_management/lila/get_lila_image_counts.py +22 -22
- megadetector/data_management/lila/lila_common.py +70 -70
- megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
- megadetector/data_management/mewc_to_md.py +339 -340
- megadetector/data_management/ocr_tools.py +258 -252
- megadetector/data_management/read_exif.py +232 -223
- megadetector/data_management/remap_coco_categories.py +26 -26
- megadetector/data_management/remove_exif.py +31 -20
- megadetector/data_management/rename_images.py +187 -187
- megadetector/data_management/resize_coco_dataset.py +41 -41
- megadetector/data_management/speciesnet_to_md.py +41 -41
- megadetector/data_management/wi_download_csv_to_coco.py +55 -55
- megadetector/data_management/yolo_output_to_md_output.py +117 -120
- megadetector/data_management/yolo_to_coco.py +195 -188
- megadetector/detection/change_detection.py +831 -0
- megadetector/detection/process_video.py +341 -338
- megadetector/detection/pytorch_detector.py +308 -266
- megadetector/detection/run_detector.py +186 -166
- megadetector/detection/run_detector_batch.py +366 -364
- megadetector/detection/run_inference_with_yolov5_val.py +328 -325
- megadetector/detection/run_tiled_inference.py +312 -253
- megadetector/detection/tf_detector.py +24 -24
- megadetector/detection/video_utils.py +291 -283
- megadetector/postprocessing/add_max_conf.py +15 -11
- megadetector/postprocessing/categorize_detections_by_size.py +44 -44
- megadetector/postprocessing/classification_postprocessing.py +808 -311
- megadetector/postprocessing/combine_batch_outputs.py +20 -21
- megadetector/postprocessing/compare_batch_results.py +528 -517
- megadetector/postprocessing/convert_output_format.py +97 -97
- megadetector/postprocessing/create_crop_folder.py +220 -147
- megadetector/postprocessing/detector_calibration.py +173 -168
- megadetector/postprocessing/generate_csv_report.py +508 -0
- megadetector/postprocessing/load_api_results.py +25 -22
- megadetector/postprocessing/md_to_coco.py +129 -98
- megadetector/postprocessing/md_to_labelme.py +89 -83
- megadetector/postprocessing/md_to_wi.py +40 -40
- megadetector/postprocessing/merge_detections.py +87 -114
- megadetector/postprocessing/postprocess_batch_results.py +319 -302
- megadetector/postprocessing/remap_detection_categories.py +36 -36
- megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
- megadetector/postprocessing/separate_detections_into_folders.py +226 -211
- megadetector/postprocessing/subset_json_detector_output.py +265 -262
- megadetector/postprocessing/top_folders_to_bottom.py +45 -45
- megadetector/postprocessing/validate_batch_results.py +70 -70
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -69
- megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
- megadetector/taxonomy_mapping/simple_image_download.py +8 -8
- megadetector/taxonomy_mapping/species_lookup.py +33 -33
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
- megadetector/taxonomy_mapping/taxonomy_graph.py +11 -11
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
- megadetector/utils/azure_utils.py +22 -22
- megadetector/utils/ct_utils.py +1019 -200
- megadetector/utils/directory_listing.py +21 -77
- megadetector/utils/gpu_test.py +22 -22
- megadetector/utils/md_tests.py +541 -518
- megadetector/utils/path_utils.py +1511 -406
- megadetector/utils/process_utils.py +41 -41
- megadetector/utils/sas_blob_utils.py +53 -49
- megadetector/utils/split_locations_into_train_val.py +73 -60
- megadetector/utils/string_utils.py +147 -26
- megadetector/utils/url_utils.py +463 -173
- megadetector/utils/wi_utils.py +2629 -2868
- megadetector/utils/write_html_image_list.py +137 -137
- megadetector/visualization/plot_utils.py +21 -21
- megadetector/visualization/render_images_with_thumbnails.py +37 -73
- megadetector/visualization/visualization_utils.py +424 -404
- megadetector/visualization/visualize_db.py +197 -190
- megadetector/visualization/visualize_detector_output.py +126 -98
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/METADATA +6 -3
- megadetector-5.0.29.dist-info/RECORD +163 -0
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
- megadetector/data_management/importers/add_nacti_sizes.py +0 -52
- megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
- megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
- megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
- megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
- megadetector/data_management/importers/awc_to_json.py +0 -191
- megadetector/data_management/importers/bellevue_to_json.py +0 -272
- megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
- megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
- megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
- megadetector/data_management/importers/cct_field_adjustments.py +0 -58
- megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
- megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
- megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
- megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
- megadetector/data_management/importers/ena24_to_json.py +0 -276
- megadetector/data_management/importers/filenames_to_json.py +0 -386
- megadetector/data_management/importers/helena_to_cct.py +0 -283
- megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
- megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
- megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
- megadetector/data_management/importers/jb_csv_to_json.py +0 -150
- megadetector/data_management/importers/mcgill_to_json.py +0 -250
- megadetector/data_management/importers/missouri_to_json.py +0 -490
- megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
- megadetector/data_management/importers/noaa_seals_2019.py +0 -181
- megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
- megadetector/data_management/importers/pc_to_json.py +0 -365
- megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
- megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
- megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
- megadetector/data_management/importers/rspb_to_json.py +0 -356
- megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
- megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
- megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
- megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
- megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
- megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
- megadetector/data_management/importers/sulross_get_exif.py +0 -65
- megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
- megadetector/data_management/importers/ubc_to_json.py +0 -399
- megadetector/data_management/importers/umn_to_json.py +0 -507
- megadetector/data_management/importers/wellington_to_json.py +0 -263
- megadetector/data_management/importers/wi_to_json.py +0 -442
- megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
- megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
- megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
- megadetector-5.0.27.dist-info/RECORD +0 -208
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
- {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""
|
|
1
|
+
r"""
|
|
2
2
|
|
|
3
3
|
save_mislabeled.py
|
|
4
4
|
|
|
@@ -10,7 +10,7 @@ List of known mislabeled images is stored in Azure Blob Storage.
|
|
|
10
10
|
* blob: megadb_mislabeled/{dataset}.csv, one file per dataset
|
|
11
11
|
|
|
12
12
|
Each file megadb_mislabeled/{dataset}.csv has two columns:
|
|
13
|
-
|
|
13
|
+
|
|
14
14
|
* 'file': str, blob name
|
|
15
15
|
|
|
16
16
|
* 'correct_class': optional str, correct dataset class
|
|
@@ -41,7 +41,7 @@ import pandas as pd
|
|
|
41
41
|
#%% Main function
|
|
42
42
|
|
|
43
43
|
def update_mislabeled_images(container_path: str, input_csv_path: str) -> None:
|
|
44
|
-
|
|
44
|
+
|
|
45
45
|
df = pd.read_csv(input_csv_path, index_col=False)
|
|
46
46
|
|
|
47
47
|
# error checking
|
|
@@ -62,7 +62,7 @@ def update_mislabeled_images(container_path: str, input_csv_path: str) -> None:
|
|
|
62
62
|
df['file'] = df['blob_dirname'] + '/' + df['File']
|
|
63
63
|
|
|
64
64
|
for ds, ds_df in df.groupby('dataset'):
|
|
65
|
-
|
|
65
|
+
|
|
66
66
|
sr_path = os.path.join(container_path, 'megadb_mislabeled', f'{ds}.csv')
|
|
67
67
|
if os.path.exists(sr_path):
|
|
68
68
|
old_sr = pd.read_csv(sr_path, index_col='file', squeeze=True)
|
|
@@ -89,7 +89,7 @@ def update_mislabeled_images(container_path: str, input_csv_path: str) -> None:
|
|
|
89
89
|
#%% Command-line driver
|
|
90
90
|
|
|
91
91
|
def _parse_args() -> argparse.Namespace:
|
|
92
|
-
|
|
92
|
+
|
|
93
93
|
parser = argparse.ArgumentParser(
|
|
94
94
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
95
95
|
description='Merges classification results with Batch Detection API '
|
|
@@ -104,7 +104,7 @@ def _parse_args() -> argparse.Namespace:
|
|
|
104
104
|
|
|
105
105
|
|
|
106
106
|
if __name__ == '__main__':
|
|
107
|
-
|
|
107
|
+
|
|
108
108
|
args = _parse_args()
|
|
109
109
|
update_mislabeled_images(container_path=args.container_path,
|
|
110
110
|
input_csv_path=args.input_csv)
|
|
@@ -54,7 +54,7 @@ EFFICIENTNET_MODELS: Mapping[str, Mapping[str, Any]] = {
|
|
|
54
54
|
|
|
55
55
|
#%% Example usage
|
|
56
56
|
|
|
57
|
-
"""
|
|
57
|
+
"""
|
|
58
58
|
python train_classifier_tf.py run_idfg /ssd/crops_sq \
|
|
59
59
|
-m "efficientnet-b0" --pretrained --finetune --label-weighted \
|
|
60
60
|
--epochs 50 --batch-size 512 --lr 1e-4 \
|
|
@@ -97,7 +97,7 @@ def create_dataset(
|
|
|
97
97
|
|
|
98
98
|
Returns: tf.data.Dataset
|
|
99
99
|
"""
|
|
100
|
-
|
|
100
|
+
|
|
101
101
|
# images dataset
|
|
102
102
|
img_ds = tf.data.Dataset.from_tensor_slices(img_files)
|
|
103
103
|
img_ds = img_ds.map(lambda p: tf.io.read_file(img_base_dir + os.sep + p),
|
|
@@ -162,7 +162,7 @@ def create_dataloaders(
|
|
|
162
162
|
datasets: dict, maps split to DataLoader
|
|
163
163
|
label_names: list of str, label names in order of label id
|
|
164
164
|
"""
|
|
165
|
-
|
|
165
|
+
|
|
166
166
|
df, label_names, split_to_locs = load_dataset_csv(
|
|
167
167
|
dataset_csv_path, label_index_json_path, splits_json_path,
|
|
168
168
|
multilabel=multilabel, label_weighted=label_weighted,
|
|
@@ -238,7 +238,7 @@ def build_model(model_name: str, num_classes: int, img_size: int,
|
|
|
238
238
|
"""
|
|
239
239
|
Creates a model with an EfficientNet base.
|
|
240
240
|
"""
|
|
241
|
-
|
|
241
|
+
|
|
242
242
|
class_name = EFFICIENTNET_MODELS[model_name]['cls']
|
|
243
243
|
dropout = EFFICIENTNET_MODELS[model_name]['dropout']
|
|
244
244
|
|
|
@@ -279,7 +279,7 @@ def log_images_with_confidence(
|
|
|
279
279
|
epoch: int
|
|
280
280
|
tag: str
|
|
281
281
|
"""
|
|
282
|
-
|
|
282
|
+
|
|
283
283
|
for label_id, heap in heap_dict.items():
|
|
284
284
|
label_name = label_names[label_id]
|
|
285
285
|
|
|
@@ -319,7 +319,7 @@ def track_extreme_examples(tp_heaps: dict[int, list[HeapItem]],
|
|
|
319
319
|
img_files: tf.Tensor, shape [batch_size], type tf.string
|
|
320
320
|
logits: tf.Tensor, shape [batch_size, num_classes]
|
|
321
321
|
"""
|
|
322
|
-
|
|
322
|
+
|
|
323
323
|
labels = labels.numpy().tolist()
|
|
324
324
|
inputs = inputs.numpy().astype(np.uint8)
|
|
325
325
|
img_files = img_files.numpy().astype(str).tolist()
|
|
@@ -480,7 +480,7 @@ def log_run(split: str, epoch: int, writer: tf.summary.SummaryWriter,
|
|
|
480
480
|
Args:
|
|
481
481
|
metrics: dict, keys already prefixed with {split}/
|
|
482
482
|
"""
|
|
483
|
-
|
|
483
|
+
|
|
484
484
|
per_class_recall = recall_from_confusion_matrix(cm, label_names)
|
|
485
485
|
metrics.update(prefix_all_keys(per_class_recall, f'{split}/label_recall/'))
|
|
486
486
|
|
|
@@ -518,7 +518,7 @@ def main(dataset_dir: str,
|
|
|
518
518
|
seed: Optional[int] = None,
|
|
519
519
|
logdir: str = '',
|
|
520
520
|
cache_splits: Sequence[str] = ()) -> None:
|
|
521
|
-
|
|
521
|
+
|
|
522
522
|
# input validation
|
|
523
523
|
assert os.path.exists(dataset_dir)
|
|
524
524
|
assert os.path.exists(cropped_images_dir)
|
|
@@ -597,7 +597,7 @@ def main(dataset_dir: str,
|
|
|
597
597
|
model.base_model.trainable = True
|
|
598
598
|
|
|
599
599
|
print('- train:')
|
|
600
|
-
|
|
600
|
+
|
|
601
601
|
train_metrics, train_heaps, train_cm = run_epoch(
|
|
602
602
|
model, loader=loaders['train'], weighted=label_weighted,
|
|
603
603
|
loss_fn=loss_fn, weight_decay=weight_decay, optimizer=optimizer,
|
|
@@ -35,7 +35,7 @@ class HeapItem:
|
|
|
35
35
|
"""
|
|
36
36
|
A wrapper over non-comparable data with a comparable priority value.
|
|
37
37
|
"""
|
|
38
|
-
|
|
38
|
+
|
|
39
39
|
priority: Any
|
|
40
40
|
data: Any = dataclasses.field(compare=False, repr=False)
|
|
41
41
|
|
|
@@ -53,7 +53,7 @@ def add_to_heap(h: list[Any], item: HeapItem, k: Optional[int] = None) -> None:
|
|
|
53
53
|
item: HeapItem
|
|
54
54
|
k: int, desired capacity of the heap, or None for no limit
|
|
55
55
|
"""
|
|
56
|
-
|
|
56
|
+
|
|
57
57
|
if k is None or len(h) < k:
|
|
58
58
|
heapq.heappush(h, item)
|
|
59
59
|
else:
|
|
@@ -66,17 +66,17 @@ def prefix_all_keys(d: Mapping[str, Any], prefix: str) -> dict[str, Any]:
|
|
|
66
66
|
"""
|
|
67
67
|
Returns a new dict where the keys are prefixed by <prefix>.
|
|
68
68
|
"""
|
|
69
|
-
|
|
69
|
+
|
|
70
70
|
return {f'{prefix}{k}': v for k, v in d.items()}
|
|
71
71
|
|
|
72
72
|
|
|
73
73
|
def fig_to_img(fig: matplotlib.figure.Figure) -> np.ndarray:
|
|
74
74
|
"""
|
|
75
75
|
Converts a matplotlib figure to an image represented by a numpy array.
|
|
76
|
-
|
|
76
|
+
|
|
77
77
|
Returns: np.ndarray, type uint8, shape [H, W, 3]
|
|
78
78
|
"""
|
|
79
|
-
|
|
79
|
+
|
|
80
80
|
with io.BytesIO() as b:
|
|
81
81
|
fig.savefig(b, transparent=False, bbox_inches='tight', pad_inches=0,
|
|
82
82
|
format='png')
|
|
@@ -103,7 +103,7 @@ def imgs_with_confidences(imgs_list: list[tuple[Any, ...]],
|
|
|
103
103
|
fig: matplotlib.figure.Figure
|
|
104
104
|
img_files: list of str
|
|
105
105
|
"""
|
|
106
|
-
|
|
106
|
+
|
|
107
107
|
imgs, img_files, tags, titles = [], [], [], []
|
|
108
108
|
for img, label_id, topk_conf, topk_preds, img_file in imgs_list:
|
|
109
109
|
imgs.append(img)
|
|
@@ -140,7 +140,7 @@ def plot_img_grid(imgs: Sequence[Any], row_h: float, col_w: float,
|
|
|
140
140
|
|
|
141
141
|
Returns: matplotlib.figure.Figure
|
|
142
142
|
"""
|
|
143
|
-
|
|
143
|
+
|
|
144
144
|
# input validation
|
|
145
145
|
num_images = len(imgs)
|
|
146
146
|
if tags is not None:
|
|
@@ -186,7 +186,7 @@ def load_splits(splits_json_path: str) -> dict[str, set[tuple[str, str]]]:
|
|
|
186
186
|
|
|
187
187
|
Returns: dict, maps split to set of (dataset, location) tuples
|
|
188
188
|
"""
|
|
189
|
-
|
|
189
|
+
|
|
190
190
|
with open(splits_json_path, 'r') as f:
|
|
191
191
|
split_to_locs_js = json.load(f)
|
|
192
192
|
split_to_locs = {
|
|
@@ -235,7 +235,7 @@ def load_dataset_csv(dataset_csv_path: str,
|
|
|
235
235
|
label_names: list of str, label names in order of label id
|
|
236
236
|
split_to_locs: dict, maps split to set of (dataset, location) tuples
|
|
237
237
|
"""
|
|
238
|
-
|
|
238
|
+
|
|
239
239
|
# read in dataset CSV and create merged (dataset, location) col
|
|
240
240
|
df = pd.read_csv(dataset_csv_path, index_col=False, float_precision='high')
|
|
241
241
|
df['dataset_location'] = list(zip(df['dataset'], df['location']))
|
|
@@ -315,7 +315,7 @@ def recall_from_confusion_matrix(
|
|
|
315
315
|
|
|
316
316
|
Returns: dict, label_name => recall
|
|
317
317
|
"""
|
|
318
|
-
|
|
318
|
+
|
|
319
319
|
result = {
|
|
320
320
|
label_name: confusion_matrix[i, i] / (confusion_matrix[i].sum() + 1e-8)
|
|
321
321
|
for i, label_name in enumerate(label_names)
|
|
@@ -4,7 +4,7 @@ annotation_constants.py
|
|
|
4
4
|
|
|
5
5
|
Defines default categories for MegaDetector output boxes.
|
|
6
6
|
|
|
7
|
-
Used throughout the repo; do not change unless you are Dan or Siyu. In fact, do not change unless
|
|
7
|
+
Used throughout the repo; do not change unless you are Dan or Siyu. In fact, do not change unless
|
|
8
8
|
you are both Dan *and* Siyu.
|
|
9
9
|
|
|
10
10
|
We use integer IDs here; this is different from the MD .json file format,
|
|
@@ -31,22 +31,22 @@ from collections import defaultdict
|
|
|
31
31
|
def camtrap_dp_to_coco(camtrap_dp_folder,output_file=None):
|
|
32
32
|
"""
|
|
33
33
|
Convert the Camtrap DP package in [camtrap_dp_folder] to COCO.
|
|
34
|
-
|
|
34
|
+
|
|
35
35
|
Does not validate images, just converts. Use integrity_check_json_db to validate
|
|
36
|
-
the resulting COCO file.
|
|
37
|
-
|
|
36
|
+
the resulting COCO file.
|
|
37
|
+
|
|
38
38
|
Optionally writes the results to [output_file]
|
|
39
39
|
"""
|
|
40
|
-
|
|
40
|
+
|
|
41
41
|
required_files = ('datapackage.json','deployments.csv','events.csv','media.csv','observations.csv')
|
|
42
|
-
|
|
42
|
+
|
|
43
43
|
for fn in required_files:
|
|
44
44
|
fn_abs = os.path.join(camtrap_dp_folder,fn)
|
|
45
45
|
assert os.path.isfile(fn_abs), 'Could not find required file {}'.format(fn_abs)
|
|
46
|
-
|
|
46
|
+
|
|
47
47
|
with open(os.path.join(camtrap_dp_folder,'datapackage.json'),'r') as f:
|
|
48
48
|
datapackage = json.load(f)
|
|
49
|
-
|
|
49
|
+
|
|
50
50
|
assert datapackage['profile'] == 'https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0/camtrap-dp-profile.json', \
|
|
51
51
|
'I only know how to parse Camtrap DP 1.0 packages'
|
|
52
52
|
|
|
@@ -54,7 +54,7 @@ def camtrap_dp_to_coco(camtrap_dp_folder,output_file=None):
|
|
|
54
54
|
events_file = None
|
|
55
55
|
media_file = None
|
|
56
56
|
observations_file = None
|
|
57
|
-
|
|
57
|
+
|
|
58
58
|
resources = datapackage['resources']
|
|
59
59
|
for r in resources:
|
|
60
60
|
if r['name'] == 'deployments':
|
|
@@ -70,19 +70,19 @@ def camtrap_dp_to_coco(camtrap_dp_folder,output_file=None):
|
|
|
70
70
|
assert events_file is not None, 'No events file specified'
|
|
71
71
|
assert media_file is not None, 'No media file specified'
|
|
72
72
|
assert observations_file is not None, 'No observation file specified'
|
|
73
|
-
|
|
73
|
+
|
|
74
74
|
deployments_df = pd.read_csv(os.path.join(camtrap_dp_folder,deployments_file))
|
|
75
75
|
events_df = pd.read_csv(os.path.join(camtrap_dp_folder,events_file))
|
|
76
76
|
media_df = pd.read_csv(os.path.join(camtrap_dp_folder,media_file))
|
|
77
77
|
observations_df = pd.read_csv(os.path.join(camtrap_dp_folder,observations_file))
|
|
78
|
-
|
|
78
|
+
|
|
79
79
|
print('Read {} deployment lines'.format(len(deployments_df)))
|
|
80
80
|
print('Read {} events lines'.format(len(events_df)))
|
|
81
81
|
print('Read {} media lines'.format(len(media_df)))
|
|
82
82
|
print('Read {} observation lines'.format(len(observations_df)))
|
|
83
|
-
|
|
83
|
+
|
|
84
84
|
media_id_to_media_info = {}
|
|
85
|
-
|
|
85
|
+
|
|
86
86
|
# i_row = 0; row = media_df.iloc[i_row]
|
|
87
87
|
for i_row,row in media_df.iterrows():
|
|
88
88
|
media_info = {}
|
|
@@ -94,23 +94,23 @@ def camtrap_dp_to_coco(camtrap_dp_folder,output_file=None):
|
|
|
94
94
|
media_info['frame_num'] = -1
|
|
95
95
|
media_info['seq_num_frames'] = -1
|
|
96
96
|
media_id_to_media_info[row['mediaID']] = media_info
|
|
97
|
-
|
|
97
|
+
|
|
98
98
|
event_id_to_media_ids = defaultdict(list)
|
|
99
|
-
|
|
99
|
+
|
|
100
100
|
# i_row = 0; row = events_df.iloc[i_row]
|
|
101
101
|
for i_row,row in events_df.iterrows():
|
|
102
102
|
media_id = row['mediaID']
|
|
103
103
|
assert media_id in media_id_to_media_info
|
|
104
104
|
event_id_to_media_ids[row['eventID']].append(media_id)
|
|
105
|
-
|
|
105
|
+
|
|
106
106
|
event_id_to_category_names = defaultdict(set)
|
|
107
|
-
|
|
107
|
+
|
|
108
108
|
# i_row = 0; row = observations_df.iloc[i_row]
|
|
109
109
|
for i_row,row in observations_df.iterrows():
|
|
110
|
-
|
|
110
|
+
|
|
111
111
|
if row['observationLevel'] != 'event':
|
|
112
112
|
raise ValueError("I don't know how to parse image-level events yet")
|
|
113
|
-
|
|
113
|
+
|
|
114
114
|
if row['observationType'] == 'blank':
|
|
115
115
|
event_id_to_category_names[row['eventID']].add('empty')
|
|
116
116
|
elif row['observationType'] == 'unknown':
|
|
@@ -122,7 +122,7 @@ def camtrap_dp_to_coco(camtrap_dp_folder,output_file=None):
|
|
|
122
122
|
assert row['observationType'] == 'animal'
|
|
123
123
|
assert isinstance(row['scientificName'],str)
|
|
124
124
|
event_id_to_category_names[row['eventID']].add(row['scientificName'])
|
|
125
|
-
|
|
125
|
+
|
|
126
126
|
# Sort images within an event into frame numbers
|
|
127
127
|
#
|
|
128
128
|
# event_id = next(iter(event_id_to_media_ids))
|
|
@@ -134,7 +134,7 @@ def camtrap_dp_to_coco(camtrap_dp_folder,output_file=None):
|
|
|
134
134
|
media_info['frame_num'] = i_media
|
|
135
135
|
media_info['seq_num_frames'] = len(media_info_this_event)
|
|
136
136
|
media_info['seq_id'] = event_id
|
|
137
|
-
|
|
137
|
+
|
|
138
138
|
# Create category names
|
|
139
139
|
category_name_to_category_id = {'empty':0}
|
|
140
140
|
for event_id in event_id_to_category_names:
|
|
@@ -142,18 +142,18 @@ def camtrap_dp_to_coco(camtrap_dp_folder,output_file=None):
|
|
|
142
142
|
for name in category_names_this_event:
|
|
143
143
|
if name not in category_name_to_category_id:
|
|
144
144
|
category_name_to_category_id[name] = len(category_name_to_category_id)
|
|
145
|
-
|
|
145
|
+
|
|
146
146
|
# Move everything into COCO format
|
|
147
147
|
images = list(media_id_to_media_info.values())
|
|
148
|
-
|
|
148
|
+
|
|
149
149
|
categories = []
|
|
150
150
|
for name in category_name_to_category_id:
|
|
151
151
|
categories.append({'name':name,'id':category_name_to_category_id[name]})
|
|
152
152
|
info = {'version':1.0,'description':datapackage['name']}
|
|
153
|
-
|
|
153
|
+
|
|
154
154
|
# Create annotations
|
|
155
155
|
annotations = []
|
|
156
|
-
|
|
156
|
+
|
|
157
157
|
for event_id in event_id_to_media_ids.keys():
|
|
158
158
|
i_ann = 0
|
|
159
159
|
media_ids_this_event = event_id_to_media_ids[event_id]
|
|
@@ -168,23 +168,23 @@ def camtrap_dp_to_coco(camtrap_dp_folder,output_file=None):
|
|
|
168
168
|
ann['category_id'] = category_name_to_category_id[category_name]
|
|
169
169
|
ann['sequence_level_annotation'] = True
|
|
170
170
|
annotations.append(ann)
|
|
171
|
-
|
|
171
|
+
|
|
172
172
|
coco_data = {}
|
|
173
173
|
coco_data['images'] = images
|
|
174
174
|
coco_data['annotations'] = annotations
|
|
175
175
|
coco_data['categories'] = categories
|
|
176
176
|
coco_data['info'] = info
|
|
177
|
-
|
|
177
|
+
|
|
178
178
|
for im in coco_data['images']:
|
|
179
179
|
im['datetime'] = str(im['datetime'] )
|
|
180
|
-
|
|
180
|
+
|
|
181
181
|
if output_file is not None:
|
|
182
182
|
with open(output_file,'w') as f:
|
|
183
183
|
json.dump(coco_data,f,indent=1)
|
|
184
|
-
|
|
184
|
+
|
|
185
185
|
return coco_data
|
|
186
|
-
|
|
187
|
-
|
|
186
|
+
|
|
187
|
+
|
|
188
188
|
#%% Interactive driver
|
|
189
189
|
|
|
190
190
|
if False:
|
|
@@ -192,19 +192,19 @@ if False:
|
|
|
192
192
|
pass
|
|
193
193
|
|
|
194
194
|
#%%
|
|
195
|
-
|
|
195
|
+
|
|
196
196
|
camtrap_dp_folder = r'C:\temp\pilot2\pilot2'
|
|
197
197
|
coco_file = os.path.join(camtrap_dp_folder,'test-coco.json')
|
|
198
198
|
coco_data = camtrap_dp_to_coco(camtrap_dp_folder,
|
|
199
199
|
output_file=coco_file)
|
|
200
|
-
|
|
200
|
+
|
|
201
201
|
#%% Validate
|
|
202
|
-
|
|
202
|
+
|
|
203
203
|
from megadetector.data_management.databases.integrity_check_json_db import \
|
|
204
204
|
integrity_check_json_db, IntegrityCheckOptions
|
|
205
|
-
|
|
205
|
+
|
|
206
206
|
options = IntegrityCheckOptions()
|
|
207
|
-
|
|
207
|
+
|
|
208
208
|
options.baseDir = camtrap_dp_folder
|
|
209
209
|
options.bCheckImageSizes = False
|
|
210
210
|
options.bCheckImageExistence = True
|
|
@@ -213,25 +213,25 @@ if False:
|
|
|
213
213
|
options.iMaxNumImages = -1
|
|
214
214
|
options.nThreads = 1
|
|
215
215
|
options.verbose = True
|
|
216
|
-
|
|
217
|
-
|
|
216
|
+
|
|
217
|
+
sorted_categories, data, error_info = integrity_check_json_db(coco_file,options)
|
|
218
218
|
|
|
219
219
|
#%% Preview
|
|
220
|
-
|
|
220
|
+
|
|
221
221
|
from megadetector.visualization.visualize_db import DbVizOptions, visualize_db
|
|
222
|
-
|
|
222
|
+
|
|
223
223
|
options = DbVizOptions()
|
|
224
224
|
options.parallelize_rendering = True
|
|
225
225
|
options.parallelize_rendering_with_threads = True
|
|
226
226
|
options.parallelize_rendering_n_cores = 10
|
|
227
|
-
|
|
227
|
+
|
|
228
228
|
preview_dir = r'c:\temp\camtrapdp-preview'
|
|
229
|
-
|
|
230
|
-
|
|
229
|
+
html_output_file, image_db = visualize_db(coco_file, preview_dir, camtrap_dp_folder, options=options)
|
|
230
|
+
|
|
231
231
|
from megadetector.utils.path_utils import open_file
|
|
232
|
-
open_file(
|
|
233
|
-
|
|
234
|
-
|
|
232
|
+
open_file(html_output_file)
|
|
233
|
+
|
|
234
|
+
|
|
235
235
|
#%% Command-line driver
|
|
236
236
|
|
|
237
237
|
# TODO
|