megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
- megadetector/classification/aggregate_classifier_probs.py +3 -3
- megadetector/classification/analyze_failed_images.py +5 -5
- megadetector/classification/cache_batchapi_outputs.py +5 -5
- megadetector/classification/create_classification_dataset.py +11 -12
- megadetector/classification/crop_detections.py +10 -10
- megadetector/classification/csv_to_json.py +8 -8
- megadetector/classification/detect_and_crop.py +13 -15
- megadetector/classification/efficientnet/model.py +8 -8
- megadetector/classification/efficientnet/utils.py +6 -5
- megadetector/classification/evaluate_model.py +7 -7
- megadetector/classification/identify_mislabeled_candidates.py +6 -6
- megadetector/classification/json_to_azcopy_list.py +1 -1
- megadetector/classification/json_validator.py +29 -32
- megadetector/classification/map_classification_categories.py +9 -9
- megadetector/classification/merge_classification_detection_output.py +12 -9
- megadetector/classification/prepare_classification_script.py +19 -19
- megadetector/classification/prepare_classification_script_mc.py +26 -26
- megadetector/classification/run_classifier.py +4 -4
- megadetector/classification/save_mislabeled.py +6 -6
- megadetector/classification/train_classifier.py +1 -1
- megadetector/classification/train_classifier_tf.py +9 -9
- megadetector/classification/train_utils.py +10 -10
- megadetector/data_management/annotations/annotation_constants.py +1 -2
- megadetector/data_management/camtrap_dp_to_coco.py +79 -46
- megadetector/data_management/cct_json_utils.py +103 -103
- megadetector/data_management/cct_to_md.py +49 -49
- megadetector/data_management/cct_to_wi.py +33 -33
- megadetector/data_management/coco_to_labelme.py +75 -75
- megadetector/data_management/coco_to_yolo.py +210 -193
- megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
- megadetector/data_management/databases/integrity_check_json_db.py +228 -200
- megadetector/data_management/databases/subset_json_db.py +33 -33
- megadetector/data_management/generate_crops_from_cct.py +88 -39
- megadetector/data_management/get_image_sizes.py +54 -49
- megadetector/data_management/labelme_to_coco.py +133 -125
- megadetector/data_management/labelme_to_yolo.py +159 -73
- megadetector/data_management/lila/create_lila_blank_set.py +81 -83
- megadetector/data_management/lila/create_lila_test_set.py +32 -31
- megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
- megadetector/data_management/lila/download_lila_subset.py +21 -24
- megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
- megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
- megadetector/data_management/lila/get_lila_image_counts.py +22 -22
- megadetector/data_management/lila/lila_common.py +73 -70
- megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
- megadetector/data_management/mewc_to_md.py +344 -340
- megadetector/data_management/ocr_tools.py +262 -255
- megadetector/data_management/read_exif.py +249 -227
- megadetector/data_management/remap_coco_categories.py +90 -28
- megadetector/data_management/remove_exif.py +81 -21
- megadetector/data_management/rename_images.py +187 -187
- megadetector/data_management/resize_coco_dataset.py +588 -120
- megadetector/data_management/speciesnet_to_md.py +41 -41
- megadetector/data_management/wi_download_csv_to_coco.py +55 -55
- megadetector/data_management/yolo_output_to_md_output.py +248 -122
- megadetector/data_management/yolo_to_coco.py +333 -191
- megadetector/detection/change_detection.py +832 -0
- megadetector/detection/process_video.py +340 -337
- megadetector/detection/pytorch_detector.py +358 -278
- megadetector/detection/run_detector.py +399 -186
- megadetector/detection/run_detector_batch.py +404 -377
- megadetector/detection/run_inference_with_yolov5_val.py +340 -327
- megadetector/detection/run_tiled_inference.py +257 -249
- megadetector/detection/tf_detector.py +24 -24
- megadetector/detection/video_utils.py +332 -295
- megadetector/postprocessing/add_max_conf.py +19 -11
- megadetector/postprocessing/categorize_detections_by_size.py +45 -45
- megadetector/postprocessing/classification_postprocessing.py +468 -433
- megadetector/postprocessing/combine_batch_outputs.py +23 -23
- megadetector/postprocessing/compare_batch_results.py +590 -525
- megadetector/postprocessing/convert_output_format.py +106 -102
- megadetector/postprocessing/create_crop_folder.py +347 -147
- megadetector/postprocessing/detector_calibration.py +173 -168
- megadetector/postprocessing/generate_csv_report.py +508 -499
- megadetector/postprocessing/load_api_results.py +48 -27
- megadetector/postprocessing/md_to_coco.py +133 -102
- megadetector/postprocessing/md_to_labelme.py +107 -90
- megadetector/postprocessing/md_to_wi.py +40 -40
- megadetector/postprocessing/merge_detections.py +92 -114
- megadetector/postprocessing/postprocess_batch_results.py +319 -301
- megadetector/postprocessing/remap_detection_categories.py +91 -38
- megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
- megadetector/postprocessing/separate_detections_into_folders.py +226 -211
- megadetector/postprocessing/subset_json_detector_output.py +265 -262
- megadetector/postprocessing/top_folders_to_bottom.py +45 -45
- megadetector/postprocessing/validate_batch_results.py +70 -70
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
- megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
- megadetector/taxonomy_mapping/simple_image_download.py +8 -8
- megadetector/taxonomy_mapping/species_lookup.py +156 -74
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
- megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
- megadetector/utils/ct_utils.py +1049 -211
- megadetector/utils/directory_listing.py +21 -77
- megadetector/utils/gpu_test.py +22 -22
- megadetector/utils/md_tests.py +632 -529
- megadetector/utils/path_utils.py +1520 -431
- megadetector/utils/process_utils.py +41 -41
- megadetector/utils/split_locations_into_train_val.py +62 -62
- megadetector/utils/string_utils.py +148 -27
- megadetector/utils/url_utils.py +489 -176
- megadetector/utils/wi_utils.py +2658 -2526
- megadetector/utils/write_html_image_list.py +137 -137
- megadetector/visualization/plot_utils.py +34 -30
- megadetector/visualization/render_images_with_thumbnails.py +39 -74
- megadetector/visualization/visualization_utils.py +487 -435
- megadetector/visualization/visualize_db.py +232 -198
- megadetector/visualization/visualize_detector_output.py +82 -76
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
- megadetector-10.0.0.dist-info/RECORD +139 -0
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
- megadetector/api/batch_processing/api_core/__init__.py +0 -0
- megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
- megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
- megadetector/api/batch_processing/api_core/server.py +0 -294
- megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
- megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
- megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
- megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
- megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
- megadetector/api/batch_processing/api_core/server_utils.py +0 -88
- megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
- megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
- megadetector/api/batch_processing/api_support/__init__.py +0 -0
- megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
- megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
- megadetector/api/synchronous/__init__.py +0 -0
- megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
- megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
- megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
- megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
- megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
- megadetector/data_management/importers/add_nacti_sizes.py +0 -52
- megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
- megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
- megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
- megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
- megadetector/data_management/importers/awc_to_json.py +0 -191
- megadetector/data_management/importers/bellevue_to_json.py +0 -272
- megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
- megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
- megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
- megadetector/data_management/importers/cct_field_adjustments.py +0 -58
- megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
- megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
- megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
- megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
- megadetector/data_management/importers/ena24_to_json.py +0 -276
- megadetector/data_management/importers/filenames_to_json.py +0 -386
- megadetector/data_management/importers/helena_to_cct.py +0 -283
- megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
- megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
- megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
- megadetector/data_management/importers/jb_csv_to_json.py +0 -150
- megadetector/data_management/importers/mcgill_to_json.py +0 -250
- megadetector/data_management/importers/missouri_to_json.py +0 -490
- megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
- megadetector/data_management/importers/noaa_seals_2019.py +0 -181
- megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
- megadetector/data_management/importers/pc_to_json.py +0 -365
- megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
- megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
- megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
- megadetector/data_management/importers/rspb_to_json.py +0 -356
- megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
- megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
- megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
- megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
- megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
- megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
- megadetector/data_management/importers/sulross_get_exif.py +0 -65
- megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
- megadetector/data_management/importers/ubc_to_json.py +0 -399
- megadetector/data_management/importers/umn_to_json.py +0 -507
- megadetector/data_management/importers/wellington_to_json.py +0 -263
- megadetector/data_management/importers/wi_to_json.py +0 -442
- megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
- megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
- megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
- megadetector/utils/azure_utils.py +0 -178
- megadetector/utils/sas_blob_utils.py +0 -509
- megadetector-5.0.28.dist-info/RECORD +0 -209
- /megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""
|
|
1
|
+
r"""
|
|
2
2
|
|
|
3
3
|
save_mislabeled.py
|
|
4
4
|
|
|
@@ -10,7 +10,7 @@ List of known mislabeled images is stored in Azure Blob Storage.
|
|
|
10
10
|
* blob: megadb_mislabeled/{dataset}.csv, one file per dataset
|
|
11
11
|
|
|
12
12
|
Each file megadb_mislabeled/{dataset}.csv has two columns:
|
|
13
|
-
|
|
13
|
+
|
|
14
14
|
* 'file': str, blob name
|
|
15
15
|
|
|
16
16
|
* 'correct_class': optional str, correct dataset class
|
|
@@ -41,7 +41,7 @@ import pandas as pd
|
|
|
41
41
|
#%% Main function
|
|
42
42
|
|
|
43
43
|
def update_mislabeled_images(container_path: str, input_csv_path: str) -> None:
|
|
44
|
-
|
|
44
|
+
|
|
45
45
|
df = pd.read_csv(input_csv_path, index_col=False)
|
|
46
46
|
|
|
47
47
|
# error checking
|
|
@@ -62,7 +62,7 @@ def update_mislabeled_images(container_path: str, input_csv_path: str) -> None:
|
|
|
62
62
|
df['file'] = df['blob_dirname'] + '/' + df['File']
|
|
63
63
|
|
|
64
64
|
for ds, ds_df in df.groupby('dataset'):
|
|
65
|
-
|
|
65
|
+
|
|
66
66
|
sr_path = os.path.join(container_path, 'megadb_mislabeled', f'{ds}.csv')
|
|
67
67
|
if os.path.exists(sr_path):
|
|
68
68
|
old_sr = pd.read_csv(sr_path, index_col='file', squeeze=True)
|
|
@@ -89,7 +89,7 @@ def update_mislabeled_images(container_path: str, input_csv_path: str) -> None:
|
|
|
89
89
|
#%% Command-line driver
|
|
90
90
|
|
|
91
91
|
def _parse_args() -> argparse.Namespace:
|
|
92
|
-
|
|
92
|
+
|
|
93
93
|
parser = argparse.ArgumentParser(
|
|
94
94
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
95
95
|
description='Merges classification results with Batch Detection API '
|
|
@@ -104,7 +104,7 @@ def _parse_args() -> argparse.Namespace:
|
|
|
104
104
|
|
|
105
105
|
|
|
106
106
|
if __name__ == '__main__':
|
|
107
|
-
|
|
107
|
+
|
|
108
108
|
args = _parse_args()
|
|
109
109
|
update_mislabeled_images(container_path=args.container_path,
|
|
110
110
|
input_csv_path=args.input_csv)
|
|
@@ -54,7 +54,7 @@ EFFICIENTNET_MODELS: Mapping[str, Mapping[str, Any]] = {
|
|
|
54
54
|
|
|
55
55
|
#%% Example usage
|
|
56
56
|
|
|
57
|
-
"""
|
|
57
|
+
"""
|
|
58
58
|
python train_classifier_tf.py run_idfg /ssd/crops_sq \
|
|
59
59
|
-m "efficientnet-b0" --pretrained --finetune --label-weighted \
|
|
60
60
|
--epochs 50 --batch-size 512 --lr 1e-4 \
|
|
@@ -97,7 +97,7 @@ def create_dataset(
|
|
|
97
97
|
|
|
98
98
|
Returns: tf.data.Dataset
|
|
99
99
|
"""
|
|
100
|
-
|
|
100
|
+
|
|
101
101
|
# images dataset
|
|
102
102
|
img_ds = tf.data.Dataset.from_tensor_slices(img_files)
|
|
103
103
|
img_ds = img_ds.map(lambda p: tf.io.read_file(img_base_dir + os.sep + p),
|
|
@@ -162,7 +162,7 @@ def create_dataloaders(
|
|
|
162
162
|
datasets: dict, maps split to DataLoader
|
|
163
163
|
label_names: list of str, label names in order of label id
|
|
164
164
|
"""
|
|
165
|
-
|
|
165
|
+
|
|
166
166
|
df, label_names, split_to_locs = load_dataset_csv(
|
|
167
167
|
dataset_csv_path, label_index_json_path, splits_json_path,
|
|
168
168
|
multilabel=multilabel, label_weighted=label_weighted,
|
|
@@ -238,7 +238,7 @@ def build_model(model_name: str, num_classes: int, img_size: int,
|
|
|
238
238
|
"""
|
|
239
239
|
Creates a model with an EfficientNet base.
|
|
240
240
|
"""
|
|
241
|
-
|
|
241
|
+
|
|
242
242
|
class_name = EFFICIENTNET_MODELS[model_name]['cls']
|
|
243
243
|
dropout = EFFICIENTNET_MODELS[model_name]['dropout']
|
|
244
244
|
|
|
@@ -279,7 +279,7 @@ def log_images_with_confidence(
|
|
|
279
279
|
epoch: int
|
|
280
280
|
tag: str
|
|
281
281
|
"""
|
|
282
|
-
|
|
282
|
+
|
|
283
283
|
for label_id, heap in heap_dict.items():
|
|
284
284
|
label_name = label_names[label_id]
|
|
285
285
|
|
|
@@ -319,7 +319,7 @@ def track_extreme_examples(tp_heaps: dict[int, list[HeapItem]],
|
|
|
319
319
|
img_files: tf.Tensor, shape [batch_size], type tf.string
|
|
320
320
|
logits: tf.Tensor, shape [batch_size, num_classes]
|
|
321
321
|
"""
|
|
322
|
-
|
|
322
|
+
|
|
323
323
|
labels = labels.numpy().tolist()
|
|
324
324
|
inputs = inputs.numpy().astype(np.uint8)
|
|
325
325
|
img_files = img_files.numpy().astype(str).tolist()
|
|
@@ -480,7 +480,7 @@ def log_run(split: str, epoch: int, writer: tf.summary.SummaryWriter,
|
|
|
480
480
|
Args:
|
|
481
481
|
metrics: dict, keys already prefixed with {split}/
|
|
482
482
|
"""
|
|
483
|
-
|
|
483
|
+
|
|
484
484
|
per_class_recall = recall_from_confusion_matrix(cm, label_names)
|
|
485
485
|
metrics.update(prefix_all_keys(per_class_recall, f'{split}/label_recall/'))
|
|
486
486
|
|
|
@@ -518,7 +518,7 @@ def main(dataset_dir: str,
|
|
|
518
518
|
seed: Optional[int] = None,
|
|
519
519
|
logdir: str = '',
|
|
520
520
|
cache_splits: Sequence[str] = ()) -> None:
|
|
521
|
-
|
|
521
|
+
|
|
522
522
|
# input validation
|
|
523
523
|
assert os.path.exists(dataset_dir)
|
|
524
524
|
assert os.path.exists(cropped_images_dir)
|
|
@@ -597,7 +597,7 @@ def main(dataset_dir: str,
|
|
|
597
597
|
model.base_model.trainable = True
|
|
598
598
|
|
|
599
599
|
print('- train:')
|
|
600
|
-
|
|
600
|
+
|
|
601
601
|
train_metrics, train_heaps, train_cm = run_epoch(
|
|
602
602
|
model, loader=loaders['train'], weighted=label_weighted,
|
|
603
603
|
loss_fn=loss_fn, weight_decay=weight_decay, optimizer=optimizer,
|
|
@@ -35,7 +35,7 @@ class HeapItem:
|
|
|
35
35
|
"""
|
|
36
36
|
A wrapper over non-comparable data with a comparable priority value.
|
|
37
37
|
"""
|
|
38
|
-
|
|
38
|
+
|
|
39
39
|
priority: Any
|
|
40
40
|
data: Any = dataclasses.field(compare=False, repr=False)
|
|
41
41
|
|
|
@@ -53,7 +53,7 @@ def add_to_heap(h: list[Any], item: HeapItem, k: Optional[int] = None) -> None:
|
|
|
53
53
|
item: HeapItem
|
|
54
54
|
k: int, desired capacity of the heap, or None for no limit
|
|
55
55
|
"""
|
|
56
|
-
|
|
56
|
+
|
|
57
57
|
if k is None or len(h) < k:
|
|
58
58
|
heapq.heappush(h, item)
|
|
59
59
|
else:
|
|
@@ -66,17 +66,17 @@ def prefix_all_keys(d: Mapping[str, Any], prefix: str) -> dict[str, Any]:
|
|
|
66
66
|
"""
|
|
67
67
|
Returns a new dict where the keys are prefixed by <prefix>.
|
|
68
68
|
"""
|
|
69
|
-
|
|
69
|
+
|
|
70
70
|
return {f'{prefix}{k}': v for k, v in d.items()}
|
|
71
71
|
|
|
72
72
|
|
|
73
73
|
def fig_to_img(fig: matplotlib.figure.Figure) -> np.ndarray:
|
|
74
74
|
"""
|
|
75
75
|
Converts a matplotlib figure to an image represented by a numpy array.
|
|
76
|
-
|
|
76
|
+
|
|
77
77
|
Returns: np.ndarray, type uint8, shape [H, W, 3]
|
|
78
78
|
"""
|
|
79
|
-
|
|
79
|
+
|
|
80
80
|
with io.BytesIO() as b:
|
|
81
81
|
fig.savefig(b, transparent=False, bbox_inches='tight', pad_inches=0,
|
|
82
82
|
format='png')
|
|
@@ -103,7 +103,7 @@ def imgs_with_confidences(imgs_list: list[tuple[Any, ...]],
|
|
|
103
103
|
fig: matplotlib.figure.Figure
|
|
104
104
|
img_files: list of str
|
|
105
105
|
"""
|
|
106
|
-
|
|
106
|
+
|
|
107
107
|
imgs, img_files, tags, titles = [], [], [], []
|
|
108
108
|
for img, label_id, topk_conf, topk_preds, img_file in imgs_list:
|
|
109
109
|
imgs.append(img)
|
|
@@ -140,7 +140,7 @@ def plot_img_grid(imgs: Sequence[Any], row_h: float, col_w: float,
|
|
|
140
140
|
|
|
141
141
|
Returns: matplotlib.figure.Figure
|
|
142
142
|
"""
|
|
143
|
-
|
|
143
|
+
|
|
144
144
|
# input validation
|
|
145
145
|
num_images = len(imgs)
|
|
146
146
|
if tags is not None:
|
|
@@ -186,7 +186,7 @@ def load_splits(splits_json_path: str) -> dict[str, set[tuple[str, str]]]:
|
|
|
186
186
|
|
|
187
187
|
Returns: dict, maps split to set of (dataset, location) tuples
|
|
188
188
|
"""
|
|
189
|
-
|
|
189
|
+
|
|
190
190
|
with open(splits_json_path, 'r') as f:
|
|
191
191
|
split_to_locs_js = json.load(f)
|
|
192
192
|
split_to_locs = {
|
|
@@ -235,7 +235,7 @@ def load_dataset_csv(dataset_csv_path: str,
|
|
|
235
235
|
label_names: list of str, label names in order of label id
|
|
236
236
|
split_to_locs: dict, maps split to set of (dataset, location) tuples
|
|
237
237
|
"""
|
|
238
|
-
|
|
238
|
+
|
|
239
239
|
# read in dataset CSV and create merged (dataset, location) col
|
|
240
240
|
df = pd.read_csv(dataset_csv_path, index_col=False, float_precision='high')
|
|
241
241
|
df['dataset_location'] = list(zip(df['dataset'], df['location']))
|
|
@@ -315,7 +315,7 @@ def recall_from_confusion_matrix(
|
|
|
315
315
|
|
|
316
316
|
Returns: dict, label_name => recall
|
|
317
317
|
"""
|
|
318
|
-
|
|
318
|
+
|
|
319
319
|
result = {
|
|
320
320
|
label_name: confusion_matrix[i, i] / (confusion_matrix[i].sum() + 1e-8)
|
|
321
321
|
for i, label_name in enumerate(label_names)
|
|
@@ -4,7 +4,7 @@ annotation_constants.py
|
|
|
4
4
|
|
|
5
5
|
Defines default categories for MegaDetector output boxes.
|
|
6
6
|
|
|
7
|
-
Used throughout the repo; do not change unless you are Dan or Siyu. In fact, do not change unless
|
|
7
|
+
Used throughout the repo; do not change unless you are Dan or Siyu. In fact, do not change unless
|
|
8
8
|
you are both Dan *and* Siyu.
|
|
9
9
|
|
|
10
10
|
We use integer IDs here; this is different from the MD .json file format,
|
|
@@ -31,4 +31,3 @@ detector_bbox_category_name_to_id = {}
|
|
|
31
31
|
for cat in detector_bbox_categories:
|
|
32
32
|
detector_bbox_category_id_to_name[cat['id']] = cat['name']
|
|
33
33
|
detector_bbox_category_name_to_id[cat['name']] = cat['id']
|
|
34
|
-
|
|
@@ -19,6 +19,8 @@ Currently supports only sequence-level labeling.
|
|
|
19
19
|
|
|
20
20
|
import os
|
|
21
21
|
import json
|
|
22
|
+
import argparse
|
|
23
|
+
|
|
22
24
|
import pandas as pd
|
|
23
25
|
|
|
24
26
|
from dateutil import parser as dateparser
|
|
@@ -31,22 +33,26 @@ from collections import defaultdict
|
|
|
31
33
|
def camtrap_dp_to_coco(camtrap_dp_folder,output_file=None):
|
|
32
34
|
"""
|
|
33
35
|
Convert the Camtrap DP package in [camtrap_dp_folder] to COCO.
|
|
34
|
-
|
|
36
|
+
|
|
35
37
|
Does not validate images, just converts. Use integrity_check_json_db to validate
|
|
36
|
-
the resulting COCO file.
|
|
37
|
-
|
|
38
|
+
the resulting COCO file.
|
|
39
|
+
|
|
38
40
|
Optionally writes the results to [output_file]
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
camtrap_dp_folder (str): input folder, containing a CamtrapDP package
|
|
44
|
+
output_file (str, optional): COCO-formatted output file
|
|
39
45
|
"""
|
|
40
|
-
|
|
46
|
+
|
|
41
47
|
required_files = ('datapackage.json','deployments.csv','events.csv','media.csv','observations.csv')
|
|
42
|
-
|
|
48
|
+
|
|
43
49
|
for fn in required_files:
|
|
44
50
|
fn_abs = os.path.join(camtrap_dp_folder,fn)
|
|
45
51
|
assert os.path.isfile(fn_abs), 'Could not find required file {}'.format(fn_abs)
|
|
46
|
-
|
|
52
|
+
|
|
47
53
|
with open(os.path.join(camtrap_dp_folder,'datapackage.json'),'r') as f:
|
|
48
54
|
datapackage = json.load(f)
|
|
49
|
-
|
|
55
|
+
|
|
50
56
|
assert datapackage['profile'] == 'https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0/camtrap-dp-profile.json', \
|
|
51
57
|
'I only know how to parse Camtrap DP 1.0 packages'
|
|
52
58
|
|
|
@@ -54,7 +60,7 @@ def camtrap_dp_to_coco(camtrap_dp_folder,output_file=None):
|
|
|
54
60
|
events_file = None
|
|
55
61
|
media_file = None
|
|
56
62
|
observations_file = None
|
|
57
|
-
|
|
63
|
+
|
|
58
64
|
resources = datapackage['resources']
|
|
59
65
|
for r in resources:
|
|
60
66
|
if r['name'] == 'deployments':
|
|
@@ -70,19 +76,19 @@ def camtrap_dp_to_coco(camtrap_dp_folder,output_file=None):
|
|
|
70
76
|
assert events_file is not None, 'No events file specified'
|
|
71
77
|
assert media_file is not None, 'No media file specified'
|
|
72
78
|
assert observations_file is not None, 'No observation file specified'
|
|
73
|
-
|
|
79
|
+
|
|
74
80
|
deployments_df = pd.read_csv(os.path.join(camtrap_dp_folder,deployments_file))
|
|
75
81
|
events_df = pd.read_csv(os.path.join(camtrap_dp_folder,events_file))
|
|
76
82
|
media_df = pd.read_csv(os.path.join(camtrap_dp_folder,media_file))
|
|
77
83
|
observations_df = pd.read_csv(os.path.join(camtrap_dp_folder,observations_file))
|
|
78
|
-
|
|
84
|
+
|
|
79
85
|
print('Read {} deployment lines'.format(len(deployments_df)))
|
|
80
86
|
print('Read {} events lines'.format(len(events_df)))
|
|
81
87
|
print('Read {} media lines'.format(len(media_df)))
|
|
82
88
|
print('Read {} observation lines'.format(len(observations_df)))
|
|
83
|
-
|
|
89
|
+
|
|
84
90
|
media_id_to_media_info = {}
|
|
85
|
-
|
|
91
|
+
|
|
86
92
|
# i_row = 0; row = media_df.iloc[i_row]
|
|
87
93
|
for i_row,row in media_df.iterrows():
|
|
88
94
|
media_info = {}
|
|
@@ -94,23 +100,23 @@ def camtrap_dp_to_coco(camtrap_dp_folder,output_file=None):
|
|
|
94
100
|
media_info['frame_num'] = -1
|
|
95
101
|
media_info['seq_num_frames'] = -1
|
|
96
102
|
media_id_to_media_info[row['mediaID']] = media_info
|
|
97
|
-
|
|
103
|
+
|
|
98
104
|
event_id_to_media_ids = defaultdict(list)
|
|
99
|
-
|
|
105
|
+
|
|
100
106
|
# i_row = 0; row = events_df.iloc[i_row]
|
|
101
107
|
for i_row,row in events_df.iterrows():
|
|
102
108
|
media_id = row['mediaID']
|
|
103
109
|
assert media_id in media_id_to_media_info
|
|
104
110
|
event_id_to_media_ids[row['eventID']].append(media_id)
|
|
105
|
-
|
|
111
|
+
|
|
106
112
|
event_id_to_category_names = defaultdict(set)
|
|
107
|
-
|
|
113
|
+
|
|
108
114
|
# i_row = 0; row = observations_df.iloc[i_row]
|
|
109
115
|
for i_row,row in observations_df.iterrows():
|
|
110
|
-
|
|
116
|
+
|
|
111
117
|
if row['observationLevel'] != 'event':
|
|
112
118
|
raise ValueError("I don't know how to parse image-level events yet")
|
|
113
|
-
|
|
119
|
+
|
|
114
120
|
if row['observationType'] == 'blank':
|
|
115
121
|
event_id_to_category_names[row['eventID']].add('empty')
|
|
116
122
|
elif row['observationType'] == 'unknown':
|
|
@@ -122,7 +128,7 @@ def camtrap_dp_to_coco(camtrap_dp_folder,output_file=None):
|
|
|
122
128
|
assert row['observationType'] == 'animal'
|
|
123
129
|
assert isinstance(row['scientificName'],str)
|
|
124
130
|
event_id_to_category_names[row['eventID']].add(row['scientificName'])
|
|
125
|
-
|
|
131
|
+
|
|
126
132
|
# Sort images within an event into frame numbers
|
|
127
133
|
#
|
|
128
134
|
# event_id = next(iter(event_id_to_media_ids))
|
|
@@ -134,7 +140,7 @@ def camtrap_dp_to_coco(camtrap_dp_folder,output_file=None):
|
|
|
134
140
|
media_info['frame_num'] = i_media
|
|
135
141
|
media_info['seq_num_frames'] = len(media_info_this_event)
|
|
136
142
|
media_info['seq_id'] = event_id
|
|
137
|
-
|
|
143
|
+
|
|
138
144
|
# Create category names
|
|
139
145
|
category_name_to_category_id = {'empty':0}
|
|
140
146
|
for event_id in event_id_to_category_names:
|
|
@@ -142,18 +148,18 @@ def camtrap_dp_to_coco(camtrap_dp_folder,output_file=None):
|
|
|
142
148
|
for name in category_names_this_event:
|
|
143
149
|
if name not in category_name_to_category_id:
|
|
144
150
|
category_name_to_category_id[name] = len(category_name_to_category_id)
|
|
145
|
-
|
|
151
|
+
|
|
146
152
|
# Move everything into COCO format
|
|
147
153
|
images = list(media_id_to_media_info.values())
|
|
148
|
-
|
|
154
|
+
|
|
149
155
|
categories = []
|
|
150
156
|
for name in category_name_to_category_id:
|
|
151
157
|
categories.append({'name':name,'id':category_name_to_category_id[name]})
|
|
152
158
|
info = {'version':1.0,'description':datapackage['name']}
|
|
153
|
-
|
|
159
|
+
|
|
154
160
|
# Create annotations
|
|
155
161
|
annotations = []
|
|
156
|
-
|
|
162
|
+
|
|
157
163
|
for event_id in event_id_to_media_ids.keys():
|
|
158
164
|
i_ann = 0
|
|
159
165
|
media_ids_this_event = event_id_to_media_ids[event_id]
|
|
@@ -168,23 +174,23 @@ def camtrap_dp_to_coco(camtrap_dp_folder,output_file=None):
|
|
|
168
174
|
ann['category_id'] = category_name_to_category_id[category_name]
|
|
169
175
|
ann['sequence_level_annotation'] = True
|
|
170
176
|
annotations.append(ann)
|
|
171
|
-
|
|
177
|
+
|
|
172
178
|
coco_data = {}
|
|
173
179
|
coco_data['images'] = images
|
|
174
180
|
coco_data['annotations'] = annotations
|
|
175
181
|
coco_data['categories'] = categories
|
|
176
182
|
coco_data['info'] = info
|
|
177
|
-
|
|
183
|
+
|
|
178
184
|
for im in coco_data['images']:
|
|
179
185
|
im['datetime'] = str(im['datetime'] )
|
|
180
|
-
|
|
186
|
+
|
|
181
187
|
if output_file is not None:
|
|
182
188
|
with open(output_file,'w') as f:
|
|
183
189
|
json.dump(coco_data,f,indent=1)
|
|
184
|
-
|
|
190
|
+
|
|
185
191
|
return coco_data
|
|
186
|
-
|
|
187
|
-
|
|
192
|
+
|
|
193
|
+
|
|
188
194
|
#%% Interactive driver
|
|
189
195
|
|
|
190
196
|
if False:
|
|
@@ -192,19 +198,19 @@ if False:
|
|
|
192
198
|
pass
|
|
193
199
|
|
|
194
200
|
#%%
|
|
195
|
-
|
|
201
|
+
|
|
196
202
|
camtrap_dp_folder = r'C:\temp\pilot2\pilot2'
|
|
197
203
|
coco_file = os.path.join(camtrap_dp_folder,'test-coco.json')
|
|
198
204
|
coco_data = camtrap_dp_to_coco(camtrap_dp_folder,
|
|
199
205
|
output_file=coco_file)
|
|
200
|
-
|
|
206
|
+
|
|
201
207
|
#%% Validate
|
|
202
|
-
|
|
208
|
+
|
|
203
209
|
from megadetector.data_management.databases.integrity_check_json_db import \
|
|
204
210
|
integrity_check_json_db, IntegrityCheckOptions
|
|
205
|
-
|
|
211
|
+
|
|
206
212
|
options = IntegrityCheckOptions()
|
|
207
|
-
|
|
213
|
+
|
|
208
214
|
options.baseDir = camtrap_dp_folder
|
|
209
215
|
options.bCheckImageSizes = False
|
|
210
216
|
options.bCheckImageExistence = True
|
|
@@ -213,25 +219,52 @@ if False:
|
|
|
213
219
|
options.iMaxNumImages = -1
|
|
214
220
|
options.nThreads = 1
|
|
215
221
|
options.verbose = True
|
|
216
|
-
|
|
217
|
-
|
|
222
|
+
|
|
223
|
+
sorted_categories, data, error_info = integrity_check_json_db(coco_file,options)
|
|
218
224
|
|
|
219
225
|
#%% Preview
|
|
220
|
-
|
|
226
|
+
|
|
221
227
|
from megadetector.visualization.visualize_db import DbVizOptions, visualize_db
|
|
222
|
-
|
|
228
|
+
|
|
223
229
|
options = DbVizOptions()
|
|
224
230
|
options.parallelize_rendering = True
|
|
225
231
|
options.parallelize_rendering_with_threads = True
|
|
226
232
|
options.parallelize_rendering_n_cores = 10
|
|
227
|
-
|
|
233
|
+
|
|
228
234
|
preview_dir = r'c:\temp\camtrapdp-preview'
|
|
229
|
-
|
|
230
|
-
|
|
235
|
+
html_output_file, image_db = visualize_db(coco_file, preview_dir, camtrap_dp_folder, options=options)
|
|
236
|
+
|
|
231
237
|
from megadetector.utils.path_utils import open_file
|
|
232
|
-
open_file(
|
|
233
|
-
|
|
234
|
-
|
|
238
|
+
open_file(html_output_file)
|
|
239
|
+
|
|
240
|
+
|
|
235
241
|
#%% Command-line driver
|
|
236
242
|
|
|
237
|
-
|
|
243
|
+
def main():
|
|
244
|
+
"""
|
|
245
|
+
Command-line interface to convert Camtrap DP to COCO.
|
|
246
|
+
"""
|
|
247
|
+
|
|
248
|
+
parser = argparse.ArgumentParser(description='Convert Camtrap DP to COCO format')
|
|
249
|
+
parser.add_argument('camtrap_dp_folder', type=str,
|
|
250
|
+
help='Input folder, containing a CamtrapDP package')
|
|
251
|
+
parser.add_argument('--output_file', type=str, default=None,
|
|
252
|
+
help='COCO-formatted output file (defaults to [camtrap_dp_folder]_coco.json)')
|
|
253
|
+
|
|
254
|
+
args = parser.parse_args()
|
|
255
|
+
|
|
256
|
+
if args.output_file is None:
|
|
257
|
+
# Default output file name: [camtrap_dp_folder]_coco.json
|
|
258
|
+
#
|
|
259
|
+
# Remove trailing slash if present
|
|
260
|
+
folder_name = args.camtrap_dp_folder.rstrip(os.sep)
|
|
261
|
+
output_file = folder_name + '_coco.json'
|
|
262
|
+
else:
|
|
263
|
+
output_file = args.output_file
|
|
264
|
+
|
|
265
|
+
camtrap_dp_to_coco(camtrap_dp_folder=args.camtrap_dp_folder, output_file=output_file)
|
|
266
|
+
print(f"Successfully converted Camtrap DP package at '{args.camtrap_dp_folder}' to " + \
|
|
267
|
+
f"COCO format at '{output_file}'")
|
|
268
|
+
|
|
269
|
+
if __name__ == '__main__':
|
|
270
|
+
main()
|