megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/__init__.py +0 -0
- api/batch_processing/__init__.py +0 -0
- api/batch_processing/api_core/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/score.py +0 -1
- api/batch_processing/api_core/server_job_status_table.py +0 -1
- api/batch_processing/api_core_support/__init__.py +0 -0
- api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
- api/batch_processing/api_support/__init__.py +0 -0
- api/batch_processing/api_support/summarize_daily_activity.py +0 -1
- api/batch_processing/data_preparation/__init__.py +0 -0
- api/batch_processing/data_preparation/manage_local_batch.py +93 -79
- api/batch_processing/data_preparation/manage_video_batch.py +8 -8
- api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
- api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
- api/batch_processing/postprocessing/__init__.py +0 -0
- api/batch_processing/postprocessing/add_max_conf.py +12 -12
- api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
- api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
- api/batch_processing/postprocessing/compare_batch_results.py +114 -44
- api/batch_processing/postprocessing/convert_output_format.py +62 -19
- api/batch_processing/postprocessing/load_api_results.py +17 -20
- api/batch_processing/postprocessing/md_to_coco.py +31 -21
- api/batch_processing/postprocessing/md_to_labelme.py +165 -68
- api/batch_processing/postprocessing/merge_detections.py +40 -15
- api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
- api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
- api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
- api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
- api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
- api/synchronous/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
- api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
- api/synchronous/api_core/animal_detection_api/config.py +35 -35
- api/synchronous/api_core/tests/__init__.py +0 -0
- api/synchronous/api_core/tests/load_test.py +109 -109
- classification/__init__.py +0 -0
- classification/aggregate_classifier_probs.py +21 -24
- classification/analyze_failed_images.py +11 -13
- classification/cache_batchapi_outputs.py +51 -51
- classification/create_classification_dataset.py +69 -68
- classification/crop_detections.py +54 -53
- classification/csv_to_json.py +97 -100
- classification/detect_and_crop.py +105 -105
- classification/evaluate_model.py +43 -42
- classification/identify_mislabeled_candidates.py +47 -46
- classification/json_to_azcopy_list.py +10 -10
- classification/json_validator.py +72 -71
- classification/map_classification_categories.py +44 -43
- classification/merge_classification_detection_output.py +68 -68
- classification/prepare_classification_script.py +157 -154
- classification/prepare_classification_script_mc.py +228 -228
- classification/run_classifier.py +27 -26
- classification/save_mislabeled.py +30 -30
- classification/train_classifier.py +20 -20
- classification/train_classifier_tf.py +21 -22
- classification/train_utils.py +10 -10
- data_management/__init__.py +0 -0
- data_management/annotations/__init__.py +0 -0
- data_management/annotations/annotation_constants.py +18 -31
- data_management/camtrap_dp_to_coco.py +238 -0
- data_management/cct_json_utils.py +107 -59
- data_management/cct_to_md.py +176 -158
- data_management/cct_to_wi.py +247 -219
- data_management/coco_to_labelme.py +272 -0
- data_management/coco_to_yolo.py +86 -62
- data_management/databases/__init__.py +0 -0
- data_management/databases/add_width_and_height_to_db.py +20 -16
- data_management/databases/combine_coco_camera_traps_files.py +35 -31
- data_management/databases/integrity_check_json_db.py +130 -83
- data_management/databases/subset_json_db.py +25 -16
- data_management/generate_crops_from_cct.py +27 -45
- data_management/get_image_sizes.py +188 -144
- data_management/importers/add_nacti_sizes.py +8 -8
- data_management/importers/add_timestamps_to_icct.py +78 -78
- data_management/importers/animl_results_to_md_results.py +158 -160
- data_management/importers/auckland_doc_test_to_json.py +9 -9
- data_management/importers/auckland_doc_to_json.py +8 -8
- data_management/importers/awc_to_json.py +7 -7
- data_management/importers/bellevue_to_json.py +15 -15
- data_management/importers/cacophony-thermal-importer.py +13 -13
- data_management/importers/carrizo_shrubfree_2018.py +8 -8
- data_management/importers/carrizo_trail_cam_2017.py +8 -8
- data_management/importers/cct_field_adjustments.py +9 -9
- data_management/importers/channel_islands_to_cct.py +10 -10
- data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
- data_management/importers/ena24_to_json.py +7 -7
- data_management/importers/filenames_to_json.py +8 -8
- data_management/importers/helena_to_cct.py +7 -7
- data_management/importers/idaho-camera-traps.py +7 -7
- data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
- data_management/importers/jb_csv_to_json.py +9 -9
- data_management/importers/mcgill_to_json.py +8 -8
- data_management/importers/missouri_to_json.py +18 -18
- data_management/importers/nacti_fieldname_adjustments.py +10 -10
- data_management/importers/noaa_seals_2019.py +8 -8
- data_management/importers/pc_to_json.py +7 -7
- data_management/importers/plot_wni_giraffes.py +7 -7
- data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
- data_management/importers/prepare_zsl_imerit.py +7 -7
- data_management/importers/rspb_to_json.py +8 -8
- data_management/importers/save_the_elephants_survey_A.py +8 -8
- data_management/importers/save_the_elephants_survey_B.py +9 -9
- data_management/importers/snapshot_safari_importer.py +26 -26
- data_management/importers/snapshot_safari_importer_reprise.py +665 -665
- data_management/importers/snapshot_serengeti_lila.py +14 -14
- data_management/importers/sulross_get_exif.py +8 -9
- data_management/importers/timelapse_csv_set_to_json.py +11 -11
- data_management/importers/ubc_to_json.py +13 -13
- data_management/importers/umn_to_json.py +7 -7
- data_management/importers/wellington_to_json.py +8 -8
- data_management/importers/wi_to_json.py +9 -9
- data_management/importers/zamba_results_to_md_results.py +181 -181
- data_management/labelme_to_coco.py +309 -159
- data_management/labelme_to_yolo.py +103 -60
- data_management/lila/__init__.py +0 -0
- data_management/lila/add_locations_to_island_camera_traps.py +9 -9
- data_management/lila/add_locations_to_nacti.py +147 -147
- data_management/lila/create_lila_blank_set.py +114 -31
- data_management/lila/create_lila_test_set.py +8 -8
- data_management/lila/create_links_to_md_results_files.py +106 -106
- data_management/lila/download_lila_subset.py +92 -90
- data_management/lila/generate_lila_per_image_labels.py +56 -43
- data_management/lila/get_lila_annotation_counts.py +18 -15
- data_management/lila/get_lila_image_counts.py +11 -11
- data_management/lila/lila_common.py +103 -70
- data_management/lila/test_lila_metadata_urls.py +132 -116
- data_management/ocr_tools.py +173 -128
- data_management/read_exif.py +161 -99
- data_management/remap_coco_categories.py +84 -0
- data_management/remove_exif.py +58 -62
- data_management/resize_coco_dataset.py +32 -44
- data_management/wi_download_csv_to_coco.py +246 -0
- data_management/yolo_output_to_md_output.py +86 -73
- data_management/yolo_to_coco.py +535 -95
- detection/__init__.py +0 -0
- detection/detector_training/__init__.py +0 -0
- detection/process_video.py +85 -33
- detection/pytorch_detector.py +43 -25
- detection/run_detector.py +157 -72
- detection/run_detector_batch.py +189 -114
- detection/run_inference_with_yolov5_val.py +118 -51
- detection/run_tiled_inference.py +113 -42
- detection/tf_detector.py +51 -28
- detection/video_utils.py +606 -521
- docs/source/conf.py +43 -0
- md_utils/__init__.py +0 -0
- md_utils/azure_utils.py +9 -9
- md_utils/ct_utils.py +249 -70
- md_utils/directory_listing.py +59 -64
- md_utils/md_tests.py +968 -862
- md_utils/path_utils.py +655 -155
- md_utils/process_utils.py +157 -133
- md_utils/sas_blob_utils.py +20 -20
- md_utils/split_locations_into_train_val.py +45 -32
- md_utils/string_utils.py +33 -10
- md_utils/url_utils.py +208 -27
- md_utils/write_html_image_list.py +51 -35
- md_visualization/__init__.py +0 -0
- md_visualization/plot_utils.py +102 -109
- md_visualization/render_images_with_thumbnails.py +34 -34
- md_visualization/visualization_utils.py +908 -311
- md_visualization/visualize_db.py +109 -58
- md_visualization/visualize_detector_output.py +61 -42
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
- megadetector-5.0.9.dist-info/RECORD +224 -0
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
- taxonomy_mapping/__init__.py +0 -0
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
- taxonomy_mapping/map_new_lila_datasets.py +154 -154
- taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
- taxonomy_mapping/preview_lila_taxonomy.py +591 -591
- taxonomy_mapping/retrieve_sample_image.py +12 -12
- taxonomy_mapping/simple_image_download.py +11 -11
- taxonomy_mapping/species_lookup.py +10 -10
- taxonomy_mapping/taxonomy_csv_checker.py +18 -18
- taxonomy_mapping/taxonomy_graph.py +47 -47
- taxonomy_mapping/validate_lila_category_mappings.py +83 -76
- data_management/cct_json_to_filename_json.py +0 -89
- data_management/cct_to_csv.py +0 -140
- data_management/databases/remove_corrupted_images_from_db.py +0 -191
- detection/detector_training/copy_checkpoints.py +0 -43
- md_visualization/visualize_megadb.py +0 -183
- megadetector-5.0.7.dist-info/RECORD +0 -202
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
|
@@ -1,27 +1,16 @@
|
|
|
1
|
-
|
|
2
|
-
#
|
|
3
|
-
# train_classifier_tf.py
|
|
4
|
-
#
|
|
5
|
-
# Train an EfficientNet classifier.
|
|
6
|
-
#
|
|
7
|
-
# Currently the implementation of multi-label multi-class classification is
|
|
8
|
-
# non-functional.
|
|
9
|
-
#
|
|
10
|
-
# During training, start tensorboard from within the classification/ directory:
|
|
11
|
-
# tensorboard --logdir run --bind_all --samples_per_plugin scalars=0,images=0
|
|
12
|
-
#
|
|
13
|
-
########
|
|
1
|
+
"""
|
|
14
2
|
|
|
15
|
-
|
|
3
|
+
train_classifier_tf.py
|
|
16
4
|
|
|
17
|
-
|
|
18
|
-
python train_classifier_tf.py run_idfg /ssd/crops_sq \
|
|
19
|
-
-m "efficientnet-b0" --pretrained --finetune --label-weighted \
|
|
20
|
-
--epochs 50 --batch-size 512 --lr 1e-4 \
|
|
21
|
-
--seed 123 \
|
|
22
|
-
--logdir run_idfg
|
|
23
|
-
"""
|
|
5
|
+
Train an EfficientNet classifier.
|
|
24
6
|
|
|
7
|
+
Currently the implementation of multi-label multi-class classification is
|
|
8
|
+
non-functional.
|
|
9
|
+
|
|
10
|
+
During training, start tensorboard from within the classification/ directory:
|
|
11
|
+
tensorboard --logdir run --bind_all --samples_per_plugin scalars=0,images=0
|
|
12
|
+
|
|
13
|
+
"""
|
|
25
14
|
|
|
26
15
|
#%% Imports and constants
|
|
27
16
|
|
|
@@ -47,7 +36,6 @@ from classification.train_utils import (
|
|
|
47
36
|
imgs_with_confidences, load_dataset_csv, prefix_all_keys)
|
|
48
37
|
from md_visualization import plot_utils
|
|
49
38
|
|
|
50
|
-
|
|
51
39
|
AUTOTUNE = tf.data.experimental.AUTOTUNE
|
|
52
40
|
|
|
53
41
|
# match pytorch EfficientNet model names
|
|
@@ -63,6 +51,17 @@ EFFICIENTNET_MODELS: Mapping[str, Mapping[str, Any]] = {
|
|
|
63
51
|
}
|
|
64
52
|
|
|
65
53
|
|
|
54
|
+
#%% Example usage
|
|
55
|
+
|
|
56
|
+
"""
|
|
57
|
+
python train_classifier_tf.py run_idfg /ssd/crops_sq \
|
|
58
|
+
-m "efficientnet-b0" --pretrained --finetune --label-weighted \
|
|
59
|
+
--epochs 50 --batch-size 512 --lr 1e-4 \
|
|
60
|
+
--seed 123 \
|
|
61
|
+
--logdir run_idfg
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
|
|
66
65
|
#%% Support functions
|
|
67
66
|
|
|
68
67
|
def create_dataset(
|
classification/train_utils.py
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
train_utils.py
|
|
4
|
+
|
|
5
|
+
Utility functions useful for training a classifier.
|
|
6
|
+
|
|
7
|
+
This script should NOT depend on any other file within this repo. It should
|
|
8
|
+
especially be agnostic to PyTorch vs. TensorFlow.
|
|
9
|
+
|
|
10
|
+
"""
|
|
11
11
|
|
|
12
12
|
#%% Imports
|
|
13
13
|
|
|
File without changes
|
|
File without changes
|
|
@@ -1,37 +1,20 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
#
|
|
5
|
-
# Shared constants used to interpret annotation output
|
|
6
|
-
#
|
|
7
|
-
# Categories assigned to bounding boxes. Used throughout our repo; do not change unless
|
|
8
|
-
# you are Dan or Siyu. In fact, do not change unless you are both Dan *and* Siyu.
|
|
9
|
-
#
|
|
10
|
-
# We use integer indices here; this is different than the API output .json file,
|
|
11
|
-
# where indices are string integers.
|
|
12
|
-
#
|
|
13
|
-
########
|
|
14
|
-
|
|
15
|
-
NUM_DETECTOR_CATEGORIES = 3 # this is for choosing colors, so ignoring the "empty" class
|
|
16
|
-
|
|
17
|
-
# This is the label mapping used for our incoming iMerit annotations
|
|
18
|
-
# Only used to parse the incoming annotations. In our database, the string name is used to avoid confusion
|
|
19
|
-
annotation_bbox_categories = [
|
|
20
|
-
{'id': 0, 'name': 'empty'},
|
|
21
|
-
{'id': 1, 'name': 'animal'},
|
|
22
|
-
{'id': 2, 'name': 'person'},
|
|
23
|
-
{'id': 3, 'name': 'group'}, # group of animals
|
|
24
|
-
{'id': 4, 'name': 'vehicle'}
|
|
25
|
-
]
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
annotation_constants.py
|
|
26
4
|
|
|
27
|
-
|
|
28
|
-
annotation_bbox_category_name_to_id = {}
|
|
5
|
+
Defines default categories for MegaDetector output boxes.
|
|
29
6
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
annotation_bbox_category_name_to_id[cat['name']] = cat['id']
|
|
7
|
+
Used throughout the repo; do not change unless you are Dan or Siyu. In fact, do not change unless
|
|
8
|
+
you are both Dan *and* Siyu.
|
|
33
9
|
|
|
34
|
-
|
|
10
|
+
We use integer IDs here; this is different from the MD .json file format,
|
|
11
|
+
where indices are string integers.
|
|
12
|
+
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
#%% Constants
|
|
16
|
+
|
|
17
|
+
# MegaDetector output categories (the "empty" category is implicit)
|
|
35
18
|
detector_bbox_categories = [
|
|
36
19
|
{'id': 0, 'name': 'empty'},
|
|
37
20
|
{'id': 1, 'name': 'animal'},
|
|
@@ -39,9 +22,13 @@ detector_bbox_categories = [
|
|
|
39
22
|
{'id': 3, 'name': 'vehicle'}
|
|
40
23
|
]
|
|
41
24
|
|
|
25
|
+
# This is used for choosing colors, so it ignores the "empty" class.
|
|
26
|
+
NUM_DETECTOR_CATEGORIES = len(detector_bbox_categories) - 1
|
|
27
|
+
|
|
42
28
|
detector_bbox_category_id_to_name = {}
|
|
43
29
|
detector_bbox_category_name_to_id = {}
|
|
44
30
|
|
|
45
31
|
for cat in detector_bbox_categories:
|
|
46
32
|
detector_bbox_category_id_to_name[cat['id']] = cat['name']
|
|
47
33
|
detector_bbox_category_name_to_id[cat['name']] = cat['id']
|
|
34
|
+
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
camtrap_dp_to_coco.py
|
|
4
|
+
|
|
5
|
+
Parse a very limited subset of the Camtrap DP data package format:
|
|
6
|
+
|
|
7
|
+
https://camtrap-dp.tdwg.org/
|
|
8
|
+
|
|
9
|
+
...and convert to COCO format. Assumes that all required metadata files have been
|
|
10
|
+
put in the same directory (which is standard).
|
|
11
|
+
|
|
12
|
+
Does not currently parse bounding boxes, just attaches species labels to images.
|
|
13
|
+
|
|
14
|
+
Currently supports only sequence-level labeling.
|
|
15
|
+
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
#%% Imports and constants
|
|
19
|
+
|
|
20
|
+
import os
|
|
21
|
+
import json
|
|
22
|
+
import pandas as pd
|
|
23
|
+
|
|
24
|
+
from dateutil import parser as dateparser
|
|
25
|
+
|
|
26
|
+
from collections import defaultdict
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
#%% Functions
|
|
30
|
+
|
|
31
|
+
def camtrap_dp_to_coco(camtrap_dp_folder,output_file=None):
|
|
32
|
+
"""
|
|
33
|
+
Convert the Camtrap DP package in [camtrap_dp_folder] to COCO.
|
|
34
|
+
|
|
35
|
+
Does not validate images, just converts. Use integrity_check_json_db to validate
|
|
36
|
+
the resulting COCO file.
|
|
37
|
+
|
|
38
|
+
Optionally writes the results to [output_file]
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
required_files = ('datapackage.json','deployments.csv','events.csv','media.csv','observations.csv')
|
|
42
|
+
|
|
43
|
+
for fn in required_files:
|
|
44
|
+
fn_abs = os.path.join(camtrap_dp_folder,fn)
|
|
45
|
+
assert os.path.isfile(fn_abs), 'Could not find required file {}'.format(fn_abs)
|
|
46
|
+
|
|
47
|
+
with open(os.path.join(camtrap_dp_folder,'datapackage.json'),'r') as f:
|
|
48
|
+
datapackage = json.load(f)
|
|
49
|
+
|
|
50
|
+
assert datapackage['profile'] == 'https://raw.githubusercontent.com/tdwg/camtrap-dp/1.0/camtrap-dp-profile.json', \
|
|
51
|
+
'I only know how to parse Camtrap DP 1.0 packages'
|
|
52
|
+
|
|
53
|
+
deployments_file = None
|
|
54
|
+
events_file = None
|
|
55
|
+
media_file = None
|
|
56
|
+
observations_file = None
|
|
57
|
+
|
|
58
|
+
resources = datapackage['resources']
|
|
59
|
+
for r in resources:
|
|
60
|
+
if r['name'] == 'deployments':
|
|
61
|
+
deployments_file = r['path']
|
|
62
|
+
elif r['name'] == 'media':
|
|
63
|
+
media_file = r['path']
|
|
64
|
+
elif r['name'] == 'events':
|
|
65
|
+
events_file = r['path']
|
|
66
|
+
elif r['name'] == 'observations':
|
|
67
|
+
observations_file = r['path']
|
|
68
|
+
|
|
69
|
+
assert deployments_file is not None, 'No deployment file specified'
|
|
70
|
+
assert events_file is not None, 'No events file specified'
|
|
71
|
+
assert media_file is not None, 'No media file specified'
|
|
72
|
+
assert observations_file is not None, 'No observation file specified'
|
|
73
|
+
|
|
74
|
+
deployments_df = pd.read_csv(os.path.join(camtrap_dp_folder,deployments_file))
|
|
75
|
+
events_df = pd.read_csv(os.path.join(camtrap_dp_folder,events_file))
|
|
76
|
+
media_df = pd.read_csv(os.path.join(camtrap_dp_folder,media_file))
|
|
77
|
+
observations_df = pd.read_csv(os.path.join(camtrap_dp_folder,observations_file))
|
|
78
|
+
|
|
79
|
+
print('Read {} deployment lines'.format(len(deployments_df)))
|
|
80
|
+
print('Read {} events lines'.format(len(events_df)))
|
|
81
|
+
print('Read {} media lines'.format(len(media_df)))
|
|
82
|
+
print('Read {} observation lines'.format(len(observations_df)))
|
|
83
|
+
|
|
84
|
+
media_id_to_media_info = {}
|
|
85
|
+
|
|
86
|
+
# i_row = 0; row = media_df.iloc[i_row]
|
|
87
|
+
for i_row,row in media_df.iterrows():
|
|
88
|
+
media_info = {}
|
|
89
|
+
media_info['file_name'] = os.path.join(row['filePath'],row['fileName']).replace('\\','/')
|
|
90
|
+
media_info['location'] = row['deploymentID']
|
|
91
|
+
media_info['id'] = row['mediaID']
|
|
92
|
+
media_info['datetime'] = row['timestamp']
|
|
93
|
+
media_info['datetime'] = dateparser.parse(media_info['datetime'])
|
|
94
|
+
media_info['frame_num'] = -1
|
|
95
|
+
media_info['seq_num_frames'] = -1
|
|
96
|
+
media_id_to_media_info[row['mediaID']] = media_info
|
|
97
|
+
|
|
98
|
+
event_id_to_media_ids = defaultdict(list)
|
|
99
|
+
|
|
100
|
+
# i_row = 0; row = events_df.iloc[i_row]
|
|
101
|
+
for i_row,row in events_df.iterrows():
|
|
102
|
+
media_id = row['mediaID']
|
|
103
|
+
assert media_id in media_id_to_media_info
|
|
104
|
+
event_id_to_media_ids[row['eventID']].append(media_id)
|
|
105
|
+
|
|
106
|
+
event_id_to_category_names = defaultdict(set)
|
|
107
|
+
|
|
108
|
+
# i_row = 0; row = observations_df.iloc[i_row]
|
|
109
|
+
for i_row,row in observations_df.iterrows():
|
|
110
|
+
|
|
111
|
+
if row['observationLevel'] != 'event':
|
|
112
|
+
raise ValueError("I don't know how to parse image-level events yet")
|
|
113
|
+
|
|
114
|
+
if row['observationType'] == 'blank':
|
|
115
|
+
event_id_to_category_names[row['eventID']].add('empty')
|
|
116
|
+
elif row['observationType'] == 'unknown':
|
|
117
|
+
event_id_to_category_names[row['eventID']].add('unknown')
|
|
118
|
+
elif row['observationType'] == 'human':
|
|
119
|
+
assert row['scientificName'] == 'Homo sapiens'
|
|
120
|
+
event_id_to_category_names[row['eventID']].add(row['scientificName'])
|
|
121
|
+
else:
|
|
122
|
+
assert row['observationType'] == 'animal'
|
|
123
|
+
assert isinstance(row['scientificName'],str)
|
|
124
|
+
event_id_to_category_names[row['eventID']].add(row['scientificName'])
|
|
125
|
+
|
|
126
|
+
# Sort images within an event into frame numbers
|
|
127
|
+
#
|
|
128
|
+
# event_id = next(iter(event_id_to_media_ids))
|
|
129
|
+
for event_id in event_id_to_media_ids.keys():
|
|
130
|
+
media_ids_this_event = event_id_to_media_ids[event_id]
|
|
131
|
+
media_info_this_event = [media_id_to_media_info[media_id] for media_id in media_ids_this_event]
|
|
132
|
+
media_info_this_event = sorted(media_info_this_event, key=lambda x: x['datetime'])
|
|
133
|
+
for i_media,media_info in enumerate(media_info_this_event):
|
|
134
|
+
media_info['frame_num'] = i_media
|
|
135
|
+
media_info['seq_num_frames'] = len(media_info_this_event)
|
|
136
|
+
media_info['seq_id'] = event_id
|
|
137
|
+
|
|
138
|
+
# Create category names
|
|
139
|
+
category_name_to_category_id = {'empty':0}
|
|
140
|
+
for event_id in event_id_to_category_names:
|
|
141
|
+
category_names_this_event = event_id_to_category_names[event_id]
|
|
142
|
+
for name in category_names_this_event:
|
|
143
|
+
if name not in category_name_to_category_id:
|
|
144
|
+
category_name_to_category_id[name] = len(category_name_to_category_id)
|
|
145
|
+
|
|
146
|
+
# Move everything into COCO format
|
|
147
|
+
images = list(media_id_to_media_info.values())
|
|
148
|
+
|
|
149
|
+
categories = []
|
|
150
|
+
for name in category_name_to_category_id:
|
|
151
|
+
categories.append({'name':name,'id':category_name_to_category_id[name]})
|
|
152
|
+
info = {'version':1.0,'description':datapackage['name']}
|
|
153
|
+
|
|
154
|
+
# Create annotations
|
|
155
|
+
annotations = []
|
|
156
|
+
|
|
157
|
+
for event_id in event_id_to_media_ids.keys():
|
|
158
|
+
i_ann = 0
|
|
159
|
+
media_ids_this_event = event_id_to_media_ids[event_id]
|
|
160
|
+
media_info_this_event = [media_id_to_media_info[media_id] for media_id in media_ids_this_event]
|
|
161
|
+
categories_this_event = event_id_to_category_names[event_id]
|
|
162
|
+
for im in media_info_this_event:
|
|
163
|
+
for category_name in categories_this_event:
|
|
164
|
+
ann = {}
|
|
165
|
+
ann['id'] = event_id + '_' + str(i_ann)
|
|
166
|
+
i_ann += 1
|
|
167
|
+
ann['image_id'] = im['id']
|
|
168
|
+
ann['category_id'] = category_name_to_category_id[category_name]
|
|
169
|
+
ann['sequence_level_annotation'] = True
|
|
170
|
+
annotations.append(ann)
|
|
171
|
+
|
|
172
|
+
coco_data = {}
|
|
173
|
+
coco_data['images'] = images
|
|
174
|
+
coco_data['annotations'] = annotations
|
|
175
|
+
coco_data['categories'] = categories
|
|
176
|
+
coco_data['info'] = info
|
|
177
|
+
|
|
178
|
+
for im in coco_data['images']:
|
|
179
|
+
im['datetime'] = str(im['datetime'] )
|
|
180
|
+
|
|
181
|
+
if output_file is not None:
|
|
182
|
+
with open(output_file,'w') as f:
|
|
183
|
+
json.dump(coco_data,f,indent=1)
|
|
184
|
+
|
|
185
|
+
return coco_data
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
#%% Interactive driver
|
|
189
|
+
|
|
190
|
+
if False:
|
|
191
|
+
|
|
192
|
+
pass
|
|
193
|
+
|
|
194
|
+
#%%
|
|
195
|
+
|
|
196
|
+
camtrap_dp_folder = r'C:\temp\pilot2\pilot2'
|
|
197
|
+
coco_file = os.path.join(camtrap_dp_folder,'test-coco.json')
|
|
198
|
+
coco_data = camtrap_dp_to_coco(camtrap_dp_folder,
|
|
199
|
+
output_file=coco_file)
|
|
200
|
+
|
|
201
|
+
#%% Validate
|
|
202
|
+
|
|
203
|
+
from data_management.databases.integrity_check_json_db import integrity_check_json_db, IntegrityCheckOptions
|
|
204
|
+
|
|
205
|
+
options = IntegrityCheckOptions()
|
|
206
|
+
|
|
207
|
+
options.baseDir = camtrap_dp_folder
|
|
208
|
+
options.bCheckImageSizes = False
|
|
209
|
+
options.bCheckImageExistence = True
|
|
210
|
+
options.bFindUnusedImages = True
|
|
211
|
+
options.bRequireLocation = True
|
|
212
|
+
options.iMaxNumImages = -1
|
|
213
|
+
options.nThreads = 1
|
|
214
|
+
options.verbose = True
|
|
215
|
+
|
|
216
|
+
sortedCategories, data, errorInfo = integrity_check_json_db(coco_file,options)
|
|
217
|
+
|
|
218
|
+
#%% Preview
|
|
219
|
+
|
|
220
|
+
from md_visualization.visualize_db import DbVizOptions, visualize_db
|
|
221
|
+
|
|
222
|
+
options = DbVizOptions()
|
|
223
|
+
options.parallelize_rendering = True
|
|
224
|
+
options.parallelize_rendering_with_threads = True
|
|
225
|
+
options.parallelize_rendering_n_cores = 10
|
|
226
|
+
|
|
227
|
+
preview_dir = r'c:\temp\camtrapdp-preview'
|
|
228
|
+
htmlOutputFile,image_db = visualize_db(coco_file, preview_dir, camtrap_dp_folder, options=options)
|
|
229
|
+
|
|
230
|
+
from md_utils.path_utils import open_file
|
|
231
|
+
open_file(htmlOutputFile)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
#%% Command-line driver
|
|
235
|
+
|
|
236
|
+
# TODO
|
|
237
|
+
|
|
238
|
+
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
cct_json_utils.py
|
|
4
|
+
|
|
5
|
+
Utilities for working with COCO Camera Traps .json databases
|
|
6
|
+
|
|
7
|
+
https://github.com/agentmorris/MegaDetector/blob/master/data_management/README.md#coco-cameratraps-format
|
|
8
|
+
|
|
9
|
+
"""
|
|
10
10
|
|
|
11
11
|
#%% Constants and imports
|
|
12
12
|
|
|
@@ -15,9 +15,6 @@ import os
|
|
|
15
15
|
|
|
16
16
|
from tqdm import tqdm
|
|
17
17
|
from collections import defaultdict, OrderedDict
|
|
18
|
-
from typing import Any, Dict, Iterable, List, Mapping, Optional, Union
|
|
19
|
-
|
|
20
|
-
JSONObject = Mapping[str, Any]
|
|
21
18
|
|
|
22
19
|
|
|
23
20
|
#%% Classes
|
|
@@ -31,18 +28,32 @@ class CameraTrapJsonUtils:
|
|
|
31
28
|
def annotations_to_string(annotations, cat_id_to_name):
|
|
32
29
|
"""
|
|
33
30
|
Given a list of annotations and a mapping from class IDs to names, produces
|
|
34
|
-
a
|
|
31
|
+
a comma-delimited string containing a list of class names, sorted alphabetically.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
annotations (list): a list of annotation dicts
|
|
35
|
+
cat_id_to_name (dict): a dict mapping category IDs to category names
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
str: a comma-delimited list of class names
|
|
35
39
|
"""
|
|
36
40
|
|
|
37
|
-
class_names = CameraTrapJsonUtils.
|
|
41
|
+
class_names = CameraTrapJsonUtils.annotations_to_class_names(annotations, cat_id_to_name)
|
|
38
42
|
return ','.join(class_names)
|
|
39
43
|
|
|
40
44
|
|
|
41
45
|
@staticmethod
|
|
42
|
-
def
|
|
46
|
+
def annotations_to_class_names(annotations, cat_id_to_name):
|
|
43
47
|
"""
|
|
44
48
|
Given a list of annotations and a mapping from class IDs to names, produces
|
|
45
|
-
a list of class names,
|
|
49
|
+
a list of class names, sorted alphabetically.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
annotations (list): a list of annotation dicts
|
|
53
|
+
cat_id_to_name (dict): a dict mapping category IDs to category names
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
list: a list of class names present in [annotations]
|
|
46
57
|
"""
|
|
47
58
|
|
|
48
59
|
# Collect all names
|
|
@@ -53,18 +64,19 @@ class CameraTrapJsonUtils:
|
|
|
53
64
|
|
|
54
65
|
|
|
55
66
|
@staticmethod
|
|
56
|
-
def order_db_keys(db
|
|
67
|
+
def order_db_keys(db):
|
|
57
68
|
"""
|
|
58
69
|
Given a dict representing a JSON database in the COCO Camera Trap
|
|
59
|
-
format,
|
|
70
|
+
format, returns an OrderedDict with keys in the order of 'info',
|
|
60
71
|
'categories', 'annotations' and 'images'. When this OrderedDict is
|
|
61
72
|
serialized with json.dump(), the order of the keys are preserved.
|
|
62
73
|
|
|
63
74
|
Args:
|
|
64
|
-
db
|
|
75
|
+
db (dict): a JSON database in the COCO Camera Trap format
|
|
65
76
|
|
|
66
77
|
Returns:
|
|
67
|
-
the same db but as an OrderedDict with keys ordered for
|
|
78
|
+
dict: the same content as [db] but as an OrderedDict with keys ordered for
|
|
79
|
+
readability
|
|
68
80
|
"""
|
|
69
81
|
|
|
70
82
|
ordered = OrderedDict([
|
|
@@ -76,10 +88,20 @@ class CameraTrapJsonUtils:
|
|
|
76
88
|
|
|
77
89
|
|
|
78
90
|
@staticmethod
|
|
79
|
-
def
|
|
91
|
+
def group_annotations_by_image_field(db_indexed, image_field='seq_id'):
|
|
80
92
|
"""
|
|
81
93
|
Given an instance of IndexedJsonDb, group annotation entries by a field in the
|
|
82
|
-
image entry.
|
|
94
|
+
image entry. Typically used to find all the annotations associated with a sequence.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
db_indexed (IndexedJsonDb): an initialized IndexedJsonDb, typically loaded from a
|
|
98
|
+
COCO Camera Traps .json file
|
|
99
|
+
image_field (str, optional): a field by which to group annotations (defaults
|
|
100
|
+
to 'seq_id')
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
dict: a dict mapping objects (typically strings, in fact typically sequence IDs) to
|
|
104
|
+
lists of annotations
|
|
83
105
|
"""
|
|
84
106
|
|
|
85
107
|
image_id_to_image_field = {}
|
|
@@ -95,27 +117,24 @@ class CameraTrapJsonUtils:
|
|
|
95
117
|
|
|
96
118
|
|
|
97
119
|
@staticmethod
|
|
98
|
-
def
|
|
99
|
-
) -> Dict[str, Any]:
|
|
120
|
+
def get_entries_for_locations(db, locations):
|
|
100
121
|
"""
|
|
101
|
-
Given a dict representing a JSON database in the COCO Camera Trap format,
|
|
122
|
+
Given a dict representing a JSON database in the COCO Camera Trap format, returns a dict
|
|
102
123
|
with the 'images' and 'annotations' fields in the CCT format, each is an array that only
|
|
103
|
-
includes entries in the original
|
|
124
|
+
includes entries in the original [db] that are in the [locations] set.
|
|
125
|
+
|
|
104
126
|
Args:
|
|
105
|
-
db: a dict representing a JSON database in the COCO Camera Trap format
|
|
106
|
-
locations: a set or list of locations to include; each item is a string
|
|
127
|
+
db (dict): a dict representing a JSON database in the COCO Camera Trap format
|
|
128
|
+
locations (set): a set or list of locations to include; each item is a string
|
|
107
129
|
|
|
108
130
|
Returns:
|
|
109
|
-
a dict with the 'images' and 'annotations' fields in the CCT format
|
|
131
|
+
dict: a dict with the 'images' and 'annotations' fields in the CCT format
|
|
110
132
|
"""
|
|
111
133
|
|
|
112
134
|
locations = set(locations)
|
|
113
135
|
print('Original DB has {} image and {} annotation entries.'.format(
|
|
114
136
|
len(db['images']), len(db['annotations'])))
|
|
115
|
-
new_db:
|
|
116
|
-
'images': [],
|
|
117
|
-
'annotations': []
|
|
118
|
-
}
|
|
137
|
+
new_db = { 'images': [], 'annotations': [] }
|
|
119
138
|
new_images = set()
|
|
120
139
|
for i in db['images']:
|
|
121
140
|
# cast location to string as the entries in locations are strings
|
|
@@ -139,12 +158,26 @@ class IndexedJsonDb:
|
|
|
139
158
|
a .json database.
|
|
140
159
|
"""
|
|
141
160
|
|
|
142
|
-
def __init__(self,
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
161
|
+
def __init__(self,
|
|
162
|
+
json_filename,
|
|
163
|
+
b_normalize_paths=False,
|
|
164
|
+
filename_replacements=None,
|
|
165
|
+
b_convert_classes_to_lower=True,
|
|
166
|
+
b_force_forward_slashes=True):
|
|
146
167
|
"""
|
|
147
|
-
|
|
168
|
+
Constructor for IndexedJsonDb that loads from a .json file or CCT-formatted dict.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
json_filename (str): filename to load, or an already-loaded dict
|
|
172
|
+
b_normalize_paths (bool, optional): whether to invoke os.path.normpath on
|
|
173
|
+
all filenames. Not relevant if b_force_forward_slashes is True.
|
|
174
|
+
filename_replacements (dict, optional): a set of string --> string mappings
|
|
175
|
+
that will trigger replacements in all filenames, typically used to remove
|
|
176
|
+
leading folders
|
|
177
|
+
b_convert_classes_to_lower (bool, optional): whether to convert all class
|
|
178
|
+
names to lowercase
|
|
179
|
+
b_force_forward_slashes (bool, optional): whether to convert backslashes to
|
|
180
|
+
forward slashes in all path names
|
|
148
181
|
"""
|
|
149
182
|
|
|
150
183
|
if isinstance(json_filename, str):
|
|
@@ -162,11 +195,15 @@ class IndexedJsonDb:
|
|
|
162
195
|
for c in self.db['categories']:
|
|
163
196
|
c['name'] = c['name'].lower()
|
|
164
197
|
|
|
198
|
+
# Normalize paths to simplify comparisons later
|
|
165
199
|
if b_normalize_paths:
|
|
166
|
-
# Normalize paths to simplify comparisons later
|
|
167
200
|
for im in self.db['images']:
|
|
168
201
|
im['file_name'] = os.path.normpath(im['file_name'])
|
|
169
202
|
|
|
203
|
+
if b_force_forward_slashes:
|
|
204
|
+
for im in self.db['images']:
|
|
205
|
+
im['file_name'] = im['file_name'].replace('\\','/')
|
|
206
|
+
|
|
170
207
|
if filename_replacements is not None:
|
|
171
208
|
for s in filename_replacements:
|
|
172
209
|
# Make custom replacements in filenames, typically used to
|
|
@@ -192,7 +229,7 @@ class IndexedJsonDb:
|
|
|
192
229
|
|
|
193
230
|
# Image ID --> annotations
|
|
194
231
|
# Each image can potentially multiple annotations, hence using lists
|
|
195
|
-
self.image_id_to_annotations
|
|
232
|
+
self.image_id_to_annotations = {}
|
|
196
233
|
self.image_id_to_annotations = defaultdict(list)
|
|
197
234
|
for ann in self.db['annotations']:
|
|
198
235
|
self.image_id_to_annotations[ann['image_id']].append(ann)
|
|
@@ -200,12 +237,17 @@ class IndexedJsonDb:
|
|
|
200
237
|
# ...__init__
|
|
201
238
|
|
|
202
239
|
|
|
203
|
-
def get_annotations_for_image(self, image:
|
|
204
|
-
) -> Optional[List[Dict[str, Any]]]:
|
|
240
|
+
def get_annotations_for_image(self, image):
|
|
205
241
|
"""
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
242
|
+
Finds all the annnotations associated with the image dict [image].
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
image (dict): an image dict loaded from a CCT .json file. Only the 'id' field
|
|
246
|
+
is used.
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
list: list of annotations associated with this image. Returns None if the db
|
|
250
|
+
has not been loaded, or [] if no annotations are available for this image.
|
|
209
251
|
"""
|
|
210
252
|
|
|
211
253
|
if self.db is None:
|
|
@@ -218,11 +260,17 @@ class IndexedJsonDb:
|
|
|
218
260
|
return image_annotations
|
|
219
261
|
|
|
220
262
|
|
|
221
|
-
def get_classes_for_image(self, image
|
|
263
|
+
def get_classes_for_image(self, image):
|
|
222
264
|
"""
|
|
223
|
-
Returns a list of class names associated with [image]
|
|
265
|
+
Returns a list of class names associated with [image].
|
|
224
266
|
|
|
225
|
-
|
|
267
|
+
Args:
|
|
268
|
+
image (dict): an image dict loaded from a CCT .json file. Only the 'id' field
|
|
269
|
+
is used.
|
|
270
|
+
|
|
271
|
+
Returns:
|
|
272
|
+
list: list of class names associated with this image. Returns None if the db
|
|
273
|
+
has not been loaded, or [] if no annotations are available for this image.
|
|
226
274
|
"""
|
|
227
275
|
|
|
228
276
|
if self.db is None:
|
|
@@ -242,27 +290,27 @@ class IndexedJsonDb:
|
|
|
242
290
|
|
|
243
291
|
# ...class IndexedJsonDb
|
|
244
292
|
|
|
245
|
-
|
|
246
|
-
#%% Functions
|
|
247
|
-
|
|
248
293
|
class SequenceOptions:
|
|
294
|
+
"""
|
|
295
|
+
Options parameterizing the grouping of images into sequences by time.
|
|
296
|
+
"""
|
|
249
297
|
|
|
298
|
+
#: Images separated by <= this duration will be grouped into the same sequence.
|
|
250
299
|
episode_interval_seconds = 60.0
|
|
251
300
|
|
|
252
301
|
|
|
302
|
+
#%% Functions
|
|
303
|
+
|
|
253
304
|
def create_sequences(image_info,options=None):
|
|
254
305
|
"""
|
|
255
|
-
|
|
256
|
-
should be a list of dicts in CCT format, i.e. with fields 'file_name','datetime','location'.
|
|
257
|
-
|
|
258
|
-
'filename' should be a string.
|
|
259
|
-
|
|
260
|
-
'datetime' should be a Python datetime object
|
|
261
|
-
|
|
262
|
-
'location' should be a string.
|
|
306
|
+
Synthesizes episodes/sequences/bursts for the images in [image_info].
|
|
263
307
|
|
|
264
308
|
Modifies [image_info], populating the 'seq_id', 'seq_num_frames', and 'frame_num' fields
|
|
265
309
|
for each image.
|
|
310
|
+
|
|
311
|
+
Args:
|
|
312
|
+
image_info (dict): a list of dicts in CCT format, i.e. with fields 'file_name' (str),
|
|
313
|
+
'datetime' (datetime), and 'location' (str).
|
|
266
314
|
"""
|
|
267
315
|
|
|
268
316
|
if options is None:
|