megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/__init__.py +0 -0
- api/batch_processing/__init__.py +0 -0
- api/batch_processing/api_core/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/score.py +0 -1
- api/batch_processing/api_core/server_job_status_table.py +0 -1
- api/batch_processing/api_core_support/__init__.py +0 -0
- api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
- api/batch_processing/api_support/__init__.py +0 -0
- api/batch_processing/api_support/summarize_daily_activity.py +0 -1
- api/batch_processing/data_preparation/__init__.py +0 -0
- api/batch_processing/data_preparation/manage_local_batch.py +93 -79
- api/batch_processing/data_preparation/manage_video_batch.py +8 -8
- api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
- api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
- api/batch_processing/postprocessing/__init__.py +0 -0
- api/batch_processing/postprocessing/add_max_conf.py +12 -12
- api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
- api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
- api/batch_processing/postprocessing/compare_batch_results.py +114 -44
- api/batch_processing/postprocessing/convert_output_format.py +62 -19
- api/batch_processing/postprocessing/load_api_results.py +17 -20
- api/batch_processing/postprocessing/md_to_coco.py +31 -21
- api/batch_processing/postprocessing/md_to_labelme.py +165 -68
- api/batch_processing/postprocessing/merge_detections.py +40 -15
- api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
- api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
- api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
- api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
- api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
- api/synchronous/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
- api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
- api/synchronous/api_core/animal_detection_api/config.py +35 -35
- api/synchronous/api_core/tests/__init__.py +0 -0
- api/synchronous/api_core/tests/load_test.py +109 -109
- classification/__init__.py +0 -0
- classification/aggregate_classifier_probs.py +21 -24
- classification/analyze_failed_images.py +11 -13
- classification/cache_batchapi_outputs.py +51 -51
- classification/create_classification_dataset.py +69 -68
- classification/crop_detections.py +54 -53
- classification/csv_to_json.py +97 -100
- classification/detect_and_crop.py +105 -105
- classification/evaluate_model.py +43 -42
- classification/identify_mislabeled_candidates.py +47 -46
- classification/json_to_azcopy_list.py +10 -10
- classification/json_validator.py +72 -71
- classification/map_classification_categories.py +44 -43
- classification/merge_classification_detection_output.py +68 -68
- classification/prepare_classification_script.py +157 -154
- classification/prepare_classification_script_mc.py +228 -228
- classification/run_classifier.py +27 -26
- classification/save_mislabeled.py +30 -30
- classification/train_classifier.py +20 -20
- classification/train_classifier_tf.py +21 -22
- classification/train_utils.py +10 -10
- data_management/__init__.py +0 -0
- data_management/annotations/__init__.py +0 -0
- data_management/annotations/annotation_constants.py +18 -31
- data_management/camtrap_dp_to_coco.py +238 -0
- data_management/cct_json_utils.py +107 -59
- data_management/cct_to_md.py +176 -158
- data_management/cct_to_wi.py +247 -219
- data_management/coco_to_labelme.py +272 -0
- data_management/coco_to_yolo.py +86 -62
- data_management/databases/__init__.py +0 -0
- data_management/databases/add_width_and_height_to_db.py +20 -16
- data_management/databases/combine_coco_camera_traps_files.py +35 -31
- data_management/databases/integrity_check_json_db.py +130 -83
- data_management/databases/subset_json_db.py +25 -16
- data_management/generate_crops_from_cct.py +27 -45
- data_management/get_image_sizes.py +188 -144
- data_management/importers/add_nacti_sizes.py +8 -8
- data_management/importers/add_timestamps_to_icct.py +78 -78
- data_management/importers/animl_results_to_md_results.py +158 -160
- data_management/importers/auckland_doc_test_to_json.py +9 -9
- data_management/importers/auckland_doc_to_json.py +8 -8
- data_management/importers/awc_to_json.py +7 -7
- data_management/importers/bellevue_to_json.py +15 -15
- data_management/importers/cacophony-thermal-importer.py +13 -13
- data_management/importers/carrizo_shrubfree_2018.py +8 -8
- data_management/importers/carrizo_trail_cam_2017.py +8 -8
- data_management/importers/cct_field_adjustments.py +9 -9
- data_management/importers/channel_islands_to_cct.py +10 -10
- data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
- data_management/importers/ena24_to_json.py +7 -7
- data_management/importers/filenames_to_json.py +8 -8
- data_management/importers/helena_to_cct.py +7 -7
- data_management/importers/idaho-camera-traps.py +7 -7
- data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
- data_management/importers/jb_csv_to_json.py +9 -9
- data_management/importers/mcgill_to_json.py +8 -8
- data_management/importers/missouri_to_json.py +18 -18
- data_management/importers/nacti_fieldname_adjustments.py +10 -10
- data_management/importers/noaa_seals_2019.py +8 -8
- data_management/importers/pc_to_json.py +7 -7
- data_management/importers/plot_wni_giraffes.py +7 -7
- data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
- data_management/importers/prepare_zsl_imerit.py +7 -7
- data_management/importers/rspb_to_json.py +8 -8
- data_management/importers/save_the_elephants_survey_A.py +8 -8
- data_management/importers/save_the_elephants_survey_B.py +9 -9
- data_management/importers/snapshot_safari_importer.py +26 -26
- data_management/importers/snapshot_safari_importer_reprise.py +665 -665
- data_management/importers/snapshot_serengeti_lila.py +14 -14
- data_management/importers/sulross_get_exif.py +8 -9
- data_management/importers/timelapse_csv_set_to_json.py +11 -11
- data_management/importers/ubc_to_json.py +13 -13
- data_management/importers/umn_to_json.py +7 -7
- data_management/importers/wellington_to_json.py +8 -8
- data_management/importers/wi_to_json.py +9 -9
- data_management/importers/zamba_results_to_md_results.py +181 -181
- data_management/labelme_to_coco.py +309 -159
- data_management/labelme_to_yolo.py +103 -60
- data_management/lila/__init__.py +0 -0
- data_management/lila/add_locations_to_island_camera_traps.py +9 -9
- data_management/lila/add_locations_to_nacti.py +147 -147
- data_management/lila/create_lila_blank_set.py +114 -31
- data_management/lila/create_lila_test_set.py +8 -8
- data_management/lila/create_links_to_md_results_files.py +106 -106
- data_management/lila/download_lila_subset.py +92 -90
- data_management/lila/generate_lila_per_image_labels.py +56 -43
- data_management/lila/get_lila_annotation_counts.py +18 -15
- data_management/lila/get_lila_image_counts.py +11 -11
- data_management/lila/lila_common.py +103 -70
- data_management/lila/test_lila_metadata_urls.py +132 -116
- data_management/ocr_tools.py +173 -128
- data_management/read_exif.py +161 -99
- data_management/remap_coco_categories.py +84 -0
- data_management/remove_exif.py +58 -62
- data_management/resize_coco_dataset.py +32 -44
- data_management/wi_download_csv_to_coco.py +246 -0
- data_management/yolo_output_to_md_output.py +86 -73
- data_management/yolo_to_coco.py +535 -95
- detection/__init__.py +0 -0
- detection/detector_training/__init__.py +0 -0
- detection/process_video.py +85 -33
- detection/pytorch_detector.py +43 -25
- detection/run_detector.py +157 -72
- detection/run_detector_batch.py +189 -114
- detection/run_inference_with_yolov5_val.py +118 -51
- detection/run_tiled_inference.py +113 -42
- detection/tf_detector.py +51 -28
- detection/video_utils.py +606 -521
- docs/source/conf.py +43 -0
- md_utils/__init__.py +0 -0
- md_utils/azure_utils.py +9 -9
- md_utils/ct_utils.py +249 -70
- md_utils/directory_listing.py +59 -64
- md_utils/md_tests.py +968 -862
- md_utils/path_utils.py +655 -155
- md_utils/process_utils.py +157 -133
- md_utils/sas_blob_utils.py +20 -20
- md_utils/split_locations_into_train_val.py +45 -32
- md_utils/string_utils.py +33 -10
- md_utils/url_utils.py +208 -27
- md_utils/write_html_image_list.py +51 -35
- md_visualization/__init__.py +0 -0
- md_visualization/plot_utils.py +102 -109
- md_visualization/render_images_with_thumbnails.py +34 -34
- md_visualization/visualization_utils.py +908 -311
- md_visualization/visualize_db.py +109 -58
- md_visualization/visualize_detector_output.py +61 -42
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
- megadetector-5.0.9.dist-info/RECORD +224 -0
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
- taxonomy_mapping/__init__.py +0 -0
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
- taxonomy_mapping/map_new_lila_datasets.py +154 -154
- taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
- taxonomy_mapping/preview_lila_taxonomy.py +591 -591
- taxonomy_mapping/retrieve_sample_image.py +12 -12
- taxonomy_mapping/simple_image_download.py +11 -11
- taxonomy_mapping/species_lookup.py +10 -10
- taxonomy_mapping/taxonomy_csv_checker.py +18 -18
- taxonomy_mapping/taxonomy_graph.py +47 -47
- taxonomy_mapping/validate_lila_category_mappings.py +83 -76
- data_management/cct_json_to_filename_json.py +0 -89
- data_management/cct_to_csv.py +0 -140
- data_management/databases/remove_corrupted_images_from_db.py +0 -191
- detection/detector_training/copy_checkpoints.py +0 -43
- md_visualization/visualize_megadb.py +0 -183
- megadetector-5.0.7.dist-info/RECORD +0 -202
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
|
@@ -1,15 +1,15 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
retrieve_sample_image.py
|
|
4
|
+
|
|
5
|
+
Downloader that retrieves images from Google images, used for verifying taxonomy
|
|
6
|
+
lookups and looking for egregious mismappings (e.g., "snake" being mapped to a fish called
|
|
7
|
+
"snake").
|
|
8
|
+
|
|
9
|
+
Simple wrapper around simple_image_download, but I've had to swap in and out the underlying
|
|
10
|
+
downloader a few times.
|
|
11
|
+
|
|
12
|
+
"""
|
|
13
13
|
|
|
14
14
|
#%% Imports and environment
|
|
15
15
|
|
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
simple_image_download.py
|
|
4
|
+
|
|
5
|
+
Web image downloader, used in preview_lila_taxonomy.py
|
|
6
|
+
|
|
7
|
+
Slightly modified from:
|
|
8
|
+
|
|
9
|
+
https://github.com/RiddlerQ/simple_image_download
|
|
10
|
+
|
|
11
|
+
"""
|
|
12
12
|
|
|
13
13
|
#%% Imports
|
|
14
14
|
|
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
species_lookup.py
|
|
4
|
+
|
|
5
|
+
Look up species names (common or scientific) in the GBIF and iNaturalist
|
|
6
|
+
taxonomies.
|
|
7
|
+
|
|
8
|
+
Run initialize_taxonomy_lookup() before calling any other function.
|
|
9
|
+
|
|
10
|
+
"""
|
|
11
11
|
|
|
12
12
|
#%% Constants and imports
|
|
13
13
|
|
|
@@ -1,21 +1,21 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
taxonomy_csv_checker.py
|
|
4
|
+
|
|
5
|
+
Checks the taxonomy CSV file to make sure that for each row:
|
|
6
|
+
|
|
7
|
+
1) The 'taxonomy_level' column matches the lowest-level taxon level in the
|
|
8
|
+
'taxonomy_string' column.
|
|
9
|
+
|
|
10
|
+
2) The 'scientific_name' column matches the scientific name from the
|
|
11
|
+
lowest-level taxon level in the 'taxonomy_string' column.
|
|
12
|
+
|
|
13
|
+
Prints out any mismatches.
|
|
14
|
+
|
|
15
|
+
Also prints out nodes that have 2 ambiguous parents. See "CASE 2" from the
|
|
16
|
+
module docstring of taxonomy_graph.py.
|
|
17
|
+
|
|
18
|
+
"""
|
|
19
19
|
|
|
20
20
|
#%% Imports
|
|
21
21
|
|
|
@@ -1,50 +1,50 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
taxonomy_graph.py
|
|
4
|
+
|
|
5
|
+
Methods for transforming taxonomy CSV into a graph structure backed by
|
|
6
|
+
NetworkX.
|
|
7
|
+
|
|
8
|
+
We treat each taxon in the taxonomy as a node in a graph, represented by the
|
|
9
|
+
TaxonNode class. We use a NetworkX directed graph (nx.DiGraph) to keep track of
|
|
10
|
+
the edges (parent-child relationships) between the nodes.
|
|
11
|
+
|
|
12
|
+
In theory, the true biological taxonomy graph should be a tree, where every
|
|
13
|
+
taxon node has exactly 1 parent. However, because we use both GBIF and INAT
|
|
14
|
+
taxonomies, there are 2 situations where a taxon node ends up with two parents.
|
|
15
|
+
Thus, the graph is actually a "directed acyclic graph" (DAG) instead of a tree.
|
|
16
|
+
|
|
17
|
+
The two situations are explained in detail below. This module includes a
|
|
18
|
+
function dag_to_tree() which converts a DAG to a tree by heuristically removing
|
|
19
|
+
edges from the DAG so that each node only has 1 parent.
|
|
20
|
+
|
|
21
|
+
CASE 1: INAT and GBIF have different granularity in their taxonomy levels
|
|
22
|
+
======
|
|
23
|
+
An example is shown below. In dag_to_tree(), the lower parent is kept, while
|
|
24
|
+
the higher-up parent is discarded. In this example, the "sciurini -> sciurus"
|
|
25
|
+
edge would be kept, while "sciuridae -> sciurus" would be removed.
|
|
26
|
+
|
|
27
|
+
"eastern gray squirrel" (inat) "squirrel" (gbif)
|
|
28
|
+
------------------------------ -----------------
|
|
29
|
+
family: sciuridae
|
|
30
|
+
/ \
|
|
31
|
+
subfamily: sciurinae | # skips subfamily
|
|
32
|
+
| |
|
|
33
|
+
tribe: sciurini | # skips tribe
|
|
34
|
+
\ /
|
|
35
|
+
genus: sciurus
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
CASE 2: INAT and GBIF have different taxonomies
|
|
39
|
+
======
|
|
40
|
+
An example is shown below. In dag_to_tree(), the resolution to these
|
|
41
|
+
discrepancies are hard-coded.
|
|
42
|
+
|
|
43
|
+
order: cathartiformes (inat) accipitriformes (gbif)
|
|
44
|
+
\ /
|
|
45
|
+
family: cathartidae
|
|
46
|
+
|
|
47
|
+
"""
|
|
48
48
|
|
|
49
49
|
#%% Imports and constants
|
|
50
50
|
|
|
@@ -1,76 +1,83 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
#%% Constants and imports
|
|
10
|
-
|
|
11
|
-
import json
|
|
12
|
-
import os
|
|
13
|
-
|
|
14
|
-
from data_management.lila.lila_common import read_lila_taxonomy_mapping
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
validate_lila_category_mappings.py
|
|
4
|
+
|
|
5
|
+
Confirm that all category names on LILA have mappings in the taxonomy file.
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
#%% Constants and imports
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
import os
|
|
13
|
+
|
|
14
|
+
from data_management.lila.lila_common import read_lila_taxonomy_mapping
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
#%% Prevent execution during infrastructural imports
|
|
18
|
+
|
|
19
|
+
if False:
|
|
20
|
+
|
|
21
|
+
#%% Constants
|
|
22
|
+
|
|
23
|
+
lila_local_base = os.path.expanduser('~/lila')
|
|
24
|
+
|
|
25
|
+
metadata_dir = os.path.join(lila_local_base,'metadata')
|
|
26
|
+
os.makedirs(metadata_dir,exist_ok=True)
|
|
27
|
+
|
|
28
|
+
# Created by get_lila_category_list.py... contains counts for each category
|
|
29
|
+
category_list_dir = os.path.join(lila_local_base,'lila_categories_list')
|
|
30
|
+
lila_dataset_to_categories_file = os.path.join(category_list_dir,'lila_dataset_to_categories.json')
|
|
31
|
+
|
|
32
|
+
assert os.path.isfile(lila_dataset_to_categories_file)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
#%% Load category and taxonomy files
|
|
36
|
+
|
|
37
|
+
with open(lila_dataset_to_categories_file,'r') as f:
|
|
38
|
+
lila_dataset_to_categories = json.load(f)
|
|
39
|
+
|
|
40
|
+
taxonomy_df = read_lila_taxonomy_mapping(metadata_dir)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
#%% Map dataset names and category names to scientific names
|
|
44
|
+
|
|
45
|
+
ds_query_to_scientific_name = {}
|
|
46
|
+
|
|
47
|
+
unmapped_queries = set()
|
|
48
|
+
|
|
49
|
+
# i_row = 1; row = taxonomy_df.iloc[i_row]; row
|
|
50
|
+
for i_row,row in taxonomy_df.iterrows():
|
|
51
|
+
|
|
52
|
+
ds_query = row['dataset_name'] + ':' + row['query']
|
|
53
|
+
ds_query = ds_query.lower()
|
|
54
|
+
|
|
55
|
+
if not isinstance(row['scientific_name'],str):
|
|
56
|
+
unmapped_queries.add(ds_query)
|
|
57
|
+
ds_query_to_scientific_name[ds_query] = 'unmapped'
|
|
58
|
+
continue
|
|
59
|
+
|
|
60
|
+
ds_query_to_scientific_name[ds_query] = row['scientific_name']
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
#%% For each dataset, make sure we can map every category to the taxonomy
|
|
64
|
+
|
|
65
|
+
# dataset_name = list(lila_dataset_to_categories.keys())[0]
|
|
66
|
+
for _dataset_name in lila_dataset_to_categories.keys():
|
|
67
|
+
|
|
68
|
+
if '_bbox' in _dataset_name:
|
|
69
|
+
dataset_name = _dataset_name.replace('_bbox','')
|
|
70
|
+
else:
|
|
71
|
+
dataset_name = _dataset_name
|
|
72
|
+
|
|
73
|
+
categories = lila_dataset_to_categories[dataset_name]
|
|
74
|
+
|
|
75
|
+
# c = categories[0]
|
|
76
|
+
for c in categories:
|
|
77
|
+
ds_query = dataset_name + ':' + c['name']
|
|
78
|
+
ds_query = ds_query.lower()
|
|
79
|
+
|
|
80
|
+
if ds_query not in ds_query_to_scientific_name:
|
|
81
|
+
print('Could not find mapping for {}'.format(ds_query))
|
|
82
|
+
else:
|
|
83
|
+
scientific_name = ds_query_to_scientific_name[ds_query]
|
|
@@ -1,89 +0,0 @@
|
|
|
1
|
-
########
|
|
2
|
-
#
|
|
3
|
-
# cct_json_to_filename_json.py
|
|
4
|
-
#
|
|
5
|
-
# Given a .json file in COCO Camera Traps format, outputs a .json-formatted list of
|
|
6
|
-
# relative file names present in the CCT file.
|
|
7
|
-
#
|
|
8
|
-
########
|
|
9
|
-
|
|
10
|
-
#%% Constants and environment
|
|
11
|
-
|
|
12
|
-
import json
|
|
13
|
-
import sys
|
|
14
|
-
import os
|
|
15
|
-
from itertools import compress
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
#%% Main function
|
|
19
|
-
|
|
20
|
-
def convertJsonToStringList(inputFilename,outputFilename=None,prepend='',bConfirmExists=False,
|
|
21
|
-
bForceForwardSlash=True,imageBase=''):
|
|
22
|
-
|
|
23
|
-
assert os.path.isfile(inputFilename), '.json file {} does not exist'.format(inputFilename)
|
|
24
|
-
if outputFilename is None:
|
|
25
|
-
outputFilename = inputFilename + '_images.json'
|
|
26
|
-
|
|
27
|
-
with open(inputFilename,'r') as f:
|
|
28
|
-
data = json.load(f)
|
|
29
|
-
|
|
30
|
-
images = data['images']
|
|
31
|
-
|
|
32
|
-
filenames = [im['file_name'] for im in images]
|
|
33
|
-
|
|
34
|
-
if bConfirmExists:
|
|
35
|
-
bValid = [False] * len(filenames)
|
|
36
|
-
for iFile,f in enumerate(filenames):
|
|
37
|
-
fullPath = os.path.join(imageBase,f)
|
|
38
|
-
if os.path.isfile(fullPath):
|
|
39
|
-
bValid[iFile] = True
|
|
40
|
-
nFilesTotal = len(filenames)
|
|
41
|
-
filenames = list(compress(filenames, bValid))
|
|
42
|
-
nFilesValid = len(filenames)
|
|
43
|
-
print('Marking {} of {} as valid'.format(nFilesValid,nFilesTotal))
|
|
44
|
-
|
|
45
|
-
filenames = [prepend + s for s in filenames]
|
|
46
|
-
if bForceForwardSlash:
|
|
47
|
-
filenames = [s.replace('\\','/') for s in filenames]
|
|
48
|
-
|
|
49
|
-
# json.dump(s,open(outputFilename,'w'))
|
|
50
|
-
|
|
51
|
-
s = json.dumps(filenames)
|
|
52
|
-
with open(outputFilename, 'w') as f:
|
|
53
|
-
f.write(s)
|
|
54
|
-
|
|
55
|
-
return s,outputFilename
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
#%% Command-line driver
|
|
59
|
-
|
|
60
|
-
import argparse
|
|
61
|
-
|
|
62
|
-
def main():
|
|
63
|
-
|
|
64
|
-
parser = argparse.ArgumentParser()
|
|
65
|
-
parser.add_argument('inputFilename')
|
|
66
|
-
parser.add_argument('outputFilename')
|
|
67
|
-
|
|
68
|
-
if len(sys.argv[1:]) == 0:
|
|
69
|
-
parser.print_help()
|
|
70
|
-
parser.exit()
|
|
71
|
-
|
|
72
|
-
args = parser.parse_args()
|
|
73
|
-
convertJsonToStringList(args.jsonFile,args)
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
if __name__ == '__main__':
|
|
77
|
-
|
|
78
|
-
main()
|
|
79
|
-
|
|
80
|
-
#%% Interactive driver
|
|
81
|
-
|
|
82
|
-
if False:
|
|
83
|
-
|
|
84
|
-
#%%
|
|
85
|
-
prepend = '20190430cameratraps/'
|
|
86
|
-
inputFilename = r"D:\wildlife_data\awc\awc_imageinfo.json"
|
|
87
|
-
outputFilename = r"D:\wildlife_data\awc\awc_image_list.json"
|
|
88
|
-
convertJsonToStringList(inputFilename,outputFilename,prepend=prepend,bConfirmExists=True,imageBase=r'D:\wildlife_data\awc')
|
|
89
|
-
print('Finished converting {} to {}'.format(inputFilename,outputFilename))
|
data_management/cct_to_csv.py
DELETED
|
@@ -1,140 +0,0 @@
|
|
|
1
|
-
########
|
|
2
|
-
#
|
|
3
|
-
# cct_to_csv.py
|
|
4
|
-
#
|
|
5
|
-
# "Converts" a COCO Camera Traps .json file to .csv, in quotes because
|
|
6
|
-
# all kinds of assumptions are made here, and if you have a particular .csv
|
|
7
|
-
# format in mind, YMMV. Most notably, does not include any bounding box information
|
|
8
|
-
# or any non-standard fields that may be present in the .json file. Does not
|
|
9
|
-
# propagate information about sequence-level vs. image-level annotations.
|
|
10
|
-
#
|
|
11
|
-
# Does not assume access to the images, therefore does not open .jpg files to find
|
|
12
|
-
# datetime information if it's not in the metadata, just writes datetime as 'unknown'.
|
|
13
|
-
#
|
|
14
|
-
########
|
|
15
|
-
|
|
16
|
-
#%% Imports
|
|
17
|
-
|
|
18
|
-
import os
|
|
19
|
-
import sys
|
|
20
|
-
import json
|
|
21
|
-
|
|
22
|
-
from tqdm import tqdm
|
|
23
|
-
from collections import defaultdict
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
#%% Main function
|
|
27
|
-
|
|
28
|
-
def cct_to_csv(input_file,output_file=None):
|
|
29
|
-
|
|
30
|
-
if output_file is None:
|
|
31
|
-
output_file = input_file + '.csv'
|
|
32
|
-
|
|
33
|
-
##%% Read input
|
|
34
|
-
|
|
35
|
-
print('Loading input data')
|
|
36
|
-
|
|
37
|
-
with open(input_file,'r') as f:
|
|
38
|
-
input_data = json.load(f)
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
##%% Build internal mappings
|
|
42
|
-
|
|
43
|
-
print('Processing input data')
|
|
44
|
-
|
|
45
|
-
images = input_data['images']
|
|
46
|
-
|
|
47
|
-
category_id_to_name = {cat['id']:cat['name'] for cat in input_data['categories']}
|
|
48
|
-
|
|
49
|
-
image_id_to_class_names = defaultdict(set)
|
|
50
|
-
|
|
51
|
-
annotations = input_data['annotations']
|
|
52
|
-
|
|
53
|
-
# annotation = annotations[0]
|
|
54
|
-
for annotation in tqdm(annotations):
|
|
55
|
-
image_id = annotation['image_id']
|
|
56
|
-
class_name = annotation['category_id']
|
|
57
|
-
image_id_to_class_names[image_id].add(
|
|
58
|
-
category_id_to_name[class_name])
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
##%% Write output file
|
|
62
|
-
|
|
63
|
-
print('Writing output file')
|
|
64
|
-
|
|
65
|
-
with open(output_file,'w') as f:
|
|
66
|
-
|
|
67
|
-
f.write('relative_path,datetime,location,sequence_id,class_name\n')
|
|
68
|
-
|
|
69
|
-
# im = images[0]
|
|
70
|
-
for im in tqdm(images):
|
|
71
|
-
|
|
72
|
-
file_name = im['file_name']
|
|
73
|
-
class_names_set = image_id_to_class_names[im['id']]
|
|
74
|
-
assert len(class_names_set) > 0
|
|
75
|
-
|
|
76
|
-
if 'datetime' in im:
|
|
77
|
-
datetime = im['datetime']
|
|
78
|
-
else:
|
|
79
|
-
datetime = 'unknown'
|
|
80
|
-
|
|
81
|
-
if 'location' in im:
|
|
82
|
-
location = im['location']
|
|
83
|
-
else:
|
|
84
|
-
location = 'unknown'
|
|
85
|
-
|
|
86
|
-
if 'seq_id' in im:
|
|
87
|
-
sequence_id = im['seq_id']
|
|
88
|
-
else:
|
|
89
|
-
sequence_id = 'unknown'
|
|
90
|
-
|
|
91
|
-
# Write out one line per class:
|
|
92
|
-
for class_name in class_names_set:
|
|
93
|
-
f.write('{},{},{},{},{}\n'.format(file_name,
|
|
94
|
-
datetime,location,sequence_id,class_name))
|
|
95
|
-
|
|
96
|
-
# ...for each class name
|
|
97
|
-
|
|
98
|
-
# ...for each image
|
|
99
|
-
|
|
100
|
-
# ...with open(output_file)
|
|
101
|
-
|
|
102
|
-
# ...def cct_to_csv
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
#%% Interactive driver
|
|
106
|
-
|
|
107
|
-
if False:
|
|
108
|
-
|
|
109
|
-
#%%
|
|
110
|
-
|
|
111
|
-
input_dir = r"G:\temp\cct-to-csv"
|
|
112
|
-
files = os.listdir(input_dir)
|
|
113
|
-
files = [s for s in files if s.endswith('.json')]
|
|
114
|
-
for fn in files:
|
|
115
|
-
input_file = os.path.join(input_dir,fn)
|
|
116
|
-
assert os.path.isfile(input_file)
|
|
117
|
-
cct_to_csv(input_file)
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
#%% Command-line driver
|
|
121
|
-
|
|
122
|
-
import argparse
|
|
123
|
-
|
|
124
|
-
def main():
|
|
125
|
-
|
|
126
|
-
parser = argparse.ArgumentParser(description=(
|
|
127
|
-
'"Convert" a COCO Camera Traps .json file to .csv (read code to see why "convert" is in quotes)'))
|
|
128
|
-
|
|
129
|
-
parser.add_argument('input_file', type=str)
|
|
130
|
-
parser.add_argument('--output_file', type=str, default=None)
|
|
131
|
-
|
|
132
|
-
if len(sys.argv[1:]) == 0:
|
|
133
|
-
parser.print_help()
|
|
134
|
-
parser.exit()
|
|
135
|
-
|
|
136
|
-
args = parser.parse_args()
|
|
137
|
-
cct_to_csv(args.input_file,args.output_file)
|
|
138
|
-
|
|
139
|
-
if __name__ == '__main__':
|
|
140
|
-
main()
|