megadetector 5.0.28__py3-none-any.whl → 5.0.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
- megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
- megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
- megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
- megadetector/classification/aggregate_classifier_probs.py +3 -3
- megadetector/classification/analyze_failed_images.py +5 -5
- megadetector/classification/cache_batchapi_outputs.py +5 -5
- megadetector/classification/create_classification_dataset.py +11 -12
- megadetector/classification/crop_detections.py +10 -10
- megadetector/classification/csv_to_json.py +8 -8
- megadetector/classification/detect_and_crop.py +13 -15
- megadetector/classification/evaluate_model.py +7 -7
- megadetector/classification/identify_mislabeled_candidates.py +6 -6
- megadetector/classification/json_to_azcopy_list.py +1 -1
- megadetector/classification/json_validator.py +29 -32
- megadetector/classification/map_classification_categories.py +9 -9
- megadetector/classification/merge_classification_detection_output.py +12 -9
- megadetector/classification/prepare_classification_script.py +19 -19
- megadetector/classification/prepare_classification_script_mc.py +23 -23
- megadetector/classification/run_classifier.py +4 -4
- megadetector/classification/save_mislabeled.py +6 -6
- megadetector/classification/train_classifier.py +1 -1
- megadetector/classification/train_classifier_tf.py +9 -9
- megadetector/classification/train_utils.py +10 -10
- megadetector/data_management/annotations/annotation_constants.py +1 -1
- megadetector/data_management/camtrap_dp_to_coco.py +45 -45
- megadetector/data_management/cct_json_utils.py +101 -101
- megadetector/data_management/cct_to_md.py +49 -49
- megadetector/data_management/cct_to_wi.py +33 -33
- megadetector/data_management/coco_to_labelme.py +75 -75
- megadetector/data_management/coco_to_yolo.py +189 -189
- megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
- megadetector/data_management/databases/integrity_check_json_db.py +202 -188
- megadetector/data_management/databases/subset_json_db.py +33 -33
- megadetector/data_management/generate_crops_from_cct.py +38 -38
- megadetector/data_management/get_image_sizes.py +54 -49
- megadetector/data_management/labelme_to_coco.py +130 -124
- megadetector/data_management/labelme_to_yolo.py +78 -72
- megadetector/data_management/lila/create_lila_blank_set.py +81 -83
- megadetector/data_management/lila/create_lila_test_set.py +32 -31
- megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
- megadetector/data_management/lila/download_lila_subset.py +21 -24
- megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
- megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
- megadetector/data_management/lila/get_lila_image_counts.py +22 -22
- megadetector/data_management/lila/lila_common.py +70 -70
- megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
- megadetector/data_management/mewc_to_md.py +339 -340
- megadetector/data_management/ocr_tools.py +258 -252
- megadetector/data_management/read_exif.py +231 -224
- megadetector/data_management/remap_coco_categories.py +26 -26
- megadetector/data_management/remove_exif.py +31 -20
- megadetector/data_management/rename_images.py +187 -187
- megadetector/data_management/resize_coco_dataset.py +41 -41
- megadetector/data_management/speciesnet_to_md.py +41 -41
- megadetector/data_management/wi_download_csv_to_coco.py +55 -55
- megadetector/data_management/yolo_output_to_md_output.py +117 -120
- megadetector/data_management/yolo_to_coco.py +195 -188
- megadetector/detection/change_detection.py +831 -0
- megadetector/detection/process_video.py +340 -337
- megadetector/detection/pytorch_detector.py +304 -262
- megadetector/detection/run_detector.py +177 -164
- megadetector/detection/run_detector_batch.py +364 -363
- megadetector/detection/run_inference_with_yolov5_val.py +328 -325
- megadetector/detection/run_tiled_inference.py +256 -249
- megadetector/detection/tf_detector.py +24 -24
- megadetector/detection/video_utils.py +290 -282
- megadetector/postprocessing/add_max_conf.py +15 -11
- megadetector/postprocessing/categorize_detections_by_size.py +44 -44
- megadetector/postprocessing/classification_postprocessing.py +415 -415
- megadetector/postprocessing/combine_batch_outputs.py +20 -21
- megadetector/postprocessing/compare_batch_results.py +528 -517
- megadetector/postprocessing/convert_output_format.py +97 -97
- megadetector/postprocessing/create_crop_folder.py +219 -146
- megadetector/postprocessing/detector_calibration.py +173 -168
- megadetector/postprocessing/generate_csv_report.py +508 -499
- megadetector/postprocessing/load_api_results.py +23 -20
- megadetector/postprocessing/md_to_coco.py +129 -98
- megadetector/postprocessing/md_to_labelme.py +89 -83
- megadetector/postprocessing/md_to_wi.py +40 -40
- megadetector/postprocessing/merge_detections.py +87 -114
- megadetector/postprocessing/postprocess_batch_results.py +313 -298
- megadetector/postprocessing/remap_detection_categories.py +36 -36
- megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
- megadetector/postprocessing/separate_detections_into_folders.py +226 -211
- megadetector/postprocessing/subset_json_detector_output.py +265 -262
- megadetector/postprocessing/top_folders_to_bottom.py +45 -45
- megadetector/postprocessing/validate_batch_results.py +70 -70
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -66
- megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
- megadetector/taxonomy_mapping/simple_image_download.py +8 -8
- megadetector/taxonomy_mapping/species_lookup.py +33 -33
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
- megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
- megadetector/utils/azure_utils.py +22 -22
- megadetector/utils/ct_utils.py +1018 -200
- megadetector/utils/directory_listing.py +21 -77
- megadetector/utils/gpu_test.py +22 -22
- megadetector/utils/md_tests.py +541 -518
- megadetector/utils/path_utils.py +1457 -398
- megadetector/utils/process_utils.py +41 -41
- megadetector/utils/sas_blob_utils.py +53 -49
- megadetector/utils/split_locations_into_train_val.py +61 -61
- megadetector/utils/string_utils.py +147 -26
- megadetector/utils/url_utils.py +463 -173
- megadetector/utils/wi_utils.py +2629 -2526
- megadetector/utils/write_html_image_list.py +137 -137
- megadetector/visualization/plot_utils.py +21 -21
- megadetector/visualization/render_images_with_thumbnails.py +37 -73
- megadetector/visualization/visualization_utils.py +401 -397
- megadetector/visualization/visualize_db.py +197 -190
- megadetector/visualization/visualize_detector_output.py +79 -73
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/METADATA +135 -132
- megadetector-5.0.29.dist-info/RECORD +163 -0
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
- megadetector/data_management/importers/add_nacti_sizes.py +0 -52
- megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
- megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
- megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
- megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
- megadetector/data_management/importers/awc_to_json.py +0 -191
- megadetector/data_management/importers/bellevue_to_json.py +0 -272
- megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
- megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
- megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
- megadetector/data_management/importers/cct_field_adjustments.py +0 -58
- megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
- megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
- megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
- megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
- megadetector/data_management/importers/ena24_to_json.py +0 -276
- megadetector/data_management/importers/filenames_to_json.py +0 -386
- megadetector/data_management/importers/helena_to_cct.py +0 -283
- megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
- megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
- megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
- megadetector/data_management/importers/jb_csv_to_json.py +0 -150
- megadetector/data_management/importers/mcgill_to_json.py +0 -250
- megadetector/data_management/importers/missouri_to_json.py +0 -490
- megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
- megadetector/data_management/importers/noaa_seals_2019.py +0 -181
- megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
- megadetector/data_management/importers/pc_to_json.py +0 -365
- megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
- megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
- megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
- megadetector/data_management/importers/rspb_to_json.py +0 -356
- megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
- megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
- megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
- megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
- megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
- megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
- megadetector/data_management/importers/sulross_get_exif.py +0 -65
- megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
- megadetector/data_management/importers/ubc_to_json.py +0 -399
- megadetector/data_management/importers/umn_to_json.py +0 -507
- megadetector/data_management/importers/wellington_to_json.py +0 -263
- megadetector/data_management/importers/wi_to_json.py +0 -442
- megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
- megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
- megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
- megadetector-5.0.28.dist-info/RECORD +0 -209
|
@@ -1,1407 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
3
|
-
idaho-camera-traps.py
|
|
4
|
-
|
|
5
|
-
Prepare the Idaho Camera Traps dataset for release on LILA.
|
|
6
|
-
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
#%% Imports and constants
|
|
10
|
-
|
|
11
|
-
import json
|
|
12
|
-
import os
|
|
13
|
-
import numpy as np
|
|
14
|
-
import dateutil
|
|
15
|
-
import pandas as pd
|
|
16
|
-
import datetime
|
|
17
|
-
import shutil
|
|
18
|
-
|
|
19
|
-
from tqdm import tqdm
|
|
20
|
-
from bson import json_util
|
|
21
|
-
|
|
22
|
-
from collections import defaultdict
|
|
23
|
-
|
|
24
|
-
# Multi-threading for .csv file comparison and image existence validation
|
|
25
|
-
from multiprocessing.pool import Pool as Pool
|
|
26
|
-
from multiprocessing.pool import ThreadPool as ThreadPool
|
|
27
|
-
n_threads = 14
|
|
28
|
-
n_threads_file_copy = 20
|
|
29
|
-
|
|
30
|
-
input_base = r'i:\idfg-images'
|
|
31
|
-
output_base = r'h:\idaho-camera-traps'
|
|
32
|
-
output_image_base = r'j:\idaho-camera-traps-output'
|
|
33
|
-
assert os.path.isdir(input_base)
|
|
34
|
-
assert os.path.isdir(output_base)
|
|
35
|
-
assert os.path.isdir(output_image_base)
|
|
36
|
-
|
|
37
|
-
output_image_base_public = os.path.join(output_image_base,'public')
|
|
38
|
-
output_image_base_private = os.path.join(output_image_base,'private')
|
|
39
|
-
|
|
40
|
-
# We are going to map the original filenames/locations to obfuscated strings, but once
|
|
41
|
-
# we've done that, we will re-use the mappings every time we run this script.
|
|
42
|
-
force_generate_mappings = False
|
|
43
|
-
|
|
44
|
-
# This is the file to which mappings get saved
|
|
45
|
-
id_mapping_file = os.path.join(output_base,'id_mapping.json')
|
|
46
|
-
|
|
47
|
-
# The maximum time (in seconds) between images within which two images are considered the
|
|
48
|
-
# same sequence.
|
|
49
|
-
max_gap_within_sequence = 30
|
|
50
|
-
|
|
51
|
-
# This is a two-column file, where each line is [string in the original metadata],[category name we want to map it to]
|
|
52
|
-
category_mapping_file = os.path.join(output_base,'category_mapping.csv')
|
|
53
|
-
|
|
54
|
-
# The output file, using the original strings
|
|
55
|
-
output_json_original_strings = os.path.join(output_base,'idaho-camera-traps-original-strings.json')
|
|
56
|
-
|
|
57
|
-
# The output file, using obfuscated strings for everything but filenamed
|
|
58
|
-
output_json_remapped_ids = os.path.join(output_base,'idaho-camera-traps-remapped-ids.json')
|
|
59
|
-
|
|
60
|
-
# The output file, using obfuscated strings and obfuscated filenames
|
|
61
|
-
output_json = os.path.join(output_base,'idaho-camera-traps.json')
|
|
62
|
-
|
|
63
|
-
# One time only, I ran MegaDetector on the whole dataset...
|
|
64
|
-
megadetector_results_file = r'H:\idaho-camera-traps\idfg-2021-07-26idaho-camera-traps_detections.json'
|
|
65
|
-
|
|
66
|
-
# ...then set aside any images that *may* have contained humans that had not already been
|
|
67
|
-
# annotated as such. Those went in this folder...
|
|
68
|
-
human_review_folder = os.path.join(output_base,'human_review')
|
|
69
|
-
|
|
70
|
-
# ...and the ones that *actually* had humans (identified via manual review) got
|
|
71
|
-
# copied to this folder...
|
|
72
|
-
human_review_selection_folder = os.path.join(output_base,'human_review_selections')
|
|
73
|
-
|
|
74
|
-
# ...which was enumerated to this text file, which is a manually-curated list of
|
|
75
|
-
# images that were flagged as human.
|
|
76
|
-
human_review_list = os.path.join(output_base,'human_flagged_images.txt')
|
|
77
|
-
|
|
78
|
-
# Unopinionated .json conversion of the .csv metadata
|
|
79
|
-
sequence_info_cache = os.path.join(output_base,'sequence_info.json')
|
|
80
|
-
|
|
81
|
-
valid_opstates = ['normal','maintenance','snow on lens','foggy lens','foggy weather',
|
|
82
|
-
'malfunction','misdirected','snow on lense','poop/slobber','sun','tilted','vegetation obstruction']
|
|
83
|
-
opstate_mappings = {'snow on lense':'snow on lens','poop/slobber':'lens obscured','maintenance':'human'}
|
|
84
|
-
|
|
85
|
-
survey_species_presence_columns = ['elkpresent','deerpresent','prongpresent']
|
|
86
|
-
|
|
87
|
-
presence_to_count_columns = {
|
|
88
|
-
'otherpresent':['MooseAntlerless','MooseCalf','MooseOther','MooseBull','MooseUnkn',
|
|
89
|
-
'BlackBearAdult','BlackBearCub','LionAdult','LionKitten','WolfAdult',
|
|
90
|
-
'WolfPup','CattleCow','CattleCalf','other'],
|
|
91
|
-
'elkpresent':['ElkSpike','ElkAntlerless','ElkCalf','ElkRaghorn','ElkMatBull','ElkUnkn','ElkPedNub'],
|
|
92
|
-
'deerpresent':['MDbuck','MDantlerless','MDfawn','WTDbuck','WTDantlerless','WTDfawn','WTDunkn','MDunkn'],
|
|
93
|
-
'prongpresent':['PronghornBuck','PronghornFawn','PHunkn']
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
required_columns = ['File','Folder','Date','Time','otherpresent','other','otherwhat','opstate']
|
|
97
|
-
expected_presence_columns = ['elkpresent','deerpresent','prongpresent','humanpresent','otherpresent']
|
|
98
|
-
|
|
99
|
-
expected_count_columns = set()
|
|
100
|
-
for presence_column in presence_to_count_columns.keys():
|
|
101
|
-
count_columns = presence_to_count_columns[presence_column]
|
|
102
|
-
for count_column in count_columns:
|
|
103
|
-
expected_count_columns.add(count_column)
|
|
104
|
-
|
|
105
|
-
def list_is_sorted(l):
|
|
106
|
-
return all(l[i] <= l[i+1] for i in range(len(l)-1))
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
#%% List files (images + .csv)
|
|
110
|
-
|
|
111
|
-
def get_files():
|
|
112
|
-
|
|
113
|
-
all_files_list = os.path.join(output_base,'all_files.json')
|
|
114
|
-
force_file_enumeration = False
|
|
115
|
-
|
|
116
|
-
if (os.path.isfile(all_files_list) and (not force_file_enumeration)):
|
|
117
|
-
|
|
118
|
-
print('File list exists, bypassing enumeration')
|
|
119
|
-
with open(all_files_list,'r') as f:
|
|
120
|
-
all_files = json.load(f)
|
|
121
|
-
|
|
122
|
-
else:
|
|
123
|
-
|
|
124
|
-
from pathlib import Path
|
|
125
|
-
all_files = []
|
|
126
|
-
for path in Path(input_base).rglob('*.*'):
|
|
127
|
-
path = str(path)
|
|
128
|
-
path = os.path.relpath(path,input_base)
|
|
129
|
-
all_files.append(path)
|
|
130
|
-
with open(all_files_list,'w') as f:
|
|
131
|
-
json.dump(all_files,f,indent=1)
|
|
132
|
-
|
|
133
|
-
print('Enumerated {} files'.format(len(all_files)))
|
|
134
|
-
|
|
135
|
-
image_files = [s for s in all_files if (s.lower().endswith('.jpg') or s.lower().endswith('.jpeg'))]
|
|
136
|
-
csv_files = [s for s in all_files if (\
|
|
137
|
-
(s.lower().endswith('.csv')) and \
|
|
138
|
-
('Backups' not in s) and \
|
|
139
|
-
('Metadata.csv' not in s) and \
|
|
140
|
-
('ExportedDataFiles' not in s) and \
|
|
141
|
-
('CSV Files' not in s)
|
|
142
|
-
)]
|
|
143
|
-
|
|
144
|
-
print('{} image files, {} .csv files'.format(len(image_files),len(csv_files)))
|
|
145
|
-
|
|
146
|
-
# Ignore .csv files in folders with multiple .csv files
|
|
147
|
-
|
|
148
|
-
# ...which would require some extra work to decipher.
|
|
149
|
-
|
|
150
|
-
csv_files_to_ignore = []
|
|
151
|
-
|
|
152
|
-
folder_to_csv_files = defaultdict(list)
|
|
153
|
-
|
|
154
|
-
# fn = csv_files[0]
|
|
155
|
-
for fn in csv_files:
|
|
156
|
-
folder_name = os.path.dirname(fn)
|
|
157
|
-
folder_to_csv_files[folder_name].append(fn)
|
|
158
|
-
|
|
159
|
-
for folder_name in folder_to_csv_files.keys():
|
|
160
|
-
if len(folder_to_csv_files[folder_name]) > 1:
|
|
161
|
-
print('Multiple .csv files for {}:'.format(folder_name))
|
|
162
|
-
for csv_file in folder_to_csv_files[folder_name]:
|
|
163
|
-
print(csv_file)
|
|
164
|
-
csv_files_to_ignore.append(csv_file)
|
|
165
|
-
print('')
|
|
166
|
-
|
|
167
|
-
n_csv_original = len(csv_files)
|
|
168
|
-
csv_files = [s for s in csv_files if s not in csv_files_to_ignore]
|
|
169
|
-
|
|
170
|
-
print('Processing {} of {} csv files'.format(len(csv_files),n_csv_original))
|
|
171
|
-
|
|
172
|
-
return image_files,csv_files
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
#%% Parse each .csv file into sequences (function)
|
|
176
|
-
|
|
177
|
-
# csv_file = csv_files[-1]
|
|
178
|
-
def csv_to_sequences(csv_file):
|
|
179
|
-
|
|
180
|
-
print('Processing {}'.format(csv_file))
|
|
181
|
-
|
|
182
|
-
csv_file_absolute = os.path.join(input_base,csv_file)
|
|
183
|
-
# os.startfile(csv_file_absolute)
|
|
184
|
-
|
|
185
|
-
sequences = []
|
|
186
|
-
# survey = csv_file.split('\\')[0]
|
|
187
|
-
|
|
188
|
-
# Sample paths from which we need to derive locations:
|
|
189
|
-
#
|
|
190
|
-
# St.Joe_elk\AM99\Trip 1\100RECNX\TimelapseData.csv
|
|
191
|
-
# Beaverhead_elk\AM34\Trip 1\100RECNX\TimelapseData.csv
|
|
192
|
-
#
|
|
193
|
-
# ClearCreek_mustelids\Winter2015-16\FS-001-P\FS-001-P.csv
|
|
194
|
-
# ClearCreek_mustelids\Summer2015\FS-001\FS-001.csv
|
|
195
|
-
# ClearCreek_mustelids\Summer2016\IDFG-016\IDFG-016.csv
|
|
196
|
-
#
|
|
197
|
-
# I:\idfg-images\ClearCreek_mustelids\Summer2016\IDFG-017b
|
|
198
|
-
# I:\idfg-images\ClearCreek_mustelids\Summer2016\IDFG-017a
|
|
199
|
-
if 'St.Joe_elk' in csv_file or 'Beaverhead_elk' in csv_file:
|
|
200
|
-
location_name = '_'.join(csv_file.split('\\')[0:2]).replace(' ','')
|
|
201
|
-
else:
|
|
202
|
-
assert 'ClearCreek_mustelids' in csv_file
|
|
203
|
-
tokens = csv_file.split('\\')
|
|
204
|
-
assert 'FS-' in tokens[2] or 'IDFG-' in tokens[2]
|
|
205
|
-
location_name = '_'.join([tokens[0],tokens[2]]).replace('-P','')
|
|
206
|
-
if location_name.endswith('017a') or location_name.endswith('017b'):
|
|
207
|
-
location_name = location_name[:-1]
|
|
208
|
-
|
|
209
|
-
# Load .csv file
|
|
210
|
-
df = pd.read_csv(csv_file_absolute)
|
|
211
|
-
df['datetime'] = None
|
|
212
|
-
df['seq_id'] = None
|
|
213
|
-
df['synthetic_frame_number'] = None
|
|
214
|
-
|
|
215
|
-
# Validate the opstate column
|
|
216
|
-
opstates = set(df['opstate'])
|
|
217
|
-
for s in opstates:
|
|
218
|
-
if isinstance(s,str):
|
|
219
|
-
s = s.strip()
|
|
220
|
-
if len(s) > 0:
|
|
221
|
-
assert s in valid_opstates,'Invalid opstate: {}'.format(s)
|
|
222
|
-
|
|
223
|
-
column_names = list(df.columns)
|
|
224
|
-
|
|
225
|
-
for s in required_columns:
|
|
226
|
-
assert s in column_names
|
|
227
|
-
|
|
228
|
-
count_columns = [s for s in column_names if s in expected_count_columns]
|
|
229
|
-
|
|
230
|
-
presence_columns = [s for s in column_names if s.endswith('present')]
|
|
231
|
-
|
|
232
|
-
for s in presence_columns:
|
|
233
|
-
if s not in expected_presence_columns:
|
|
234
|
-
assert 'Unexpected presence column {} in {}'.format(s,csv_file)
|
|
235
|
-
for s in expected_presence_columns:
|
|
236
|
-
if s not in presence_columns:
|
|
237
|
-
assert 'Missing presence column {} in {}'.format(s,csv_file)
|
|
238
|
-
|
|
239
|
-
if False:
|
|
240
|
-
for s in expected_count_columns:
|
|
241
|
-
if s not in count_columns:
|
|
242
|
-
print('Missing count column {} in {}'.format(s,csv_file))
|
|
243
|
-
|
|
244
|
-
## Create datetimes
|
|
245
|
-
|
|
246
|
-
# print('Creating datetimes')
|
|
247
|
-
|
|
248
|
-
# i_row = 0; row = df.iloc[i_row]
|
|
249
|
-
for i_row,row in df.iterrows():
|
|
250
|
-
|
|
251
|
-
date = row['Date']
|
|
252
|
-
time = row['Time']
|
|
253
|
-
datestring = date + ' ' + time
|
|
254
|
-
dt = dateutil.parser.parse(datestring)
|
|
255
|
-
assert dt.year >= 2015 and dt.year <= 2019
|
|
256
|
-
df.loc[i_row,'datetime'] = dt
|
|
257
|
-
|
|
258
|
-
# Make sure data are sorted chronologically
|
|
259
|
-
#
|
|
260
|
-
# In odd circumstances, they are not... so sort them first, but warn
|
|
261
|
-
datetimes = list(df['datetime'])
|
|
262
|
-
if not list_is_sorted(datetimes):
|
|
263
|
-
print('Datetimes not sorted for {}'.format(csv_file))
|
|
264
|
-
|
|
265
|
-
df = df.sort_values('datetime')
|
|
266
|
-
df.reset_index(drop=True, inplace=True)
|
|
267
|
-
datetimes = list(df['datetime'])
|
|
268
|
-
assert list_is_sorted(datetimes)
|
|
269
|
-
|
|
270
|
-
# Debugging when I was trying to see what was up with the unsorted dates
|
|
271
|
-
if False:
|
|
272
|
-
for i in range(0,len(datetimes)-1):
|
|
273
|
-
dt = datetimes[i+1]
|
|
274
|
-
prev_dt = datetimes[i]
|
|
275
|
-
delta = dt - prev_dt
|
|
276
|
-
assert delta >= datetime.timedelta(0)
|
|
277
|
-
|
|
278
|
-
## Parse into sequences
|
|
279
|
-
|
|
280
|
-
# print('Creating sequences')
|
|
281
|
-
|
|
282
|
-
current_sequence_id = None
|
|
283
|
-
next_frame_number = 0
|
|
284
|
-
previous_datetime = None
|
|
285
|
-
|
|
286
|
-
sequence_id_to_rows = defaultdict(list)
|
|
287
|
-
|
|
288
|
-
# i_row = 0; row = df.iloc[i_row]
|
|
289
|
-
for i_row,row in df.iterrows():
|
|
290
|
-
|
|
291
|
-
dt = row['datetime']
|
|
292
|
-
assert dt is not None and isinstance(dt,datetime.datetime)
|
|
293
|
-
|
|
294
|
-
# Start a new sequence if:
|
|
295
|
-
#
|
|
296
|
-
# * This image has no timestamp
|
|
297
|
-
# * This image has a frame number of zero
|
|
298
|
-
# * We have no previous image timestamp
|
|
299
|
-
#
|
|
300
|
-
if previous_datetime is None:
|
|
301
|
-
delta = None
|
|
302
|
-
else:
|
|
303
|
-
delta = (dt - previous_datetime).total_seconds()
|
|
304
|
-
|
|
305
|
-
# Start a new sequence if necessary
|
|
306
|
-
if delta is None or delta > max_gap_within_sequence:
|
|
307
|
-
next_frame_number = 0
|
|
308
|
-
current_sequence_id = location_name + '_seq_' + str(dt) # str(uuid.uuid1())
|
|
309
|
-
|
|
310
|
-
assert current_sequence_id is not None
|
|
311
|
-
|
|
312
|
-
sequence_id_to_rows[current_sequence_id].append(i_row)
|
|
313
|
-
df.loc[i_row,'seq_id'] = current_sequence_id
|
|
314
|
-
df.loc[i_row,'synthetic_frame_number'] = next_frame_number
|
|
315
|
-
next_frame_number = next_frame_number + 1
|
|
316
|
-
previous_datetime = dt
|
|
317
|
-
|
|
318
|
-
# ...for each row
|
|
319
|
-
|
|
320
|
-
location_sequences = list(set(list(df['seq_id'])))
|
|
321
|
-
location_sequences.sort()
|
|
322
|
-
|
|
323
|
-
inconsistent_sequences = []
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
## Parse labels for each sequence
|
|
327
|
-
|
|
328
|
-
# sequence_id = location_sequences[0]
|
|
329
|
-
for sequence_id in location_sequences:
|
|
330
|
-
|
|
331
|
-
sequence_row_indices = sequence_id_to_rows[sequence_id]
|
|
332
|
-
assert len(sequence_row_indices) > 0
|
|
333
|
-
|
|
334
|
-
# Row indices in a sequence should be adjacent
|
|
335
|
-
if len(sequence_row_indices) > 1:
|
|
336
|
-
d = np.diff(sequence_row_indices)
|
|
337
|
-
assert(all(d==1))
|
|
338
|
-
|
|
339
|
-
# sequence_df = df[df['seq_id']==sequence_id]
|
|
340
|
-
sequence_df = df.iloc[sequence_row_indices]
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
## Determine what's present
|
|
344
|
-
|
|
345
|
-
presence_columns_marked = []
|
|
346
|
-
survey_species = []
|
|
347
|
-
other_species = []
|
|
348
|
-
|
|
349
|
-
# Be conservative; assume humans are present in all maintenance images
|
|
350
|
-
opstates = set(sequence_df['opstate'])
|
|
351
|
-
assert all([ ( (isinstance(s,float)) or (len(s.strip())== 0) or \
|
|
352
|
-
(s.strip() in valid_opstates)) for s in opstates]),\
|
|
353
|
-
'Invalid optstate in: {}'.format(' | '.join(opstates))
|
|
354
|
-
|
|
355
|
-
for presence_column in presence_columns:
|
|
356
|
-
|
|
357
|
-
presence_values = list(sequence_df[presence_column])
|
|
358
|
-
|
|
359
|
-
# The presence columns are *almost* always identical for all images in a sequence
|
|
360
|
-
single_presence_value = (len(set(presence_values)) == 1)
|
|
361
|
-
# assert single_presence_value
|
|
362
|
-
if not single_presence_value:
|
|
363
|
-
# print('Warning: presence value for {} is inconsistent for {}'.format(
|
|
364
|
-
# presence_column,sequence_id))
|
|
365
|
-
inconsistent_sequences.append(sequence_id)
|
|
366
|
-
|
|
367
|
-
if any(presence_values):
|
|
368
|
-
presence_columns_marked.append(presence_column)
|
|
369
|
-
|
|
370
|
-
# ...for each presence column
|
|
371
|
-
|
|
372
|
-
# Tally up the standard (survey) species
|
|
373
|
-
survey_species = [s.replace('present','') for s in presence_columns_marked if s != 'otherpresent']
|
|
374
|
-
for opstate in opstates:
|
|
375
|
-
if not isinstance(opstate,str):
|
|
376
|
-
continue
|
|
377
|
-
opstate = opstate.strip()
|
|
378
|
-
if len(opstate) == 0:
|
|
379
|
-
continue
|
|
380
|
-
if opstate in opstate_mappings:
|
|
381
|
-
opstate = opstate_mappings[opstate]
|
|
382
|
-
if (opstate != 'normal') and (opstate not in survey_species):
|
|
383
|
-
survey_species.append(opstate)
|
|
384
|
-
|
|
385
|
-
# If no presence columns are marked, all counts should be zero
|
|
386
|
-
if len(presence_columns_marked) == 0:
|
|
387
|
-
|
|
388
|
-
# count_column = count_columns[0]
|
|
389
|
-
for count_column in count_columns:
|
|
390
|
-
|
|
391
|
-
values = list(set(list(sequence_df[count_column])))
|
|
392
|
-
|
|
393
|
-
# Occasionally a count gets entered (correctly) without the presence column being marked
|
|
394
|
-
# assert len(values) == 1 and values[0] == 0, 'Non-zero counts with no presence
|
|
395
|
-
# columns marked for sequence {}'.format(sequence_id)
|
|
396
|
-
if (not(len(values) == 1 and values[0] == 0)):
|
|
397
|
-
print('Warning: presence and counts are inconsistent for {}'.format(sequence_id))
|
|
398
|
-
|
|
399
|
-
# Handle this by virtually checking the "right" box
|
|
400
|
-
for presence_column in presence_to_count_columns.keys():
|
|
401
|
-
count_columns_this_species = presence_to_count_columns[presence_column]
|
|
402
|
-
if count_column in count_columns_this_species:
|
|
403
|
-
if presence_column not in presence_columns_marked:
|
|
404
|
-
presence_columns_marked.append(presence_column)
|
|
405
|
-
|
|
406
|
-
# Make sure we found a match
|
|
407
|
-
assert len(presence_columns_marked) > 0
|
|
408
|
-
|
|
409
|
-
# Handle 'other' tags
|
|
410
|
-
if 'otherpresent' in presence_columns_marked:
|
|
411
|
-
|
|
412
|
-
sequence_otherwhats = set()
|
|
413
|
-
sequence_comments = set()
|
|
414
|
-
|
|
415
|
-
for i,r in sequence_df.iterrows():
|
|
416
|
-
otherwhat = r['otherwhat']
|
|
417
|
-
if isinstance(otherwhat,str):
|
|
418
|
-
otherwhat = otherwhat.strip()
|
|
419
|
-
if len(otherwhat) > 0:
|
|
420
|
-
sequence_otherwhats.add(otherwhat)
|
|
421
|
-
comment = r['comment']
|
|
422
|
-
if isinstance(comment,str):
|
|
423
|
-
comment = comment.strip()
|
|
424
|
-
if len(comment) > 0:
|
|
425
|
-
sequence_comments.add(comment)
|
|
426
|
-
|
|
427
|
-
freetext_species = []
|
|
428
|
-
for s in sequence_otherwhats:
|
|
429
|
-
freetext_species.append(s)
|
|
430
|
-
for s in sequence_comments:
|
|
431
|
-
freetext_species.append(s)
|
|
432
|
-
|
|
433
|
-
counted_species = []
|
|
434
|
-
|
|
435
|
-
otherpresent_columns = presence_to_count_columns['otherpresent']
|
|
436
|
-
|
|
437
|
-
# column_name = otherpresent_columns[0]
|
|
438
|
-
for column_name in otherpresent_columns:
|
|
439
|
-
|
|
440
|
-
if column_name in sequence_df and column_name != 'other':
|
|
441
|
-
|
|
442
|
-
column_counts = list(sequence_df[column_name])
|
|
443
|
-
column_count_positive = any([c > 0 for c in column_counts])
|
|
444
|
-
|
|
445
|
-
if column_count_positive:
|
|
446
|
-
# print('Found non-survey counted species column: {}'.format(column_name))
|
|
447
|
-
counted_species.append(column_name)
|
|
448
|
-
|
|
449
|
-
# ...for each non-empty presence column
|
|
450
|
-
|
|
451
|
-
# Very rarely, the "otherpresent" column is checked, but no more detail is available
|
|
452
|
-
if not ( (len(freetext_species) > 0) or (len(counted_species) > 0) ):
|
|
453
|
-
other_species.append('unknown')
|
|
454
|
-
|
|
455
|
-
other_species += freetext_species
|
|
456
|
-
other_species += counted_species
|
|
457
|
-
|
|
458
|
-
# ...handling non-survey species
|
|
459
|
-
|
|
460
|
-
all_species = other_species + survey_species
|
|
461
|
-
|
|
462
|
-
# Build the sequence data
|
|
463
|
-
|
|
464
|
-
images = []
|
|
465
|
-
# i_row = 0; row = sequence_df.iloc[i_row]
|
|
466
|
-
for i_row,row in sequence_df.iterrows():
|
|
467
|
-
im = {}
|
|
468
|
-
# Only one folder used a single .csv file for two subfolders
|
|
469
|
-
if ('RelativePath' in row) and (isinstance(row['RelativePath'],str)) \
|
|
470
|
-
and (len(row['RelativePath'].strip()) > 0):
|
|
471
|
-
assert 'IDFG-028' in location_name
|
|
472
|
-
im['file_name'] = os.path.join(row['RelativePath'],row['File'])
|
|
473
|
-
else:
|
|
474
|
-
im['file_name'] = row['File']
|
|
475
|
-
im['datetime'] = row['datetime']
|
|
476
|
-
images.append(im)
|
|
477
|
-
|
|
478
|
-
sequence = {}
|
|
479
|
-
sequence['csv_source'] = csv_file
|
|
480
|
-
sequence['sequence_id'] = sequence_id
|
|
481
|
-
sequence['images'] = images
|
|
482
|
-
sequence['species_present'] = all_species
|
|
483
|
-
sequence['location'] = location_name
|
|
484
|
-
|
|
485
|
-
sequences.append(sequence)
|
|
486
|
-
|
|
487
|
-
# ...for each sequence
|
|
488
|
-
|
|
489
|
-
return sequences
|
|
490
|
-
|
|
491
|
-
# ...def csv_to_sequences()
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
#%% Parse each .csv file into sequences (loop)
|
|
495
|
-
|
|
496
|
-
if __name__ == "__main__":
|
|
497
|
-
|
|
498
|
-
#%%
|
|
499
|
-
|
|
500
|
-
import multiprocessing
|
|
501
|
-
multiprocessing.freeze_support()
|
|
502
|
-
image_files,csv_files = get_files()
|
|
503
|
-
|
|
504
|
-
#%%
|
|
505
|
-
|
|
506
|
-
if n_threads == 1:
|
|
507
|
-
|
|
508
|
-
# i_file = -1; csv_file = csv_files[i_file]
|
|
509
|
-
sequences_by_file = []
|
|
510
|
-
for i_file,csv_file in enumerate(csv_files):
|
|
511
|
-
print('Processing file {} of {}'.format(i_file,len(csv_files)))
|
|
512
|
-
sequences = csv_to_sequences(csv_file)
|
|
513
|
-
sequences_by_file.append(sequences)
|
|
514
|
-
|
|
515
|
-
else:
|
|
516
|
-
|
|
517
|
-
pool = Pool(n_threads)
|
|
518
|
-
sequences_by_file = list(pool.imap(csv_to_sequences,csv_files))
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
#%% Save sequence data
|
|
522
|
-
|
|
523
|
-
with open(sequence_info_cache,'w') as f:
|
|
524
|
-
json.dump(sequences_by_file,f,indent=2,default=json_util.default)
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
#%% Load sequence data
|
|
528
|
-
|
|
529
|
-
if False:
|
|
530
|
-
|
|
531
|
-
#%%
|
|
532
|
-
|
|
533
|
-
with open(sequence_info_cache,'r') as f:
|
|
534
|
-
sequences_by_file = json.load(f,object_hook=json_util.object_hook)
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
#%% Validate file mapping (based on the existing enumeration)
|
|
538
|
-
|
|
539
|
-
missing_images = []
|
|
540
|
-
image_files_set = set(image_files)
|
|
541
|
-
n_images_in_sequences = 0
|
|
542
|
-
sequence_ids = set()
|
|
543
|
-
|
|
544
|
-
# sequences = sequences_by_file[0]
|
|
545
|
-
for i_sequences,sequences in enumerate(tqdm(sequences_by_file)):
|
|
546
|
-
|
|
547
|
-
assert len(sequences) > 0
|
|
548
|
-
csv_source = sequences[0]['csv_source']
|
|
549
|
-
csv_file_absolute = os.path.join(input_base,csv_source)
|
|
550
|
-
csv_folder = os.path.dirname(csv_file_absolute)
|
|
551
|
-
assert os.path.isfile(csv_file_absolute)
|
|
552
|
-
|
|
553
|
-
# sequence = sequences[0]
|
|
554
|
-
for i_sequence,sequence in enumerate(sequences):
|
|
555
|
-
|
|
556
|
-
assert sequence['csv_source'] == csv_source
|
|
557
|
-
sequence_id = sequence['sequence_id']
|
|
558
|
-
if sequence_id in sequence_ids:
|
|
559
|
-
print('Warning: duplicate sequence for {}, creating new sequence'.format(sequence_id))
|
|
560
|
-
sequence['sequence_id'] = sequence['sequence_id'] + '_' + str(i_sequences) + \
|
|
561
|
-
'_' + str(i_sequence)
|
|
562
|
-
sequence_id = sequence['sequence_id']
|
|
563
|
-
assert sequence_id not in sequence_ids
|
|
564
|
-
|
|
565
|
-
sequence_ids.add(sequence_id)
|
|
566
|
-
|
|
567
|
-
species_present = sequence['species_present']
|
|
568
|
-
images = sequence['images']
|
|
569
|
-
|
|
570
|
-
for im in images:
|
|
571
|
-
|
|
572
|
-
n_images_in_sequences += 1
|
|
573
|
-
image_file_relative = im['file_name']
|
|
574
|
-
|
|
575
|
-
# Actually, one folder has relative paths
|
|
576
|
-
# assert '\\' not in image_file_relative and '/' not in image_file_relative
|
|
577
|
-
|
|
578
|
-
image_file_absolute = os.path.join(csv_folder,image_file_relative)
|
|
579
|
-
image_file_container_relative = os.path.relpath(image_file_absolute,input_base)
|
|
580
|
-
|
|
581
|
-
# os.startfile(csv_folder)
|
|
582
|
-
# assert os.path.isfile(image_file_absolute)
|
|
583
|
-
# found_file = os.path.isfile(image_file_absolute)
|
|
584
|
-
found_file = image_file_container_relative in image_files_set
|
|
585
|
-
if not found_file:
|
|
586
|
-
print('Warning: can\'t find image {}'.format(image_file_absolute))
|
|
587
|
-
missing_images.append(image_file_absolute)
|
|
588
|
-
|
|
589
|
-
# ...for each image
|
|
590
|
-
|
|
591
|
-
# ...for each sequence
|
|
592
|
-
|
|
593
|
-
# ...for each .csv file
|
|
594
|
-
|
|
595
|
-
print('{} of {} images missing ({} on disk)'.format(len(missing_images),n_images_in_sequences,
|
|
596
|
-
len(image_files)))
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
#%% Load manual category mappings
|
|
600
|
-
|
|
601
|
-
with open(category_mapping_file,'r') as f:
|
|
602
|
-
category_mapping_lines = f.readlines()
|
|
603
|
-
category_mapping_lines = [s.strip() for s in category_mapping_lines]
|
|
604
|
-
|
|
605
|
-
category_mappings = {}
|
|
606
|
-
for s in category_mapping_lines:
|
|
607
|
-
tokens = s.split(',',1)
|
|
608
|
-
category_name = tokens[0].strip()
|
|
609
|
-
category_value = tokens[1].strip().replace('"','').replace(',','+')
|
|
610
|
-
assert ',' not in category_name
|
|
611
|
-
assert ',' not in category_value
|
|
612
|
-
|
|
613
|
-
# The second column is blank when the first column already represents the category name
|
|
614
|
-
if len(category_value) == 0:
|
|
615
|
-
category_value = category_name
|
|
616
|
-
category_mappings[category_name] = category_value
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
#%% Convert to CCT .json (original strings)
|
|
620
|
-
|
|
621
|
-
human_flagged_images = []
|
|
622
|
-
with open(human_review_list,'r') as f:
|
|
623
|
-
human_flagged_images = f.readlines()
|
|
624
|
-
human_flagged_images = [s.strip().replace('/','\\') for s in human_flagged_images]
|
|
625
|
-
human_flagged_images = set(human_flagged_images)
|
|
626
|
-
print('Read {} human flagged images'.format(len(human_flagged_images)))
|
|
627
|
-
|
|
628
|
-
annotations = []
|
|
629
|
-
image_id_to_image = {}
|
|
630
|
-
category_name_to_category = {}
|
|
631
|
-
|
|
632
|
-
# Force the empty category to be ID 0
|
|
633
|
-
empty_category_id = 0
|
|
634
|
-
empty_category = {}
|
|
635
|
-
empty_category['id'] = empty_category_id
|
|
636
|
-
empty_category['name'] = 'empty'
|
|
637
|
-
category_name_to_category['empty'] = empty_category
|
|
638
|
-
|
|
639
|
-
human_category_id = 1
|
|
640
|
-
human_category = {}
|
|
641
|
-
human_category['id'] = human_category_id
|
|
642
|
-
human_category['name'] = 'human'
|
|
643
|
-
category_name_to_category['human'] = human_category
|
|
644
|
-
|
|
645
|
-
next_category_id = 2
|
|
646
|
-
|
|
647
|
-
annotation_ids = set()
|
|
648
|
-
|
|
649
|
-
if False:
|
|
650
|
-
target_folder = r'ClearCreek_mustelids\Summer2015\FS-035'
|
|
651
|
-
for sequences in sequences_by_file:
|
|
652
|
-
if target_folder in sequences[0]['csv_source']:
|
|
653
|
-
break
|
|
654
|
-
|
|
655
|
-
# For each .csv file...
|
|
656
|
-
#
|
|
657
|
-
# sequences = sequences_by_file[0]
|
|
658
|
-
for sequences in tqdm(sequences_by_file):
|
|
659
|
-
|
|
660
|
-
# For each sequence...
|
|
661
|
-
#
|
|
662
|
-
# sequence = sequences[0]
|
|
663
|
-
for sequence in sequences:
|
|
664
|
-
|
|
665
|
-
species_present = sequence['species_present']
|
|
666
|
-
species_present = [s.lower().strip().replace(',',';') for s in species_present]
|
|
667
|
-
|
|
668
|
-
sequence_images = sequence['images']
|
|
669
|
-
location = sequence['location'].lower().strip()
|
|
670
|
-
sequence_id = sequence['sequence_id']
|
|
671
|
-
csv_source = sequence['csv_source']
|
|
672
|
-
csv_folder_relative = os.path.dirname(csv_source)
|
|
673
|
-
|
|
674
|
-
sequence_category_ids = set()
|
|
675
|
-
|
|
676
|
-
# Find categories for this image
|
|
677
|
-
if len(species_present) == 0:
|
|
678
|
-
|
|
679
|
-
sequence_category_ids.add(0)
|
|
680
|
-
assert category_name_to_category['empty']['id'] == list(sequence_category_ids)[0]
|
|
681
|
-
|
|
682
|
-
else:
|
|
683
|
-
|
|
684
|
-
# When 'unknown' is used in combination with another label, use that
|
|
685
|
-
# label; the "unknown" here doesn't mean "another unknown species", it means
|
|
686
|
-
# there is some other unknown property about the main species.
|
|
687
|
-
if 'unknown' in species_present and len(species_present) > 1:
|
|
688
|
-
assert all([((s in category_mappings) or (s in valid_opstates) or \
|
|
689
|
-
(s in opstate_mappings.values()))\
|
|
690
|
-
for s in species_present if s != 'unknown'])
|
|
691
|
-
species_present = [s for s in species_present if s != 'unknown']
|
|
692
|
-
|
|
693
|
-
# category_name_string = species_present[0]
|
|
694
|
-
for category_name_string in species_present:
|
|
695
|
-
|
|
696
|
-
# This piece of text had a lot of complicated syntax in it, and it would have
|
|
697
|
-
# been too complicated to handle in a general way
|
|
698
|
-
if 'coyotoes' in category_name_string:
|
|
699
|
-
# print('Ignoring category {}'.format(category_name_string))
|
|
700
|
-
continue
|
|
701
|
-
|
|
702
|
-
if category_name_string not in category_mappings:
|
|
703
|
-
assert category_name_string in valid_opstates or \
|
|
704
|
-
category_name_string in opstate_mappings.values()
|
|
705
|
-
else:
|
|
706
|
-
category_name_string = category_mappings[category_name_string]
|
|
707
|
-
assert ',' not in category_name_string
|
|
708
|
-
|
|
709
|
-
category_names = category_name_string.split('+')
|
|
710
|
-
assert len(category_names) <= 2
|
|
711
|
-
|
|
712
|
-
# Don't process redundant labels
|
|
713
|
-
category_names = set(category_names)
|
|
714
|
-
|
|
715
|
-
# category_name = category_names[0]
|
|
716
|
-
for category_name in category_names:
|
|
717
|
-
|
|
718
|
-
if category_name == 'ignore':
|
|
719
|
-
continue
|
|
720
|
-
|
|
721
|
-
category_name = category_name.replace('"','')
|
|
722
|
-
|
|
723
|
-
# If we've seen this category before...
|
|
724
|
-
if category_name in category_name_to_category:
|
|
725
|
-
|
|
726
|
-
category = category_name_to_category[category_name]
|
|
727
|
-
category_id = category['id']
|
|
728
|
-
|
|
729
|
-
# If this is a new category...
|
|
730
|
-
else:
|
|
731
|
-
|
|
732
|
-
# print('Adding new category for {}'.format(category_name))
|
|
733
|
-
category_id = next_category_id
|
|
734
|
-
category = {}
|
|
735
|
-
category['id'] = category_id
|
|
736
|
-
category['name'] = category_name
|
|
737
|
-
category_name_to_category[category_name] = category
|
|
738
|
-
next_category_id += 1
|
|
739
|
-
|
|
740
|
-
sequence_category_ids.add(category_id)
|
|
741
|
-
|
|
742
|
-
# ...for each category (inner)
|
|
743
|
-
|
|
744
|
-
# ...for each category (outer)
|
|
745
|
-
|
|
746
|
-
# ...if we do/don't have species in this sequence
|
|
747
|
-
|
|
748
|
-
# We should have at least one category assigned (which may be "empty" or "unknown")
|
|
749
|
-
assert len(sequence_category_ids) > 0
|
|
750
|
-
|
|
751
|
-
# assert len(sequence_category_ids) > 0
|
|
752
|
-
|
|
753
|
-
# Was any image in this sequence manually flagged as human?
|
|
754
|
-
for i_image,im in enumerate(sequence_images):
|
|
755
|
-
|
|
756
|
-
file_name_relative = os.path.join(csv_folder_relative,im['file_name'])
|
|
757
|
-
if file_name_relative in human_flagged_images:
|
|
758
|
-
# print('Flagging sequence {} as human based on manual review'.format(sequence_id))
|
|
759
|
-
assert human_category_id not in sequence_category_ids
|
|
760
|
-
sequence_category_ids.add(human_category_id)
|
|
761
|
-
break
|
|
762
|
-
|
|
763
|
-
# For each image in this sequence...
|
|
764
|
-
#
|
|
765
|
-
# i_image = 0; im = images[i_image]
|
|
766
|
-
for i_image,im in enumerate(sequence_images):
|
|
767
|
-
|
|
768
|
-
image_id = sequence_id + '_' + im['file_name']
|
|
769
|
-
assert image_id not in image_id_to_image
|
|
770
|
-
|
|
771
|
-
output_im = {}
|
|
772
|
-
output_im['id'] = image_id
|
|
773
|
-
output_im['file_name'] = os.path.join(csv_folder_relative,im['file_name'])
|
|
774
|
-
output_im['seq_id'] = sequence_id
|
|
775
|
-
output_im['seq_num_frames'] = len(sequence)
|
|
776
|
-
output_im['frame_num'] = i_image
|
|
777
|
-
output_im['datetime'] = str(im['datetime'])
|
|
778
|
-
output_im['location'] = location
|
|
779
|
-
|
|
780
|
-
image_id_to_image[image_id] = output_im
|
|
781
|
-
|
|
782
|
-
# Create annotations for this image
|
|
783
|
-
for i_ann,category_id in enumerate(sequence_category_ids):
|
|
784
|
-
|
|
785
|
-
ann = {}
|
|
786
|
-
ann['id'] = 'ann_' + image_id + '_' + str(i_ann)
|
|
787
|
-
assert ann['id'] not in annotation_ids
|
|
788
|
-
annotation_ids.add(ann['id'])
|
|
789
|
-
ann['image_id'] = image_id
|
|
790
|
-
ann['category_id'] = category_id
|
|
791
|
-
ann['sequence_level_annotation'] = True
|
|
792
|
-
annotations.append(ann)
|
|
793
|
-
|
|
794
|
-
# ...for each image in this sequence
|
|
795
|
-
|
|
796
|
-
# ...for each sequence
|
|
797
|
-
|
|
798
|
-
# ...for each .csv file
|
|
799
|
-
|
|
800
|
-
images = list(image_id_to_image.values())
|
|
801
|
-
categories = list(category_name_to_category.values())
|
|
802
|
-
print('Loaded {} annotations in {} categories for {} images'.format(
|
|
803
|
-
len(annotations),len(categories),len(images)))
|
|
804
|
-
|
|
805
|
-
# Verify that all images have annotations
|
|
806
|
-
image_id_to_annotations = defaultdict(list)
|
|
807
|
-
|
|
808
|
-
# ann = ict_data['annotations'][0]
|
|
809
|
-
|
|
810
|
-
# For debugging only
|
|
811
|
-
categories_to_counts = defaultdict(int)
|
|
812
|
-
for ann in tqdm(annotations):
|
|
813
|
-
image_id_to_annotations[ann['image_id']].append(ann)
|
|
814
|
-
categories_to_counts[ann['category_id']] = categories_to_counts[ann['category_id']] + 1
|
|
815
|
-
|
|
816
|
-
for im in tqdm(images):
|
|
817
|
-
image_annotations = image_id_to_annotations[im['id']]
|
|
818
|
-
assert len(image_annotations) > 0
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
#%% Create output (original strings)
|
|
822
|
-
|
|
823
|
-
info = {}
|
|
824
|
-
info['contributor'] = 'Idaho Department of Fish and Game'
|
|
825
|
-
info['description'] = 'Idaho Camera traps'
|
|
826
|
-
info['version'] = '2021.07.19'
|
|
827
|
-
|
|
828
|
-
output_data = {}
|
|
829
|
-
output_data['images'] = images
|
|
830
|
-
output_data['annotations'] = annotations
|
|
831
|
-
output_data['categories'] = categories
|
|
832
|
-
output_data['info'] = info
|
|
833
|
-
|
|
834
|
-
with open(output_json_original_strings,'w') as f:
|
|
835
|
-
json.dump(output_data,f,indent=1)
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
#%% Validate .json file
|
|
839
|
-
|
|
840
|
-
from megadetector.data_management.databases import integrity_check_json_db
|
|
841
|
-
|
|
842
|
-
options = integrity_check_json_db.IntegrityCheckOptions()
|
|
843
|
-
options.baseDir = input_base
|
|
844
|
-
options.bCheckImageSizes = False
|
|
845
|
-
options.bCheckImageExistence = False
|
|
846
|
-
options.bFindUnusedImages = False
|
|
847
|
-
|
|
848
|
-
_, _, _ = integrity_check_json_db.integrity_check_json_db(output_json_original_strings, options)
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
#%% Preview labels
|
|
852
|
-
|
|
853
|
-
from megadetector.visualization import visualize_db
|
|
854
|
-
|
|
855
|
-
viz_options = visualize_db.DbVizOptions()
|
|
856
|
-
viz_options.num_to_visualize = 1000
|
|
857
|
-
viz_options.trim_to_images_with_bboxes = False
|
|
858
|
-
viz_options.add_search_links = False
|
|
859
|
-
viz_options.sort_by_filename = False
|
|
860
|
-
viz_options.parallelize_rendering = True
|
|
861
|
-
viz_options.include_filename_links = True
|
|
862
|
-
|
|
863
|
-
viz_options.classes_to_exclude = ['empty','deer','elk']
|
|
864
|
-
html_output_file, _ = visualize_db.visualize_db(db_path=output_json_original_strings,
|
|
865
|
-
output_dir=os.path.join(
|
|
866
|
-
output_base,'preview'),
|
|
867
|
-
image_base_dir=input_base,
|
|
868
|
-
options=viz_options)
|
|
869
|
-
os.startfile(html_output_file)
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
#%% Look for humans that were found by MegaDetector that haven't already been identified as human
|
|
873
|
-
|
|
874
|
-
# This whole step only needed to get run once
|
|
875
|
-
|
|
876
|
-
if False:
|
|
877
|
-
|
|
878
|
-
pass
|
|
879
|
-
|
|
880
|
-
#%%
|
|
881
|
-
|
|
882
|
-
human_confidence_threshold = 0.5
|
|
883
|
-
|
|
884
|
-
# Load MD results
|
|
885
|
-
with open(megadetector_results_file,'r') as f:
|
|
886
|
-
md_results = json.load(f)
|
|
887
|
-
|
|
888
|
-
# Get a list of filenames that MD tagged as human
|
|
889
|
-
|
|
890
|
-
human_md_categories =\
|
|
891
|
-
[category_id for category_id in md_results['detection_categories'] if \
|
|
892
|
-
((md_results['detection_categories'][category_id] == 'person') or \
|
|
893
|
-
(md_results['detection_categories'][category_id] == 'vehicle'))]
|
|
894
|
-
assert len(human_md_categories) == 2
|
|
895
|
-
|
|
896
|
-
# im = md_results['images'][0]
|
|
897
|
-
md_human_images = set()
|
|
898
|
-
|
|
899
|
-
for im in md_results['images']:
|
|
900
|
-
if 'detections' not in im:
|
|
901
|
-
continue
|
|
902
|
-
if im['max_detection_conf'] < human_confidence_threshold:
|
|
903
|
-
continue
|
|
904
|
-
for detection in im['detections']:
|
|
905
|
-
if detection['category'] not in human_md_categories:
|
|
906
|
-
continue
|
|
907
|
-
elif detection['conf'] < human_confidence_threshold:
|
|
908
|
-
continue
|
|
909
|
-
else:
|
|
910
|
-
md_human_images.add(im['file'])
|
|
911
|
-
break
|
|
912
|
-
|
|
913
|
-
# ...for each detection
|
|
914
|
-
|
|
915
|
-
# ...for each image
|
|
916
|
-
|
|
917
|
-
print('MD found {} potential human images (of {})'.format(
|
|
918
|
-
len(md_human_images),len(md_results['images'])))
|
|
919
|
-
|
|
920
|
-
# Map images to annotations in ICT
|
|
921
|
-
|
|
922
|
-
with open(output_json_original_strings,'r') as f:
|
|
923
|
-
ict_data = json.load(f)
|
|
924
|
-
|
|
925
|
-
category_id_to_name = {c['id']:c['name'] for c in categories}
|
|
926
|
-
|
|
927
|
-
image_id_to_annotations = defaultdict(list)
|
|
928
|
-
|
|
929
|
-
# ann = ict_data['annotations'][0]
|
|
930
|
-
for ann in tqdm(ict_data['annotations']):
|
|
931
|
-
image_id_to_annotations[ann['image_id']].append(ann)
|
|
932
|
-
|
|
933
|
-
human_ict_categories = ['human']
|
|
934
|
-
manual_human_images = set()
|
|
935
|
-
|
|
936
|
-
# For every image
|
|
937
|
-
# im = ict_data['images'][0]
|
|
938
|
-
for im in tqdm(ict_data['images']):
|
|
939
|
-
|
|
940
|
-
# Does this image already have a human annotation?
|
|
941
|
-
manual_human = False
|
|
942
|
-
|
|
943
|
-
annotations = image_id_to_annotations[im['id']]
|
|
944
|
-
assert len(annotations) > 0
|
|
945
|
-
|
|
946
|
-
for ann in annotations:
|
|
947
|
-
category_name = category_id_to_name[ann['category_id']]
|
|
948
|
-
if category_name in human_ict_categories:
|
|
949
|
-
manual_human_images.add(im['file_name'].replace('\\','/'))
|
|
950
|
-
|
|
951
|
-
# ...for each annotation
|
|
952
|
-
|
|
953
|
-
# ...for each image
|
|
954
|
-
|
|
955
|
-
print('{} images identified as human in source metadata'.format(len(manual_human_images)))
|
|
956
|
-
|
|
957
|
-
missing_human_images = []
|
|
958
|
-
|
|
959
|
-
for fn in md_human_images:
|
|
960
|
-
if fn not in manual_human_images:
|
|
961
|
-
missing_human_images.append(fn)
|
|
962
|
-
|
|
963
|
-
print('{} potentially untagged human images'.format(len(missing_human_images)))
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
#%% Copy images for review to a new folder
|
|
967
|
-
|
|
968
|
-
os.makedirs(human_review_folder,exist_ok=True)
|
|
969
|
-
missing_human_images.sort()
|
|
970
|
-
|
|
971
|
-
# fn = missing_human_images[0]
|
|
972
|
-
for i_image,fn in enumerate(tqdm(missing_human_images)):
|
|
973
|
-
input_fn_absolute = os.path.join(input_base,fn).replace('\\','/')
|
|
974
|
-
assert os.path.isfile(input_fn_absolute)
|
|
975
|
-
output_path = os.path.join(human_review_folder,str(i_image).zfill(4) + '_' + fn.replace('/','~'))
|
|
976
|
-
shutil.copyfile(input_fn_absolute,output_path)
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
#%% Manual step...
|
|
980
|
-
|
|
981
|
-
# Copy any images from that list that have humans in them to...
|
|
982
|
-
human_review_selection_folder = r'H:\idaho-camera-traps\human_review_selections'
|
|
983
|
-
assert os.path.isdir(human_review_selection_folder)
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
#%% Create a list of the images we just manually flagged
|
|
987
|
-
|
|
988
|
-
human_tagged_filenames = os.listdir(human_review_selection_folder)
|
|
989
|
-
human_tagged_relative_paths = []
|
|
990
|
-
# fn = human_tagged_filenames[0]
|
|
991
|
-
for fn in human_tagged_filenames:
|
|
992
|
-
|
|
993
|
-
# E.g. '0000_Beaverhead_elk~AM174~Trip 1~100RECNX~IMG_1397.JPG'
|
|
994
|
-
relative_path = fn[5:].replace('~','/')
|
|
995
|
-
human_tagged_relative_paths.append(relative_path)
|
|
996
|
-
|
|
997
|
-
with open(human_review_list,'w') as f:
|
|
998
|
-
for s in human_tagged_relative_paths:
|
|
999
|
-
f.write(s + '\n')
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
#%% Translate location, image, sequence IDs
|
|
1003
|
-
|
|
1004
|
-
# Load mappings if available
|
|
1005
|
-
if (not force_generate_mappings) and (os.path.isfile(id_mapping_file)):
|
|
1006
|
-
|
|
1007
|
-
print('Loading ID mappings from {}'.format(id_mapping_file))
|
|
1008
|
-
|
|
1009
|
-
with open(id_mapping_file,'r') as f:
|
|
1010
|
-
mappings = json.load(f)
|
|
1011
|
-
|
|
1012
|
-
image_id_mappings = mappings['image_id_mappings']
|
|
1013
|
-
annotation_id_mappings = mappings['annotation_id_mappings']
|
|
1014
|
-
location_id_mappings = mappings['location_id_mappings']
|
|
1015
|
-
sequence_id_mappings = mappings['sequence_id_mappings']
|
|
1016
|
-
|
|
1017
|
-
else:
|
|
1018
|
-
|
|
1019
|
-
# Generate mappings
|
|
1020
|
-
mappings = {}
|
|
1021
|
-
|
|
1022
|
-
next_location_id = 0
|
|
1023
|
-
location_id_string_to_n_sequences = defaultdict(int)
|
|
1024
|
-
location_id_string_to_n_images = defaultdict(int)
|
|
1025
|
-
|
|
1026
|
-
image_id_mappings = {}
|
|
1027
|
-
annotation_id_mappings = {}
|
|
1028
|
-
location_id_mappings = {}
|
|
1029
|
-
sequence_id_mappings = {}
|
|
1030
|
-
|
|
1031
|
-
for im in tqdm(images):
|
|
1032
|
-
|
|
1033
|
-
# If we've seen this location before...
|
|
1034
|
-
if im['location'] in location_id_mappings:
|
|
1035
|
-
location_id = location_id_mappings[im['location']]
|
|
1036
|
-
else:
|
|
1037
|
-
# Otherwise assign a string-formatted int as the ID
|
|
1038
|
-
location_id = str(next_location_id)
|
|
1039
|
-
|
|
1040
|
-
location_id_mappings[im['location']] = location_id
|
|
1041
|
-
next_location_id += 1
|
|
1042
|
-
|
|
1043
|
-
# If we've seen this sequence before...
|
|
1044
|
-
if im['seq_id'] in sequence_id_mappings:
|
|
1045
|
-
sequence_id = sequence_id_mappings[im['seq_id']]
|
|
1046
|
-
else:
|
|
1047
|
-
# Otherwise assign a string-formatted int as the ID
|
|
1048
|
-
n_sequences_this_location = location_id_string_to_n_sequences[location_id]
|
|
1049
|
-
sequence_id = 'loc_{}_seq_{}'.format(
|
|
1050
|
-
location_id.zfill(4),str(n_sequences_this_location).zfill(6))
|
|
1051
|
-
sequence_id_mappings[im['seq_id']] = sequence_id
|
|
1052
|
-
|
|
1053
|
-
n_sequences_this_location += 1
|
|
1054
|
-
location_id_string_to_n_sequences[location_id] = n_sequences_this_location
|
|
1055
|
-
|
|
1056
|
-
assert im['id'] not in image_id_mappings
|
|
1057
|
-
|
|
1058
|
-
# Assign an image ID
|
|
1059
|
-
|
|
1060
|
-
n_images_this_location = location_id_string_to_n_images[location_id]
|
|
1061
|
-
image_id_mappings[im['id']] = 'loc_{}_im_{}'.format(
|
|
1062
|
-
location_id.zfill(4),str(n_images_this_location).zfill(6))
|
|
1063
|
-
|
|
1064
|
-
n_images_this_location += 1
|
|
1065
|
-
location_id_string_to_n_images[location_id] = n_images_this_location
|
|
1066
|
-
|
|
1067
|
-
# ...for each image
|
|
1068
|
-
|
|
1069
|
-
# Assign annotation mappings
|
|
1070
|
-
for i_ann,ann in enumerate(tqdm(annotations)):
|
|
1071
|
-
assert ann['image_id'] in image_id_mappings
|
|
1072
|
-
assert ann['id'] not in annotation_id_mappings
|
|
1073
|
-
annotation_id_mappings[ann['id']] = 'ann_{}'.format(str(i_ann).zfill(8))
|
|
1074
|
-
|
|
1075
|
-
mappings['image_id_mappings'] = image_id_mappings
|
|
1076
|
-
mappings['annotation_id_mappings'] = annotation_id_mappings
|
|
1077
|
-
mappings['location_id_mappings'] = location_id_mappings
|
|
1078
|
-
mappings['sequence_id_mappings'] = sequence_id_mappings
|
|
1079
|
-
|
|
1080
|
-
# Save mappings
|
|
1081
|
-
with open(id_mapping_file,'w') as f:
|
|
1082
|
-
json.dump(mappings,f,indent=2)
|
|
1083
|
-
|
|
1084
|
-
print('Saved ID mappings to {}'.format(id_mapping_file))
|
|
1085
|
-
|
|
1086
|
-
# Back this file up, lest we should accidentally re-run this script
|
|
1087
|
-
# with force_generate_mappings = True and overwrite the mappings we used.
|
|
1088
|
-
datestr = str(datetime.datetime.now()).replace(':','-')
|
|
1089
|
-
backup_file = id_mapping_file.replace('.json','_' + datestr + '.json')
|
|
1090
|
-
shutil.copyfile(id_mapping_file,backup_file)
|
|
1091
|
-
|
|
1092
|
-
# ...if we are/aren't re-generating mappings
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
#%% Apply mappings
|
|
1096
|
-
|
|
1097
|
-
for im in images:
|
|
1098
|
-
im['id'] = image_id_mappings[im['id']]
|
|
1099
|
-
im['seq_id'] = sequence_id_mappings[im['seq_id']]
|
|
1100
|
-
im['location'] = location_id_mappings[im['location']]
|
|
1101
|
-
for ann in annotations:
|
|
1102
|
-
ann['id'] = annotation_id_mappings[ann['id']]
|
|
1103
|
-
ann['image_id'] = image_id_mappings[ann['image_id']]
|
|
1104
|
-
|
|
1105
|
-
print('Applied mappings')
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
#%% Write new dictionaries (modified strings, original files)
|
|
1109
|
-
|
|
1110
|
-
output_data = {}
|
|
1111
|
-
output_data['images'] = images
|
|
1112
|
-
output_data['annotations'] = annotations
|
|
1113
|
-
output_data['categories'] = categories
|
|
1114
|
-
output_data['info'] = info
|
|
1115
|
-
|
|
1116
|
-
with open(output_json_remapped_ids,'w') as f:
|
|
1117
|
-
json.dump(output_data,f,indent=2)
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
#%% Validate .json file (modified strings, original files)
|
|
1121
|
-
|
|
1122
|
-
from megadetector.data_management.databases import integrity_check_json_db
|
|
1123
|
-
|
|
1124
|
-
options = integrity_check_json_db.IntegrityCheckOptions()
|
|
1125
|
-
options.baseDir = input_base
|
|
1126
|
-
options.bCheckImageSizes = False
|
|
1127
|
-
options.bCheckImageExistence = False
|
|
1128
|
-
options.bFindUnusedImages = False
|
|
1129
|
-
|
|
1130
|
-
_, _, _ = integrity_check_json_db.integrity_check_json_db(output_json_remapped_ids, options)
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
#%% Preview labels (original files)
|
|
1134
|
-
|
|
1135
|
-
from megadetector.visualization import visualize_db
|
|
1136
|
-
|
|
1137
|
-
viz_options = visualize_db.DbVizOptions()
|
|
1138
|
-
viz_options.num_to_visualize = 1000
|
|
1139
|
-
viz_options.trim_to_images_with_bboxes = False
|
|
1140
|
-
viz_options.add_search_links = False
|
|
1141
|
-
viz_options.sort_by_filename = False
|
|
1142
|
-
viz_options.parallelize_rendering = True
|
|
1143
|
-
viz_options.include_filename_links = True
|
|
1144
|
-
|
|
1145
|
-
# viz_options.classes_to_exclude = ['empty','deer','elk']
|
|
1146
|
-
# viz_options.classes_to_include = ['bobcat']
|
|
1147
|
-
viz_options.classes_to_include = [viz_options.multiple_categories_tag]
|
|
1148
|
-
|
|
1149
|
-
html_output_file, _ = visualize_db.visualize_db(db_path=output_json_remapped_ids,
|
|
1150
|
-
output_dir=os.path.join(
|
|
1151
|
-
output_base,'preview'),
|
|
1152
|
-
image_base_dir=input_base,
|
|
1153
|
-
options=viz_options)
|
|
1154
|
-
os.startfile(html_output_file)
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
#%% Copy images to final output folder (prep)
|
|
1158
|
-
|
|
1159
|
-
force_copy = False
|
|
1160
|
-
|
|
1161
|
-
with open(output_json_remapped_ids,'r') as f:
|
|
1162
|
-
d = json.load(f)
|
|
1163
|
-
|
|
1164
|
-
images = d['images']
|
|
1165
|
-
|
|
1166
|
-
private_categories = ['human','domestic dog','vehicle']
|
|
1167
|
-
|
|
1168
|
-
private_image_ids = set()
|
|
1169
|
-
|
|
1170
|
-
category_id_to_name = {c['id']:c['name'] for c in d['categories']}
|
|
1171
|
-
|
|
1172
|
-
# ann = d['annotations'][0]
|
|
1173
|
-
for ann in d['annotations']:
|
|
1174
|
-
category_name = category_id_to_name[ann['category_id']]
|
|
1175
|
-
if category_name in private_categories:
|
|
1176
|
-
private_image_ids.add(ann['image_id'])
|
|
1177
|
-
|
|
1178
|
-
print('Moving {} of {} images to the private folder'.format(len(private_image_ids),len(images)))
|
|
1179
|
-
|
|
1180
|
-
def process_image(im):
|
|
1181
|
-
|
|
1182
|
-
input_relative_path = im['file_name']
|
|
1183
|
-
input_absolute_path = os.path.join(input_base,input_relative_path)
|
|
1184
|
-
|
|
1185
|
-
if not os.path.isfile(input_absolute_path):
|
|
1186
|
-
print('Warning: file {} is not available'.format(input_absolute_path))
|
|
1187
|
-
return
|
|
1188
|
-
|
|
1189
|
-
location = im['location']
|
|
1190
|
-
image_id = im['id']
|
|
1191
|
-
|
|
1192
|
-
location_folder = 'loc_' + location.zfill(4)
|
|
1193
|
-
assert location_folder in image_id
|
|
1194
|
-
|
|
1195
|
-
output_relative_path = location_folder + '/' + image_id + '.jpg'
|
|
1196
|
-
|
|
1197
|
-
# Is this a public or private image?
|
|
1198
|
-
private_image = (image_id in private_image_ids)
|
|
1199
|
-
|
|
1200
|
-
# Generate absolute path
|
|
1201
|
-
if private_image:
|
|
1202
|
-
output_absolute_path = os.path.join(output_image_base_private,output_relative_path)
|
|
1203
|
-
else:
|
|
1204
|
-
output_absolute_path = os.path.join(output_image_base_public,output_relative_path)
|
|
1205
|
-
|
|
1206
|
-
# Copy to output
|
|
1207
|
-
output_dir = os.path.dirname(output_absolute_path)
|
|
1208
|
-
os.makedirs(output_dir,exist_ok=True)
|
|
1209
|
-
|
|
1210
|
-
if force_copy or (not os.path.isfile(output_absolute_path)):
|
|
1211
|
-
shutil.copyfile(input_absolute_path,output_absolute_path)
|
|
1212
|
-
|
|
1213
|
-
# Update the filename reference
|
|
1214
|
-
im['file_name'] = output_relative_path
|
|
1215
|
-
|
|
1216
|
-
# ...def process_image(im)
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
#%% Copy images to final output folder (execution)
|
|
1220
|
-
|
|
1221
|
-
# For each image
|
|
1222
|
-
if n_threads_file_copy == 1:
|
|
1223
|
-
# im = images[0]
|
|
1224
|
-
for im in tqdm(images):
|
|
1225
|
-
process_image(im)
|
|
1226
|
-
else:
|
|
1227
|
-
pool = ThreadPool(n_threads_file_copy)
|
|
1228
|
-
pool.map(process_image,images)
|
|
1229
|
-
|
|
1230
|
-
print('Finished copying, writing .json output')
|
|
1231
|
-
|
|
1232
|
-
# Write output .json
|
|
1233
|
-
with open(output_json,'w') as f:
|
|
1234
|
-
json.dump(d,f,indent=1)
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
#%% Make sure the right number of images got there
|
|
1238
|
-
|
|
1239
|
-
from pathlib import Path
|
|
1240
|
-
all_output_files = []
|
|
1241
|
-
all_output_files_list = os.path.join(output_base,'all_output_files.json')
|
|
1242
|
-
|
|
1243
|
-
for path in Path(output_image_base).rglob('*.*'):
|
|
1244
|
-
path = str(path)
|
|
1245
|
-
path = os.path.relpath(path,output_image_base)
|
|
1246
|
-
all_output_files.append(path)
|
|
1247
|
-
with open(all_output_files_list,'w') as f:
|
|
1248
|
-
json.dump(all_output_files,f,indent=1)
|
|
1249
|
-
|
|
1250
|
-
print('Enumerated {} output files (of {} images)'.format(len(all_output_files),len(images)))
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
#%% Validate .json file (final filenames)
|
|
1254
|
-
|
|
1255
|
-
from megadetector.data_management.databases import integrity_check_json_db
|
|
1256
|
-
|
|
1257
|
-
options = integrity_check_json_db.IntegrityCheckOptions()
|
|
1258
|
-
options.baseDir = input_base
|
|
1259
|
-
options.bCheckImageSizes = False
|
|
1260
|
-
options.bCheckImageExistence = False
|
|
1261
|
-
options.bFindUnusedImages = False
|
|
1262
|
-
|
|
1263
|
-
_, _, _ = integrity_check_json_db.integrity_check_json_db(output_json, options)
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
#%% Preview labels (final filenames)
|
|
1267
|
-
|
|
1268
|
-
from megadetector.visualization import visualize_db
|
|
1269
|
-
|
|
1270
|
-
viz_options = visualize_db.DbVizOptions()
|
|
1271
|
-
viz_options.num_to_visualize = 1500
|
|
1272
|
-
viz_options.trim_to_images_with_bboxes = False
|
|
1273
|
-
viz_options.add_search_links = False
|
|
1274
|
-
viz_options.sort_by_filename = False
|
|
1275
|
-
viz_options.parallelize_rendering = True
|
|
1276
|
-
viz_options.include_filename_links = True
|
|
1277
|
-
|
|
1278
|
-
# viz_options.classes_to_exclude = ['empty','deer','elk']
|
|
1279
|
-
viz_options.classes_to_include = ['bear','mountain lion']
|
|
1280
|
-
# viz_options.classes_to_include = ['horse']
|
|
1281
|
-
# viz_options.classes_to_include = [viz_options.multiple_categories_tag]
|
|
1282
|
-
# viz_options.classes_to_include = ['human','vehicle','domestic dog']
|
|
1283
|
-
|
|
1284
|
-
html_output_file, _ = visualize_db.visualize_db(db_path=output_json,
|
|
1285
|
-
output_dir=os.path.join(
|
|
1286
|
-
output_base,'final-preview-01'),
|
|
1287
|
-
image_base_dir=output_image_base_public,
|
|
1288
|
-
options=viz_options)
|
|
1289
|
-
os.startfile(html_output_file)
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
#%% Create zipfiles
|
|
1293
|
-
|
|
1294
|
-
#%% List public files
|
|
1295
|
-
|
|
1296
|
-
from pathlib import Path
|
|
1297
|
-
all_public_output_files = []
|
|
1298
|
-
all_public_output_files_list = os.path.join(output_base,'all_public_output_files.json')
|
|
1299
|
-
|
|
1300
|
-
if not os.path.isfile(all_public_output_files_list):
|
|
1301
|
-
for path in Path(output_image_base_public).rglob('*.*'):
|
|
1302
|
-
path = str(path)
|
|
1303
|
-
path = os.path.relpath(path,output_image_base)
|
|
1304
|
-
all_public_output_files.append(path)
|
|
1305
|
-
with open(all_public_output_files_list,'w') as f:
|
|
1306
|
-
json.dump(all_public_output_files,f,indent=1)
|
|
1307
|
-
else:
|
|
1308
|
-
with open(all_public_output_files_list,'r') as f:
|
|
1309
|
-
all_public_output_files = json.load(f)
|
|
1310
|
-
|
|
1311
|
-
print('Enumerated {} public output files'.format(len(all_public_output_files)))
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
#%% Find the size of each file
|
|
1315
|
-
|
|
1316
|
-
filename_to_size = {}
|
|
1317
|
-
|
|
1318
|
-
all_public_output_sizes_list = os.path.join(output_base,'all_public_output_sizes.json')
|
|
1319
|
-
|
|
1320
|
-
if not os.path.isfile(all_public_output_sizes_list):
|
|
1321
|
-
# fn = all_public_output_files[0]
|
|
1322
|
-
for fn in tqdm(all_public_output_files):
|
|
1323
|
-
p = os.path.join(output_image_base,fn)
|
|
1324
|
-
assert os.path.isfile(p)
|
|
1325
|
-
filename_to_size[fn] = os.path.getsize(p)
|
|
1326
|
-
|
|
1327
|
-
with open(all_public_output_sizes_list,'w') as f:
|
|
1328
|
-
json.dump(filename_to_size,f,indent=1)
|
|
1329
|
-
else:
|
|
1330
|
-
with open(all_public_output_sizes_list,'r') as f:
|
|
1331
|
-
filename_to_size = json.load(f)
|
|
1332
|
-
|
|
1333
|
-
assert len(filename_to_size) == len(all_public_output_files)
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
#%% Split into chunks of approximately-equal size
|
|
1337
|
-
|
|
1338
|
-
import humanfriendly
|
|
1339
|
-
total_size = sum(filename_to_size.values())
|
|
1340
|
-
print('{} in {} files'.format(humanfriendly.format_size(total_size),len(all_public_output_files)))
|
|
1341
|
-
|
|
1342
|
-
bytes_per_part = 320e9
|
|
1343
|
-
|
|
1344
|
-
file_lists = []
|
|
1345
|
-
|
|
1346
|
-
current_file_list = []
|
|
1347
|
-
n_bytes_current_file_list = 0
|
|
1348
|
-
|
|
1349
|
-
for fn in all_public_output_files:
|
|
1350
|
-
size = filename_to_size[fn]
|
|
1351
|
-
current_file_list.append(fn)
|
|
1352
|
-
n_bytes_current_file_list += size
|
|
1353
|
-
if n_bytes_current_file_list > bytes_per_part:
|
|
1354
|
-
file_lists.append(current_file_list)
|
|
1355
|
-
current_file_list = []
|
|
1356
|
-
n_bytes_current_file_list = 0
|
|
1357
|
-
# ...for each file
|
|
1358
|
-
|
|
1359
|
-
file_lists.append(current_file_list)
|
|
1360
|
-
|
|
1361
|
-
assert sum([len(l) for l in file_lists]) == len(all_public_output_files)
|
|
1362
|
-
|
|
1363
|
-
print('List sizes:')
|
|
1364
|
-
for l in file_lists:
|
|
1365
|
-
print(len(l))
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
#%% Create a zipfile for each chunk
|
|
1369
|
-
|
|
1370
|
-
from zipfile import ZipFile
|
|
1371
|
-
import zipfile
|
|
1372
|
-
import os
|
|
1373
|
-
|
|
1374
|
-
def create_zipfile(i_file_list):
|
|
1375
|
-
|
|
1376
|
-
file_list = file_lists[i_file_list]
|
|
1377
|
-
zipfile_name = os.path.join('k:\\idaho-camera-traps-images.part_{}.zip'.format(i_file_list))
|
|
1378
|
-
|
|
1379
|
-
print('Processing archive {} to file {}'.format(i_file_list,zipfile_name))
|
|
1380
|
-
|
|
1381
|
-
with ZipFile(zipfile_name, 'w') as zipObj:
|
|
1382
|
-
|
|
1383
|
-
for filename_relative in file_list:
|
|
1384
|
-
|
|
1385
|
-
assert filename_relative.startswith('public')
|
|
1386
|
-
filename_absolute = os.path.join(output_image_base,filename_relative)
|
|
1387
|
-
zipObj.write(filename_absolute.replace('\\','/'),
|
|
1388
|
-
filename_relative, compress_type=zipfile.ZIP_STORED)
|
|
1389
|
-
|
|
1390
|
-
# ...for each filename
|
|
1391
|
-
|
|
1392
|
-
# with ZipFile()
|
|
1393
|
-
|
|
1394
|
-
# ...def create_zipfile()
|
|
1395
|
-
|
|
1396
|
-
# i_file_list = 0; file_list = file_lists[i_file_list]
|
|
1397
|
-
|
|
1398
|
-
n_zip_threads = 1 # len(file_lists)
|
|
1399
|
-
if n_zip_threads == 1:
|
|
1400
|
-
for i_file_list in range(0,len(file_lists)):
|
|
1401
|
-
create_zipfile(i_file_list)
|
|
1402
|
-
else:
|
|
1403
|
-
pool = ThreadPool(n_zip_threads)
|
|
1404
|
-
indices = list(range(0,len(file_lists)))
|
|
1405
|
-
pool.map(create_zipfile,indices)
|
|
1406
|
-
|
|
1407
|
-
# ....if __name__ == "__main__"
|