megadetector 5.0.28__py3-none-any.whl → 5.0.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
- megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
- megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
- megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
- megadetector/classification/aggregate_classifier_probs.py +3 -3
- megadetector/classification/analyze_failed_images.py +5 -5
- megadetector/classification/cache_batchapi_outputs.py +5 -5
- megadetector/classification/create_classification_dataset.py +11 -12
- megadetector/classification/crop_detections.py +10 -10
- megadetector/classification/csv_to_json.py +8 -8
- megadetector/classification/detect_and_crop.py +13 -15
- megadetector/classification/evaluate_model.py +7 -7
- megadetector/classification/identify_mislabeled_candidates.py +6 -6
- megadetector/classification/json_to_azcopy_list.py +1 -1
- megadetector/classification/json_validator.py +29 -32
- megadetector/classification/map_classification_categories.py +9 -9
- megadetector/classification/merge_classification_detection_output.py +12 -9
- megadetector/classification/prepare_classification_script.py +19 -19
- megadetector/classification/prepare_classification_script_mc.py +23 -23
- megadetector/classification/run_classifier.py +4 -4
- megadetector/classification/save_mislabeled.py +6 -6
- megadetector/classification/train_classifier.py +1 -1
- megadetector/classification/train_classifier_tf.py +9 -9
- megadetector/classification/train_utils.py +10 -10
- megadetector/data_management/annotations/annotation_constants.py +1 -1
- megadetector/data_management/camtrap_dp_to_coco.py +45 -45
- megadetector/data_management/cct_json_utils.py +101 -101
- megadetector/data_management/cct_to_md.py +49 -49
- megadetector/data_management/cct_to_wi.py +33 -33
- megadetector/data_management/coco_to_labelme.py +75 -75
- megadetector/data_management/coco_to_yolo.py +189 -189
- megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
- megadetector/data_management/databases/integrity_check_json_db.py +202 -188
- megadetector/data_management/databases/subset_json_db.py +33 -33
- megadetector/data_management/generate_crops_from_cct.py +38 -38
- megadetector/data_management/get_image_sizes.py +54 -49
- megadetector/data_management/labelme_to_coco.py +130 -124
- megadetector/data_management/labelme_to_yolo.py +78 -72
- megadetector/data_management/lila/create_lila_blank_set.py +81 -83
- megadetector/data_management/lila/create_lila_test_set.py +32 -31
- megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
- megadetector/data_management/lila/download_lila_subset.py +21 -24
- megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
- megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
- megadetector/data_management/lila/get_lila_image_counts.py +22 -22
- megadetector/data_management/lila/lila_common.py +70 -70
- megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
- megadetector/data_management/mewc_to_md.py +339 -340
- megadetector/data_management/ocr_tools.py +258 -252
- megadetector/data_management/read_exif.py +231 -224
- megadetector/data_management/remap_coco_categories.py +26 -26
- megadetector/data_management/remove_exif.py +31 -20
- megadetector/data_management/rename_images.py +187 -187
- megadetector/data_management/resize_coco_dataset.py +41 -41
- megadetector/data_management/speciesnet_to_md.py +41 -41
- megadetector/data_management/wi_download_csv_to_coco.py +55 -55
- megadetector/data_management/yolo_output_to_md_output.py +117 -120
- megadetector/data_management/yolo_to_coco.py +195 -188
- megadetector/detection/change_detection.py +831 -0
- megadetector/detection/process_video.py +340 -337
- megadetector/detection/pytorch_detector.py +304 -262
- megadetector/detection/run_detector.py +177 -164
- megadetector/detection/run_detector_batch.py +364 -363
- megadetector/detection/run_inference_with_yolov5_val.py +328 -325
- megadetector/detection/run_tiled_inference.py +256 -249
- megadetector/detection/tf_detector.py +24 -24
- megadetector/detection/video_utils.py +290 -282
- megadetector/postprocessing/add_max_conf.py +15 -11
- megadetector/postprocessing/categorize_detections_by_size.py +44 -44
- megadetector/postprocessing/classification_postprocessing.py +415 -415
- megadetector/postprocessing/combine_batch_outputs.py +20 -21
- megadetector/postprocessing/compare_batch_results.py +528 -517
- megadetector/postprocessing/convert_output_format.py +97 -97
- megadetector/postprocessing/create_crop_folder.py +219 -146
- megadetector/postprocessing/detector_calibration.py +173 -168
- megadetector/postprocessing/generate_csv_report.py +508 -499
- megadetector/postprocessing/load_api_results.py +23 -20
- megadetector/postprocessing/md_to_coco.py +129 -98
- megadetector/postprocessing/md_to_labelme.py +89 -83
- megadetector/postprocessing/md_to_wi.py +40 -40
- megadetector/postprocessing/merge_detections.py +87 -114
- megadetector/postprocessing/postprocess_batch_results.py +313 -298
- megadetector/postprocessing/remap_detection_categories.py +36 -36
- megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
- megadetector/postprocessing/separate_detections_into_folders.py +226 -211
- megadetector/postprocessing/subset_json_detector_output.py +265 -262
- megadetector/postprocessing/top_folders_to_bottom.py +45 -45
- megadetector/postprocessing/validate_batch_results.py +70 -70
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -66
- megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
- megadetector/taxonomy_mapping/simple_image_download.py +8 -8
- megadetector/taxonomy_mapping/species_lookup.py +33 -33
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
- megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
- megadetector/utils/azure_utils.py +22 -22
- megadetector/utils/ct_utils.py +1018 -200
- megadetector/utils/directory_listing.py +21 -77
- megadetector/utils/gpu_test.py +22 -22
- megadetector/utils/md_tests.py +541 -518
- megadetector/utils/path_utils.py +1457 -398
- megadetector/utils/process_utils.py +41 -41
- megadetector/utils/sas_blob_utils.py +53 -49
- megadetector/utils/split_locations_into_train_val.py +61 -61
- megadetector/utils/string_utils.py +147 -26
- megadetector/utils/url_utils.py +463 -173
- megadetector/utils/wi_utils.py +2629 -2526
- megadetector/utils/write_html_image_list.py +137 -137
- megadetector/visualization/plot_utils.py +21 -21
- megadetector/visualization/render_images_with_thumbnails.py +37 -73
- megadetector/visualization/visualization_utils.py +401 -397
- megadetector/visualization/visualize_db.py +197 -190
- megadetector/visualization/visualize_detector_output.py +79 -73
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/METADATA +135 -132
- megadetector-5.0.29.dist-info/RECORD +163 -0
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
- megadetector/data_management/importers/add_nacti_sizes.py +0 -52
- megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
- megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
- megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
- megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
- megadetector/data_management/importers/awc_to_json.py +0 -191
- megadetector/data_management/importers/bellevue_to_json.py +0 -272
- megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
- megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
- megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
- megadetector/data_management/importers/cct_field_adjustments.py +0 -58
- megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
- megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
- megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
- megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
- megadetector/data_management/importers/ena24_to_json.py +0 -276
- megadetector/data_management/importers/filenames_to_json.py +0 -386
- megadetector/data_management/importers/helena_to_cct.py +0 -283
- megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
- megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
- megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
- megadetector/data_management/importers/jb_csv_to_json.py +0 -150
- megadetector/data_management/importers/mcgill_to_json.py +0 -250
- megadetector/data_management/importers/missouri_to_json.py +0 -490
- megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
- megadetector/data_management/importers/noaa_seals_2019.py +0 -181
- megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
- megadetector/data_management/importers/pc_to_json.py +0 -365
- megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
- megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
- megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
- megadetector/data_management/importers/rspb_to_json.py +0 -356
- megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
- megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
- megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
- megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
- megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
- megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
- megadetector/data_management/importers/sulross_get_exif.py +0 -65
- megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
- megadetector/data_management/importers/ubc_to_json.py +0 -399
- megadetector/data_management/importers/umn_to_json.py +0 -507
- megadetector/data_management/importers/wellington_to_json.py +0 -263
- megadetector/data_management/importers/wi_to_json.py +0 -442
- megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
- megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
- megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
- megadetector-5.0.28.dist-info/RECORD +0 -209
|
@@ -1,356 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
3
|
-
rspb_to_json.py
|
|
4
|
-
|
|
5
|
-
Convert the .csv file provided for the RSPB data set to a
|
|
6
|
-
COCO-camera-traps .json file
|
|
7
|
-
|
|
8
|
-
"""
|
|
9
|
-
|
|
10
|
-
#%% Constants and environment
|
|
11
|
-
|
|
12
|
-
import pandas as pd
|
|
13
|
-
import os
|
|
14
|
-
import glob
|
|
15
|
-
import json
|
|
16
|
-
import re
|
|
17
|
-
import uuid
|
|
18
|
-
import tqdm
|
|
19
|
-
import time
|
|
20
|
-
import ntpath
|
|
21
|
-
import humanfriendly
|
|
22
|
-
import PIL
|
|
23
|
-
|
|
24
|
-
from megadetector.data_management.databases import integrity_check_json_db
|
|
25
|
-
from megadetector.visualization import visualize_db
|
|
26
|
-
|
|
27
|
-
# [location] is an obfuscation
|
|
28
|
-
baseDir = r'e:\wildlife_data\rspb_gola_data'
|
|
29
|
-
metadataFile = os.path.join(baseDir,'gola_camtrapr_master_renaming_table_2019-01-31.csv')
|
|
30
|
-
outputFile = os.path.join(baseDir,'rspb_gola_labeled.json')
|
|
31
|
-
imageBaseDir = os.path.join(baseDir,'gola_camtrapr_data')
|
|
32
|
-
imageFlatDir = os.path.join(baseDir,'gola_camtrapr_data_flat')
|
|
33
|
-
unmatchedImagesFile = os.path.join(baseDir,'unmatchedImages.txt')
|
|
34
|
-
assert(os.path.isdir(imageBaseDir))
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
#%% Create info struct
|
|
38
|
-
|
|
39
|
-
info = {}
|
|
40
|
-
info['year'] = 2019
|
|
41
|
-
info['version'] = 1
|
|
42
|
-
info['description'] = 'COCO style database for RSPB gola data'
|
|
43
|
-
info['secondary_contributor'] = 'Converted to COCO .json by Dan Morris'
|
|
44
|
-
info['contributor'] = 'RSPB'
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
#%% Read source data
|
|
48
|
-
|
|
49
|
-
metadataTable = pd.read_csv(metadataFile)
|
|
50
|
-
|
|
51
|
-
print('Read {} columns and {} rows from metadata file'.format(len(metadataTable.columns),
|
|
52
|
-
len(metadataTable)))
|
|
53
|
-
|
|
54
|
-
# metadataTable.columns.values
|
|
55
|
-
#
|
|
56
|
-
# array(['Project', 'inDir', 'FileName', 'Station', 'Camera',
|
|
57
|
-
# 'StationCameraFileName', 'DateTimeOriginal', 'DateReadable',
|
|
58
|
-
# 'outDir', 'filename_new', 'fileExistsAlready', 'CopyStatus',
|
|
59
|
-
# 'Species'], dtype=object)
|
|
60
|
-
|
|
61
|
-
metadataTable[['Species']] = metadataTable[['Species']].fillna(value='unlabeled')
|
|
62
|
-
|
|
63
|
-
# We'll populate these later
|
|
64
|
-
metadataTable['sequenceID'] = ''
|
|
65
|
-
metadataTable['frameNumber'] = ''
|
|
66
|
-
metadataTable['filePath'] = ''
|
|
67
|
-
|
|
68
|
-
failedCopies = metadataTable[~metadataTable.CopyStatus]
|
|
69
|
-
print('Removing {} rows that were failed copies'.format(len(failedCopies)))
|
|
70
|
-
|
|
71
|
-
metadataTable = metadataTable[metadataTable.CopyStatus]
|
|
72
|
-
|
|
73
|
-
species = list(metadataTable.Species)
|
|
74
|
-
uniqueSpecies = set(species)
|
|
75
|
-
|
|
76
|
-
print('Read {} unique species in {} rows'.format(len(uniqueSpecies),len(metadataTable)))
|
|
77
|
-
|
|
78
|
-
speciesMappings = {}
|
|
79
|
-
|
|
80
|
-
# keys should be lowercase
|
|
81
|
-
speciesMappings['blank'] = 'empty'
|
|
82
|
-
speciesMappings[''] = 'unlabeled'
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
#%% Enumerate images, confirm filename uniqueness
|
|
86
|
-
|
|
87
|
-
imageFullPaths = glob.glob(os.path.join(imageBaseDir,r'**\*.JPG'),recursive=True)
|
|
88
|
-
|
|
89
|
-
print('Counted {} images'.format(len(imageFullPaths)))
|
|
90
|
-
|
|
91
|
-
filenamesOnly = set()
|
|
92
|
-
|
|
93
|
-
for p in imageFullPaths:
|
|
94
|
-
|
|
95
|
-
fn = ntpath.basename(p)
|
|
96
|
-
assert fn not in filenamesOnly
|
|
97
|
-
filenamesOnly.add(fn)
|
|
98
|
-
|
|
99
|
-
print('Finished uniqueness checking')
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
#%% Update metadata filenames to include site and camera folders, check existence
|
|
103
|
-
#
|
|
104
|
-
# Takes ~1min
|
|
105
|
-
|
|
106
|
-
filenamesToRows = {}
|
|
107
|
-
|
|
108
|
-
startTime = time.time()
|
|
109
|
-
|
|
110
|
-
newRows = []
|
|
111
|
-
matchFailures = []
|
|
112
|
-
|
|
113
|
-
# iRow = 0; row = metadataTable.iloc[iRow]
|
|
114
|
-
for iRow,row in tqdm.tqdm(metadataTable.iterrows(), total=metadataTable.shape[0]):
|
|
115
|
-
|
|
116
|
-
baseFn = row['filename_new']
|
|
117
|
-
station = row['Station']
|
|
118
|
-
|
|
119
|
-
filenamesToRows[baseFn] = iRow
|
|
120
|
-
|
|
121
|
-
# There's a bug in the metadata; the 'camera' column isn't correct.
|
|
122
|
-
# camera = row['Camera']
|
|
123
|
-
# These appear as, e.g., '3.22e12'
|
|
124
|
-
# camera = str(int(float(camera)))
|
|
125
|
-
|
|
126
|
-
# Let's pull this out of the file name instead
|
|
127
|
-
#
|
|
128
|
-
# Filenames look like one of the following:
|
|
129
|
-
#
|
|
130
|
-
# A1__03224850850507__2015-11-28__10-45-04(1).JPG
|
|
131
|
-
# Bayama2PH__C05__NA(NA).JPG
|
|
132
|
-
pat = '^(?P<station>.+?)__(?P<camera>.+?)__((?P<date>.+?)__)?(?P<time>[^_\()]+?)\((?P<frame>.+?)\)\.JPG'
|
|
133
|
-
match = re.match(pat,baseFn)
|
|
134
|
-
if match is None:
|
|
135
|
-
raise ValueError('Regex failure at row {}: {}'.format(iRow,baseFn))
|
|
136
|
-
assert(station == match.group('station'))
|
|
137
|
-
camera = match.group('camera')
|
|
138
|
-
row['Camera'] = camera
|
|
139
|
-
|
|
140
|
-
assert match.group('station') is not None
|
|
141
|
-
assert match.group('camera') is not None
|
|
142
|
-
assert match.group('frame') is not None
|
|
143
|
-
|
|
144
|
-
if match.group('date') is None:
|
|
145
|
-
imgDate = ''
|
|
146
|
-
else:
|
|
147
|
-
imgDate = match.group('date')
|
|
148
|
-
|
|
149
|
-
if match.group('time') is None:
|
|
150
|
-
imgTime = ''
|
|
151
|
-
else:
|
|
152
|
-
imgTime = match.group('time')
|
|
153
|
-
|
|
154
|
-
frame = -1
|
|
155
|
-
try:
|
|
156
|
-
frame = int(match.group['frame'])
|
|
157
|
-
except:
|
|
158
|
-
pass
|
|
159
|
-
row['frameNumber'] = frame
|
|
160
|
-
|
|
161
|
-
fn = os.path.join(station,camera,baseFn)
|
|
162
|
-
fullPath = os.path.join(imageBaseDir,fn)
|
|
163
|
-
row['filePath'] = fn
|
|
164
|
-
# assert(os.path.isfile(fullPath))
|
|
165
|
-
if not os.path.isfile(fullPath):
|
|
166
|
-
print('Failed to match image {}'.format(fullPath))
|
|
167
|
-
matchFailures.append(fullPath)
|
|
168
|
-
continue
|
|
169
|
-
|
|
170
|
-
# metadataTable.iloc[iRow] = row
|
|
171
|
-
newRows.append(row)
|
|
172
|
-
|
|
173
|
-
elapsed = time.time() - startTime
|
|
174
|
-
|
|
175
|
-
# Re-assemble into an updated table
|
|
176
|
-
metadataTable = pd.DataFrame(newRows)
|
|
177
|
-
|
|
178
|
-
print('Finished checking file existence, extracting metadata in {}, couldn''t find {} images'.format(
|
|
179
|
-
humanfriendly.format_timespan(elapsed),len(matchFailures)))
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
#%% Check for images that aren't included in the metadata file
|
|
183
|
-
|
|
184
|
-
imagesNotInMetadata = []
|
|
185
|
-
|
|
186
|
-
# Enumerate all images
|
|
187
|
-
for iImage,imagePath in enumerate(imageFullPaths):
|
|
188
|
-
|
|
189
|
-
fn = ntpath.basename(imagePath)
|
|
190
|
-
if(fn not in filenamesToRows):
|
|
191
|
-
imagesNotInMetadata.append(imagePath)
|
|
192
|
-
|
|
193
|
-
print('Finished matching {} images, failed to match {}'.format(
|
|
194
|
-
len(imageFullPaths),len(imagesNotInMetadata)))
|
|
195
|
-
|
|
196
|
-
# Write to a text file
|
|
197
|
-
with open(unmatchedImagesFile, 'w') as f:
|
|
198
|
-
for fn in imagesNotInMetadata:
|
|
199
|
-
f.write('{}\n'.format(fn))
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
#%% Create CCT dictionaries
|
|
203
|
-
|
|
204
|
-
# Also gets image sizes, so this takes ~6 minutes
|
|
205
|
-
#
|
|
206
|
-
# Implicitly checks images for overt corruptness, i.e. by not crashing.
|
|
207
|
-
|
|
208
|
-
images = []
|
|
209
|
-
annotations = []
|
|
210
|
-
|
|
211
|
-
# Map categories to integer IDs (that's what COCO likes)
|
|
212
|
-
nextCategoryID = 1
|
|
213
|
-
categoriesToCategoryId = {'empty':0}
|
|
214
|
-
categoriesToCounts = {'empty':0}
|
|
215
|
-
|
|
216
|
-
# For each image
|
|
217
|
-
#
|
|
218
|
-
# Because in practice images are 1:1 with annotations in this data set,
|
|
219
|
-
# this is also a loop over annotations.
|
|
220
|
-
|
|
221
|
-
startTime = time.time()
|
|
222
|
-
|
|
223
|
-
# iRow = 0; row = metadataTable.iloc[iRow]
|
|
224
|
-
for iRow,row in tqdm.tqdm(metadataTable.iterrows(), total=metadataTable.shape[0]):
|
|
225
|
-
|
|
226
|
-
im = {}
|
|
227
|
-
|
|
228
|
-
# A1__03224850850507__2015-11-28__10-45-04(1).JPG
|
|
229
|
-
fn = row['filename_new']
|
|
230
|
-
assert '.JPG' in fn
|
|
231
|
-
fn = fn.replace('.JPG','')
|
|
232
|
-
im['id'] = fn
|
|
233
|
-
|
|
234
|
-
# 'A1\\03224850850507\\A1__03224850850507__2015-11-28__10-45-04(1).JPG'
|
|
235
|
-
im['file_name'] = row['filePath']
|
|
236
|
-
|
|
237
|
-
# Not currently populated
|
|
238
|
-
im['seq_id'] = row['sequenceID']
|
|
239
|
-
|
|
240
|
-
# Often -1, sometimes a semi-meaningful int
|
|
241
|
-
im['frame_num'] = row['frameNumber']
|
|
242
|
-
|
|
243
|
-
# A1
|
|
244
|
-
im['site']= row['Station']
|
|
245
|
-
|
|
246
|
-
# 03224850850507
|
|
247
|
-
im['camera'] = row['Camera']
|
|
248
|
-
|
|
249
|
-
# In variable form, but sometimes '28/11/2015 10:45'
|
|
250
|
-
im['datetime'] = row['DateTimeOriginal']
|
|
251
|
-
|
|
252
|
-
images.append(im)
|
|
253
|
-
|
|
254
|
-
# Check image height and width
|
|
255
|
-
imagePath = os.path.join(imageBaseDir,im['file_name'])
|
|
256
|
-
assert(os.path.isfile(imagePath))
|
|
257
|
-
pilImage = PIL.Image.open(imagePath)
|
|
258
|
-
width, height = pilImage.size
|
|
259
|
-
im['width'] = width
|
|
260
|
-
im['height'] = height
|
|
261
|
-
|
|
262
|
-
category = row['Species'].lower()
|
|
263
|
-
if category in speciesMappings:
|
|
264
|
-
category = speciesMappings[category]
|
|
265
|
-
|
|
266
|
-
# Have we seen this category before?
|
|
267
|
-
if category in categoriesToCategoryId:
|
|
268
|
-
categoryID = categoriesToCategoryId[category]
|
|
269
|
-
categoriesToCounts[category] += 1
|
|
270
|
-
else:
|
|
271
|
-
categoryID = nextCategoryID
|
|
272
|
-
categoriesToCategoryId[category] = categoryID
|
|
273
|
-
categoriesToCounts[category] = 0
|
|
274
|
-
nextCategoryID += 1
|
|
275
|
-
|
|
276
|
-
# Create an annotation
|
|
277
|
-
ann = {}
|
|
278
|
-
|
|
279
|
-
# The Internet tells me this guarantees uniqueness to a reasonable extent, even
|
|
280
|
-
# beyond the sheer improbability of collisions.
|
|
281
|
-
ann['id'] = str(uuid.uuid1())
|
|
282
|
-
ann['image_id'] = im['id']
|
|
283
|
-
ann['category_id'] = categoryID
|
|
284
|
-
|
|
285
|
-
annotations.append(ann)
|
|
286
|
-
|
|
287
|
-
# ...for each image
|
|
288
|
-
|
|
289
|
-
# Convert categories to a CCT-style dictionary
|
|
290
|
-
|
|
291
|
-
categories = []
|
|
292
|
-
|
|
293
|
-
for category in categoriesToCounts:
|
|
294
|
-
|
|
295
|
-
print('Category {}, count {}'.format(category,categoriesToCounts[category]))
|
|
296
|
-
categoryID = categoriesToCategoryId[category]
|
|
297
|
-
cat = {}
|
|
298
|
-
cat['name'] = category
|
|
299
|
-
cat['id'] = categoryID
|
|
300
|
-
categories.append(cat)
|
|
301
|
-
|
|
302
|
-
elapsed = time.time() - startTime
|
|
303
|
-
|
|
304
|
-
print('Finished creating CCT dictionaries in {}'.format(
|
|
305
|
-
humanfriendly.format_timespan(elapsed)))
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
#%% Write output
|
|
309
|
-
|
|
310
|
-
json_data = {}
|
|
311
|
-
json_data['images'] = images
|
|
312
|
-
json_data['annotations'] = annotations
|
|
313
|
-
json_data['categories'] = categories
|
|
314
|
-
json_data['info'] = info
|
|
315
|
-
json.dump(json_data,open(outputFile,'w'),indent=4)
|
|
316
|
-
|
|
317
|
-
print('Finished writing .json file with {} images, {} annotations, and {} categories'.format(
|
|
318
|
-
len(images),len(annotations),len(categories)))
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
#%% Check database integrity
|
|
322
|
-
|
|
323
|
-
options = integrity_check_json_db.IntegrityCheckOptions()
|
|
324
|
-
options.baseDir = imageBaseDir
|
|
325
|
-
options.bCheckImageSizes = False
|
|
326
|
-
options.bFindUnusedImages = False
|
|
327
|
-
integrity_check_json_db.integrity_check_json_db(outputFile, options)
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
#%% Preview a few images to make sure labels were passed along sensibly
|
|
331
|
-
|
|
332
|
-
db_path = outputFile
|
|
333
|
-
output_dir = os.path.join(baseDir,'label_preview')
|
|
334
|
-
image_base_dir = imageBaseDir
|
|
335
|
-
options = visualize_db.DbVizOptions()
|
|
336
|
-
options.num_to_visualize = 100
|
|
337
|
-
htmlOutputFile = visualize_db.visualize_db(db_path,output_dir,image_base_dir,options)
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
#%% One-time processing step: copy images to a flat directory for annotation
|
|
341
|
-
|
|
342
|
-
if False:
|
|
343
|
-
|
|
344
|
-
#%%
|
|
345
|
-
|
|
346
|
-
from shutil import copyfile
|
|
347
|
-
os.makedirs(imageFlatDir,exist_ok=True)
|
|
348
|
-
|
|
349
|
-
for sourcePath in tqdm.tqdm(imageFullPaths):
|
|
350
|
-
fn = ntpath.basename(sourcePath)
|
|
351
|
-
targetPath = os.path.join(imageFlatDir,fn)
|
|
352
|
-
assert not os.path.isfile(targetPath)
|
|
353
|
-
copyfile(sourcePath,targetPath)
|
|
354
|
-
|
|
355
|
-
print('Copied {} files'.format(len(imageFullPaths)))
|
|
356
|
-
|
|
@@ -1,320 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
3
|
-
save_the_elephants_survey_A.py
|
|
4
|
-
|
|
5
|
-
Convert the .csv file provided for the Save the Elephants Survey A data set to a
|
|
6
|
-
COCO-camera-traps .json file
|
|
7
|
-
|
|
8
|
-
"""
|
|
9
|
-
|
|
10
|
-
#%% Constants and environment
|
|
11
|
-
|
|
12
|
-
import pandas as pd
|
|
13
|
-
import os
|
|
14
|
-
import json
|
|
15
|
-
import uuid
|
|
16
|
-
import time
|
|
17
|
-
import humanfriendly
|
|
18
|
-
import numpy as np
|
|
19
|
-
from tqdm import tqdm
|
|
20
|
-
|
|
21
|
-
from megadetector.utils.path_utils import find_images
|
|
22
|
-
|
|
23
|
-
input_base = r'z:/ste_2019_08_drop'
|
|
24
|
-
input_metadata_file = os.path.join(input_base,'SURVEY_A.xlsx')
|
|
25
|
-
|
|
26
|
-
output_base = r'f:/save_the_elephants/survey_a'
|
|
27
|
-
output_json_file = os.path.join(output_base,'ste_survey_a.json')
|
|
28
|
-
image_directory = os.path.join(input_base,'SURVEY A with False Triggers')
|
|
29
|
-
|
|
30
|
-
os.makedirs(output_base,exist_ok=True)
|
|
31
|
-
assert(os.path.isdir(image_directory))
|
|
32
|
-
assert(os.path.isfile(input_metadata_file))
|
|
33
|
-
|
|
34
|
-
# Handle all unstructured fields in the source data as extra fields in the annotations
|
|
35
|
-
mapped_fields = {'No. of Animals in Photo':'num_animals',
|
|
36
|
-
'No. of new indiviauls (first sighting of new individual)':'num_new_individuals',
|
|
37
|
-
'Number Adult Males (first sighting of new individual)':'num_adult_males',
|
|
38
|
-
'Number Adult Females (first sighting of new individual)':'num_adult_females',
|
|
39
|
-
'Number Adult Unknown (first sighting of new individual)':'num_adult_unknown',
|
|
40
|
-
'Number Sub-adult Males (first sighting of new individual)':'num_subadult_males',
|
|
41
|
-
'Number Sub-adult Females (first sighting of new individual)':'num_subadult_females',
|
|
42
|
-
'Number Sub-adult Unknown (first sighting of new individual)':'num_subadult_unknown',
|
|
43
|
-
'Number Juvenile (first sighting of new individual)':'num_juvenile',
|
|
44
|
-
'Number Newborn (first sighting of new individual)':'num_newborn',
|
|
45
|
-
'Activity':'activity',
|
|
46
|
-
'Animal ID':'animal_id',
|
|
47
|
-
'Specific Notes':'notes'}
|
|
48
|
-
|
|
49
|
-
# photo_type really should be an image property, but there are a few conflicts
|
|
50
|
-
# that forced me to handle it as an annotation proprerty
|
|
51
|
-
mapped_fields['Photo Type '] = 'photo_type'
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
#%% Read source data
|
|
55
|
-
|
|
56
|
-
input_metadata = pd.read_excel(input_metadata_file, sheet_name='9. CT Image')
|
|
57
|
-
input_metadata = input_metadata.iloc[2:]
|
|
58
|
-
|
|
59
|
-
print('Read {} columns and {} rows from metadata file'.format(len(input_metadata.columns),
|
|
60
|
-
len(input_metadata)))
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
#%% Map filenames to rows, verify image existence
|
|
64
|
-
|
|
65
|
-
start_time = time.time()
|
|
66
|
-
|
|
67
|
-
# Maps relative paths to row indices in input_metadata
|
|
68
|
-
filenames_to_rows = {}
|
|
69
|
-
filenames_with_multiple_annotations = []
|
|
70
|
-
missing_images = []
|
|
71
|
-
|
|
72
|
-
# Build up a map from filenames to a list of rows, checking image existence as we go
|
|
73
|
-
for i_row, fn in tqdm(enumerate(input_metadata['Image Name']),total=len(input_metadata)):
|
|
74
|
-
|
|
75
|
-
# Ignore directories
|
|
76
|
-
if not fn.endswith('.JPG'):
|
|
77
|
-
continue
|
|
78
|
-
|
|
79
|
-
if fn in filenames_to_rows:
|
|
80
|
-
filenames_with_multiple_annotations.append(fn)
|
|
81
|
-
filenames_to_rows[fn].append(i_row)
|
|
82
|
-
else:
|
|
83
|
-
filenames_to_rows[fn] = [i_row]
|
|
84
|
-
image_path = os.path.join(image_directory, fn)
|
|
85
|
-
if not os.path.isfile(image_path):
|
|
86
|
-
missing_images.append(image_path)
|
|
87
|
-
|
|
88
|
-
elapsed = time.time() - start_time
|
|
89
|
-
|
|
90
|
-
print('Finished verifying image existence for {} files in {}, found {} filenames with multiple labels, {} missing images'.format(
|
|
91
|
-
len(filenames_to_rows), humanfriendly.format_timespan(elapsed),
|
|
92
|
-
len(filenames_with_multiple_annotations),len(missing_images)))
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
#%% Make sure the multiple-annotation cases make sense
|
|
96
|
-
|
|
97
|
-
if False:
|
|
98
|
-
|
|
99
|
-
#%%
|
|
100
|
-
|
|
101
|
-
fn = filenames_with_multiple_annotations[1000]
|
|
102
|
-
rows = filenames_to_rows[fn]
|
|
103
|
-
assert(len(rows) > 1)
|
|
104
|
-
for i_row in rows:
|
|
105
|
-
print(input_metadata.iloc[i_row]['Species'])
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
#%% Check for images that aren't included in the metadata file
|
|
109
|
-
|
|
110
|
-
# Enumerate all images
|
|
111
|
-
image_full_paths = find_images(image_directory, bRecursive=True)
|
|
112
|
-
|
|
113
|
-
unannotated_images = []
|
|
114
|
-
|
|
115
|
-
for iImage, image_path in tqdm(enumerate(image_full_paths),total=len(image_full_paths)):
|
|
116
|
-
relative_path = os.path.relpath(image_path,image_directory)
|
|
117
|
-
if relative_path not in filenames_to_rows:
|
|
118
|
-
unannotated_images.append(relative_path)
|
|
119
|
-
|
|
120
|
-
print('Finished checking {} images to make sure they\'re in the metadata, found {} unannotated images'.format(
|
|
121
|
-
len(image_full_paths),len(unannotated_images)))
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
#%% Create CCT dictionaries
|
|
125
|
-
|
|
126
|
-
images = []
|
|
127
|
-
annotations = []
|
|
128
|
-
categories = []
|
|
129
|
-
|
|
130
|
-
image_ids_to_images = {}
|
|
131
|
-
|
|
132
|
-
category_name_to_category = {}
|
|
133
|
-
|
|
134
|
-
# Force the empty category to be ID 0
|
|
135
|
-
empty_category = {}
|
|
136
|
-
empty_category['name'] = 'empty'
|
|
137
|
-
empty_category['id'] = 0
|
|
138
|
-
category_name_to_category['empty'] = empty_category
|
|
139
|
-
categories.append(empty_category)
|
|
140
|
-
next_category_id = 1
|
|
141
|
-
|
|
142
|
-
start_time = time.time()
|
|
143
|
-
|
|
144
|
-
# i_image = 0; image_name = list(filenames_to_rows.keys())[i_image]
|
|
145
|
-
for image_name in tqdm(list(filenames_to_rows.keys())):
|
|
146
|
-
|
|
147
|
-
# Example filename:
|
|
148
|
-
#
|
|
149
|
-
# 'Site 1_Oloisukut_1\Oloisukut_A11_UP\Service_2\100EK113\EK001382.JPG'
|
|
150
|
-
# 'Site 1_Oloisukut_1\Oloisukut_A11_UP\Service_2.1\100EK113\EK001382.JPG'
|
|
151
|
-
img_id = image_name.replace('\\','/').replace('/','_').replace(' ','_')
|
|
152
|
-
|
|
153
|
-
row_indices = filenames_to_rows[image_name]
|
|
154
|
-
|
|
155
|
-
# i_row = row_indices[0]
|
|
156
|
-
for i_row in row_indices:
|
|
157
|
-
|
|
158
|
-
row = input_metadata.iloc[i_row]
|
|
159
|
-
assert(row['Image Name'] == image_name)
|
|
160
|
-
|
|
161
|
-
timestamp = row['Date'].strftime("%d/%m/%Y")
|
|
162
|
-
station_label = row['Camera Trap Station Label']
|
|
163
|
-
photo_type = row['Photo Type ']
|
|
164
|
-
if isinstance(photo_type,float):
|
|
165
|
-
photo_type = ''
|
|
166
|
-
photo_type = photo_type.strip().lower()
|
|
167
|
-
|
|
168
|
-
if img_id in image_ids_to_images:
|
|
169
|
-
|
|
170
|
-
im = image_ids_to_images[img_id]
|
|
171
|
-
assert im['file_name'] == image_name
|
|
172
|
-
assert im['station_label'] == station_label
|
|
173
|
-
|
|
174
|
-
# There are a small handful of datetime mismatches across annotations
|
|
175
|
-
# for the same image
|
|
176
|
-
# assert im['datetime'] == timestamp
|
|
177
|
-
if im['datetime'] != timestamp:
|
|
178
|
-
print('Warning: timestamp conflict for image {}: {},{}'.format(
|
|
179
|
-
image_name,im['datetime'],timestamp))
|
|
180
|
-
|
|
181
|
-
else:
|
|
182
|
-
|
|
183
|
-
im = {}
|
|
184
|
-
im['id'] = img_id
|
|
185
|
-
im['file_name'] = image_name
|
|
186
|
-
im['datetime'] = timestamp
|
|
187
|
-
im['station_label'] = station_label
|
|
188
|
-
im['photo_type'] = photo_type
|
|
189
|
-
|
|
190
|
-
image_ids_to_images[img_id] = im
|
|
191
|
-
images.append(im)
|
|
192
|
-
|
|
193
|
-
species = row['Species']
|
|
194
|
-
|
|
195
|
-
if (isinstance(species,float) or \
|
|
196
|
-
(isinstance(species,str) and (len(species) == 0))):
|
|
197
|
-
category_name = 'empty'
|
|
198
|
-
else:
|
|
199
|
-
category_name = species
|
|
200
|
-
|
|
201
|
-
# Special cases based on the 'photo type' field
|
|
202
|
-
if 'vehicle' in photo_type:
|
|
203
|
-
category_name = 'vehicle'
|
|
204
|
-
# Various spellings of 'community'
|
|
205
|
-
elif 'comm' in photo_type:
|
|
206
|
-
category_name = 'human'
|
|
207
|
-
elif 'camera' in photo_type or 'researcher' in photo_type:
|
|
208
|
-
category_name = 'human'
|
|
209
|
-
elif 'livestock' in photo_type:
|
|
210
|
-
category_name = 'livestock'
|
|
211
|
-
elif 'blank' in photo_type:
|
|
212
|
-
category_name = 'empty'
|
|
213
|
-
elif 'plant movement' in photo_type:
|
|
214
|
-
category_name = 'empty'
|
|
215
|
-
|
|
216
|
-
category_name = category_name.strip().lower()
|
|
217
|
-
|
|
218
|
-
# Have we seen this category before?
|
|
219
|
-
if category_name in category_name_to_category:
|
|
220
|
-
category_id = category_name_to_category[category_name]['id']
|
|
221
|
-
else:
|
|
222
|
-
category_id = next_category_id
|
|
223
|
-
category = {}
|
|
224
|
-
category['id'] = category_id
|
|
225
|
-
category['name'] = category_name
|
|
226
|
-
category_name_to_category[category_name] = category
|
|
227
|
-
categories.append(category)
|
|
228
|
-
next_category_id += 1
|
|
229
|
-
|
|
230
|
-
# Create an annotation
|
|
231
|
-
ann = {}
|
|
232
|
-
ann['id'] = str(uuid.uuid1())
|
|
233
|
-
ann['image_id'] = im['id']
|
|
234
|
-
ann['category_id'] = category_id
|
|
235
|
-
|
|
236
|
-
# fieldname = list(mapped_fields.keys())[0]
|
|
237
|
-
for fieldname in mapped_fields:
|
|
238
|
-
target_field = mapped_fields[fieldname]
|
|
239
|
-
val = row[fieldname]
|
|
240
|
-
if isinstance(val,float) and np.isnan(val):
|
|
241
|
-
val = ''
|
|
242
|
-
else:
|
|
243
|
-
val = str(val).strip()
|
|
244
|
-
ann[target_field] = val
|
|
245
|
-
|
|
246
|
-
annotations.append(ann)
|
|
247
|
-
|
|
248
|
-
# ...for each row
|
|
249
|
-
|
|
250
|
-
# ...for each image
|
|
251
|
-
|
|
252
|
-
print('Finished creating CCT dictionaries in {}'.format(
|
|
253
|
-
humanfriendly.format_timespan(elapsed)))
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
#%% Create info struct
|
|
257
|
-
|
|
258
|
-
info = {}
|
|
259
|
-
info['year'] = 2019
|
|
260
|
-
info['version'] = 1
|
|
261
|
-
info['description'] = 'Save the Elephants Survey A'
|
|
262
|
-
info['contributor'] = 'Save the Elephants'
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
#%% Write output
|
|
266
|
-
|
|
267
|
-
json_data = {}
|
|
268
|
-
json_data['images'] = images
|
|
269
|
-
json_data['annotations'] = annotations
|
|
270
|
-
json_data['categories'] = categories
|
|
271
|
-
json_data['info'] = info
|
|
272
|
-
json.dump(json_data, open(output_json_file, 'w'), indent=2)
|
|
273
|
-
|
|
274
|
-
print('Finished writing .json file with {} images, {} annotations, and {} categories'.format(
|
|
275
|
-
len(images),len(annotations),len(categories)))
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
#%% Validate output
|
|
279
|
-
|
|
280
|
-
from megadetector.data_management.databases import integrity_check_json_db
|
|
281
|
-
|
|
282
|
-
options = integrity_check_json_db.IntegrityCheckOptions()
|
|
283
|
-
options.baseDir = image_directory
|
|
284
|
-
options.bCheckImageSizes = False
|
|
285
|
-
options.bCheckImageExistence = False
|
|
286
|
-
options.bFindUnusedImages = False
|
|
287
|
-
|
|
288
|
-
sortedCategories, data = integrity_check_json_db.integrity_check_json_db(output_json_file,options)
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
#%% Preview labels
|
|
292
|
-
|
|
293
|
-
from megadetector.visualization import visualize_db
|
|
294
|
-
from megadetector.data_management.databases import integrity_check_json_db
|
|
295
|
-
|
|
296
|
-
viz_options = visualize_db.DbVizOptions()
|
|
297
|
-
viz_options.num_to_visualize = 1000
|
|
298
|
-
viz_options.trim_to_images_with_bboxes = False
|
|
299
|
-
viz_options.add_search_links = True
|
|
300
|
-
viz_options.sort_by_filename = False
|
|
301
|
-
viz_options.parallelize_rendering = True
|
|
302
|
-
html_output_file,image_db = visualize_db.visualize_db(db_path=output_json_file,
|
|
303
|
-
output_dir=os.path.join(output_base,'preview'),
|
|
304
|
-
image_base_dir=image_directory,
|
|
305
|
-
options=viz_options)
|
|
306
|
-
os.startfile(html_output_file)
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
#%% Scrap
|
|
310
|
-
|
|
311
|
-
if False:
|
|
312
|
-
|
|
313
|
-
pass
|
|
314
|
-
|
|
315
|
-
#%% Find unique photo types
|
|
316
|
-
|
|
317
|
-
annotations = image_db['annotations']
|
|
318
|
-
photo_types = set()
|
|
319
|
-
for ann in tqdm(annotations):
|
|
320
|
-
photo_types.add(ann['photo_type'])
|