megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
- megadetector/classification/aggregate_classifier_probs.py +3 -3
- megadetector/classification/analyze_failed_images.py +5 -5
- megadetector/classification/cache_batchapi_outputs.py +5 -5
- megadetector/classification/create_classification_dataset.py +11 -12
- megadetector/classification/crop_detections.py +10 -10
- megadetector/classification/csv_to_json.py +8 -8
- megadetector/classification/detect_and_crop.py +13 -15
- megadetector/classification/efficientnet/model.py +8 -8
- megadetector/classification/efficientnet/utils.py +6 -5
- megadetector/classification/evaluate_model.py +7 -7
- megadetector/classification/identify_mislabeled_candidates.py +6 -6
- megadetector/classification/json_to_azcopy_list.py +1 -1
- megadetector/classification/json_validator.py +29 -32
- megadetector/classification/map_classification_categories.py +9 -9
- megadetector/classification/merge_classification_detection_output.py +12 -9
- megadetector/classification/prepare_classification_script.py +19 -19
- megadetector/classification/prepare_classification_script_mc.py +26 -26
- megadetector/classification/run_classifier.py +4 -4
- megadetector/classification/save_mislabeled.py +6 -6
- megadetector/classification/train_classifier.py +1 -1
- megadetector/classification/train_classifier_tf.py +9 -9
- megadetector/classification/train_utils.py +10 -10
- megadetector/data_management/annotations/annotation_constants.py +1 -2
- megadetector/data_management/camtrap_dp_to_coco.py +79 -46
- megadetector/data_management/cct_json_utils.py +103 -103
- megadetector/data_management/cct_to_md.py +49 -49
- megadetector/data_management/cct_to_wi.py +33 -33
- megadetector/data_management/coco_to_labelme.py +75 -75
- megadetector/data_management/coco_to_yolo.py +210 -193
- megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
- megadetector/data_management/databases/integrity_check_json_db.py +228 -200
- megadetector/data_management/databases/subset_json_db.py +33 -33
- megadetector/data_management/generate_crops_from_cct.py +88 -39
- megadetector/data_management/get_image_sizes.py +54 -49
- megadetector/data_management/labelme_to_coco.py +133 -125
- megadetector/data_management/labelme_to_yolo.py +159 -73
- megadetector/data_management/lila/create_lila_blank_set.py +81 -83
- megadetector/data_management/lila/create_lila_test_set.py +32 -31
- megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
- megadetector/data_management/lila/download_lila_subset.py +21 -24
- megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
- megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
- megadetector/data_management/lila/get_lila_image_counts.py +22 -22
- megadetector/data_management/lila/lila_common.py +73 -70
- megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
- megadetector/data_management/mewc_to_md.py +344 -340
- megadetector/data_management/ocr_tools.py +262 -255
- megadetector/data_management/read_exif.py +249 -227
- megadetector/data_management/remap_coco_categories.py +90 -28
- megadetector/data_management/remove_exif.py +81 -21
- megadetector/data_management/rename_images.py +187 -187
- megadetector/data_management/resize_coco_dataset.py +588 -120
- megadetector/data_management/speciesnet_to_md.py +41 -41
- megadetector/data_management/wi_download_csv_to_coco.py +55 -55
- megadetector/data_management/yolo_output_to_md_output.py +248 -122
- megadetector/data_management/yolo_to_coco.py +333 -191
- megadetector/detection/change_detection.py +832 -0
- megadetector/detection/process_video.py +340 -337
- megadetector/detection/pytorch_detector.py +358 -278
- megadetector/detection/run_detector.py +399 -186
- megadetector/detection/run_detector_batch.py +404 -377
- megadetector/detection/run_inference_with_yolov5_val.py +340 -327
- megadetector/detection/run_tiled_inference.py +257 -249
- megadetector/detection/tf_detector.py +24 -24
- megadetector/detection/video_utils.py +332 -295
- megadetector/postprocessing/add_max_conf.py +19 -11
- megadetector/postprocessing/categorize_detections_by_size.py +45 -45
- megadetector/postprocessing/classification_postprocessing.py +468 -433
- megadetector/postprocessing/combine_batch_outputs.py +23 -23
- megadetector/postprocessing/compare_batch_results.py +590 -525
- megadetector/postprocessing/convert_output_format.py +106 -102
- megadetector/postprocessing/create_crop_folder.py +347 -147
- megadetector/postprocessing/detector_calibration.py +173 -168
- megadetector/postprocessing/generate_csv_report.py +508 -499
- megadetector/postprocessing/load_api_results.py +48 -27
- megadetector/postprocessing/md_to_coco.py +133 -102
- megadetector/postprocessing/md_to_labelme.py +107 -90
- megadetector/postprocessing/md_to_wi.py +40 -40
- megadetector/postprocessing/merge_detections.py +92 -114
- megadetector/postprocessing/postprocess_batch_results.py +319 -301
- megadetector/postprocessing/remap_detection_categories.py +91 -38
- megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
- megadetector/postprocessing/separate_detections_into_folders.py +226 -211
- megadetector/postprocessing/subset_json_detector_output.py +265 -262
- megadetector/postprocessing/top_folders_to_bottom.py +45 -45
- megadetector/postprocessing/validate_batch_results.py +70 -70
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
- megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
- megadetector/taxonomy_mapping/simple_image_download.py +8 -8
- megadetector/taxonomy_mapping/species_lookup.py +156 -74
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
- megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
- megadetector/utils/ct_utils.py +1049 -211
- megadetector/utils/directory_listing.py +21 -77
- megadetector/utils/gpu_test.py +22 -22
- megadetector/utils/md_tests.py +632 -529
- megadetector/utils/path_utils.py +1520 -431
- megadetector/utils/process_utils.py +41 -41
- megadetector/utils/split_locations_into_train_val.py +62 -62
- megadetector/utils/string_utils.py +148 -27
- megadetector/utils/url_utils.py +489 -176
- megadetector/utils/wi_utils.py +2658 -2526
- megadetector/utils/write_html_image_list.py +137 -137
- megadetector/visualization/plot_utils.py +34 -30
- megadetector/visualization/render_images_with_thumbnails.py +39 -74
- megadetector/visualization/visualization_utils.py +487 -435
- megadetector/visualization/visualize_db.py +232 -198
- megadetector/visualization/visualize_detector_output.py +82 -76
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
- megadetector-10.0.0.dist-info/RECORD +139 -0
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
- megadetector/api/batch_processing/api_core/__init__.py +0 -0
- megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
- megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
- megadetector/api/batch_processing/api_core/server.py +0 -294
- megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
- megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
- megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
- megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
- megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
- megadetector/api/batch_processing/api_core/server_utils.py +0 -88
- megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
- megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
- megadetector/api/batch_processing/api_support/__init__.py +0 -0
- megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
- megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
- megadetector/api/synchronous/__init__.py +0 -0
- megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
- megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
- megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
- megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
- megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
- megadetector/data_management/importers/add_nacti_sizes.py +0 -52
- megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
- megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
- megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
- megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
- megadetector/data_management/importers/awc_to_json.py +0 -191
- megadetector/data_management/importers/bellevue_to_json.py +0 -272
- megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
- megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
- megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
- megadetector/data_management/importers/cct_field_adjustments.py +0 -58
- megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
- megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
- megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
- megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
- megadetector/data_management/importers/ena24_to_json.py +0 -276
- megadetector/data_management/importers/filenames_to_json.py +0 -386
- megadetector/data_management/importers/helena_to_cct.py +0 -283
- megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
- megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
- megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
- megadetector/data_management/importers/jb_csv_to_json.py +0 -150
- megadetector/data_management/importers/mcgill_to_json.py +0 -250
- megadetector/data_management/importers/missouri_to_json.py +0 -490
- megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
- megadetector/data_management/importers/noaa_seals_2019.py +0 -181
- megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
- megadetector/data_management/importers/pc_to_json.py +0 -365
- megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
- megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
- megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
- megadetector/data_management/importers/rspb_to_json.py +0 -356
- megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
- megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
- megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
- megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
- megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
- megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
- megadetector/data_management/importers/sulross_get_exif.py +0 -65
- megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
- megadetector/data_management/importers/ubc_to_json.py +0 -399
- megadetector/data_management/importers/umn_to_json.py +0 -507
- megadetector/data_management/importers/wellington_to_json.py +0 -263
- megadetector/data_management/importers/wi_to_json.py +0 -442
- megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
- megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
- megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
- megadetector/utils/azure_utils.py +0 -178
- megadetector/utils/sas_blob_utils.py +0 -509
- megadetector-5.0.28.dist-info/RECORD +0 -209
- /megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
- {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0
|
@@ -1,150 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
3
|
-
jb_csv_to_json.py
|
|
4
|
-
|
|
5
|
-
Convert a particular .csv file to CCT format. Images were not available at
|
|
6
|
-
the time I wrote this script, so this is much shorter than other scripts
|
|
7
|
-
in this folder.
|
|
8
|
-
|
|
9
|
-
"""
|
|
10
|
-
|
|
11
|
-
#%% Constants and environment
|
|
12
|
-
|
|
13
|
-
import pandas as pd
|
|
14
|
-
import uuid
|
|
15
|
-
import json
|
|
16
|
-
|
|
17
|
-
input_metadata_file = r'd:\temp\pre_bounding_box.csv'
|
|
18
|
-
output_file = r'd:\temp\pre_bounding_box.json'
|
|
19
|
-
filename_col = 'filename'
|
|
20
|
-
label_col = 'category'
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
#%% Read source data
|
|
24
|
-
|
|
25
|
-
input_metadata = pd.read_csv(input_metadata_file)
|
|
26
|
-
|
|
27
|
-
print('Read {} columns and {} rows from metadata file'.format(len(input_metadata.columns),
|
|
28
|
-
len(input_metadata)))
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
#%% Confirm filename uniqueness (this data set has one label per image)
|
|
32
|
-
|
|
33
|
-
imageFilenames = input_metadata[filename_col]
|
|
34
|
-
|
|
35
|
-
duplicateRows = []
|
|
36
|
-
filenamesToRows = {}
|
|
37
|
-
|
|
38
|
-
# Build up a map from filenames to a list of rows, checking image existence as we go
|
|
39
|
-
for iFile,fn in enumerate(imageFilenames):
|
|
40
|
-
|
|
41
|
-
if (fn in filenamesToRows):
|
|
42
|
-
duplicateRows.append(iFile)
|
|
43
|
-
filenamesToRows[fn].append(iFile)
|
|
44
|
-
else:
|
|
45
|
-
filenamesToRows[fn] = [iFile]
|
|
46
|
-
|
|
47
|
-
assert(len(duplicateRows) == 0)
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
#%% Create CCT dictionaries
|
|
51
|
-
|
|
52
|
-
images = []
|
|
53
|
-
annotations = []
|
|
54
|
-
|
|
55
|
-
# Map categories to integer IDs (that's what COCO likes)
|
|
56
|
-
nextCategoryID = 1
|
|
57
|
-
categories = []
|
|
58
|
-
categoryNamesToCategories = {}
|
|
59
|
-
|
|
60
|
-
cat = {}
|
|
61
|
-
cat['name'] = 'empty'
|
|
62
|
-
cat['id'] = 0
|
|
63
|
-
categories.append(cat)
|
|
64
|
-
categoryNamesToCategories['empty'] = cat
|
|
65
|
-
|
|
66
|
-
# For each image
|
|
67
|
-
#
|
|
68
|
-
# Because in practice images are 1:1 with annotations in this data set,
|
|
69
|
-
# this is also a loop over annotations.
|
|
70
|
-
|
|
71
|
-
# imageName = imageFilenames[0]
|
|
72
|
-
for imageName in imageFilenames:
|
|
73
|
-
|
|
74
|
-
rows = filenamesToRows[imageName]
|
|
75
|
-
|
|
76
|
-
# As per above, this is convenient and appears to be true; asserting to be safe
|
|
77
|
-
assert(len(rows) == 1)
|
|
78
|
-
iRow = rows[0]
|
|
79
|
-
|
|
80
|
-
row = input_metadata.iloc[iRow]
|
|
81
|
-
|
|
82
|
-
im = {}
|
|
83
|
-
# Filenames look like "290716114012001a1116.jpg"
|
|
84
|
-
im['id'] = imageName.split('.')[0]
|
|
85
|
-
im['file_name'] = imageName
|
|
86
|
-
im['seq_id'] = '-1'
|
|
87
|
-
|
|
88
|
-
images.append(im)
|
|
89
|
-
|
|
90
|
-
categoryName = row[label_col].lower()
|
|
91
|
-
|
|
92
|
-
# Have we seen this category before?
|
|
93
|
-
if categoryName in categoryNamesToCategories:
|
|
94
|
-
categoryID = categoryNamesToCategories[categoryName]['id']
|
|
95
|
-
else:
|
|
96
|
-
cat = {}
|
|
97
|
-
categoryID = nextCategoryID
|
|
98
|
-
cat['name'] = categoryName
|
|
99
|
-
cat['id'] = nextCategoryID
|
|
100
|
-
categories.append(cat)
|
|
101
|
-
categoryNamesToCategories[categoryName] = cat
|
|
102
|
-
nextCategoryID += 1
|
|
103
|
-
|
|
104
|
-
# Create an annotation
|
|
105
|
-
ann = {}
|
|
106
|
-
|
|
107
|
-
# The Internet tells me this guarantees uniqueness to a reasonable extent, even
|
|
108
|
-
# beyond the sheer improbability of collisions.
|
|
109
|
-
ann['id'] = str(uuid.uuid1())
|
|
110
|
-
ann['image_id'] = im['id']
|
|
111
|
-
ann['category_id'] = categoryID
|
|
112
|
-
|
|
113
|
-
annotations.append(ann)
|
|
114
|
-
|
|
115
|
-
# ...for each image
|
|
116
|
-
|
|
117
|
-
print('Finished creating dictionaries')
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
#%% Create info struct
|
|
121
|
-
|
|
122
|
-
info = {}
|
|
123
|
-
info['year'] = 2019
|
|
124
|
-
info['version'] = 1
|
|
125
|
-
info['description'] = 'COCO style database'
|
|
126
|
-
info['secondary_contributor'] = 'Converted to COCO .json by Dan Morris'
|
|
127
|
-
info['contributor'] = ''
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
#%% Write output
|
|
131
|
-
|
|
132
|
-
json_data = {}
|
|
133
|
-
json_data['images'] = images
|
|
134
|
-
json_data['annotations'] = annotations
|
|
135
|
-
json_data['categories'] = categories
|
|
136
|
-
json_data['info'] = info
|
|
137
|
-
json.dump(json_data, open(output_file,'w'), indent=4)
|
|
138
|
-
|
|
139
|
-
print('Finished writing .json file with {} images, {} annotations, and {} categories'.format(
|
|
140
|
-
len(images),len(annotations),len(categories)))
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
#%% Validate
|
|
144
|
-
|
|
145
|
-
from megadetector.data_management.databases import integrity_check_json_db
|
|
146
|
-
|
|
147
|
-
options = integrity_check_json_db.IntegrityCheckOptions()
|
|
148
|
-
sortedCategories,data = integrity_check_json_db.integrity_check_json_db(output_file, options)
|
|
149
|
-
|
|
150
|
-
|
|
@@ -1,250 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
3
|
-
mcgill_to_json.py
|
|
4
|
-
|
|
5
|
-
Convert the .csv file provided for the McGill test data set to a
|
|
6
|
-
COCO-camera-traps .json file
|
|
7
|
-
|
|
8
|
-
"""
|
|
9
|
-
|
|
10
|
-
#%% Constants and environment
|
|
11
|
-
|
|
12
|
-
import pandas as pd
|
|
13
|
-
import os
|
|
14
|
-
import glob
|
|
15
|
-
import json
|
|
16
|
-
import uuid
|
|
17
|
-
import time
|
|
18
|
-
import ntpath
|
|
19
|
-
import humanfriendly
|
|
20
|
-
import PIL
|
|
21
|
-
import math
|
|
22
|
-
|
|
23
|
-
baseDir = r'D:\wildlife_data\mcgill_test'
|
|
24
|
-
input_metadata_file = os.path.join(baseDir, 'dan_500_photos_metadata.csv')
|
|
25
|
-
output_file = os.path.join(baseDir, 'mcgill_test.json')
|
|
26
|
-
image_directory = baseDir
|
|
27
|
-
|
|
28
|
-
assert(os.path.isdir(image_directory))
|
|
29
|
-
assert(os.path.isfile(input_metadata_file))
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
#%% Read source data
|
|
33
|
-
|
|
34
|
-
input_metadata = pd.read_csv(input_metadata_file)
|
|
35
|
-
|
|
36
|
-
print('Read {} columns and {} rows from metadata file'.format(len(input_metadata.columns),
|
|
37
|
-
len(input_metadata)))
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
#%% Map filenames to rows, verify image existence
|
|
41
|
-
|
|
42
|
-
# Create an additional column for concatenated filenames
|
|
43
|
-
input_metadata['relative_path'] = ''
|
|
44
|
-
input_metadata['full_path'] = ''
|
|
45
|
-
|
|
46
|
-
startTime = time.time()
|
|
47
|
-
|
|
48
|
-
# Maps relative filenames to rows
|
|
49
|
-
filenamesToRows = {}
|
|
50
|
-
|
|
51
|
-
duplicateRows = []
|
|
52
|
-
|
|
53
|
-
# Build up a map from filenames to a list of rows, checking image existence as we go
|
|
54
|
-
# row = input_metadata.iloc[0]
|
|
55
|
-
for iFile,row in input_metadata.iterrows():
|
|
56
|
-
|
|
57
|
-
relativePath = os.path.join(row['site'],row['date_range'],str(row['camera']),
|
|
58
|
-
str(row['folder']),row['filename'])
|
|
59
|
-
fullPath = os.path.join(baseDir,relativePath)
|
|
60
|
-
|
|
61
|
-
if (relativePath in filenamesToRows):
|
|
62
|
-
duplicateRows.append(iFile)
|
|
63
|
-
filenamesToRows[relativePath].append(iFile)
|
|
64
|
-
else:
|
|
65
|
-
filenamesToRows[relativePath] = [iFile]
|
|
66
|
-
assert(os.path.isfile(fullPath))
|
|
67
|
-
|
|
68
|
-
row['relative_path'] = relativePath
|
|
69
|
-
row['full_path'] = fullPath
|
|
70
|
-
|
|
71
|
-
input_metadata.iloc[iFile] = row
|
|
72
|
-
|
|
73
|
-
elapsed = time.time() - startTime
|
|
74
|
-
print('Finished verifying image existence in {}, found {} filenames with multiple labels'.format(
|
|
75
|
-
humanfriendly.format_timespan(elapsed),len(duplicateRows)))
|
|
76
|
-
|
|
77
|
-
# I didn't expect this to be true a priori, but it appears to be true, and
|
|
78
|
-
# it saves us the trouble of checking consistency across multiple occurrences
|
|
79
|
-
# of an image.
|
|
80
|
-
assert(len(duplicateRows) == 0)
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
#%% Check for images that aren't included in the metadata file
|
|
84
|
-
|
|
85
|
-
# Enumerate all images
|
|
86
|
-
imageFullPaths = glob.glob(os.path.join(image_directory,'**/*.JPG'), recursive=True)
|
|
87
|
-
|
|
88
|
-
for iImage,imagePath in enumerate(imageFullPaths):
|
|
89
|
-
|
|
90
|
-
imageRelPath = ntpath.relpath(imagePath, image_directory)
|
|
91
|
-
assert(imageRelPath in filenamesToRows)
|
|
92
|
-
|
|
93
|
-
print('Finished checking {} images to make sure they\'re in the metadata'.format(
|
|
94
|
-
len(imageFullPaths)))
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
#%% Create CCT dictionaries
|
|
98
|
-
|
|
99
|
-
# Also gets image sizes, so this takes ~6 minutes
|
|
100
|
-
#
|
|
101
|
-
# Implicitly checks images for overt corruptness, i.e. by not crashing.
|
|
102
|
-
|
|
103
|
-
images = []
|
|
104
|
-
annotations = []
|
|
105
|
-
categories = []
|
|
106
|
-
|
|
107
|
-
emptyCategory = {}
|
|
108
|
-
emptyCategory['id'] = 0
|
|
109
|
-
emptyCategory['name'] = 'empty'
|
|
110
|
-
emptyCategory['latin'] = 'empty'
|
|
111
|
-
emptyCategory['count'] = 0
|
|
112
|
-
categories.append(emptyCategory)
|
|
113
|
-
|
|
114
|
-
# Map categories to integer IDs (that's what COCO likes)
|
|
115
|
-
nextCategoryID = 1
|
|
116
|
-
labelToCategory = {'empty':emptyCategory}
|
|
117
|
-
|
|
118
|
-
# For each image
|
|
119
|
-
#
|
|
120
|
-
# Because in practice images are 1:1 with annotations in this data set,
|
|
121
|
-
# this is also a loop over annotations.
|
|
122
|
-
|
|
123
|
-
startTime = time.time()
|
|
124
|
-
|
|
125
|
-
# row = input_metadata.iloc[0]
|
|
126
|
-
for iFile,row in input_metadata.iterrows():
|
|
127
|
-
|
|
128
|
-
relPath = row['relative_path'].replace('\\','/')
|
|
129
|
-
im = {}
|
|
130
|
-
# Filenames look like "290716114012001a1116.jpg"
|
|
131
|
-
im['id'] = relPath.replace('/','_').replace(' ','_')
|
|
132
|
-
|
|
133
|
-
im['file_name'] = relPath
|
|
134
|
-
|
|
135
|
-
im['seq_id'] = -1
|
|
136
|
-
im['frame_num'] = -1
|
|
137
|
-
|
|
138
|
-
# In the form "001a"
|
|
139
|
-
im['site']= row['site']
|
|
140
|
-
|
|
141
|
-
# Can be in the form '111' or 's46'
|
|
142
|
-
im['camera'] = row['camera']
|
|
143
|
-
|
|
144
|
-
# In the form "7/29/2016 11:40"
|
|
145
|
-
im['datetime'] = row['timestamp']
|
|
146
|
-
|
|
147
|
-
otherFields = ['motion','temp_F','n_present','n_waterhole','n_contact','notes']
|
|
148
|
-
|
|
149
|
-
for s in otherFields:
|
|
150
|
-
im[s] = row[s]
|
|
151
|
-
|
|
152
|
-
# Check image height and width
|
|
153
|
-
fullPath = row['full_path']
|
|
154
|
-
assert(os.path.isfile(fullPath))
|
|
155
|
-
pilImage = PIL.Image.open(fullPath)
|
|
156
|
-
width, height = pilImage.size
|
|
157
|
-
im['width'] = width
|
|
158
|
-
im['height'] = height
|
|
159
|
-
|
|
160
|
-
images.append(im)
|
|
161
|
-
|
|
162
|
-
label = row['species']
|
|
163
|
-
if not isinstance(label,str):
|
|
164
|
-
# NaN is the only thing we should see that's not a string
|
|
165
|
-
assert math.isnan(label)
|
|
166
|
-
label = 'empty'
|
|
167
|
-
else:
|
|
168
|
-
label = label.lower()
|
|
169
|
-
|
|
170
|
-
latin = row['binomial']
|
|
171
|
-
if not isinstance(latin,str):
|
|
172
|
-
# NaN is the only thing we should see that's not a string
|
|
173
|
-
assert math.isnan(latin)
|
|
174
|
-
latin = 'empty'
|
|
175
|
-
else:
|
|
176
|
-
latin = latin.lower()
|
|
177
|
-
|
|
178
|
-
if label == 'empty':
|
|
179
|
-
if latin != 'empty':
|
|
180
|
-
latin = 'empty'
|
|
181
|
-
|
|
182
|
-
if label == 'unknown':
|
|
183
|
-
if latin != 'unknown':
|
|
184
|
-
latin = 'unknown'
|
|
185
|
-
|
|
186
|
-
if label not in labelToCategory:
|
|
187
|
-
print('Adding category {} ({})'.format(label,latin))
|
|
188
|
-
category = {}
|
|
189
|
-
categoryID = nextCategoryID
|
|
190
|
-
category['id'] = categoryID
|
|
191
|
-
nextCategoryID += 1
|
|
192
|
-
category['name'] = label
|
|
193
|
-
category['latin'] = latin
|
|
194
|
-
category['count'] = 1
|
|
195
|
-
labelToCategory[label] = category
|
|
196
|
-
categories.append(category)
|
|
197
|
-
else:
|
|
198
|
-
category = labelToCategory[label]
|
|
199
|
-
category['count'] = category['count'] + 1
|
|
200
|
-
categoryID = category['id']
|
|
201
|
-
|
|
202
|
-
# Create an annotation
|
|
203
|
-
ann = {}
|
|
204
|
-
|
|
205
|
-
# The Internet tells me this guarantees uniqueness to a reasonable extent, even
|
|
206
|
-
# beyond the sheer improbability of collisions.
|
|
207
|
-
ann['id'] = str(uuid.uuid1())
|
|
208
|
-
ann['image_id'] = im['id']
|
|
209
|
-
ann['category_id'] = categoryID
|
|
210
|
-
|
|
211
|
-
annotations.append(ann)
|
|
212
|
-
|
|
213
|
-
# ...for each image
|
|
214
|
-
|
|
215
|
-
# Convert categories to a CCT-style dictionary
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
for category in categories:
|
|
219
|
-
print('Category {}, count {}'.format(category['name'],category['count']))
|
|
220
|
-
|
|
221
|
-
elapsed = time.time() - startTime
|
|
222
|
-
print('Finished creating CCT dictionaries in {}'.format(
|
|
223
|
-
humanfriendly.format_timespan(elapsed)))
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
#%% Create info struct
|
|
227
|
-
|
|
228
|
-
info = {}
|
|
229
|
-
info['year'] = 2019
|
|
230
|
-
info['version'] = 1
|
|
231
|
-
info['description'] = 'COCO style database'
|
|
232
|
-
info['secondary_contributor'] = 'Converted to COCO .json by Dan Morris'
|
|
233
|
-
info['contributor'] = 'McGill University'
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
#%% Write output
|
|
237
|
-
|
|
238
|
-
json_data = {}
|
|
239
|
-
json_data['images'] = images
|
|
240
|
-
json_data['annotations'] = annotations
|
|
241
|
-
json_data['categories'] = categories
|
|
242
|
-
json_data['info'] = info
|
|
243
|
-
json.dump(json_data, open(output_file,'w'), indent=4)
|
|
244
|
-
|
|
245
|
-
print('Finished writing .json file with {} images, {} annotations, and {} categories'.format(
|
|
246
|
-
len(images),len(annotations),len(categories)))
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|