megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/__init__.py +0 -0
- api/batch_processing/__init__.py +0 -0
- api/batch_processing/api_core/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/score.py +0 -1
- api/batch_processing/api_core/server_job_status_table.py +0 -1
- api/batch_processing/api_core_support/__init__.py +0 -0
- api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
- api/batch_processing/api_support/__init__.py +0 -0
- api/batch_processing/api_support/summarize_daily_activity.py +0 -1
- api/batch_processing/data_preparation/__init__.py +0 -0
- api/batch_processing/data_preparation/manage_local_batch.py +93 -79
- api/batch_processing/data_preparation/manage_video_batch.py +8 -8
- api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
- api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
- api/batch_processing/postprocessing/__init__.py +0 -0
- api/batch_processing/postprocessing/add_max_conf.py +12 -12
- api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
- api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
- api/batch_processing/postprocessing/compare_batch_results.py +114 -44
- api/batch_processing/postprocessing/convert_output_format.py +62 -19
- api/batch_processing/postprocessing/load_api_results.py +17 -20
- api/batch_processing/postprocessing/md_to_coco.py +31 -21
- api/batch_processing/postprocessing/md_to_labelme.py +165 -68
- api/batch_processing/postprocessing/merge_detections.py +40 -15
- api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
- api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
- api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
- api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
- api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
- api/synchronous/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
- api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
- api/synchronous/api_core/animal_detection_api/config.py +35 -35
- api/synchronous/api_core/tests/__init__.py +0 -0
- api/synchronous/api_core/tests/load_test.py +109 -109
- classification/__init__.py +0 -0
- classification/aggregate_classifier_probs.py +21 -24
- classification/analyze_failed_images.py +11 -13
- classification/cache_batchapi_outputs.py +51 -51
- classification/create_classification_dataset.py +69 -68
- classification/crop_detections.py +54 -53
- classification/csv_to_json.py +97 -100
- classification/detect_and_crop.py +105 -105
- classification/evaluate_model.py +43 -42
- classification/identify_mislabeled_candidates.py +47 -46
- classification/json_to_azcopy_list.py +10 -10
- classification/json_validator.py +72 -71
- classification/map_classification_categories.py +44 -43
- classification/merge_classification_detection_output.py +68 -68
- classification/prepare_classification_script.py +157 -154
- classification/prepare_classification_script_mc.py +228 -228
- classification/run_classifier.py +27 -26
- classification/save_mislabeled.py +30 -30
- classification/train_classifier.py +20 -20
- classification/train_classifier_tf.py +21 -22
- classification/train_utils.py +10 -10
- data_management/__init__.py +0 -0
- data_management/annotations/__init__.py +0 -0
- data_management/annotations/annotation_constants.py +18 -31
- data_management/camtrap_dp_to_coco.py +238 -0
- data_management/cct_json_utils.py +107 -59
- data_management/cct_to_md.py +176 -158
- data_management/cct_to_wi.py +247 -219
- data_management/coco_to_labelme.py +272 -0
- data_management/coco_to_yolo.py +86 -62
- data_management/databases/__init__.py +0 -0
- data_management/databases/add_width_and_height_to_db.py +20 -16
- data_management/databases/combine_coco_camera_traps_files.py +35 -31
- data_management/databases/integrity_check_json_db.py +130 -83
- data_management/databases/subset_json_db.py +25 -16
- data_management/generate_crops_from_cct.py +27 -45
- data_management/get_image_sizes.py +188 -144
- data_management/importers/add_nacti_sizes.py +8 -8
- data_management/importers/add_timestamps_to_icct.py +78 -78
- data_management/importers/animl_results_to_md_results.py +158 -160
- data_management/importers/auckland_doc_test_to_json.py +9 -9
- data_management/importers/auckland_doc_to_json.py +8 -8
- data_management/importers/awc_to_json.py +7 -7
- data_management/importers/bellevue_to_json.py +15 -15
- data_management/importers/cacophony-thermal-importer.py +13 -13
- data_management/importers/carrizo_shrubfree_2018.py +8 -8
- data_management/importers/carrizo_trail_cam_2017.py +8 -8
- data_management/importers/cct_field_adjustments.py +9 -9
- data_management/importers/channel_islands_to_cct.py +10 -10
- data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
- data_management/importers/ena24_to_json.py +7 -7
- data_management/importers/filenames_to_json.py +8 -8
- data_management/importers/helena_to_cct.py +7 -7
- data_management/importers/idaho-camera-traps.py +7 -7
- data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
- data_management/importers/jb_csv_to_json.py +9 -9
- data_management/importers/mcgill_to_json.py +8 -8
- data_management/importers/missouri_to_json.py +18 -18
- data_management/importers/nacti_fieldname_adjustments.py +10 -10
- data_management/importers/noaa_seals_2019.py +8 -8
- data_management/importers/pc_to_json.py +7 -7
- data_management/importers/plot_wni_giraffes.py +7 -7
- data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
- data_management/importers/prepare_zsl_imerit.py +7 -7
- data_management/importers/rspb_to_json.py +8 -8
- data_management/importers/save_the_elephants_survey_A.py +8 -8
- data_management/importers/save_the_elephants_survey_B.py +9 -9
- data_management/importers/snapshot_safari_importer.py +26 -26
- data_management/importers/snapshot_safari_importer_reprise.py +665 -665
- data_management/importers/snapshot_serengeti_lila.py +14 -14
- data_management/importers/sulross_get_exif.py +8 -9
- data_management/importers/timelapse_csv_set_to_json.py +11 -11
- data_management/importers/ubc_to_json.py +13 -13
- data_management/importers/umn_to_json.py +7 -7
- data_management/importers/wellington_to_json.py +8 -8
- data_management/importers/wi_to_json.py +9 -9
- data_management/importers/zamba_results_to_md_results.py +181 -181
- data_management/labelme_to_coco.py +309 -159
- data_management/labelme_to_yolo.py +103 -60
- data_management/lila/__init__.py +0 -0
- data_management/lila/add_locations_to_island_camera_traps.py +9 -9
- data_management/lila/add_locations_to_nacti.py +147 -147
- data_management/lila/create_lila_blank_set.py +114 -31
- data_management/lila/create_lila_test_set.py +8 -8
- data_management/lila/create_links_to_md_results_files.py +106 -106
- data_management/lila/download_lila_subset.py +92 -90
- data_management/lila/generate_lila_per_image_labels.py +56 -43
- data_management/lila/get_lila_annotation_counts.py +18 -15
- data_management/lila/get_lila_image_counts.py +11 -11
- data_management/lila/lila_common.py +103 -70
- data_management/lila/test_lila_metadata_urls.py +132 -116
- data_management/ocr_tools.py +173 -128
- data_management/read_exif.py +161 -99
- data_management/remap_coco_categories.py +84 -0
- data_management/remove_exif.py +58 -62
- data_management/resize_coco_dataset.py +32 -44
- data_management/wi_download_csv_to_coco.py +246 -0
- data_management/yolo_output_to_md_output.py +86 -73
- data_management/yolo_to_coco.py +535 -95
- detection/__init__.py +0 -0
- detection/detector_training/__init__.py +0 -0
- detection/process_video.py +85 -33
- detection/pytorch_detector.py +43 -25
- detection/run_detector.py +157 -72
- detection/run_detector_batch.py +189 -114
- detection/run_inference_with_yolov5_val.py +118 -51
- detection/run_tiled_inference.py +113 -42
- detection/tf_detector.py +51 -28
- detection/video_utils.py +606 -521
- docs/source/conf.py +43 -0
- md_utils/__init__.py +0 -0
- md_utils/azure_utils.py +9 -9
- md_utils/ct_utils.py +249 -70
- md_utils/directory_listing.py +59 -64
- md_utils/md_tests.py +968 -862
- md_utils/path_utils.py +655 -155
- md_utils/process_utils.py +157 -133
- md_utils/sas_blob_utils.py +20 -20
- md_utils/split_locations_into_train_val.py +45 -32
- md_utils/string_utils.py +33 -10
- md_utils/url_utils.py +208 -27
- md_utils/write_html_image_list.py +51 -35
- md_visualization/__init__.py +0 -0
- md_visualization/plot_utils.py +102 -109
- md_visualization/render_images_with_thumbnails.py +34 -34
- md_visualization/visualization_utils.py +908 -311
- md_visualization/visualize_db.py +109 -58
- md_visualization/visualize_detector_output.py +61 -42
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
- megadetector-5.0.9.dist-info/RECORD +224 -0
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
- taxonomy_mapping/__init__.py +0 -0
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
- taxonomy_mapping/map_new_lila_datasets.py +154 -154
- taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
- taxonomy_mapping/preview_lila_taxonomy.py +591 -591
- taxonomy_mapping/retrieve_sample_image.py +12 -12
- taxonomy_mapping/simple_image_download.py +11 -11
- taxonomy_mapping/species_lookup.py +10 -10
- taxonomy_mapping/taxonomy_csv_checker.py +18 -18
- taxonomy_mapping/taxonomy_graph.py +47 -47
- taxonomy_mapping/validate_lila_category_mappings.py +83 -76
- data_management/cct_json_to_filename_json.py +0 -89
- data_management/cct_to_csv.py +0 -140
- data_management/databases/remove_corrupted_images_from_db.py +0 -191
- detection/detector_training/copy_checkpoints.py +0 -43
- md_visualization/visualize_megadb.py +0 -183
- megadetector-5.0.7.dist-info/RECORD +0 -202
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
resize_coco_dataset.py
|
|
4
|
+
|
|
5
|
+
Given a COCO-formatted dataset, resizes all the images to a target size,
|
|
6
|
+
scaling bounding boxes accordingly.
|
|
7
|
+
|
|
8
|
+
"""
|
|
9
9
|
|
|
10
10
|
#%% Imports and constants
|
|
11
11
|
|
|
@@ -26,31 +26,31 @@ from md_visualization.visualization_utils import \
|
|
|
26
26
|
def resize_coco_dataset(input_folder,input_filename,
|
|
27
27
|
output_folder,output_filename,
|
|
28
28
|
target_size=(-1,-1),
|
|
29
|
-
correct_size_image_handling='copy'
|
|
30
|
-
right_edge_quantization_threshold=None):
|
|
29
|
+
correct_size_image_handling='copy'):
|
|
31
30
|
"""
|
|
32
|
-
Given a COCO-formatted dataset (images in input_folder, data in input_filename),
|
|
33
|
-
all the images to a target size (in output_folder) and
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
31
|
+
Given a COCO-formatted dataset (images in input_folder, data in input_filename), resizes
|
|
32
|
+
all the images to a target size (in output_folder) and scales bounding boxes accordingly.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
input_folder (str): the folder where images live; filenames in [input_filename] should
|
|
36
|
+
be relative to [input_folder]
|
|
37
|
+
input_filename (str): the (input) COCO-formatted .json file containing annotations
|
|
38
|
+
output_folder (str): the folder to which we should write resized images; can be the
|
|
39
|
+
same as [input_folder], in which case images are over-written
|
|
40
|
+
output_filename (str): the COCO-formatted .json file we should generate that refers to
|
|
41
|
+
the resized images
|
|
42
|
+
target_size (list or tuple of ints): this should be tuple/list of ints, with length 2 (w,h).
|
|
43
|
+
If either dimension is -1, aspect ratio will be preserved. If both dimensions are -1, this means
|
|
44
|
+
"keep the original size". If both dimensions are -1 and correct_size_image_handling is copy, this
|
|
45
|
+
function is basically a no-op.
|
|
46
|
+
correct_size_image_handling (str): can be 'copy' (in which case the original image is just copied
|
|
47
|
+
to the output folder) or 'rewrite' (in which case the image is opened via PIL and re-written,
|
|
48
|
+
attempting to preserve the same quality). The only reason to do use 'rewrite' 'is the case where
|
|
49
|
+
you're superstitious about biases coming from images in a training set being written by different
|
|
50
|
+
image encoders.
|
|
46
51
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
of the way there, due to what appears to be a slight bias inherent to MD. If a box extends
|
|
50
|
-
within [right_edge_quantization_threshold] (a small number, from 0 to 1, but probably around
|
|
51
|
-
0.02) of the right edge of the image, it will be extended to the far right edge.
|
|
52
|
-
|
|
53
|
-
Returns the COCO database with resized images.
|
|
52
|
+
Returns:
|
|
53
|
+
dict: the COCO database with resized images, identical to the content of [output_filename]
|
|
54
54
|
"""
|
|
55
55
|
|
|
56
56
|
# Read input data
|
|
@@ -126,15 +126,6 @@ def resize_coco_dataset(input_folder,input_filename,
|
|
|
126
126
|
bbox[2] * width_scale,
|
|
127
127
|
bbox[3] * height_scale]
|
|
128
128
|
|
|
129
|
-
# Do we need to quantize this box?
|
|
130
|
-
if right_edge_quantization_threshold is not None and \
|
|
131
|
-
right_edge_quantization_threshold > 0:
|
|
132
|
-
bbox_right_edge_abs = bbox[0] + bbox[2]
|
|
133
|
-
bbox_right_edge_norm = bbox_right_edge_abs / output_w
|
|
134
|
-
bbox_right_edge_distance = (1.0 - bbox_right_edge_norm)
|
|
135
|
-
if bbox_right_edge_distance < right_edge_quantization_threshold:
|
|
136
|
-
bbox[2] = output_w - bbox[0]
|
|
137
|
-
|
|
138
129
|
ann['bbox'] = bbox
|
|
139
130
|
|
|
140
131
|
# ...if this annotation has a box
|
|
@@ -169,13 +160,10 @@ if False:
|
|
|
169
160
|
|
|
170
161
|
correct_size_image_handling = 'rewrite'
|
|
171
162
|
|
|
172
|
-
right_edge_quantization_threshold = 0.015
|
|
173
|
-
|
|
174
163
|
resize_coco_dataset(input_folder,input_filename,
|
|
175
164
|
output_folder,output_filename,
|
|
176
165
|
target_size=target_size,
|
|
177
|
-
correct_size_image_handling=correct_size_image_handling
|
|
178
|
-
right_edge_quantization_threshold=right_edge_quantization_threshold)
|
|
166
|
+
correct_size_image_handling=correct_size_image_handling)
|
|
179
167
|
|
|
180
168
|
|
|
181
169
|
#%% Preview
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
wi_download_csv_to_coco.py
|
|
4
|
+
|
|
5
|
+
Converts a .csv file from a Wildlife Insights project export to a COCO camera traps .json file.
|
|
6
|
+
|
|
7
|
+
Currently assumes that common names are unique identifiers, which is convenient but unreliable.
|
|
8
|
+
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
#%% Imports and constants
|
|
12
|
+
|
|
13
|
+
import os
|
|
14
|
+
import json
|
|
15
|
+
import pandas as pd
|
|
16
|
+
|
|
17
|
+
from tqdm import tqdm
|
|
18
|
+
from collections import defaultdict
|
|
19
|
+
|
|
20
|
+
from md_visualization import visualization_utils as vis_utils
|
|
21
|
+
from md_utils.ct_utils import isnan
|
|
22
|
+
|
|
23
|
+
wi_extra_annotation_columns = \
|
|
24
|
+
('is_blank','identified_by','wi_taxon_id','class','order','family','genus','species','uncertainty',
|
|
25
|
+
'number_of_objects','age','sex','animal_recognizable','individual_id','individual_animal_notes',
|
|
26
|
+
'behavior','highlighted','markings')
|
|
27
|
+
|
|
28
|
+
wi_extra_image_columns = ('project_id','deployment_id')
|
|
29
|
+
|
|
30
|
+
def _make_location_id(project_id,deployment_id):
|
|
31
|
+
return 'project_' + str(project_id) + '_deployment_' + deployment_id
|
|
32
|
+
|
|
33
|
+
default_category_remappings = {
|
|
34
|
+
'Homo Species':'Human',
|
|
35
|
+
'Human-Camera Trapper':'Human',
|
|
36
|
+
'No CV Result':'Unknown'
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
#%% Main function
|
|
41
|
+
|
|
42
|
+
def wi_download_csv_to_coco(csv_file_in,
|
|
43
|
+
coco_file_out=None,
|
|
44
|
+
image_folder=None,
|
|
45
|
+
validate_images=False,
|
|
46
|
+
gs_prefix=None,
|
|
47
|
+
verbose=True,
|
|
48
|
+
category_remappings=default_category_remappings):
|
|
49
|
+
"""
|
|
50
|
+
Converts a .csv file from a Wildlife Insights project export to a COCO
|
|
51
|
+
Camera Traps .json file.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
csv_file_in (str): the downloaded .csv file we should convert to COCO
|
|
55
|
+
coco_file_out (str, optional): the .json file we should write; if [coco_file_out] is None,
|
|
56
|
+
uses [csv_file_in].json
|
|
57
|
+
image_folder (str, optional): the folder where images live, only relevant if
|
|
58
|
+
[validate_images] is True
|
|
59
|
+
validate_images (bool, optional): whether to check images for corruption and load
|
|
60
|
+
image sizes; if this is True, [image_folder] must be a valid folder
|
|
61
|
+
gs_prefix (str, optional): a string to remove from GS URLs to convert to path names...
|
|
62
|
+
for example, if your gs:// URLs look like:
|
|
63
|
+
|
|
64
|
+
`gs://11234134_xyz/deployment/55554/dfadfasdfs.jpg`
|
|
65
|
+
|
|
66
|
+
...and you specify gs_prefix='11234134_xyz/deployment/', the filenames in
|
|
67
|
+
the .json file will look like:
|
|
68
|
+
|
|
69
|
+
`55554/dfadfasdfs.jpg`
|
|
70
|
+
verbose (bool, optional): enable additional debug console output
|
|
71
|
+
category_remappings (dict, optional): str --> str dict that maps any number of
|
|
72
|
+
WI category names to output category names; for example defaults to mapping
|
|
73
|
+
"Homo Species" to "Human", but leaves 99.99% of categories unchanged.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
dict: COCO-formatted data, identical to what's written to [coco_file_out]
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
##%% Create COCO dictionaries
|
|
80
|
+
|
|
81
|
+
category_name_to_id = {}
|
|
82
|
+
category_name_to_id['empty'] = 0
|
|
83
|
+
|
|
84
|
+
df = pd.read_csv(csv_file_in)
|
|
85
|
+
|
|
86
|
+
print('Read {} rows from {}'.format(len(df),csv_file_in))
|
|
87
|
+
|
|
88
|
+
image_id_to_image = {}
|
|
89
|
+
image_id_to_annotations = defaultdict(list)
|
|
90
|
+
|
|
91
|
+
# i_row = 0; row = df.iloc[i_row]
|
|
92
|
+
for i_row,row in df.iterrows():
|
|
93
|
+
|
|
94
|
+
image_id = row['image_id']
|
|
95
|
+
|
|
96
|
+
if image_id not in image_id_to_image:
|
|
97
|
+
|
|
98
|
+
im = {}
|
|
99
|
+
image_id_to_image[image_id] = im
|
|
100
|
+
|
|
101
|
+
im['id'] = image_id
|
|
102
|
+
|
|
103
|
+
gs_url = row['location']
|
|
104
|
+
assert gs_url.startswith('gs://')
|
|
105
|
+
|
|
106
|
+
file_name = gs_url.replace('gs://','')
|
|
107
|
+
if gs_prefix is not None:
|
|
108
|
+
file_name = file_name.replace(gs_prefix,'')
|
|
109
|
+
|
|
110
|
+
location_id = _make_location_id(row['project_id'],row['deployment_id'])
|
|
111
|
+
im['file_name'] = file_name
|
|
112
|
+
im['location'] = location_id
|
|
113
|
+
im['datetime'] = row['timestamp']
|
|
114
|
+
|
|
115
|
+
im['wi_image_info'] = {}
|
|
116
|
+
for s in wi_extra_image_columns:
|
|
117
|
+
im['wi_image_info'][s] = str(row[s])
|
|
118
|
+
|
|
119
|
+
else:
|
|
120
|
+
|
|
121
|
+
im = image_id_to_image[image_id]
|
|
122
|
+
assert im['datetime'] == row['timestamp']
|
|
123
|
+
location_id = _make_location_id(row['project_id'],row['deployment_id'])
|
|
124
|
+
assert im['location'] == location_id
|
|
125
|
+
|
|
126
|
+
category_name = row['common_name']
|
|
127
|
+
if category_remappings is not None and category_name in category_remappings:
|
|
128
|
+
category_name = category_remappings[category_name]
|
|
129
|
+
|
|
130
|
+
if category_name == 'Blank':
|
|
131
|
+
category_name = 'empty'
|
|
132
|
+
assert row['is_blank'] == 1
|
|
133
|
+
else:
|
|
134
|
+
assert row['is_blank'] == 0
|
|
135
|
+
assert isinstance(category_name,str)
|
|
136
|
+
if category_name in category_name_to_id:
|
|
137
|
+
category_id = category_name_to_id[category_name]
|
|
138
|
+
else:
|
|
139
|
+
category_id = len(category_name_to_id)
|
|
140
|
+
category_name_to_id[category_name] = category_id
|
|
141
|
+
|
|
142
|
+
ann = {}
|
|
143
|
+
ann['image_id'] = image_id
|
|
144
|
+
annotations_this_image = image_id_to_annotations[image_id]
|
|
145
|
+
annotation_number = len(annotations_this_image)
|
|
146
|
+
ann['id'] = image_id + '_' + str(annotation_number).zfill(2)
|
|
147
|
+
ann['category_id'] = category_id
|
|
148
|
+
annotations_this_image.append(ann)
|
|
149
|
+
|
|
150
|
+
extra_info = {}
|
|
151
|
+
for s in wi_extra_annotation_columns:
|
|
152
|
+
v = row[s]
|
|
153
|
+
if not isnan(v):
|
|
154
|
+
extra_info[s] = v
|
|
155
|
+
ann['wi_extra_info'] = extra_info
|
|
156
|
+
|
|
157
|
+
# ...for each row
|
|
158
|
+
|
|
159
|
+
images = list(image_id_to_image.values())
|
|
160
|
+
categories = []
|
|
161
|
+
for category_name in category_name_to_id:
|
|
162
|
+
category_id = category_name_to_id[category_name]
|
|
163
|
+
categories.append({'id':category_id,'name':category_name})
|
|
164
|
+
annotations = []
|
|
165
|
+
for image_id in image_id_to_annotations:
|
|
166
|
+
annotations_this_image = image_id_to_annotations[image_id]
|
|
167
|
+
for ann in annotations_this_image:
|
|
168
|
+
annotations.append(ann)
|
|
169
|
+
info = {'version':'1.00','description':'converted from WI export'}
|
|
170
|
+
info['source_file'] = csv_file_in
|
|
171
|
+
coco_data = {}
|
|
172
|
+
coco_data['info'] = info
|
|
173
|
+
coco_data['images'] = images
|
|
174
|
+
coco_data['annotations'] = annotations
|
|
175
|
+
coco_data['categories'] = categories
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
##%% Validate images, add sizes
|
|
179
|
+
|
|
180
|
+
if validate_images:
|
|
181
|
+
|
|
182
|
+
print('Validating images')
|
|
183
|
+
# TODO: trivially parallelizable
|
|
184
|
+
|
|
185
|
+
assert os.path.isdir(image_folder), \
|
|
186
|
+
'Must specify a valid image folder if you specify validate_images=True'
|
|
187
|
+
|
|
188
|
+
# im = images[0]
|
|
189
|
+
for im in tqdm(images):
|
|
190
|
+
file_name_relative = im['file_name']
|
|
191
|
+
file_name_abs = os.path.join(image_folder,file_name_relative)
|
|
192
|
+
assert os.path.isfile(file_name_abs)
|
|
193
|
+
|
|
194
|
+
im['corrupt'] = False
|
|
195
|
+
try:
|
|
196
|
+
pil_im = vis_utils.load_image(file_name_abs)
|
|
197
|
+
except Exception:
|
|
198
|
+
im['corrupt'] = True
|
|
199
|
+
if not im['corrupt']:
|
|
200
|
+
im['width'] = pil_im.width
|
|
201
|
+
im['height'] = pil_im.height
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
##%% Write output json
|
|
205
|
+
|
|
206
|
+
if coco_file_out is None:
|
|
207
|
+
coco_file_out = csv_file_in + '.json'
|
|
208
|
+
|
|
209
|
+
with open(coco_file_out,'w') as f:
|
|
210
|
+
json.dump(coco_data,f,indent=1)
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
##%% Validate output
|
|
214
|
+
|
|
215
|
+
from data_management.databases.integrity_check_json_db import \
|
|
216
|
+
IntegrityCheckOptions,integrity_check_json_db
|
|
217
|
+
options = IntegrityCheckOptions()
|
|
218
|
+
options.baseDir = image_folder
|
|
219
|
+
options.bCheckImageExistence = True
|
|
220
|
+
options.verbose = verbose
|
|
221
|
+
_ = integrity_check_json_db(coco_file_out,options)
|
|
222
|
+
|
|
223
|
+
return coco_data
|
|
224
|
+
|
|
225
|
+
# ...def wi_download_csv_to_coco(...)
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
#%% Interactive driver
|
|
229
|
+
|
|
230
|
+
if False:
|
|
231
|
+
|
|
232
|
+
#%%
|
|
233
|
+
|
|
234
|
+
base_folder = r'a/b/c'
|
|
235
|
+
csv_file_in = os.path.join(base_folder,'images.csv')
|
|
236
|
+
coco_file_out = None
|
|
237
|
+
gs_prefix = 'a_b_c_main/'
|
|
238
|
+
image_folder = os.path.join(base_folder,'images')
|
|
239
|
+
validate_images = False
|
|
240
|
+
verbose = True
|
|
241
|
+
category_remappings = default_category_remappings
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
#%% Command-line driver
|
|
245
|
+
|
|
246
|
+
# TODO
|
|
@@ -1,43 +1,38 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
# just the "images" portion of a COCO .json file.
|
|
37
|
-
#
|
|
38
|
-
# Converting from this format also requires access to the original images, since the format
|
|
39
|
-
# written by YOLOv5 uses absolute coordinates, but MD results are in relative coordinates.
|
|
40
|
-
#
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
yolo_output_to_md_output.py
|
|
4
|
+
|
|
5
|
+
Converts the output of YOLOv5's detect.py or val.py to the MD API output format.
|
|
6
|
+
|
|
7
|
+
**Converting .txt files**
|
|
8
|
+
|
|
9
|
+
detect.py writes a .txt file per image, in YOLO training format. Converting from this
|
|
10
|
+
format does not currently support recursive results, since detect.py doesn't save filenames
|
|
11
|
+
in a way that allows easy inference of folder names. Requires access to the input
|
|
12
|
+
images, because the YOLO format uses the *absence* of a results file to indicate that
|
|
13
|
+
no detections are present.
|
|
14
|
+
|
|
15
|
+
YOLOv5 output has one text file per image, like so:
|
|
16
|
+
|
|
17
|
+
0 0.0141693 0.469758 0.0283385 0.131552 0.761428
|
|
18
|
+
|
|
19
|
+
That's [class, x_center, y_center, width_of_box, height_of_box, confidence]
|
|
20
|
+
|
|
21
|
+
val.py can write in this format as well, using the --save-txt argument.
|
|
22
|
+
|
|
23
|
+
In both cases, a confidence value is only written to each line if you include the --save-conf
|
|
24
|
+
argument. Confidence values are required by this conversion script.
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
**Converting .json files**
|
|
28
|
+
|
|
29
|
+
val.py can also write a .json file in COCO-ish format. It's "COCO-ish" because it's
|
|
30
|
+
just the "images" portion of a COCO .json file.
|
|
31
|
+
|
|
32
|
+
Converting from this format also requires access to the original images, since the format
|
|
33
|
+
written by YOLOv5 uses absolute coordinates, but MD results are in relative coordinates.
|
|
34
|
+
|
|
35
|
+
"""
|
|
41
36
|
|
|
42
37
|
#%% Imports and constants
|
|
43
38
|
|
|
@@ -51,9 +46,7 @@ from tqdm import tqdm
|
|
|
51
46
|
|
|
52
47
|
from md_utils import path_utils
|
|
53
48
|
from md_utils import ct_utils
|
|
54
|
-
|
|
55
49
|
from md_visualization import visualization_utils as vis_utils
|
|
56
|
-
|
|
57
50
|
from detection.run_detector import CONF_DIGITS, COORD_DIGITS
|
|
58
51
|
|
|
59
52
|
|
|
@@ -61,9 +54,16 @@ from detection.run_detector import CONF_DIGITS, COORD_DIGITS
|
|
|
61
54
|
|
|
62
55
|
def read_classes_from_yolo_dataset_file(fn):
|
|
63
56
|
"""
|
|
64
|
-
|
|
57
|
+
Reads a dictionary mapping integer class IDs to class names from a YOLOv5/YOLOv8
|
|
65
58
|
dataset.yaml file or a .json file. A .json file should contain a dictionary mapping
|
|
66
59
|
integer category IDs to string category names.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
fn (str): YOLOv5/YOLOv8 dataset file with a .yml or .yaml extension, or a .json file
|
|
63
|
+
mapping integer category IDs to category names.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
dict: a mapping from integer category IDs to category names
|
|
67
67
|
"""
|
|
68
68
|
|
|
69
69
|
if fn.endswith('.yml') or fn.endswith('.yaml'):
|
|
@@ -92,45 +92,42 @@ def read_classes_from_yolo_dataset_file(fn):
|
|
|
92
92
|
raise ValueError('Unrecognized category file type: {}'.format(fn))
|
|
93
93
|
|
|
94
94
|
assert len(category_id_to_name) > 0, 'Failed to read class mappings from {}'.format(fn)
|
|
95
|
+
|
|
95
96
|
return category_id_to_name
|
|
96
97
|
|
|
97
98
|
|
|
98
|
-
def yolo_json_output_to_md_output(yolo_json_file,
|
|
99
|
-
|
|
99
|
+
def yolo_json_output_to_md_output(yolo_json_file,
|
|
100
|
+
image_folder,
|
|
101
|
+
output_file,
|
|
102
|
+
yolo_category_id_to_name,
|
|
100
103
|
detector_name='unknown',
|
|
101
104
|
image_id_to_relative_path=None,
|
|
102
105
|
offset_yolo_class_ids=True,
|
|
103
106
|
truncate_to_standard_md_precision=True,
|
|
104
107
|
image_id_to_error=None):
|
|
105
108
|
"""
|
|
106
|
-
|
|
109
|
+
Converts a YOLOv5/YOLOv8 .json file to MD .json format.
|
|
107
110
|
|
|
108
111
|
Args:
|
|
109
112
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
- truncate_to_standard_md_precision: YOLOv5 .json includes lots of (not-super-meaningful)
|
|
130
|
-
precision, set this to truncate to COORD_DIGITS and CONF_DIGITS.
|
|
131
|
-
|
|
132
|
-
- image_id_to_error: if you want to include image IDs in the output file for which you couldn't
|
|
133
|
-
prepare the input file in the first place due to errors, include them here.
|
|
113
|
+
yolo_json_file (str): the .json file to convert from YOLOv5 format to MD output format
|
|
114
|
+
image_folder (str): the .json file contains relative path names, this is the path base
|
|
115
|
+
yolo_category_id_to_name (str or dict): the .json results file contains only numeric
|
|
116
|
+
identifiers for categories, but we want names and numbers for the output format;
|
|
117
|
+
yolo_category_id_to_name provides that mapping either as a dict or as a YOLOv5
|
|
118
|
+
dataset.yaml file.
|
|
119
|
+
detector_name (str, optional): a string that gets put in the output file, not otherwise
|
|
120
|
+
used within this function
|
|
121
|
+
image_id_to_relative_path (dict, optional): YOLOv5 .json uses only basenames (e.g.
|
|
122
|
+
abc1234.JPG); by default these will be appended to the input path to create pathnames.
|
|
123
|
+
If you have a flat folder, this is fine. If you want to map base names to relative paths in
|
|
124
|
+
a more complicated way, use this parameter.
|
|
125
|
+
offset_yolo_class_ids (bool, optional): YOLOv5 class IDs always start at zero; if you want to
|
|
126
|
+
make the output classes start at 1, set offset_yolo_class_ids to True.
|
|
127
|
+
truncate_to_standard_md_precision (bool, optional): YOLOv5 .json includes lots of
|
|
128
|
+
(not-super-meaningful) precision, set this to truncate to COORD_DIGITS and CONF_DIGITS.
|
|
129
|
+
image_id_to_error (dict, optional): if you want to include image IDs in the output file for which
|
|
130
|
+
you couldn't prepare the input file in the first place due to errors, include them here.
|
|
134
131
|
"""
|
|
135
132
|
|
|
136
133
|
assert os.path.isfile(yolo_json_file), \
|
|
@@ -314,14 +311,25 @@ def yolo_json_output_to_md_output(yolo_json_file, image_folder,
|
|
|
314
311
|
# ...def yolo_json_output_to_md_output(...)
|
|
315
312
|
|
|
316
313
|
|
|
317
|
-
def yolo_txt_output_to_md_output(input_results_folder,
|
|
318
|
-
|
|
314
|
+
def yolo_txt_output_to_md_output(input_results_folder,
|
|
315
|
+
image_folder,
|
|
316
|
+
output_file,
|
|
317
|
+
detector_tag=None):
|
|
319
318
|
"""
|
|
320
|
-
Converts a folder of YOLO-
|
|
319
|
+
Converts a folder of YOLO-output .txt files to MD .json format.
|
|
321
320
|
|
|
322
321
|
Less finished than the .json conversion function; this .txt conversion assumes
|
|
323
322
|
a hard-coded mapping representing the standard MD categories (in MD indexing,
|
|
324
323
|
1/2/3=animal/person/vehicle; in YOLO indexing, 0/1/2=animal/person/vehicle).
|
|
324
|
+
|
|
325
|
+
Args:
|
|
326
|
+
input_results_folder (str): the folder containing YOLO-output .txt files
|
|
327
|
+
image_folder (str): the folder where images live, may be the same as
|
|
328
|
+
[input_results_folder]
|
|
329
|
+
output_file (str): the MD-formatted .json file to which we should write
|
|
330
|
+
results
|
|
331
|
+
detector_tag (str, optional): string to put in the 'detector' field in the
|
|
332
|
+
output file
|
|
325
333
|
"""
|
|
326
334
|
|
|
327
335
|
assert os.path.isdir(input_results_folder)
|
|
@@ -426,3 +434,8 @@ if False:
|
|
|
426
434
|
image_folder = os.path.expanduser('~/data/KRU-test')
|
|
427
435
|
output_file = os.path.expanduser('~/data/mdv5a-yolo-pt-kru.json')
|
|
428
436
|
yolo_txt_output_to_md_output(input_results_folder,image_folder,output_file)
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
#%% Command-line driver
|
|
440
|
+
|
|
441
|
+
# TODO
|