megadetector 5.0.8__py3-none-any.whl → 5.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/__init__.py +0 -0
- api/batch_processing/__init__.py +0 -0
- api/batch_processing/api_core/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/score.py +0 -1
- api/batch_processing/api_core/server_job_status_table.py +0 -1
- api/batch_processing/api_core_support/__init__.py +0 -0
- api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
- api/batch_processing/api_support/__init__.py +0 -0
- api/batch_processing/api_support/summarize_daily_activity.py +0 -1
- api/batch_processing/data_preparation/__init__.py +0 -0
- api/batch_processing/data_preparation/manage_local_batch.py +65 -65
- api/batch_processing/data_preparation/manage_video_batch.py +8 -8
- api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
- api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
- api/batch_processing/postprocessing/__init__.py +0 -0
- api/batch_processing/postprocessing/add_max_conf.py +12 -12
- api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
- api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
- api/batch_processing/postprocessing/compare_batch_results.py +113 -43
- api/batch_processing/postprocessing/convert_output_format.py +41 -16
- api/batch_processing/postprocessing/load_api_results.py +16 -17
- api/batch_processing/postprocessing/md_to_coco.py +31 -21
- api/batch_processing/postprocessing/md_to_labelme.py +52 -22
- api/batch_processing/postprocessing/merge_detections.py +14 -14
- api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
- api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
- api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
- api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
- api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
- api/synchronous/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
- api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
- api/synchronous/api_core/animal_detection_api/config.py +35 -35
- api/synchronous/api_core/tests/__init__.py +0 -0
- api/synchronous/api_core/tests/load_test.py +109 -109
- classification/__init__.py +0 -0
- classification/aggregate_classifier_probs.py +21 -24
- classification/analyze_failed_images.py +11 -13
- classification/cache_batchapi_outputs.py +51 -51
- classification/create_classification_dataset.py +69 -68
- classification/crop_detections.py +54 -53
- classification/csv_to_json.py +97 -100
- classification/detect_and_crop.py +105 -105
- classification/evaluate_model.py +43 -42
- classification/identify_mislabeled_candidates.py +47 -46
- classification/json_to_azcopy_list.py +10 -10
- classification/json_validator.py +72 -71
- classification/map_classification_categories.py +44 -43
- classification/merge_classification_detection_output.py +68 -68
- classification/prepare_classification_script.py +157 -154
- classification/prepare_classification_script_mc.py +228 -228
- classification/run_classifier.py +27 -26
- classification/save_mislabeled.py +30 -30
- classification/train_classifier.py +20 -20
- classification/train_classifier_tf.py +21 -22
- classification/train_utils.py +10 -10
- data_management/__init__.py +0 -0
- data_management/annotations/__init__.py +0 -0
- data_management/annotations/annotation_constants.py +18 -31
- data_management/camtrap_dp_to_coco.py +238 -0
- data_management/cct_json_utils.py +102 -59
- data_management/cct_to_md.py +176 -158
- data_management/cct_to_wi.py +247 -219
- data_management/coco_to_labelme.py +272 -263
- data_management/coco_to_yolo.py +79 -58
- data_management/databases/__init__.py +0 -0
- data_management/databases/add_width_and_height_to_db.py +20 -16
- data_management/databases/combine_coco_camera_traps_files.py +35 -31
- data_management/databases/integrity_check_json_db.py +62 -24
- data_management/databases/subset_json_db.py +24 -15
- data_management/generate_crops_from_cct.py +27 -45
- data_management/get_image_sizes.py +188 -162
- data_management/importers/add_nacti_sizes.py +8 -8
- data_management/importers/add_timestamps_to_icct.py +78 -78
- data_management/importers/animl_results_to_md_results.py +158 -158
- data_management/importers/auckland_doc_test_to_json.py +9 -9
- data_management/importers/auckland_doc_to_json.py +8 -8
- data_management/importers/awc_to_json.py +7 -7
- data_management/importers/bellevue_to_json.py +15 -15
- data_management/importers/cacophony-thermal-importer.py +13 -13
- data_management/importers/carrizo_shrubfree_2018.py +8 -8
- data_management/importers/carrizo_trail_cam_2017.py +8 -8
- data_management/importers/cct_field_adjustments.py +9 -9
- data_management/importers/channel_islands_to_cct.py +10 -10
- data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
- data_management/importers/ena24_to_json.py +7 -7
- data_management/importers/filenames_to_json.py +8 -8
- data_management/importers/helena_to_cct.py +7 -7
- data_management/importers/idaho-camera-traps.py +7 -7
- data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
- data_management/importers/jb_csv_to_json.py +9 -9
- data_management/importers/mcgill_to_json.py +8 -8
- data_management/importers/missouri_to_json.py +18 -18
- data_management/importers/nacti_fieldname_adjustments.py +10 -10
- data_management/importers/noaa_seals_2019.py +7 -7
- data_management/importers/pc_to_json.py +7 -7
- data_management/importers/plot_wni_giraffes.py +7 -7
- data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
- data_management/importers/prepare_zsl_imerit.py +7 -7
- data_management/importers/rspb_to_json.py +8 -8
- data_management/importers/save_the_elephants_survey_A.py +8 -8
- data_management/importers/save_the_elephants_survey_B.py +9 -9
- data_management/importers/snapshot_safari_importer.py +26 -26
- data_management/importers/snapshot_safari_importer_reprise.py +665 -665
- data_management/importers/snapshot_serengeti_lila.py +14 -14
- data_management/importers/sulross_get_exif.py +8 -9
- data_management/importers/timelapse_csv_set_to_json.py +11 -11
- data_management/importers/ubc_to_json.py +13 -13
- data_management/importers/umn_to_json.py +7 -7
- data_management/importers/wellington_to_json.py +8 -8
- data_management/importers/wi_to_json.py +9 -9
- data_management/importers/zamba_results_to_md_results.py +181 -181
- data_management/labelme_to_coco.py +65 -24
- data_management/labelme_to_yolo.py +8 -8
- data_management/lila/__init__.py +0 -0
- data_management/lila/add_locations_to_island_camera_traps.py +9 -9
- data_management/lila/add_locations_to_nacti.py +147 -147
- data_management/lila/create_lila_blank_set.py +13 -13
- data_management/lila/create_lila_test_set.py +8 -8
- data_management/lila/create_links_to_md_results_files.py +106 -106
- data_management/lila/download_lila_subset.py +44 -110
- data_management/lila/generate_lila_per_image_labels.py +55 -42
- data_management/lila/get_lila_annotation_counts.py +18 -15
- data_management/lila/get_lila_image_counts.py +11 -11
- data_management/lila/lila_common.py +96 -33
- data_management/lila/test_lila_metadata_urls.py +132 -116
- data_management/ocr_tools.py +173 -128
- data_management/read_exif.py +110 -97
- data_management/remap_coco_categories.py +83 -83
- data_management/remove_exif.py +58 -62
- data_management/resize_coco_dataset.py +30 -23
- data_management/wi_download_csv_to_coco.py +246 -239
- data_management/yolo_output_to_md_output.py +86 -73
- data_management/yolo_to_coco.py +300 -60
- detection/__init__.py +0 -0
- detection/detector_training/__init__.py +0 -0
- detection/process_video.py +85 -33
- detection/pytorch_detector.py +43 -25
- detection/run_detector.py +157 -72
- detection/run_detector_batch.py +179 -113
- detection/run_inference_with_yolov5_val.py +108 -48
- detection/run_tiled_inference.py +111 -40
- detection/tf_detector.py +51 -29
- detection/video_utils.py +606 -521
- docs/source/conf.py +43 -0
- md_utils/__init__.py +0 -0
- md_utils/azure_utils.py +9 -9
- md_utils/ct_utils.py +228 -68
- md_utils/directory_listing.py +59 -64
- md_utils/md_tests.py +968 -871
- md_utils/path_utils.py +460 -134
- md_utils/process_utils.py +157 -133
- md_utils/sas_blob_utils.py +20 -20
- md_utils/split_locations_into_train_val.py +45 -32
- md_utils/string_utils.py +33 -10
- md_utils/url_utils.py +176 -60
- md_utils/write_html_image_list.py +40 -33
- md_visualization/__init__.py +0 -0
- md_visualization/plot_utils.py +102 -109
- md_visualization/render_images_with_thumbnails.py +34 -34
- md_visualization/visualization_utils.py +597 -291
- md_visualization/visualize_db.py +76 -48
- md_visualization/visualize_detector_output.py +61 -42
- {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/METADATA +13 -7
- megadetector-5.0.10.dist-info/RECORD +224 -0
- {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/top_level.txt +1 -0
- taxonomy_mapping/__init__.py +0 -0
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
- taxonomy_mapping/map_new_lila_datasets.py +154 -154
- taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
- taxonomy_mapping/preview_lila_taxonomy.py +591 -591
- taxonomy_mapping/retrieve_sample_image.py +12 -12
- taxonomy_mapping/simple_image_download.py +11 -11
- taxonomy_mapping/species_lookup.py +10 -10
- taxonomy_mapping/taxonomy_csv_checker.py +18 -18
- taxonomy_mapping/taxonomy_graph.py +47 -47
- taxonomy_mapping/validate_lila_category_mappings.py +83 -76
- data_management/cct_json_to_filename_json.py +0 -89
- data_management/cct_to_csv.py +0 -140
- data_management/databases/remove_corrupted_images_from_db.py +0 -191
- detection/detector_training/copy_checkpoints.py +0 -43
- megadetector-5.0.8.dist-info/RECORD +0 -205
- {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/LICENSE +0 -0
- {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/WHEEL +0 -0
|
@@ -1,43 +1,38 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
# just the "images" portion of a COCO .json file.
|
|
37
|
-
#
|
|
38
|
-
# Converting from this format also requires access to the original images, since the format
|
|
39
|
-
# written by YOLOv5 uses absolute coordinates, but MD results are in relative coordinates.
|
|
40
|
-
#
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
yolo_output_to_md_output.py
|
|
4
|
+
|
|
5
|
+
Converts the output of YOLOv5's detect.py or val.py to the MD API output format.
|
|
6
|
+
|
|
7
|
+
**Converting .txt files**
|
|
8
|
+
|
|
9
|
+
detect.py writes a .txt file per image, in YOLO training format. Converting from this
|
|
10
|
+
format does not currently support recursive results, since detect.py doesn't save filenames
|
|
11
|
+
in a way that allows easy inference of folder names. Requires access to the input
|
|
12
|
+
images, because the YOLO format uses the *absence* of a results file to indicate that
|
|
13
|
+
no detections are present.
|
|
14
|
+
|
|
15
|
+
YOLOv5 output has one text file per image, like so:
|
|
16
|
+
|
|
17
|
+
0 0.0141693 0.469758 0.0283385 0.131552 0.761428
|
|
18
|
+
|
|
19
|
+
That's [class, x_center, y_center, width_of_box, height_of_box, confidence]
|
|
20
|
+
|
|
21
|
+
val.py can write in this format as well, using the --save-txt argument.
|
|
22
|
+
|
|
23
|
+
In both cases, a confidence value is only written to each line if you include the --save-conf
|
|
24
|
+
argument. Confidence values are required by this conversion script.
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
**Converting .json files**
|
|
28
|
+
|
|
29
|
+
val.py can also write a .json file in COCO-ish format. It's "COCO-ish" because it's
|
|
30
|
+
just the "images" portion of a COCO .json file.
|
|
31
|
+
|
|
32
|
+
Converting from this format also requires access to the original images, since the format
|
|
33
|
+
written by YOLOv5 uses absolute coordinates, but MD results are in relative coordinates.
|
|
34
|
+
|
|
35
|
+
"""
|
|
41
36
|
|
|
42
37
|
#%% Imports and constants
|
|
43
38
|
|
|
@@ -51,9 +46,7 @@ from tqdm import tqdm
|
|
|
51
46
|
|
|
52
47
|
from md_utils import path_utils
|
|
53
48
|
from md_utils import ct_utils
|
|
54
|
-
|
|
55
49
|
from md_visualization import visualization_utils as vis_utils
|
|
56
|
-
|
|
57
50
|
from detection.run_detector import CONF_DIGITS, COORD_DIGITS
|
|
58
51
|
|
|
59
52
|
|
|
@@ -61,9 +54,16 @@ from detection.run_detector import CONF_DIGITS, COORD_DIGITS
|
|
|
61
54
|
|
|
62
55
|
def read_classes_from_yolo_dataset_file(fn):
|
|
63
56
|
"""
|
|
64
|
-
|
|
57
|
+
Reads a dictionary mapping integer class IDs to class names from a YOLOv5/YOLOv8
|
|
65
58
|
dataset.yaml file or a .json file. A .json file should contain a dictionary mapping
|
|
66
59
|
integer category IDs to string category names.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
fn (str): YOLOv5/YOLOv8 dataset file with a .yml or .yaml extension, or a .json file
|
|
63
|
+
mapping integer category IDs to category names.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
dict: a mapping from integer category IDs to category names
|
|
67
67
|
"""
|
|
68
68
|
|
|
69
69
|
if fn.endswith('.yml') or fn.endswith('.yaml'):
|
|
@@ -92,45 +92,42 @@ def read_classes_from_yolo_dataset_file(fn):
|
|
|
92
92
|
raise ValueError('Unrecognized category file type: {}'.format(fn))
|
|
93
93
|
|
|
94
94
|
assert len(category_id_to_name) > 0, 'Failed to read class mappings from {}'.format(fn)
|
|
95
|
+
|
|
95
96
|
return category_id_to_name
|
|
96
97
|
|
|
97
98
|
|
|
98
|
-
def yolo_json_output_to_md_output(yolo_json_file,
|
|
99
|
-
|
|
99
|
+
def yolo_json_output_to_md_output(yolo_json_file,
|
|
100
|
+
image_folder,
|
|
101
|
+
output_file,
|
|
102
|
+
yolo_category_id_to_name,
|
|
100
103
|
detector_name='unknown',
|
|
101
104
|
image_id_to_relative_path=None,
|
|
102
105
|
offset_yolo_class_ids=True,
|
|
103
106
|
truncate_to_standard_md_precision=True,
|
|
104
107
|
image_id_to_error=None):
|
|
105
108
|
"""
|
|
106
|
-
|
|
109
|
+
Converts a YOLOv5/YOLOv8 .json file to MD .json format.
|
|
107
110
|
|
|
108
111
|
Args:
|
|
109
112
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
- truncate_to_standard_md_precision: YOLOv5 .json includes lots of (not-super-meaningful)
|
|
130
|
-
precision, set this to truncate to COORD_DIGITS and CONF_DIGITS.
|
|
131
|
-
|
|
132
|
-
- image_id_to_error: if you want to include image IDs in the output file for which you couldn't
|
|
133
|
-
prepare the input file in the first place due to errors, include them here.
|
|
113
|
+
yolo_json_file (str): the .json file to convert from YOLOv5 format to MD output format
|
|
114
|
+
image_folder (str): the .json file contains relative path names, this is the path base
|
|
115
|
+
yolo_category_id_to_name (str or dict): the .json results file contains only numeric
|
|
116
|
+
identifiers for categories, but we want names and numbers for the output format;
|
|
117
|
+
yolo_category_id_to_name provides that mapping either as a dict or as a YOLOv5
|
|
118
|
+
dataset.yaml file.
|
|
119
|
+
detector_name (str, optional): a string that gets put in the output file, not otherwise
|
|
120
|
+
used within this function
|
|
121
|
+
image_id_to_relative_path (dict, optional): YOLOv5 .json uses only basenames (e.g.
|
|
122
|
+
abc1234.JPG); by default these will be appended to the input path to create pathnames.
|
|
123
|
+
If you have a flat folder, this is fine. If you want to map base names to relative paths in
|
|
124
|
+
a more complicated way, use this parameter.
|
|
125
|
+
offset_yolo_class_ids (bool, optional): YOLOv5 class IDs always start at zero; if you want to
|
|
126
|
+
make the output classes start at 1, set offset_yolo_class_ids to True.
|
|
127
|
+
truncate_to_standard_md_precision (bool, optional): YOLOv5 .json includes lots of
|
|
128
|
+
(not-super-meaningful) precision, set this to truncate to COORD_DIGITS and CONF_DIGITS.
|
|
129
|
+
image_id_to_error (dict, optional): if you want to include image IDs in the output file for which
|
|
130
|
+
you couldn't prepare the input file in the first place due to errors, include them here.
|
|
134
131
|
"""
|
|
135
132
|
|
|
136
133
|
assert os.path.isfile(yolo_json_file), \
|
|
@@ -314,14 +311,25 @@ def yolo_json_output_to_md_output(yolo_json_file, image_folder,
|
|
|
314
311
|
# ...def yolo_json_output_to_md_output(...)
|
|
315
312
|
|
|
316
313
|
|
|
317
|
-
def yolo_txt_output_to_md_output(input_results_folder,
|
|
318
|
-
|
|
314
|
+
def yolo_txt_output_to_md_output(input_results_folder,
|
|
315
|
+
image_folder,
|
|
316
|
+
output_file,
|
|
317
|
+
detector_tag=None):
|
|
319
318
|
"""
|
|
320
|
-
Converts a folder of YOLO-
|
|
319
|
+
Converts a folder of YOLO-output .txt files to MD .json format.
|
|
321
320
|
|
|
322
321
|
Less finished than the .json conversion function; this .txt conversion assumes
|
|
323
322
|
a hard-coded mapping representing the standard MD categories (in MD indexing,
|
|
324
323
|
1/2/3=animal/person/vehicle; in YOLO indexing, 0/1/2=animal/person/vehicle).
|
|
324
|
+
|
|
325
|
+
Args:
|
|
326
|
+
input_results_folder (str): the folder containing YOLO-output .txt files
|
|
327
|
+
image_folder (str): the folder where images live, may be the same as
|
|
328
|
+
[input_results_folder]
|
|
329
|
+
output_file (str): the MD-formatted .json file to which we should write
|
|
330
|
+
results
|
|
331
|
+
detector_tag (str, optional): string to put in the 'detector' field in the
|
|
332
|
+
output file
|
|
325
333
|
"""
|
|
326
334
|
|
|
327
335
|
assert os.path.isdir(input_results_folder)
|
|
@@ -426,3 +434,8 @@ if False:
|
|
|
426
434
|
image_folder = os.path.expanduser('~/data/KRU-test')
|
|
427
435
|
output_file = os.path.expanduser('~/data/mdv5a-yolo-pt-kru.json')
|
|
428
436
|
yolo_txt_output_to_md_output(input_results_folder,image_folder,output_file)
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
#%% Command-line driver
|
|
440
|
+
|
|
441
|
+
# TODO
|
data_management/yolo_to_coco.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
yolo_to_coco.py
|
|
4
|
+
|
|
5
|
+
Converts a folder of YOLO-formatted annotation files to a COCO-formatted dataset.
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
8
|
|
|
9
9
|
#%% Imports and constants
|
|
10
10
|
|
|
@@ -18,6 +18,8 @@ from functools import partial
|
|
|
18
18
|
from tqdm import tqdm
|
|
19
19
|
|
|
20
20
|
from md_utils.path_utils import find_images
|
|
21
|
+
from md_utils.path_utils import recursive_file_list
|
|
22
|
+
from md_utils.path_utils import find_image_strings
|
|
21
23
|
from md_utils.ct_utils import invert_dictionary
|
|
22
24
|
from md_visualization.visualization_utils import open_image
|
|
23
25
|
from data_management.yolo_output_to_md_output import read_classes_from_yolo_dataset_file
|
|
@@ -25,9 +27,13 @@ from data_management.yolo_output_to_md_output import read_classes_from_yolo_data
|
|
|
25
27
|
|
|
26
28
|
#%% Support functions
|
|
27
29
|
|
|
28
|
-
def
|
|
30
|
+
def _filename_to_image_id(fn):
|
|
31
|
+
"""
|
|
32
|
+
Image IDs can't have spaces in them, replae spaces with underscores
|
|
33
|
+
"""
|
|
29
34
|
return fn.replace(' ','_')
|
|
30
35
|
|
|
36
|
+
|
|
31
37
|
def _process_image(fn_abs,input_folder,category_id_to_name):
|
|
32
38
|
"""
|
|
33
39
|
Internal support function for processing one image's labels.
|
|
@@ -35,7 +41,7 @@ def _process_image(fn_abs,input_folder,category_id_to_name):
|
|
|
35
41
|
|
|
36
42
|
# Create the image object for this image
|
|
37
43
|
fn_relative = os.path.relpath(fn_abs,input_folder)
|
|
38
|
-
image_id =
|
|
44
|
+
image_id = _filename_to_image_id(fn_relative)
|
|
39
45
|
|
|
40
46
|
# This is done in a separate loop now
|
|
41
47
|
#
|
|
@@ -127,60 +133,28 @@ def _process_image(fn_abs,input_folder,category_id_to_name):
|
|
|
127
133
|
# ...def _process_image(...)
|
|
128
134
|
|
|
129
135
|
|
|
130
|
-
|
|
131
|
-
#%% Main conversion function
|
|
132
|
-
|
|
133
|
-
def yolo_to_coco(input_folder,
|
|
134
|
-
class_name_file,
|
|
135
|
-
output_file=None,
|
|
136
|
-
empty_image_handling='no_annotations',
|
|
137
|
-
empty_image_category_name='empty',
|
|
138
|
-
error_image_handling='no_annotations',
|
|
139
|
-
allow_images_without_label_files=True,
|
|
140
|
-
n_workers=1,
|
|
141
|
-
pool_type='thread',
|
|
142
|
-
recursive=True,
|
|
143
|
-
exclude_string=None,
|
|
144
|
-
include_string=None):
|
|
136
|
+
def load_yolo_class_list(class_name_file):
|
|
145
137
|
"""
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
* 'skip': don't include the image in the image list
|
|
158
|
-
|
|
159
|
-
* 'error': there shouldn't be any empty images
|
|
160
|
-
|
|
161
|
-
error_image_handling can be:
|
|
162
|
-
|
|
163
|
-
* 'skip': don't include the image at all
|
|
164
|
-
|
|
165
|
-
* 'no_annotations': include with no annotations
|
|
166
|
-
|
|
167
|
-
All images will be assigned an "error" value, usually None.
|
|
168
|
-
|
|
169
|
-
Returns a COCO-formatted dictionary.
|
|
138
|
+
Loads a dictionary mapping zero-indexed IDs to class names from the text/yaml file
|
|
139
|
+
[class_name_file].
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
class_name_file (str or list): this can be:
|
|
143
|
+
- a .yaml or .yaml file in YOLO's dataset.yaml format
|
|
144
|
+
- a .txt or .data file containing a flat list of class names
|
|
145
|
+
- a list of class names
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
dict: A dict mapping zero-indexed integer IDs to class names
|
|
170
149
|
"""
|
|
171
150
|
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
('no_annotations','empty_annotations','skip','error'), \
|
|
179
|
-
'Unrecognized empty image handling spec: {}'.format(empty_image_handling)
|
|
180
|
-
|
|
151
|
+
# class_name_file can also be a list of class names
|
|
152
|
+
if isinstance(class_name_file,list):
|
|
153
|
+
category_id_to_name = {}
|
|
154
|
+
for i_name,name in enumerate(class_name_file):
|
|
155
|
+
category_id_to_name[i_name] = name
|
|
156
|
+
return category_id_to_name
|
|
181
157
|
|
|
182
|
-
## Read class names
|
|
183
|
-
|
|
184
158
|
ext = os.path.splitext(class_name_file)[1][1:]
|
|
185
159
|
assert ext in ('yml','txt','yaml','data'), 'Unrecognized class name file type {}'.format(
|
|
186
160
|
class_name_file)
|
|
@@ -211,6 +185,267 @@ def yolo_to_coco(input_folder,
|
|
|
211
185
|
|
|
212
186
|
assert ext in ('yml','yaml')
|
|
213
187
|
category_id_to_name = read_classes_from_yolo_dataset_file(class_name_file)
|
|
188
|
+
|
|
189
|
+
return category_id_to_name
|
|
190
|
+
|
|
191
|
+
# ...load_yolo_class_list(...)
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def validate_label_file(label_file,category_id_to_name=None,verbose=False):
|
|
195
|
+
""""
|
|
196
|
+
Verifies that [label_file] is a valid YOLO label file. Does not check the extension.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
label_file (str): the .txt file to validate
|
|
200
|
+
category_id_to_name (dict, optional): a dict mapping integer category IDs to names;
|
|
201
|
+
if this is not None, this function errors if the file uses a category that's not
|
|
202
|
+
in this dict
|
|
203
|
+
verbose (bool, optional): enable additional debug console output
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
dict: a dict with keys 'file' (the same as [label_file]) and 'errors' (a list of
|
|
207
|
+
errors (if any) that we found in this file)
|
|
208
|
+
"""
|
|
209
|
+
|
|
210
|
+
label_result = {}
|
|
211
|
+
label_result['file'] = label_file
|
|
212
|
+
label_result['errors'] = []
|
|
213
|
+
|
|
214
|
+
try:
|
|
215
|
+
with open(label_file,'r') as f:
|
|
216
|
+
lines = f.readlines()
|
|
217
|
+
except Exception as e:
|
|
218
|
+
label_result['errors'].append('Read error: {}'.format(str(e)))
|
|
219
|
+
return label_result
|
|
220
|
+
|
|
221
|
+
# i_line 0; line = lines[i_line]
|
|
222
|
+
for i_line,line in enumerate(lines):
|
|
223
|
+
s = line.strip()
|
|
224
|
+
if len(s) == 0 or s[0] == '#':
|
|
225
|
+
continue
|
|
226
|
+
|
|
227
|
+
try:
|
|
228
|
+
|
|
229
|
+
tokens = s.split()
|
|
230
|
+
assert len(tokens) == 5, '{} tokens'.format(len(tokens))
|
|
231
|
+
|
|
232
|
+
if category_id_to_name is not None:
|
|
233
|
+
category_id = int(tokens[0])
|
|
234
|
+
assert category_id in category_id_to_name, \
|
|
235
|
+
'Unrecognized category ID {}'.format(category_id)
|
|
236
|
+
|
|
237
|
+
yolo_bbox = [float(x) for x in tokens[1:]]
|
|
238
|
+
|
|
239
|
+
except Exception as e:
|
|
240
|
+
label_result['errors'].append('Token error at line {}: {}'.format(i_line,str(e)))
|
|
241
|
+
continue
|
|
242
|
+
|
|
243
|
+
normalized_x_center = yolo_bbox[0]
|
|
244
|
+
normalized_y_center = yolo_bbox[1]
|
|
245
|
+
normalized_width = yolo_bbox[2]
|
|
246
|
+
normalized_height = yolo_bbox[3]
|
|
247
|
+
|
|
248
|
+
normalized_x_min = normalized_x_center - normalized_width / 2.0
|
|
249
|
+
normalized_x_max = normalized_x_center + normalized_width / 2.0
|
|
250
|
+
normalized_y_min = normalized_y_center - normalized_height / 2.0
|
|
251
|
+
normalized_y_max = normalized_y_center + normalized_height / 2.0
|
|
252
|
+
|
|
253
|
+
if normalized_x_min < 0 or normalized_y_min < 0 or \
|
|
254
|
+
normalized_x_max > 1 or normalized_y_max > 1:
|
|
255
|
+
label_result['errors'].append('Invalid bounding box: {} {} {} {}'.format(
|
|
256
|
+
normalized_x_min,normalized_y_min,normalized_x_max,normalized_y_max))
|
|
257
|
+
|
|
258
|
+
# ...for each line
|
|
259
|
+
|
|
260
|
+
if verbose:
|
|
261
|
+
if len(label_result['errors']) > 0:
|
|
262
|
+
print('Errors for {}:'.format(label_file))
|
|
263
|
+
for error in label_result['errors']:
|
|
264
|
+
print(error)
|
|
265
|
+
|
|
266
|
+
return label_result
|
|
267
|
+
|
|
268
|
+
# ...def validate_label_file(...)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def validate_yolo_dataset(input_folder, class_name_file, n_workers=1, pool_type='thread', verbose=False):
|
|
272
|
+
"""
|
|
273
|
+
Verifies all the labels in a YOLO dataset folder.
|
|
274
|
+
|
|
275
|
+
Looks for:
|
|
276
|
+
|
|
277
|
+
* Image files without label files
|
|
278
|
+
* Text files without image files
|
|
279
|
+
* Illegal classes in label files
|
|
280
|
+
* Invalid boxes in label files
|
|
281
|
+
|
|
282
|
+
Args:
|
|
283
|
+
input_folder (str): the YOLO dataset folder to validate
|
|
284
|
+
class_name_file (str or list): a list of classes, a flat text file, or a yolo
|
|
285
|
+
dataset.yml/.yaml file. If it's a dataset.yml file, that file should point to
|
|
286
|
+
input_folder as the base folder, though this is not explicitly checked.
|
|
287
|
+
n_workers (int, optional): number of concurrent workers, set to <= 1 to disable
|
|
288
|
+
parallelization
|
|
289
|
+
pool_type (str, optional): 'thread' or 'process', worker type to use for parallelization;
|
|
290
|
+
not used if [n_workers] <= 1
|
|
291
|
+
verbose (bool, optional): enable additional debug console output
|
|
292
|
+
|
|
293
|
+
Returns:
|
|
294
|
+
dict: validation results, as a dict with fields:
|
|
295
|
+
|
|
296
|
+
- image_files_without_label_files (list)
|
|
297
|
+
- label_files_without_image_files (list)
|
|
298
|
+
- label_results (list of dicts with field 'filename', 'errors') (list)
|
|
299
|
+
"""
|
|
300
|
+
|
|
301
|
+
# Validate arguments
|
|
302
|
+
assert os.path.isdir(input_folder), 'Could not find input folder {}'.format(input_folder)
|
|
303
|
+
if n_workers > 1:
|
|
304
|
+
assert pool_type in ('thread','process'), 'Illegal pool type {}'.format(pool_type)
|
|
305
|
+
|
|
306
|
+
category_id_to_name = load_yolo_class_list(class_name_file)
|
|
307
|
+
|
|
308
|
+
print('Enumerating files in {}'.format(input_folder))
|
|
309
|
+
|
|
310
|
+
all_files = recursive_file_list(input_folder,recursive=True,return_relative_paths=False,
|
|
311
|
+
convert_slashes=True)
|
|
312
|
+
label_files = [fn for fn in all_files if fn.endswith('.txt')]
|
|
313
|
+
image_files = find_image_strings(all_files)
|
|
314
|
+
print('Found {} images files and {} label files in {}'.format(
|
|
315
|
+
len(image_files),len(label_files),input_folder))
|
|
316
|
+
|
|
317
|
+
label_files_set = set(label_files)
|
|
318
|
+
|
|
319
|
+
image_files_without_extension = set()
|
|
320
|
+
for fn in image_files:
|
|
321
|
+
image_file_without_extension = os.path.splitext(fn)[0]
|
|
322
|
+
assert image_file_without_extension not in image_files_without_extension, \
|
|
323
|
+
'Duplicate image file, likely with different extensions: {}'.format(fn)
|
|
324
|
+
image_files_without_extension.add(image_file_without_extension)
|
|
325
|
+
|
|
326
|
+
print('Looking for missing image/label files')
|
|
327
|
+
|
|
328
|
+
image_files_without_label_files = []
|
|
329
|
+
label_files_without_images = []
|
|
330
|
+
|
|
331
|
+
for image_file in tqdm(image_files):
|
|
332
|
+
expected_label_file = os.path.splitext(image_file)[0] + '.txt'
|
|
333
|
+
if expected_label_file not in label_files_set:
|
|
334
|
+
image_files_without_label_files.append(image_file)
|
|
335
|
+
|
|
336
|
+
for label_file in tqdm(label_files):
|
|
337
|
+
expected_image_file_without_extension = os.path.splitext(label_file)[0]
|
|
338
|
+
if expected_image_file_without_extension not in image_files_without_extension:
|
|
339
|
+
label_files_without_images.append(label_file)
|
|
340
|
+
|
|
341
|
+
print('Found {} image files without labels, {} labels without images'.format(
|
|
342
|
+
len(image_files_without_label_files),len(label_files_without_images)))
|
|
343
|
+
|
|
344
|
+
print('Validating label files')
|
|
345
|
+
|
|
346
|
+
if n_workers <= 1:
|
|
347
|
+
|
|
348
|
+
label_results = []
|
|
349
|
+
for fn_abs in tqdm(label_files):
|
|
350
|
+
label_results.append(validate_label_file(fn_abs,
|
|
351
|
+
category_id_to_name=category_id_to_name,
|
|
352
|
+
verbose=verbose))
|
|
353
|
+
|
|
354
|
+
else:
|
|
355
|
+
|
|
356
|
+
assert pool_type in ('process','thread'), 'Illegal pool type {}'.format(pool_type)
|
|
357
|
+
|
|
358
|
+
if pool_type == 'thread':
|
|
359
|
+
pool = ThreadPool(n_workers)
|
|
360
|
+
else:
|
|
361
|
+
pool = Pool(n_workers)
|
|
362
|
+
|
|
363
|
+
print('Starting a {} pool of {} workers'.format(pool_type,n_workers))
|
|
364
|
+
|
|
365
|
+
p = partial(validate_label_file,
|
|
366
|
+
category_id_to_name=category_id_to_name,
|
|
367
|
+
verbose=verbose)
|
|
368
|
+
label_results = list(tqdm(pool.imap(p, label_files),
|
|
369
|
+
total=len(label_files)))
|
|
370
|
+
|
|
371
|
+
assert len(label_results) == len(label_files)
|
|
372
|
+
|
|
373
|
+
validation_results = {}
|
|
374
|
+
validation_results['image_files_without_label_files'] = image_files_without_label_files
|
|
375
|
+
validation_results['label_files_without_images'] = label_files_without_images
|
|
376
|
+
validation_results['label_results'] = label_results
|
|
377
|
+
|
|
378
|
+
return validation_results
|
|
379
|
+
|
|
380
|
+
# ...validate_yolo_dataset(...)
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
#%% Main conversion function
|
|
384
|
+
|
|
385
|
+
def yolo_to_coco(input_folder,
|
|
386
|
+
class_name_file,
|
|
387
|
+
output_file=None,
|
|
388
|
+
empty_image_handling='no_annotations',
|
|
389
|
+
empty_image_category_name='empty',
|
|
390
|
+
error_image_handling='no_annotations',
|
|
391
|
+
allow_images_without_label_files=True,
|
|
392
|
+
n_workers=1,
|
|
393
|
+
pool_type='thread',
|
|
394
|
+
recursive=True,
|
|
395
|
+
exclude_string=None,
|
|
396
|
+
include_string=None):
|
|
397
|
+
"""
|
|
398
|
+
Converts a YOLO-formatted dataset to a COCO-formatted dataset.
|
|
399
|
+
|
|
400
|
+
All images will be assigned an "error" value, usually None.
|
|
401
|
+
|
|
402
|
+
Args:
|
|
403
|
+
input_folder (str): the YOLO dataset folder to validate
|
|
404
|
+
class_name_file (str or list): a list of classes, a flat text file, or a yolo
|
|
405
|
+
dataset.yml/.yaml file. If it's a dataset.yml file, that file should point to
|
|
406
|
+
input_folder as the base folder, though this is not explicitly checked.
|
|
407
|
+
output_file (str, optional): .json file to which we should write COCO .json data
|
|
408
|
+
empty_image_handling (str, optional): how to handle images with no boxes; whether
|
|
409
|
+
this includes images with no .txt files depending on the value of
|
|
410
|
+
[allow_images_without_label_files]. Can be:
|
|
411
|
+
|
|
412
|
+
- 'no_annotations': include the image in the image list, with no annotations
|
|
413
|
+
- 'empty_annotations': include the image in the image list, and add an annotation without
|
|
414
|
+
any bounding boxes, using a category called [empty_image_category_name].
|
|
415
|
+
- 'skip': don't include the image in the image list
|
|
416
|
+
- 'error': there shouldn't be any empty images
|
|
417
|
+
error_image_handling (str, optional): how to handle images that don't load properly; can
|
|
418
|
+
be:
|
|
419
|
+
|
|
420
|
+
- 'skip': don't include the image at all
|
|
421
|
+
- 'no_annotations': include with no annotations
|
|
422
|
+
|
|
423
|
+
n_workers (int, optional): number of concurrent workers, set to <= 1 to disable
|
|
424
|
+
parallelization
|
|
425
|
+
pool_type (str, optional): 'thread' or 'process', worker type to use for parallelization;
|
|
426
|
+
not used if [n_workers] <= 1
|
|
427
|
+
recursive (bool, optional): whether to recurse into [input_folder]
|
|
428
|
+
exclude_string (str, optional): exclude any images whose filename contains a string
|
|
429
|
+
include_string (str, optional): include only images whose filename contains a string
|
|
430
|
+
|
|
431
|
+
Returns:
|
|
432
|
+
dict: COCO-formatted data, the same as what's written to [output_file]
|
|
433
|
+
"""
|
|
434
|
+
|
|
435
|
+
## Validate input
|
|
436
|
+
|
|
437
|
+
assert os.path.isdir(input_folder)
|
|
438
|
+
assert os.path.isfile(class_name_file)
|
|
439
|
+
|
|
440
|
+
assert empty_image_handling in \
|
|
441
|
+
('no_annotations','empty_annotations','skip','error'), \
|
|
442
|
+
'Unrecognized empty image handling spec: {}'.format(empty_image_handling)
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
## Read class names
|
|
446
|
+
|
|
447
|
+
category_id_to_name = load_yolo_class_list(class_name_file)
|
|
448
|
+
|
|
214
449
|
|
|
215
450
|
# Find or create the empty image category, if necessary
|
|
216
451
|
empty_category_id = None
|
|
@@ -275,7 +510,7 @@ def yolo_to_coco(input_folder,
|
|
|
275
510
|
for fn_abs in tqdm(image_files_abs):
|
|
276
511
|
|
|
277
512
|
fn_relative = os.path.relpath(fn_abs,input_folder)
|
|
278
|
-
image_id =
|
|
513
|
+
image_id = _filename_to_image_id(fn_relative)
|
|
279
514
|
assert image_id not in image_ids, \
|
|
280
515
|
'Oops, you have hit a very esoteric case where you have the same filename ' + \
|
|
281
516
|
'with both spaces and underscores, this is not currently handled.'
|
|
@@ -434,3 +669,8 @@ if False:
|
|
|
434
669
|
|
|
435
670
|
from md_utils.path_utils import open_file
|
|
436
671
|
open_file(html_output_file)
|
|
672
|
+
|
|
673
|
+
|
|
674
|
+
#%% Command-line driver
|
|
675
|
+
|
|
676
|
+
# TODO
|
detection/__init__.py
ADDED
|
File without changes
|
|
File without changes
|