megadetector 5.0.8__py3-none-any.whl → 5.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (190) hide show
  1. api/__init__.py +0 -0
  2. api/batch_processing/__init__.py +0 -0
  3. api/batch_processing/api_core/__init__.py +0 -0
  4. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. api/batch_processing/api_core/batch_service/score.py +0 -1
  6. api/batch_processing/api_core/server_job_status_table.py +0 -1
  7. api/batch_processing/api_core_support/__init__.py +0 -0
  8. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
  9. api/batch_processing/api_support/__init__.py +0 -0
  10. api/batch_processing/api_support/summarize_daily_activity.py +0 -1
  11. api/batch_processing/data_preparation/__init__.py +0 -0
  12. api/batch_processing/data_preparation/manage_local_batch.py +65 -65
  13. api/batch_processing/data_preparation/manage_video_batch.py +8 -8
  14. api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
  15. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  16. api/batch_processing/postprocessing/__init__.py +0 -0
  17. api/batch_processing/postprocessing/add_max_conf.py +12 -12
  18. api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
  19. api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
  20. api/batch_processing/postprocessing/compare_batch_results.py +113 -43
  21. api/batch_processing/postprocessing/convert_output_format.py +41 -16
  22. api/batch_processing/postprocessing/load_api_results.py +16 -17
  23. api/batch_processing/postprocessing/md_to_coco.py +31 -21
  24. api/batch_processing/postprocessing/md_to_labelme.py +52 -22
  25. api/batch_processing/postprocessing/merge_detections.py +14 -14
  26. api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
  27. api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
  28. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
  29. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
  30. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
  31. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
  32. api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
  33. api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
  34. api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
  35. api/synchronous/__init__.py +0 -0
  36. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  37. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
  38. api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
  39. api/synchronous/api_core/animal_detection_api/config.py +35 -35
  40. api/synchronous/api_core/tests/__init__.py +0 -0
  41. api/synchronous/api_core/tests/load_test.py +109 -109
  42. classification/__init__.py +0 -0
  43. classification/aggregate_classifier_probs.py +21 -24
  44. classification/analyze_failed_images.py +11 -13
  45. classification/cache_batchapi_outputs.py +51 -51
  46. classification/create_classification_dataset.py +69 -68
  47. classification/crop_detections.py +54 -53
  48. classification/csv_to_json.py +97 -100
  49. classification/detect_and_crop.py +105 -105
  50. classification/evaluate_model.py +43 -42
  51. classification/identify_mislabeled_candidates.py +47 -46
  52. classification/json_to_azcopy_list.py +10 -10
  53. classification/json_validator.py +72 -71
  54. classification/map_classification_categories.py +44 -43
  55. classification/merge_classification_detection_output.py +68 -68
  56. classification/prepare_classification_script.py +157 -154
  57. classification/prepare_classification_script_mc.py +228 -228
  58. classification/run_classifier.py +27 -26
  59. classification/save_mislabeled.py +30 -30
  60. classification/train_classifier.py +20 -20
  61. classification/train_classifier_tf.py +21 -22
  62. classification/train_utils.py +10 -10
  63. data_management/__init__.py +0 -0
  64. data_management/annotations/__init__.py +0 -0
  65. data_management/annotations/annotation_constants.py +18 -31
  66. data_management/camtrap_dp_to_coco.py +238 -0
  67. data_management/cct_json_utils.py +102 -59
  68. data_management/cct_to_md.py +176 -158
  69. data_management/cct_to_wi.py +247 -219
  70. data_management/coco_to_labelme.py +272 -263
  71. data_management/coco_to_yolo.py +79 -58
  72. data_management/databases/__init__.py +0 -0
  73. data_management/databases/add_width_and_height_to_db.py +20 -16
  74. data_management/databases/combine_coco_camera_traps_files.py +35 -31
  75. data_management/databases/integrity_check_json_db.py +62 -24
  76. data_management/databases/subset_json_db.py +24 -15
  77. data_management/generate_crops_from_cct.py +27 -45
  78. data_management/get_image_sizes.py +188 -162
  79. data_management/importers/add_nacti_sizes.py +8 -8
  80. data_management/importers/add_timestamps_to_icct.py +78 -78
  81. data_management/importers/animl_results_to_md_results.py +158 -158
  82. data_management/importers/auckland_doc_test_to_json.py +9 -9
  83. data_management/importers/auckland_doc_to_json.py +8 -8
  84. data_management/importers/awc_to_json.py +7 -7
  85. data_management/importers/bellevue_to_json.py +15 -15
  86. data_management/importers/cacophony-thermal-importer.py +13 -13
  87. data_management/importers/carrizo_shrubfree_2018.py +8 -8
  88. data_management/importers/carrizo_trail_cam_2017.py +8 -8
  89. data_management/importers/cct_field_adjustments.py +9 -9
  90. data_management/importers/channel_islands_to_cct.py +10 -10
  91. data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
  92. data_management/importers/ena24_to_json.py +7 -7
  93. data_management/importers/filenames_to_json.py +8 -8
  94. data_management/importers/helena_to_cct.py +7 -7
  95. data_management/importers/idaho-camera-traps.py +7 -7
  96. data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
  97. data_management/importers/jb_csv_to_json.py +9 -9
  98. data_management/importers/mcgill_to_json.py +8 -8
  99. data_management/importers/missouri_to_json.py +18 -18
  100. data_management/importers/nacti_fieldname_adjustments.py +10 -10
  101. data_management/importers/noaa_seals_2019.py +7 -7
  102. data_management/importers/pc_to_json.py +7 -7
  103. data_management/importers/plot_wni_giraffes.py +7 -7
  104. data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
  105. data_management/importers/prepare_zsl_imerit.py +7 -7
  106. data_management/importers/rspb_to_json.py +8 -8
  107. data_management/importers/save_the_elephants_survey_A.py +8 -8
  108. data_management/importers/save_the_elephants_survey_B.py +9 -9
  109. data_management/importers/snapshot_safari_importer.py +26 -26
  110. data_management/importers/snapshot_safari_importer_reprise.py +665 -665
  111. data_management/importers/snapshot_serengeti_lila.py +14 -14
  112. data_management/importers/sulross_get_exif.py +8 -9
  113. data_management/importers/timelapse_csv_set_to_json.py +11 -11
  114. data_management/importers/ubc_to_json.py +13 -13
  115. data_management/importers/umn_to_json.py +7 -7
  116. data_management/importers/wellington_to_json.py +8 -8
  117. data_management/importers/wi_to_json.py +9 -9
  118. data_management/importers/zamba_results_to_md_results.py +181 -181
  119. data_management/labelme_to_coco.py +65 -24
  120. data_management/labelme_to_yolo.py +8 -8
  121. data_management/lila/__init__.py +0 -0
  122. data_management/lila/add_locations_to_island_camera_traps.py +9 -9
  123. data_management/lila/add_locations_to_nacti.py +147 -147
  124. data_management/lila/create_lila_blank_set.py +13 -13
  125. data_management/lila/create_lila_test_set.py +8 -8
  126. data_management/lila/create_links_to_md_results_files.py +106 -106
  127. data_management/lila/download_lila_subset.py +44 -110
  128. data_management/lila/generate_lila_per_image_labels.py +55 -42
  129. data_management/lila/get_lila_annotation_counts.py +18 -15
  130. data_management/lila/get_lila_image_counts.py +11 -11
  131. data_management/lila/lila_common.py +96 -33
  132. data_management/lila/test_lila_metadata_urls.py +132 -116
  133. data_management/ocr_tools.py +173 -128
  134. data_management/read_exif.py +110 -97
  135. data_management/remap_coco_categories.py +83 -83
  136. data_management/remove_exif.py +58 -62
  137. data_management/resize_coco_dataset.py +30 -23
  138. data_management/wi_download_csv_to_coco.py +246 -239
  139. data_management/yolo_output_to_md_output.py +86 -73
  140. data_management/yolo_to_coco.py +300 -60
  141. detection/__init__.py +0 -0
  142. detection/detector_training/__init__.py +0 -0
  143. detection/process_video.py +85 -33
  144. detection/pytorch_detector.py +43 -25
  145. detection/run_detector.py +157 -72
  146. detection/run_detector_batch.py +179 -113
  147. detection/run_inference_with_yolov5_val.py +108 -48
  148. detection/run_tiled_inference.py +111 -40
  149. detection/tf_detector.py +51 -29
  150. detection/video_utils.py +606 -521
  151. docs/source/conf.py +43 -0
  152. md_utils/__init__.py +0 -0
  153. md_utils/azure_utils.py +9 -9
  154. md_utils/ct_utils.py +228 -68
  155. md_utils/directory_listing.py +59 -64
  156. md_utils/md_tests.py +968 -871
  157. md_utils/path_utils.py +460 -134
  158. md_utils/process_utils.py +157 -133
  159. md_utils/sas_blob_utils.py +20 -20
  160. md_utils/split_locations_into_train_val.py +45 -32
  161. md_utils/string_utils.py +33 -10
  162. md_utils/url_utils.py +176 -60
  163. md_utils/write_html_image_list.py +40 -33
  164. md_visualization/__init__.py +0 -0
  165. md_visualization/plot_utils.py +102 -109
  166. md_visualization/render_images_with_thumbnails.py +34 -34
  167. md_visualization/visualization_utils.py +597 -291
  168. md_visualization/visualize_db.py +76 -48
  169. md_visualization/visualize_detector_output.py +61 -42
  170. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/METADATA +13 -7
  171. megadetector-5.0.10.dist-info/RECORD +224 -0
  172. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/top_level.txt +1 -0
  173. taxonomy_mapping/__init__.py +0 -0
  174. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
  175. taxonomy_mapping/map_new_lila_datasets.py +154 -154
  176. taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
  177. taxonomy_mapping/preview_lila_taxonomy.py +591 -591
  178. taxonomy_mapping/retrieve_sample_image.py +12 -12
  179. taxonomy_mapping/simple_image_download.py +11 -11
  180. taxonomy_mapping/species_lookup.py +10 -10
  181. taxonomy_mapping/taxonomy_csv_checker.py +18 -18
  182. taxonomy_mapping/taxonomy_graph.py +47 -47
  183. taxonomy_mapping/validate_lila_category_mappings.py +83 -76
  184. data_management/cct_json_to_filename_json.py +0 -89
  185. data_management/cct_to_csv.py +0 -140
  186. data_management/databases/remove_corrupted_images_from_db.py +0 -191
  187. detection/detector_training/copy_checkpoints.py +0 -43
  188. megadetector-5.0.8.dist-info/RECORD +0 -205
  189. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/LICENSE +0 -0
  190. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/WHEEL +0 -0
@@ -1,43 +1,38 @@
1
- ########
2
- #
3
- # yolo_output_to_md_output.py
4
- #
5
- # Converts the output of YOLOv5's detect.py or val.py to the MD API output format.
6
- #
7
- # Command-line driver not done yet, this has only been run interactively.
8
- #
9
- ########
10
-
11
- ### Converting .txt files ###
12
-
13
- #
14
- # detect.py writes a .txt file per image, in YOLO training format. Converting from this
15
- # format does not currently support recursive results, since detect.py doesn't save filenames
16
- # in a way that allows easy inference of folder names. Requires access to the input
17
- # images, because the YOLO format uses the *absence* of a results file to indicate that
18
- # no detections are present.
19
- #
20
- # YOLOv5 output has one text file per image, like so:
21
- #
22
- # 0 0.0141693 0.469758 0.0283385 0.131552 0.761428
23
- #
24
- # That's [class, x_center, y_center, width_of_box, height_of_box, confidence]
25
- #
26
- # val.py can write in this format as well, using the --save-txt argument.
27
- #
28
- # In both cases, a confidence value is only written to each line if you include the --save-conf
29
- # argument. Confidence values are required by this conversion script.
30
- #
31
-
32
- ### Converting .json files ###
33
-
34
- #
35
- # val.py can also write a .json file in COCO-ish format. It's "COCO-ish" because it's
36
- # just the "images" portion of a COCO .json file.
37
- #
38
- # Converting from this format also requires access to the original images, since the format
39
- # written by YOLOv5 uses absolute coordinates, but MD results are in relative coordinates.
40
- #
1
+ """
2
+
3
+ yolo_output_to_md_output.py
4
+
5
+ Converts the output of YOLOv5's detect.py or val.py to the MD API output format.
6
+
7
+ **Converting .txt files**
8
+
9
+ detect.py writes a .txt file per image, in YOLO training format. Converting from this
10
+ format does not currently support recursive results, since detect.py doesn't save filenames
11
+ in a way that allows easy inference of folder names. Requires access to the input
12
+ images, because the YOLO format uses the *absence* of a results file to indicate that
13
+ no detections are present.
14
+
15
+ YOLOv5 output has one text file per image, like so:
16
+
17
+ 0 0.0141693 0.469758 0.0283385 0.131552 0.761428
18
+
19
+ That's [class, x_center, y_center, width_of_box, height_of_box, confidence]
20
+
21
+ val.py can write in this format as well, using the --save-txt argument.
22
+
23
+ In both cases, a confidence value is only written to each line if you include the --save-conf
24
+ argument. Confidence values are required by this conversion script.
25
+
26
+
27
+ **Converting .json files**
28
+
29
+ val.py can also write a .json file in COCO-ish format. It's "COCO-ish" because it's
30
+ just the "images" portion of a COCO .json file.
31
+
32
+ Converting from this format also requires access to the original images, since the format
33
+ written by YOLOv5 uses absolute coordinates, but MD results are in relative coordinates.
34
+
35
+ """
41
36
 
42
37
  #%% Imports and constants
43
38
 
@@ -51,9 +46,7 @@ from tqdm import tqdm
51
46
 
52
47
  from md_utils import path_utils
53
48
  from md_utils import ct_utils
54
-
55
49
  from md_visualization import visualization_utils as vis_utils
56
-
57
50
  from detection.run_detector import CONF_DIGITS, COORD_DIGITS
58
51
 
59
52
 
@@ -61,9 +54,16 @@ from detection.run_detector import CONF_DIGITS, COORD_DIGITS
61
54
 
62
55
  def read_classes_from_yolo_dataset_file(fn):
63
56
  """
64
- Read a dictionary mapping integer class IDs to class names from a YOLOv5/YOLOv8
57
+ Reads a dictionary mapping integer class IDs to class names from a YOLOv5/YOLOv8
65
58
  dataset.yaml file or a .json file. A .json file should contain a dictionary mapping
66
59
  integer category IDs to string category names.
60
+
61
+ Args:
62
+ fn (str): YOLOv5/YOLOv8 dataset file with a .yml or .yaml extension, or a .json file
63
+ mapping integer category IDs to category names.
64
+
65
+ Returns:
66
+ dict: a mapping from integer category IDs to category names
67
67
  """
68
68
 
69
69
  if fn.endswith('.yml') or fn.endswith('.yaml'):
@@ -92,45 +92,42 @@ def read_classes_from_yolo_dataset_file(fn):
92
92
  raise ValueError('Unrecognized category file type: {}'.format(fn))
93
93
 
94
94
  assert len(category_id_to_name) > 0, 'Failed to read class mappings from {}'.format(fn)
95
+
95
96
  return category_id_to_name
96
97
 
97
98
 
98
- def yolo_json_output_to_md_output(yolo_json_file, image_folder,
99
- output_file, yolo_category_id_to_name,
99
+ def yolo_json_output_to_md_output(yolo_json_file,
100
+ image_folder,
101
+ output_file,
102
+ yolo_category_id_to_name,
100
103
  detector_name='unknown',
101
104
  image_id_to_relative_path=None,
102
105
  offset_yolo_class_ids=True,
103
106
  truncate_to_standard_md_precision=True,
104
107
  image_id_to_error=None):
105
108
  """
106
- Convert a YOLOv5 .json file to MD .json format.
109
+ Converts a YOLOv5/YOLOv8 .json file to MD .json format.
107
110
 
108
111
  Args:
109
112
 
110
- - yolo_json_file: the .json file to convert from YOLOv5 format to MD output format.
111
-
112
- - image_folder: the .json file contains relative path names, this is the path base.
113
-
114
- - yolo_category_id_to_name: the .json file contains only numeric identifiers for
115
- categories, but we want names and numbers for the output format; this is a
116
- dict mapping numbers to names. Can also be a YOLOv5 dataset.yaml file.
117
-
118
- - detector_name: a string that gets put in the output file, not otherwise used within
119
- this function.
120
-
121
- - image_id_to_relative_path: YOLOv5 .json uses only basenames (e.g. abc1234.JPG);
122
- by default these will be appended to the input path to create pathnames, so if you
123
- have a flat folder, this is fine. If you want to map base names to relative paths, use
124
- this dict.
125
-
126
- - offset_yolo_class_ids: YOLOv5 class IDs always start at zero; if you want to make the
127
- output classes start at 1, set offset_yolo_class_ids to True.
128
-
129
- - truncate_to_standard_md_precision: YOLOv5 .json includes lots of (not-super-meaningful)
130
- precision, set this to truncate to COORD_DIGITS and CONF_DIGITS.
131
-
132
- - image_id_to_error: if you want to include image IDs in the output file for which you couldn't
133
- prepare the input file in the first place due to errors, include them here.
113
+ yolo_json_file (str): the .json file to convert from YOLOv5 format to MD output format
114
+ image_folder (str): the .json file contains relative path names, this is the path base
115
+ yolo_category_id_to_name (str or dict): the .json results file contains only numeric
116
+ identifiers for categories, but we want names and numbers for the output format;
117
+ yolo_category_id_to_name provides that mapping either as a dict or as a YOLOv5
118
+ dataset.yaml file.
119
+ detector_name (str, optional): a string that gets put in the output file, not otherwise
120
+ used within this function
121
+ image_id_to_relative_path (dict, optional): YOLOv5 .json uses only basenames (e.g.
122
+ abc1234.JPG); by default these will be appended to the input path to create pathnames.
123
+ If you have a flat folder, this is fine. If you want to map base names to relative paths in
124
+ a more complicated way, use this parameter.
125
+ offset_yolo_class_ids (bool, optional): YOLOv5 class IDs always start at zero; if you want to
126
+ make the output classes start at 1, set offset_yolo_class_ids to True.
127
+ truncate_to_standard_md_precision (bool, optional): YOLOv5 .json includes lots of
128
+ (not-super-meaningful) precision, set this to truncate to COORD_DIGITS and CONF_DIGITS.
129
+ image_id_to_error (dict, optional): if you want to include image IDs in the output file for which
130
+ you couldn't prepare the input file in the first place due to errors, include them here.
134
131
  """
135
132
 
136
133
  assert os.path.isfile(yolo_json_file), \
@@ -314,14 +311,25 @@ def yolo_json_output_to_md_output(yolo_json_file, image_folder,
314
311
  # ...def yolo_json_output_to_md_output(...)
315
312
 
316
313
 
317
- def yolo_txt_output_to_md_output(input_results_folder, image_folder,
318
- output_file, detector_tag=None):
314
+ def yolo_txt_output_to_md_output(input_results_folder,
315
+ image_folder,
316
+ output_file,
317
+ detector_tag=None):
319
318
  """
320
- Converts a folder of YOLO-outptu .txt files to MD .json format.
319
+ Converts a folder of YOLO-output .txt files to MD .json format.
321
320
 
322
321
  Less finished than the .json conversion function; this .txt conversion assumes
323
322
  a hard-coded mapping representing the standard MD categories (in MD indexing,
324
323
  1/2/3=animal/person/vehicle; in YOLO indexing, 0/1/2=animal/person/vehicle).
324
+
325
+ Args:
326
+ input_results_folder (str): the folder containing YOLO-output .txt files
327
+ image_folder (str): the folder where images live, may be the same as
328
+ [input_results_folder]
329
+ output_file (str): the MD-formatted .json file to which we should write
330
+ results
331
+ detector_tag (str, optional): string to put in the 'detector' field in the
332
+ output file
325
333
  """
326
334
 
327
335
  assert os.path.isdir(input_results_folder)
@@ -426,3 +434,8 @@ if False:
426
434
  image_folder = os.path.expanduser('~/data/KRU-test')
427
435
  output_file = os.path.expanduser('~/data/mdv5a-yolo-pt-kru.json')
428
436
  yolo_txt_output_to_md_output(input_results_folder,image_folder,output_file)
437
+
438
+
439
+ #%% Command-line driver
440
+
441
+ # TODO
@@ -1,10 +1,10 @@
1
- ########
2
- #
3
- # yolo_to_coco.py
4
- #
5
- # Converts a folder of YOLO-formatted annotation files to a COCO-formatted dataset.
6
- #
7
- ########
1
+ """
2
+
3
+ yolo_to_coco.py
4
+
5
+ Converts a folder of YOLO-formatted annotation files to a COCO-formatted dataset.
6
+
7
+ """
8
8
 
9
9
  #%% Imports and constants
10
10
 
@@ -18,6 +18,8 @@ from functools import partial
18
18
  from tqdm import tqdm
19
19
 
20
20
  from md_utils.path_utils import find_images
21
+ from md_utils.path_utils import recursive_file_list
22
+ from md_utils.path_utils import find_image_strings
21
23
  from md_utils.ct_utils import invert_dictionary
22
24
  from md_visualization.visualization_utils import open_image
23
25
  from data_management.yolo_output_to_md_output import read_classes_from_yolo_dataset_file
@@ -25,9 +27,13 @@ from data_management.yolo_output_to_md_output import read_classes_from_yolo_data
25
27
 
26
28
  #%% Support functions
27
29
 
28
- def filename_to_image_id(fn):
30
+ def _filename_to_image_id(fn):
31
+ """
32
+ Image IDs can't have spaces in them, replae spaces with underscores
33
+ """
29
34
  return fn.replace(' ','_')
30
35
 
36
+
31
37
  def _process_image(fn_abs,input_folder,category_id_to_name):
32
38
  """
33
39
  Internal support function for processing one image's labels.
@@ -35,7 +41,7 @@ def _process_image(fn_abs,input_folder,category_id_to_name):
35
41
 
36
42
  # Create the image object for this image
37
43
  fn_relative = os.path.relpath(fn_abs,input_folder)
38
- image_id = filename_to_image_id(fn_relative)
44
+ image_id = _filename_to_image_id(fn_relative)
39
45
 
40
46
  # This is done in a separate loop now
41
47
  #
@@ -127,60 +133,28 @@ def _process_image(fn_abs,input_folder,category_id_to_name):
127
133
  # ...def _process_image(...)
128
134
 
129
135
 
130
-
131
- #%% Main conversion function
132
-
133
- def yolo_to_coco(input_folder,
134
- class_name_file,
135
- output_file=None,
136
- empty_image_handling='no_annotations',
137
- empty_image_category_name='empty',
138
- error_image_handling='no_annotations',
139
- allow_images_without_label_files=True,
140
- n_workers=1,
141
- pool_type='thread',
142
- recursive=True,
143
- exclude_string=None,
144
- include_string=None):
136
+ def load_yolo_class_list(class_name_file):
145
137
  """
146
- Convert the YOLO-formatted data in [input_folder] to a COCO-formatted dictionary,
147
- reading class names from [class_name_file], which can be a flat list with a .txt
148
- extension or a YOLO dataset.yml file. Optionally writes the output dataset to [output_file].
149
-
150
- empty_image_handling can be:
151
-
152
- * 'no_annotations': include the image in the image list, with no annotations
153
-
154
- * 'empty_annotations': include the image in the image list, and add an annotation without
155
- any bounding boxes, using a category called [empty_image_category_name].
156
-
157
- * 'skip': don't include the image in the image list
158
-
159
- * 'error': there shouldn't be any empty images
160
-
161
- error_image_handling can be:
162
-
163
- * 'skip': don't include the image at all
164
-
165
- * 'no_annotations': include with no annotations
166
-
167
- All images will be assigned an "error" value, usually None.
168
-
169
- Returns a COCO-formatted dictionary.
138
+ Loads a dictionary mapping zero-indexed IDs to class names from the text/yaml file
139
+ [class_name_file].
140
+
141
+ Args:
142
+ class_name_file (str or list): this can be:
143
+ - a .yaml or .yaml file in YOLO's dataset.yaml format
144
+ - a .txt or .data file containing a flat list of class names
145
+ - a list of class names
146
+
147
+ Returns:
148
+ dict: A dict mapping zero-indexed integer IDs to class names
170
149
  """
171
150
 
172
- ## Validate input
173
-
174
- assert os.path.isdir(input_folder)
175
- assert os.path.isfile(class_name_file)
176
-
177
- assert empty_image_handling in \
178
- ('no_annotations','empty_annotations','skip','error'), \
179
- 'Unrecognized empty image handling spec: {}'.format(empty_image_handling)
180
-
151
+ # class_name_file can also be a list of class names
152
+ if isinstance(class_name_file,list):
153
+ category_id_to_name = {}
154
+ for i_name,name in enumerate(class_name_file):
155
+ category_id_to_name[i_name] = name
156
+ return category_id_to_name
181
157
 
182
- ## Read class names
183
-
184
158
  ext = os.path.splitext(class_name_file)[1][1:]
185
159
  assert ext in ('yml','txt','yaml','data'), 'Unrecognized class name file type {}'.format(
186
160
  class_name_file)
@@ -211,6 +185,267 @@ def yolo_to_coco(input_folder,
211
185
 
212
186
  assert ext in ('yml','yaml')
213
187
  category_id_to_name = read_classes_from_yolo_dataset_file(class_name_file)
188
+
189
+ return category_id_to_name
190
+
191
+ # ...load_yolo_class_list(...)
192
+
193
+
194
+ def validate_label_file(label_file,category_id_to_name=None,verbose=False):
195
+ """"
196
+ Verifies that [label_file] is a valid YOLO label file. Does not check the extension.
197
+
198
+ Args:
199
+ label_file (str): the .txt file to validate
200
+ category_id_to_name (dict, optional): a dict mapping integer category IDs to names;
201
+ if this is not None, this function errors if the file uses a category that's not
202
+ in this dict
203
+ verbose (bool, optional): enable additional debug console output
204
+
205
+ Returns:
206
+ dict: a dict with keys 'file' (the same as [label_file]) and 'errors' (a list of
207
+ errors (if any) that we found in this file)
208
+ """
209
+
210
+ label_result = {}
211
+ label_result['file'] = label_file
212
+ label_result['errors'] = []
213
+
214
+ try:
215
+ with open(label_file,'r') as f:
216
+ lines = f.readlines()
217
+ except Exception as e:
218
+ label_result['errors'].append('Read error: {}'.format(str(e)))
219
+ return label_result
220
+
221
+ # i_line 0; line = lines[i_line]
222
+ for i_line,line in enumerate(lines):
223
+ s = line.strip()
224
+ if len(s) == 0 or s[0] == '#':
225
+ continue
226
+
227
+ try:
228
+
229
+ tokens = s.split()
230
+ assert len(tokens) == 5, '{} tokens'.format(len(tokens))
231
+
232
+ if category_id_to_name is not None:
233
+ category_id = int(tokens[0])
234
+ assert category_id in category_id_to_name, \
235
+ 'Unrecognized category ID {}'.format(category_id)
236
+
237
+ yolo_bbox = [float(x) for x in tokens[1:]]
238
+
239
+ except Exception as e:
240
+ label_result['errors'].append('Token error at line {}: {}'.format(i_line,str(e)))
241
+ continue
242
+
243
+ normalized_x_center = yolo_bbox[0]
244
+ normalized_y_center = yolo_bbox[1]
245
+ normalized_width = yolo_bbox[2]
246
+ normalized_height = yolo_bbox[3]
247
+
248
+ normalized_x_min = normalized_x_center - normalized_width / 2.0
249
+ normalized_x_max = normalized_x_center + normalized_width / 2.0
250
+ normalized_y_min = normalized_y_center - normalized_height / 2.0
251
+ normalized_y_max = normalized_y_center + normalized_height / 2.0
252
+
253
+ if normalized_x_min < 0 or normalized_y_min < 0 or \
254
+ normalized_x_max > 1 or normalized_y_max > 1:
255
+ label_result['errors'].append('Invalid bounding box: {} {} {} {}'.format(
256
+ normalized_x_min,normalized_y_min,normalized_x_max,normalized_y_max))
257
+
258
+ # ...for each line
259
+
260
+ if verbose:
261
+ if len(label_result['errors']) > 0:
262
+ print('Errors for {}:'.format(label_file))
263
+ for error in label_result['errors']:
264
+ print(error)
265
+
266
+ return label_result
267
+
268
+ # ...def validate_label_file(...)
269
+
270
+
271
+ def validate_yolo_dataset(input_folder, class_name_file, n_workers=1, pool_type='thread', verbose=False):
272
+ """
273
+ Verifies all the labels in a YOLO dataset folder.
274
+
275
+ Looks for:
276
+
277
+ * Image files without label files
278
+ * Text files without image files
279
+ * Illegal classes in label files
280
+ * Invalid boxes in label files
281
+
282
+ Args:
283
+ input_folder (str): the YOLO dataset folder to validate
284
+ class_name_file (str or list): a list of classes, a flat text file, or a yolo
285
+ dataset.yml/.yaml file. If it's a dataset.yml file, that file should point to
286
+ input_folder as the base folder, though this is not explicitly checked.
287
+ n_workers (int, optional): number of concurrent workers, set to <= 1 to disable
288
+ parallelization
289
+ pool_type (str, optional): 'thread' or 'process', worker type to use for parallelization;
290
+ not used if [n_workers] <= 1
291
+ verbose (bool, optional): enable additional debug console output
292
+
293
+ Returns:
294
+ dict: validation results, as a dict with fields:
295
+
296
+ - image_files_without_label_files (list)
297
+ - label_files_without_image_files (list)
298
+ - label_results (list of dicts with field 'filename', 'errors') (list)
299
+ """
300
+
301
+ # Validate arguments
302
+ assert os.path.isdir(input_folder), 'Could not find input folder {}'.format(input_folder)
303
+ if n_workers > 1:
304
+ assert pool_type in ('thread','process'), 'Illegal pool type {}'.format(pool_type)
305
+
306
+ category_id_to_name = load_yolo_class_list(class_name_file)
307
+
308
+ print('Enumerating files in {}'.format(input_folder))
309
+
310
+ all_files = recursive_file_list(input_folder,recursive=True,return_relative_paths=False,
311
+ convert_slashes=True)
312
+ label_files = [fn for fn in all_files if fn.endswith('.txt')]
313
+ image_files = find_image_strings(all_files)
314
+ print('Found {} images files and {} label files in {}'.format(
315
+ len(image_files),len(label_files),input_folder))
316
+
317
+ label_files_set = set(label_files)
318
+
319
+ image_files_without_extension = set()
320
+ for fn in image_files:
321
+ image_file_without_extension = os.path.splitext(fn)[0]
322
+ assert image_file_without_extension not in image_files_without_extension, \
323
+ 'Duplicate image file, likely with different extensions: {}'.format(fn)
324
+ image_files_without_extension.add(image_file_without_extension)
325
+
326
+ print('Looking for missing image/label files')
327
+
328
+ image_files_without_label_files = []
329
+ label_files_without_images = []
330
+
331
+ for image_file in tqdm(image_files):
332
+ expected_label_file = os.path.splitext(image_file)[0] + '.txt'
333
+ if expected_label_file not in label_files_set:
334
+ image_files_without_label_files.append(image_file)
335
+
336
+ for label_file in tqdm(label_files):
337
+ expected_image_file_without_extension = os.path.splitext(label_file)[0]
338
+ if expected_image_file_without_extension not in image_files_without_extension:
339
+ label_files_without_images.append(label_file)
340
+
341
+ print('Found {} image files without labels, {} labels without images'.format(
342
+ len(image_files_without_label_files),len(label_files_without_images)))
343
+
344
+ print('Validating label files')
345
+
346
+ if n_workers <= 1:
347
+
348
+ label_results = []
349
+ for fn_abs in tqdm(label_files):
350
+ label_results.append(validate_label_file(fn_abs,
351
+ category_id_to_name=category_id_to_name,
352
+ verbose=verbose))
353
+
354
+ else:
355
+
356
+ assert pool_type in ('process','thread'), 'Illegal pool type {}'.format(pool_type)
357
+
358
+ if pool_type == 'thread':
359
+ pool = ThreadPool(n_workers)
360
+ else:
361
+ pool = Pool(n_workers)
362
+
363
+ print('Starting a {} pool of {} workers'.format(pool_type,n_workers))
364
+
365
+ p = partial(validate_label_file,
366
+ category_id_to_name=category_id_to_name,
367
+ verbose=verbose)
368
+ label_results = list(tqdm(pool.imap(p, label_files),
369
+ total=len(label_files)))
370
+
371
+ assert len(label_results) == len(label_files)
372
+
373
+ validation_results = {}
374
+ validation_results['image_files_without_label_files'] = image_files_without_label_files
375
+ validation_results['label_files_without_images'] = label_files_without_images
376
+ validation_results['label_results'] = label_results
377
+
378
+ return validation_results
379
+
380
+ # ...validate_yolo_dataset(...)
381
+
382
+
383
+ #%% Main conversion function
384
+
385
+ def yolo_to_coco(input_folder,
386
+ class_name_file,
387
+ output_file=None,
388
+ empty_image_handling='no_annotations',
389
+ empty_image_category_name='empty',
390
+ error_image_handling='no_annotations',
391
+ allow_images_without_label_files=True,
392
+ n_workers=1,
393
+ pool_type='thread',
394
+ recursive=True,
395
+ exclude_string=None,
396
+ include_string=None):
397
+ """
398
+ Converts a YOLO-formatted dataset to a COCO-formatted dataset.
399
+
400
+ All images will be assigned an "error" value, usually None.
401
+
402
+ Args:
403
+ input_folder (str): the YOLO dataset folder to validate
404
+ class_name_file (str or list): a list of classes, a flat text file, or a yolo
405
+ dataset.yml/.yaml file. If it's a dataset.yml file, that file should point to
406
+ input_folder as the base folder, though this is not explicitly checked.
407
+ output_file (str, optional): .json file to which we should write COCO .json data
408
+ empty_image_handling (str, optional): how to handle images with no boxes; whether
409
+ this includes images with no .txt files depending on the value of
410
+ [allow_images_without_label_files]. Can be:
411
+
412
+ - 'no_annotations': include the image in the image list, with no annotations
413
+ - 'empty_annotations': include the image in the image list, and add an annotation without
414
+ any bounding boxes, using a category called [empty_image_category_name].
415
+ - 'skip': don't include the image in the image list
416
+ - 'error': there shouldn't be any empty images
417
+ error_image_handling (str, optional): how to handle images that don't load properly; can
418
+ be:
419
+
420
+ - 'skip': don't include the image at all
421
+ - 'no_annotations': include with no annotations
422
+
423
+ n_workers (int, optional): number of concurrent workers, set to <= 1 to disable
424
+ parallelization
425
+ pool_type (str, optional): 'thread' or 'process', worker type to use for parallelization;
426
+ not used if [n_workers] <= 1
427
+ recursive (bool, optional): whether to recurse into [input_folder]
428
+ exclude_string (str, optional): exclude any images whose filename contains a string
429
+ include_string (str, optional): include only images whose filename contains a string
430
+
431
+ Returns:
432
+ dict: COCO-formatted data, the same as what's written to [output_file]
433
+ """
434
+
435
+ ## Validate input
436
+
437
+ assert os.path.isdir(input_folder)
438
+ assert os.path.isfile(class_name_file)
439
+
440
+ assert empty_image_handling in \
441
+ ('no_annotations','empty_annotations','skip','error'), \
442
+ 'Unrecognized empty image handling spec: {}'.format(empty_image_handling)
443
+
444
+
445
+ ## Read class names
446
+
447
+ category_id_to_name = load_yolo_class_list(class_name_file)
448
+
214
449
 
215
450
  # Find or create the empty image category, if necessary
216
451
  empty_category_id = None
@@ -275,7 +510,7 @@ def yolo_to_coco(input_folder,
275
510
  for fn_abs in tqdm(image_files_abs):
276
511
 
277
512
  fn_relative = os.path.relpath(fn_abs,input_folder)
278
- image_id = filename_to_image_id(fn_relative)
513
+ image_id = _filename_to_image_id(fn_relative)
279
514
  assert image_id not in image_ids, \
280
515
  'Oops, you have hit a very esoteric case where you have the same filename ' + \
281
516
  'with both spaces and underscores, this is not currently handled.'
@@ -434,3 +669,8 @@ if False:
434
669
 
435
670
  from md_utils.path_utils import open_file
436
671
  open_file(html_output_file)
672
+
673
+
674
+ #%% Command-line driver
675
+
676
+ # TODO
detection/__init__.py ADDED
File without changes
File without changes