megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (191) hide show
  1. api/__init__.py +0 -0
  2. api/batch_processing/__init__.py +0 -0
  3. api/batch_processing/api_core/__init__.py +0 -0
  4. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. api/batch_processing/api_core/batch_service/score.py +0 -1
  6. api/batch_processing/api_core/server_job_status_table.py +0 -1
  7. api/batch_processing/api_core_support/__init__.py +0 -0
  8. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
  9. api/batch_processing/api_support/__init__.py +0 -0
  10. api/batch_processing/api_support/summarize_daily_activity.py +0 -1
  11. api/batch_processing/data_preparation/__init__.py +0 -0
  12. api/batch_processing/data_preparation/manage_local_batch.py +93 -79
  13. api/batch_processing/data_preparation/manage_video_batch.py +8 -8
  14. api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
  15. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  16. api/batch_processing/postprocessing/__init__.py +0 -0
  17. api/batch_processing/postprocessing/add_max_conf.py +12 -12
  18. api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
  19. api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
  20. api/batch_processing/postprocessing/compare_batch_results.py +114 -44
  21. api/batch_processing/postprocessing/convert_output_format.py +62 -19
  22. api/batch_processing/postprocessing/load_api_results.py +17 -20
  23. api/batch_processing/postprocessing/md_to_coco.py +31 -21
  24. api/batch_processing/postprocessing/md_to_labelme.py +165 -68
  25. api/batch_processing/postprocessing/merge_detections.py +40 -15
  26. api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
  27. api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
  28. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
  29. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
  30. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
  31. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
  32. api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
  33. api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
  34. api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
  35. api/synchronous/__init__.py +0 -0
  36. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  37. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
  38. api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
  39. api/synchronous/api_core/animal_detection_api/config.py +35 -35
  40. api/synchronous/api_core/tests/__init__.py +0 -0
  41. api/synchronous/api_core/tests/load_test.py +109 -109
  42. classification/__init__.py +0 -0
  43. classification/aggregate_classifier_probs.py +21 -24
  44. classification/analyze_failed_images.py +11 -13
  45. classification/cache_batchapi_outputs.py +51 -51
  46. classification/create_classification_dataset.py +69 -68
  47. classification/crop_detections.py +54 -53
  48. classification/csv_to_json.py +97 -100
  49. classification/detect_and_crop.py +105 -105
  50. classification/evaluate_model.py +43 -42
  51. classification/identify_mislabeled_candidates.py +47 -46
  52. classification/json_to_azcopy_list.py +10 -10
  53. classification/json_validator.py +72 -71
  54. classification/map_classification_categories.py +44 -43
  55. classification/merge_classification_detection_output.py +68 -68
  56. classification/prepare_classification_script.py +157 -154
  57. classification/prepare_classification_script_mc.py +228 -228
  58. classification/run_classifier.py +27 -26
  59. classification/save_mislabeled.py +30 -30
  60. classification/train_classifier.py +20 -20
  61. classification/train_classifier_tf.py +21 -22
  62. classification/train_utils.py +10 -10
  63. data_management/__init__.py +0 -0
  64. data_management/annotations/__init__.py +0 -0
  65. data_management/annotations/annotation_constants.py +18 -31
  66. data_management/camtrap_dp_to_coco.py +238 -0
  67. data_management/cct_json_utils.py +107 -59
  68. data_management/cct_to_md.py +176 -158
  69. data_management/cct_to_wi.py +247 -219
  70. data_management/coco_to_labelme.py +272 -0
  71. data_management/coco_to_yolo.py +86 -62
  72. data_management/databases/__init__.py +0 -0
  73. data_management/databases/add_width_and_height_to_db.py +20 -16
  74. data_management/databases/combine_coco_camera_traps_files.py +35 -31
  75. data_management/databases/integrity_check_json_db.py +130 -83
  76. data_management/databases/subset_json_db.py +25 -16
  77. data_management/generate_crops_from_cct.py +27 -45
  78. data_management/get_image_sizes.py +188 -144
  79. data_management/importers/add_nacti_sizes.py +8 -8
  80. data_management/importers/add_timestamps_to_icct.py +78 -78
  81. data_management/importers/animl_results_to_md_results.py +158 -160
  82. data_management/importers/auckland_doc_test_to_json.py +9 -9
  83. data_management/importers/auckland_doc_to_json.py +8 -8
  84. data_management/importers/awc_to_json.py +7 -7
  85. data_management/importers/bellevue_to_json.py +15 -15
  86. data_management/importers/cacophony-thermal-importer.py +13 -13
  87. data_management/importers/carrizo_shrubfree_2018.py +8 -8
  88. data_management/importers/carrizo_trail_cam_2017.py +8 -8
  89. data_management/importers/cct_field_adjustments.py +9 -9
  90. data_management/importers/channel_islands_to_cct.py +10 -10
  91. data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
  92. data_management/importers/ena24_to_json.py +7 -7
  93. data_management/importers/filenames_to_json.py +8 -8
  94. data_management/importers/helena_to_cct.py +7 -7
  95. data_management/importers/idaho-camera-traps.py +7 -7
  96. data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
  97. data_management/importers/jb_csv_to_json.py +9 -9
  98. data_management/importers/mcgill_to_json.py +8 -8
  99. data_management/importers/missouri_to_json.py +18 -18
  100. data_management/importers/nacti_fieldname_adjustments.py +10 -10
  101. data_management/importers/noaa_seals_2019.py +8 -8
  102. data_management/importers/pc_to_json.py +7 -7
  103. data_management/importers/plot_wni_giraffes.py +7 -7
  104. data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
  105. data_management/importers/prepare_zsl_imerit.py +7 -7
  106. data_management/importers/rspb_to_json.py +8 -8
  107. data_management/importers/save_the_elephants_survey_A.py +8 -8
  108. data_management/importers/save_the_elephants_survey_B.py +9 -9
  109. data_management/importers/snapshot_safari_importer.py +26 -26
  110. data_management/importers/snapshot_safari_importer_reprise.py +665 -665
  111. data_management/importers/snapshot_serengeti_lila.py +14 -14
  112. data_management/importers/sulross_get_exif.py +8 -9
  113. data_management/importers/timelapse_csv_set_to_json.py +11 -11
  114. data_management/importers/ubc_to_json.py +13 -13
  115. data_management/importers/umn_to_json.py +7 -7
  116. data_management/importers/wellington_to_json.py +8 -8
  117. data_management/importers/wi_to_json.py +9 -9
  118. data_management/importers/zamba_results_to_md_results.py +181 -181
  119. data_management/labelme_to_coco.py +309 -159
  120. data_management/labelme_to_yolo.py +103 -60
  121. data_management/lila/__init__.py +0 -0
  122. data_management/lila/add_locations_to_island_camera_traps.py +9 -9
  123. data_management/lila/add_locations_to_nacti.py +147 -147
  124. data_management/lila/create_lila_blank_set.py +114 -31
  125. data_management/lila/create_lila_test_set.py +8 -8
  126. data_management/lila/create_links_to_md_results_files.py +106 -106
  127. data_management/lila/download_lila_subset.py +92 -90
  128. data_management/lila/generate_lila_per_image_labels.py +56 -43
  129. data_management/lila/get_lila_annotation_counts.py +18 -15
  130. data_management/lila/get_lila_image_counts.py +11 -11
  131. data_management/lila/lila_common.py +103 -70
  132. data_management/lila/test_lila_metadata_urls.py +132 -116
  133. data_management/ocr_tools.py +173 -128
  134. data_management/read_exif.py +161 -99
  135. data_management/remap_coco_categories.py +84 -0
  136. data_management/remove_exif.py +58 -62
  137. data_management/resize_coco_dataset.py +32 -44
  138. data_management/wi_download_csv_to_coco.py +246 -0
  139. data_management/yolo_output_to_md_output.py +86 -73
  140. data_management/yolo_to_coco.py +535 -95
  141. detection/__init__.py +0 -0
  142. detection/detector_training/__init__.py +0 -0
  143. detection/process_video.py +85 -33
  144. detection/pytorch_detector.py +43 -25
  145. detection/run_detector.py +157 -72
  146. detection/run_detector_batch.py +189 -114
  147. detection/run_inference_with_yolov5_val.py +118 -51
  148. detection/run_tiled_inference.py +113 -42
  149. detection/tf_detector.py +51 -28
  150. detection/video_utils.py +606 -521
  151. docs/source/conf.py +43 -0
  152. md_utils/__init__.py +0 -0
  153. md_utils/azure_utils.py +9 -9
  154. md_utils/ct_utils.py +249 -70
  155. md_utils/directory_listing.py +59 -64
  156. md_utils/md_tests.py +968 -862
  157. md_utils/path_utils.py +655 -155
  158. md_utils/process_utils.py +157 -133
  159. md_utils/sas_blob_utils.py +20 -20
  160. md_utils/split_locations_into_train_val.py +45 -32
  161. md_utils/string_utils.py +33 -10
  162. md_utils/url_utils.py +208 -27
  163. md_utils/write_html_image_list.py +51 -35
  164. md_visualization/__init__.py +0 -0
  165. md_visualization/plot_utils.py +102 -109
  166. md_visualization/render_images_with_thumbnails.py +34 -34
  167. md_visualization/visualization_utils.py +908 -311
  168. md_visualization/visualize_db.py +109 -58
  169. md_visualization/visualize_detector_output.py +61 -42
  170. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
  171. megadetector-5.0.9.dist-info/RECORD +224 -0
  172. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
  173. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
  174. taxonomy_mapping/__init__.py +0 -0
  175. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
  176. taxonomy_mapping/map_new_lila_datasets.py +154 -154
  177. taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
  178. taxonomy_mapping/preview_lila_taxonomy.py +591 -591
  179. taxonomy_mapping/retrieve_sample_image.py +12 -12
  180. taxonomy_mapping/simple_image_download.py +11 -11
  181. taxonomy_mapping/species_lookup.py +10 -10
  182. taxonomy_mapping/taxonomy_csv_checker.py +18 -18
  183. taxonomy_mapping/taxonomy_graph.py +47 -47
  184. taxonomy_mapping/validate_lila_category_mappings.py +83 -76
  185. data_management/cct_json_to_filename_json.py +0 -89
  186. data_management/cct_to_csv.py +0 -140
  187. data_management/databases/remove_corrupted_images_from_db.py +0 -191
  188. detection/detector_training/copy_checkpoints.py +0 -43
  189. md_visualization/visualize_megadb.py +0 -183
  190. megadetector-5.0.7.dist-info/RECORD +0 -202
  191. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
@@ -1,10 +1,10 @@
1
- ########
2
- #
3
- # labelme_to_coco.py
4
- #
5
- # Converts a folder of labelme-formatted .json files to COCO.
6
- #
7
- ########
1
+ """
2
+
3
+ labelme_to_coco.py
4
+
5
+ Converts a folder of labelme-formatted .json files to COCO.
6
+
7
+ """
8
8
 
9
9
  #%% Constants and imports
10
10
 
@@ -15,10 +15,177 @@ import uuid
15
15
  from md_utils import path_utils
16
16
  from md_visualization.visualization_utils import open_image
17
17
 
18
+ from multiprocessing.pool import Pool, ThreadPool
19
+ from functools import partial
20
+
18
21
  from tqdm import tqdm
19
22
 
20
23
 
21
- #%% Functions
24
+ #%% Support functions
25
+
26
+ def _add_category(category_name,category_name_to_id,candidate_category_id=0):
27
+ """
28
+ Adds the category [category_name] to the dict [category_name_to_id], by default
29
+ using the next available integer index.
30
+ """
31
+
32
+ if category_name in category_name_to_id:
33
+ return category_name_to_id[category_name]
34
+ while candidate_category_id in category_name_to_id.values():
35
+ candidate_category_id += 1
36
+ category_name_to_id[category_name] = candidate_category_id
37
+ return candidate_category_id
38
+
39
+
40
+ def _process_labelme_file(image_fn_relative,input_folder,use_folders_as_labels,
41
+ no_json_handling,validate_image_sizes,
42
+ category_name_to_id,allow_new_categories=True):
43
+ """
44
+ Internal function for processing each image; this support function facilitates parallelization.
45
+ """
46
+
47
+ result = {}
48
+ result['im'] = None
49
+ result['annotations_this_image'] = None
50
+ result['status'] = None
51
+
52
+ image_fn_abs = os.path.join(input_folder,image_fn_relative)
53
+ json_fn_abs = os.path.splitext(image_fn_abs)[0] + '.json'
54
+
55
+ im = {}
56
+ im['id'] = image_fn_relative
57
+ im['file_name'] = image_fn_relative
58
+
59
+ # If there's no .json file for this image...
60
+ if not os.path.isfile(json_fn_abs):
61
+
62
+ # Either skip it...
63
+ if no_json_handling == 'skip':
64
+ print('Skipping image {} (no .json file)'.format(image_fn_relative))
65
+ result['status'] = 'skipped (no .json file)'
66
+ return result
67
+
68
+ # ...or error
69
+ elif no_json_handling == 'error':
70
+ raise ValueError('Image file {} has no corresponding .json file'.format(
71
+ image_fn_relative))
72
+
73
+ # ...or treat it as empty.
74
+ elif no_json_handling == 'empty':
75
+ try:
76
+ pil_im = open_image(image_fn_abs)
77
+ except Exception:
78
+ print('Warning: error opening image {}, skipping'.format(image_fn_abs))
79
+ result['status'] = 'image load error'
80
+ return result
81
+ im['width'] = pil_im.width
82
+ im['height'] = pil_im.height
83
+
84
+ # Just in case we need to differentiate between "no .json file" and "a .json file with no annotations"
85
+ im['no_labelme_json'] = True
86
+ shapes = []
87
+ else:
88
+ raise ValueError('Unrecognized specifier {} for handling images with no .json files'.format(
89
+ no_json_handling))
90
+
91
+ # If we found a .json file for this image...
92
+ else:
93
+
94
+ # Read the .json file
95
+ with open(json_fn_abs,'r') as f:
96
+ labelme_data = json.load(f)
97
+ im['width'] = labelme_data['imageWidth']
98
+ im['height'] = labelme_data['imageHeight']
99
+
100
+ if validate_image_sizes:
101
+ try:
102
+ pil_im = open_image(image_fn_abs)
103
+ except Exception:
104
+ print('Warning: error opening image {} for size validation, skipping'.format(image_fn_abs))
105
+ result['status'] = 'skipped (size validation error)'
106
+ return result
107
+ if not (im['width'] == pil_im.width and im['height'] == pil_im.height):
108
+ print('Warning: image size validation error for file {}'.format(image_fn_relative))
109
+ im['width'] = pil_im.width
110
+ im['height'] = pil_im.height
111
+ im['labelme_width'] = labelme_data['imageWidth']
112
+ im['labelme_height'] = labelme_data['imageHeight']
113
+
114
+ shapes = labelme_data['shapes']
115
+
116
+ if ('flags' in labelme_data) and (len(labelme_data['flags']) > 0):
117
+ im['flags'] = labelme_data['flags']
118
+
119
+ annotations_this_image = []
120
+
121
+ if len(shapes) == 0:
122
+
123
+ if allow_new_categories:
124
+ category_id = _add_category('empty',category_name_to_id)
125
+ else:
126
+ assert 'empty' in category_name_to_id
127
+ category_id = category_name_to_id['empty']
128
+
129
+ ann = {}
130
+ ann['id'] = str(uuid.uuid1())
131
+ ann['image_id'] = im['id']
132
+ ann['category_id'] = category_id
133
+ ann['sequence_level_annotation'] = False
134
+ annotations_this_image.append(ann)
135
+
136
+ else:
137
+
138
+ for shape in shapes:
139
+
140
+ if shape['shape_type'] != 'rectangle':
141
+ print('Only rectangles are supported, skipping an annotation of type {} in {}'.format(
142
+ shape['shape_type'],image_fn_relative))
143
+ continue
144
+
145
+ if use_folders_as_labels:
146
+ category_name = os.path.basename(os.path.dirname(image_fn_abs))
147
+ else:
148
+ category_name = shape['label']
149
+
150
+ if allow_new_categories:
151
+ category_id = _add_category(category_name,category_name_to_id)
152
+ else:
153
+ assert category_name in category_name_to_id
154
+ category_id = category_name_to_id[category_name]
155
+
156
+ points = shape['points']
157
+ if len(points) != 2:
158
+ print('Warning: illegal rectangle with {} points for {}'.format(
159
+ len(points),image_fn_relative))
160
+ continue
161
+
162
+ p0 = points[0]
163
+ p1 = points[1]
164
+ x0 = min(p0[0],p1[0])
165
+ x1 = max(p0[0],p1[0])
166
+ y0 = min(p0[1],p1[1])
167
+ y1 = max(p0[1],p1[1])
168
+
169
+ bbox = [x0,y0,abs(x1-x0),abs(y1-y0)]
170
+ ann = {}
171
+ ann['id'] = str(uuid.uuid1())
172
+ ann['image_id'] = im['id']
173
+ ann['category_id'] = category_id
174
+ ann['sequence_level_annotation'] = False
175
+ ann['bbox'] = bbox
176
+ annotations_this_image.append(ann)
177
+
178
+ # ...for each shape
179
+
180
+ result['im'] = im
181
+ result['annotations_this_image'] = annotations_this_image
182
+
183
+ return result
184
+
185
+ # ...def _process_labelme_file(...)
186
+
187
+
188
+ #%% Main function
22
189
 
23
190
  def labelme_to_coco(input_folder,
24
191
  output_file=None,
@@ -32,12 +199,17 @@ def labelme_to_coco(input_folder,
32
199
  recursive=True,
33
200
  no_json_handling='skip',
34
201
  validate_image_sizes=True,
35
- right_edge_quantization_threshold=None):
202
+ max_workers=1,
203
+ use_threads=True):
36
204
  """
37
- Find all images in [input_folder] that have corresponding .json files, and convert
205
+ Finds all images in [input_folder] that have corresponding .json files, and converts
38
206
  to a COCO .json file.
39
207
 
40
- Currently only supports bounding box annotations.
208
+ Currently only supports bounding box annotations and image-level flags (i.e., does not
209
+ support point or general polygon annotations).
210
+
211
+ Labelme's image-level flags don't quite fit the COCO annotations format, so they are attached
212
+ to image objects, rather than annotation objects.
41
213
 
42
214
  If output_file is None, just returns the resulting dict, does not write to file.
43
215
 
@@ -52,42 +224,90 @@ def labelme_to_coco(input_folder,
52
224
  file. Empty images in the "lion" folder will still be given the label "empty" (or
53
225
  [empty_category_name]).
54
226
 
55
- no_json_handling can be:
227
+ Args:
228
+ input_folder (str): input folder to search for images and Labelme .json files
229
+ output_file (str, optional): output file to which we should write COCO-formatted data; if None
230
+ this function just returns the COCO-formatted dict
231
+ category_id_to_category_name (dict, optional): dict mapping category IDs to category names;
232
+ really used to map Labelme category names to COCO category IDs. IDs will be auto-generated
233
+ if this is None.
234
+ empty_category_id (int, optional): category ID to use for the not-very-COCO-like "empty" category;
235
+ also see the no_json_handling parameter.
236
+ info_struct (dict, optional): dict to stash in the "info" field of the resulting COCO dict
237
+ relative_paths_to_include (list, optional): allowlist of relative paths to include in the COCO
238
+ dict; there's no reason to specify this along with relative_paths_to_exclude.
239
+ relative_paths_to_exclude (list, optional): blocklist of relative paths to exclude from the COCO
240
+ dict; there's no reason to specify this along with relative_paths_to_include.
241
+ use_folders_as_labels (bool, optional): if this is True, class names will be pulled from folder names,
242
+ useful if you have images like a/b/cat/image001.jpg, a/b/dog/image002.jpg, etc.
243
+ recursive (bool, optional): whether to recurse into [input_folder]
244
+ no_json_handling (str, optional): how to deal with image files that have no corresponding .json files,
245
+ can be:
246
+
247
+ - 'skip': ignore image files with no corresponding .json files
248
+ - 'empty': treat image files with no corresponding .json files as empty
249
+ - 'error': throw an error when an image file has no corresponding .json file
250
+ validate_image_sizes (bool, optional): whether to load images to verify that the sizes specified
251
+ in the labelme files are correct
252
+ max_workers (int, optional): number of workers to use for parallelization, set to <=1 to disable
253
+ parallelization
254
+ use_threads (bool, optional): whether to use threads (True) or processes (False) for parallelization,
255
+ not relevant if max_workers <= 1
56
256
 
57
- * 'skip': ignore image files with no corresponding .json files
58
- * 'empty': treat image files with no corresponding .json files as empty
59
- * 'error': throw an error when an image file has no corresponding .json file
60
-
61
- right_edge_quantization_threshold is an off-by-default hack to handle cases where
62
- boxes that really should be running off the right side of the image only extend like 99%
63
- of the way there, due to what appears to be a slight bias inherent to MD. If a box extends
64
- within [right_edge_quantization_threshold] (a small number, from 0 to 1, but probably around
65
- 0.02) of the right edge of the image, it will be extended to the far right edge.
257
+ Returns:
258
+ dict: a COCO-formatted dictionary, identical to what's written to [output_file] if [output_file] is not None.
66
259
  """
67
260
 
261
+ if max_workers > 1:
262
+ assert category_id_to_category_name is not None, \
263
+ 'When parallelizing labelme --> COCO conversion, you must supply a category mapping'
264
+
68
265
  if category_id_to_category_name is None:
69
266
  category_name_to_id = {}
70
267
  else:
71
268
  category_name_to_id = {v: k for k, v in category_id_to_category_name.items()}
72
-
73
269
  for category_name in category_name_to_id:
74
270
  try:
75
271
  category_name_to_id[category_name] = int(category_name_to_id[category_name])
76
272
  except ValueError:
77
273
  raise ValueError('Category IDs must be ints or string-formatted ints')
274
+
275
+ # If the user supplied an explicit empty category ID, and the empty category
276
+ # name is already in category_name_to_id, make sure they match.
277
+ if empty_category_id is not None:
278
+ if empty_category_name in category_name_to_id:
279
+ assert category_name_to_id[empty_category_name] == empty_category_id, \
280
+ 'Ambiguous empty category specification'
281
+ if empty_category_id in category_id_to_category_name:
282
+ assert category_id_to_category_name[empty_category_id] == empty_category_name, \
283
+ 'Ambiguous empty category specification'
284
+ else:
285
+ if empty_category_name in category_name_to_id:
286
+ empty_category_id = category_name_to_id[empty_category_name]
78
287
 
288
+ del category_id_to_category_name
289
+
79
290
  # Enumerate images
291
+ print('Enumerating images in {}'.format(input_folder))
80
292
  image_filenames_relative = path_utils.find_images(input_folder,recursive=recursive,
81
- return_relative_paths=True)
82
-
83
- def add_category(category_name,candidate_category_id=0):
84
- if category_name in category_name_to_id:
85
- return category_name_to_id[category_name]
86
- while candidate_category_id in category_name_to_id.values():
87
- candidate_category_id += 1
88
- category_name_to_id[category_name] = candidate_category_id
89
- return candidate_category_id
293
+ return_relative_paths=True,
294
+ convert_slashes=True)
295
+
296
+ # Remove any images we're supposed to skip
297
+ if (relative_paths_to_include is not None) or (relative_paths_to_exclude is not None):
298
+ image_filenames_relative_to_process = []
299
+ for image_fn_relative in image_filenames_relative:
300
+ if relative_paths_to_include is not None and image_fn_relative not in relative_paths_to_include:
301
+ continue
302
+ if relative_paths_to_exclude is not None and image_fn_relative in relative_paths_to_exclude:
303
+ continue
304
+ image_filenames_relative_to_process.append(image_fn_relative)
305
+ print('Processing {} of {} images'.format(
306
+ len(image_filenames_relative_to_process),
307
+ len(image_filenames_relative)))
308
+ image_filenames_relative = image_filenames_relative_to_process
90
309
 
310
+ # If the user supplied a category ID to use for empty images...
91
311
  if empty_category_id is not None:
92
312
  try:
93
313
  empty_category_id = int(empty_category_id)
@@ -95,136 +315,52 @@ def labelme_to_coco(input_folder,
95
315
  raise ValueError('Category IDs must be ints or string-formatted ints')
96
316
 
97
317
  if empty_category_id is None:
98
- empty_category_id = add_category(empty_category_name)
99
-
100
- images = []
101
- annotations = []
102
-
103
- n_edges_quantized = 0
104
-
105
- # image_fn_relative = image_filenames_relative[0]
106
- for image_fn_relative in tqdm(image_filenames_relative):
107
-
108
- if relative_paths_to_include is not None and image_fn_relative not in relative_paths_to_include:
109
- continue
110
- if relative_paths_to_exclude is not None and image_fn_relative in relative_paths_to_exclude:
111
- continue
112
-
113
- image_fn_abs = os.path.join(input_folder,image_fn_relative)
114
- json_fn_abs = os.path.splitext(image_fn_abs)[0] + '.json'
115
-
116
- im = {}
117
- im['id'] = image_fn_relative
118
- im['file_name'] = image_fn_relative
119
-
120
- # If there's no .json file for this image...
121
- if not os.path.isfile(json_fn_abs):
318
+ empty_category_id = _add_category(empty_category_name,category_name_to_id)
122
319
 
123
- # Either skip it...
124
- if no_json_handling == 'skip':
125
- continue
126
-
127
- # ...or error
128
- elif no_json_handling == 'error':
129
- raise ValueError('Image file {} has no corresponding .json file'.format(
130
- image_fn_relative))
131
-
132
- # ...or treat it as empty.
133
- elif no_json_handling == 'empty':
134
- try:
135
- pil_im = open_image(image_fn_abs)
136
- except Exception:
137
- print('Warning: error opening image {}, skipping'.format(image_fn_abs))
138
- continue
139
- im['width'] = pil_im.width
140
- im['height'] = pil_im.height
141
- shapes = []
142
- else:
143
- raise ValueError('Unrecognized specifier {} for handling images with no .json files'.format(
144
- no_json_handling))
320
+ if max_workers <= 1:
145
321
 
146
- # If we found a .json file for this image...
147
- else:
322
+ image_results = []
323
+ for image_fn_relative in tqdm(image_filenames_relative):
148
324
 
149
- # Read the .json file
150
- with open(json_fn_abs,'r') as f:
151
- labelme_data = json.load(f)
152
- im['width'] = labelme_data['imageWidth']
153
- im['height'] = labelme_data['imageHeight']
325
+ result = _process_labelme_file(image_fn_relative,input_folder,use_folders_as_labels,
326
+ no_json_handling,validate_image_sizes,
327
+ category_name_to_id,allow_new_categories=True)
328
+ image_results.append(result)
154
329
 
155
- if validate_image_sizes:
156
- try:
157
- pil_im = open_image(image_fn_abs)
158
- except Exception:
159
- print('Warning: error opening image {}, skipping'.format(image_fn_abs))
160
- continue
161
- assert im['width'] == pil_im.width and im['height'] == pil_im.height, \
162
- 'Image size validation error for file {}'.format(image_fn_relative)
163
-
164
- shapes = labelme_data['shapes']
330
+ else:
165
331
 
166
- if len(shapes) == 0:
167
-
168
- category_id = add_category('empty')
169
- ann = {}
170
- ann['id'] = str(uuid.uuid1())
171
- ann['image_id'] = im['id']
172
- ann['category_id'] = category_id
173
- ann['sequence_level_annotation'] = False
174
- annotations.append(ann)
175
-
332
+ n_workers = min(max_workers,len(image_filenames_relative))
333
+ assert category_name_to_id is not None
334
+
335
+ if use_threads:
336
+ pool = ThreadPool(n_workers)
176
337
  else:
177
-
178
- for shape in shapes:
179
- if shape['shape_type'] != 'rectangle':
180
- print('Only rectangles are supported, skipping an annotation of type {} in {}'.format(
181
- shape['shape_type'],image_fn_relative))
182
- continue
183
-
184
- if use_folders_as_labels:
185
- category_name = os.path.basename(os.path.dirname(image_fn_abs))
186
- else:
187
- category_name = shape['label']
188
-
189
- category_id = add_category(category_name)
190
-
191
- points = shape['points']
192
- assert len(points) == 2, 'Illegal rectangle with {} points'.format(
193
- len(points))
194
-
195
- p0 = points[0]
196
- p1 = points[1]
197
- x0 = min(p0[0],p1[0])
198
- x1 = max(p0[0],p1[0])
199
- y0 = min(p0[1],p1[1])
200
- y1 = max(p0[1],p1[1])
201
-
202
- if right_edge_quantization_threshold is not None:
203
- x1_rel = x1 / (im['width'] - 1)
204
- right_edge_distance = 1.0 - x1_rel
205
- if right_edge_distance < right_edge_quantization_threshold:
206
- n_edges_quantized += 1
207
- x1 = im['width'] - 1
208
-
209
- bbox = [x0,y0,abs(x1-x0),abs(y1-y0)]
210
- ann = {}
211
- ann['id'] = str(uuid.uuid1())
212
- ann['image_id'] = im['id']
213
- ann['category_id'] = category_id
214
- ann['sequence_level_annotation'] = False
215
- ann['bbox'] = bbox
216
- annotations.append(ann)
217
-
218
- # ...for each shape
219
-
220
- images.append(im)
221
-
222
- # ..for each image
338
+ pool = Pool(n_workers)
339
+
340
+ image_results = list(tqdm(pool.imap(
341
+ partial(_process_labelme_file,
342
+ input_folder=input_folder,
343
+ use_folders_as_labels=use_folders_as_labels,
344
+ no_json_handling=no_json_handling,
345
+ validate_image_sizes=validate_image_sizes,
346
+ category_name_to_id=category_name_to_id,
347
+ allow_new_categories=False
348
+ ),image_filenames_relative), total=len(image_filenames_relative)))
349
+
350
+ images = []
351
+ annotations = []
223
352
 
224
- if n_edges_quantized > 0:
225
- print('Quantized the right edge in {} of {} images'.format(
226
- n_edges_quantized,len(image_filenames_relative)))
353
+ # Flatten the lists of images and annotations
354
+ for result in image_results:
355
+ im = result['im']
356
+ annotations_this_image = result['annotations_this_image']
227
357
 
358
+ if im is None:
359
+ assert annotations_this_image is None
360
+ else:
361
+ images.append(im)
362
+ annotations.extend(annotations_this_image)
363
+
228
364
  output_dict = {}
229
365
  output_dict['images'] = images
230
366
  output_dict['annotations'] = annotations
@@ -257,12 +393,26 @@ def find_empty_labelme_files(input_folder,recursive=True):
257
393
  Returns a list of all image files in in [input_folder] associated with .json files that have
258
394
  no boxes in them. Also returns a list of images with no associated .json files. Specifically,
259
395
  returns a dict:
260
-
261
- {
262
- 'images_with_empty_json_files':[list],
263
- 'images_with_no_json_files':[list],
264
- 'images_with_non_empty_json_files':[list]
265
- }
396
+
397
+ .. code-block: none
398
+
399
+ {
400
+ 'images_with_empty_json_files':[list],
401
+ 'images_with_no_json_files':[list],
402
+ 'images_with_non_empty_json_files':[list]
403
+ }
404
+
405
+ Args:
406
+ input_folder (str): the folder to search for empty (i.e., box-less) Labelme .json files
407
+ recursive (bool, optional): whether to recurse into [input_folder]
408
+
409
+ Returns:
410
+ dict: a dict with fields:
411
+ - images_with_empty_json_files: a list of all image files in [input_folder] associated with
412
+ .json files that have no boxes in them
413
+ - images_with_no_json_files: a list of images in [input_folder] with no associated .json files
414
+ - images_with_non_empty_json_files: a list of images in [input_folder] associated with .json
415
+ files that have at least one box
266
416
  """
267
417
  image_filenames_relative = path_utils.find_images(input_folder,recursive=True,
268
418
  return_relative_paths=True)