megadetector 5.0.11__py3-none-any.whl → 5.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (203) hide show
  1. megadetector/api/__init__.py +0 -0
  2. megadetector/api/batch_processing/__init__.py +0 -0
  3. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  4. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. megadetector/api/batch_processing/api_core/batch_service/score.py +439 -0
  6. megadetector/api/batch_processing/api_core/server.py +294 -0
  7. megadetector/api/batch_processing/api_core/server_api_config.py +97 -0
  8. megadetector/api/batch_processing/api_core/server_app_config.py +55 -0
  9. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +220 -0
  10. megadetector/api/batch_processing/api_core/server_job_status_table.py +149 -0
  11. megadetector/api/batch_processing/api_core/server_orchestration.py +360 -0
  12. megadetector/api/batch_processing/api_core/server_utils.py +88 -0
  13. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  14. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +46 -0
  15. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  16. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +152 -0
  17. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  18. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  19. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  20. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  21. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
  22. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  23. megadetector/api/synchronous/__init__.py +0 -0
  24. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  25. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +152 -0
  26. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +263 -0
  27. megadetector/api/synchronous/api_core/animal_detection_api/config.py +35 -0
  28. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  29. megadetector/api/synchronous/api_core/tests/load_test.py +110 -0
  30. megadetector/classification/__init__.py +0 -0
  31. megadetector/classification/aggregate_classifier_probs.py +108 -0
  32. megadetector/classification/analyze_failed_images.py +227 -0
  33. megadetector/classification/cache_batchapi_outputs.py +198 -0
  34. megadetector/classification/create_classification_dataset.py +627 -0
  35. megadetector/classification/crop_detections.py +516 -0
  36. megadetector/classification/csv_to_json.py +226 -0
  37. megadetector/classification/detect_and_crop.py +855 -0
  38. megadetector/classification/efficientnet/__init__.py +9 -0
  39. megadetector/classification/efficientnet/model.py +415 -0
  40. megadetector/classification/efficientnet/utils.py +607 -0
  41. megadetector/classification/evaluate_model.py +520 -0
  42. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  43. megadetector/classification/json_to_azcopy_list.py +63 -0
  44. megadetector/classification/json_validator.py +699 -0
  45. megadetector/classification/map_classification_categories.py +276 -0
  46. megadetector/classification/merge_classification_detection_output.py +506 -0
  47. megadetector/classification/prepare_classification_script.py +194 -0
  48. megadetector/classification/prepare_classification_script_mc.py +228 -0
  49. megadetector/classification/run_classifier.py +287 -0
  50. megadetector/classification/save_mislabeled.py +110 -0
  51. megadetector/classification/train_classifier.py +827 -0
  52. megadetector/classification/train_classifier_tf.py +725 -0
  53. megadetector/classification/train_utils.py +323 -0
  54. megadetector/data_management/__init__.py +0 -0
  55. megadetector/data_management/annotations/__init__.py +0 -0
  56. megadetector/data_management/annotations/annotation_constants.py +34 -0
  57. megadetector/data_management/camtrap_dp_to_coco.py +237 -0
  58. megadetector/data_management/cct_json_utils.py +404 -0
  59. megadetector/data_management/cct_to_md.py +176 -0
  60. megadetector/data_management/cct_to_wi.py +289 -0
  61. megadetector/data_management/coco_to_labelme.py +283 -0
  62. megadetector/data_management/coco_to_yolo.py +662 -0
  63. megadetector/data_management/databases/__init__.py +0 -0
  64. megadetector/data_management/databases/add_width_and_height_to_db.py +33 -0
  65. megadetector/data_management/databases/combine_coco_camera_traps_files.py +206 -0
  66. megadetector/data_management/databases/integrity_check_json_db.py +493 -0
  67. megadetector/data_management/databases/subset_json_db.py +115 -0
  68. megadetector/data_management/generate_crops_from_cct.py +149 -0
  69. megadetector/data_management/get_image_sizes.py +189 -0
  70. megadetector/data_management/importers/add_nacti_sizes.py +52 -0
  71. megadetector/data_management/importers/add_timestamps_to_icct.py +79 -0
  72. megadetector/data_management/importers/animl_results_to_md_results.py +158 -0
  73. megadetector/data_management/importers/auckland_doc_test_to_json.py +373 -0
  74. megadetector/data_management/importers/auckland_doc_to_json.py +201 -0
  75. megadetector/data_management/importers/awc_to_json.py +191 -0
  76. megadetector/data_management/importers/bellevue_to_json.py +273 -0
  77. megadetector/data_management/importers/cacophony-thermal-importer.py +793 -0
  78. megadetector/data_management/importers/carrizo_shrubfree_2018.py +269 -0
  79. megadetector/data_management/importers/carrizo_trail_cam_2017.py +289 -0
  80. megadetector/data_management/importers/cct_field_adjustments.py +58 -0
  81. megadetector/data_management/importers/channel_islands_to_cct.py +913 -0
  82. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +180 -0
  83. megadetector/data_management/importers/eMammal/eMammal_helpers.py +249 -0
  84. megadetector/data_management/importers/eMammal/make_eMammal_json.py +223 -0
  85. megadetector/data_management/importers/ena24_to_json.py +276 -0
  86. megadetector/data_management/importers/filenames_to_json.py +386 -0
  87. megadetector/data_management/importers/helena_to_cct.py +283 -0
  88. megadetector/data_management/importers/idaho-camera-traps.py +1407 -0
  89. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +294 -0
  90. megadetector/data_management/importers/jb_csv_to_json.py +150 -0
  91. megadetector/data_management/importers/mcgill_to_json.py +250 -0
  92. megadetector/data_management/importers/missouri_to_json.py +490 -0
  93. megadetector/data_management/importers/nacti_fieldname_adjustments.py +79 -0
  94. megadetector/data_management/importers/noaa_seals_2019.py +181 -0
  95. megadetector/data_management/importers/pc_to_json.py +365 -0
  96. megadetector/data_management/importers/plot_wni_giraffes.py +123 -0
  97. megadetector/data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -0
  98. megadetector/data_management/importers/prepare_zsl_imerit.py +131 -0
  99. megadetector/data_management/importers/rspb_to_json.py +356 -0
  100. megadetector/data_management/importers/save_the_elephants_survey_A.py +320 -0
  101. megadetector/data_management/importers/save_the_elephants_survey_B.py +329 -0
  102. megadetector/data_management/importers/snapshot_safari_importer.py +758 -0
  103. megadetector/data_management/importers/snapshot_safari_importer_reprise.py +665 -0
  104. megadetector/data_management/importers/snapshot_serengeti_lila.py +1067 -0
  105. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +150 -0
  106. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +153 -0
  107. megadetector/data_management/importers/sulross_get_exif.py +65 -0
  108. megadetector/data_management/importers/timelapse_csv_set_to_json.py +490 -0
  109. megadetector/data_management/importers/ubc_to_json.py +399 -0
  110. megadetector/data_management/importers/umn_to_json.py +507 -0
  111. megadetector/data_management/importers/wellington_to_json.py +263 -0
  112. megadetector/data_management/importers/wi_to_json.py +442 -0
  113. megadetector/data_management/importers/zamba_results_to_md_results.py +181 -0
  114. megadetector/data_management/labelme_to_coco.py +547 -0
  115. megadetector/data_management/labelme_to_yolo.py +272 -0
  116. megadetector/data_management/lila/__init__.py +0 -0
  117. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +97 -0
  118. megadetector/data_management/lila/add_locations_to_nacti.py +147 -0
  119. megadetector/data_management/lila/create_lila_blank_set.py +558 -0
  120. megadetector/data_management/lila/create_lila_test_set.py +152 -0
  121. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  122. megadetector/data_management/lila/download_lila_subset.py +178 -0
  123. megadetector/data_management/lila/generate_lila_per_image_labels.py +516 -0
  124. megadetector/data_management/lila/get_lila_annotation_counts.py +170 -0
  125. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  126. megadetector/data_management/lila/lila_common.py +300 -0
  127. megadetector/data_management/lila/test_lila_metadata_urls.py +132 -0
  128. megadetector/data_management/ocr_tools.py +870 -0
  129. megadetector/data_management/read_exif.py +809 -0
  130. megadetector/data_management/remap_coco_categories.py +84 -0
  131. megadetector/data_management/remove_exif.py +66 -0
  132. megadetector/data_management/rename_images.py +187 -0
  133. megadetector/data_management/resize_coco_dataset.py +189 -0
  134. megadetector/data_management/wi_download_csv_to_coco.py +247 -0
  135. megadetector/data_management/yolo_output_to_md_output.py +446 -0
  136. megadetector/data_management/yolo_to_coco.py +676 -0
  137. megadetector/detection/__init__.py +0 -0
  138. megadetector/detection/detector_training/__init__.py +0 -0
  139. megadetector/detection/detector_training/model_main_tf2.py +114 -0
  140. megadetector/detection/process_video.py +846 -0
  141. megadetector/detection/pytorch_detector.py +355 -0
  142. megadetector/detection/run_detector.py +779 -0
  143. megadetector/detection/run_detector_batch.py +1219 -0
  144. megadetector/detection/run_inference_with_yolov5_val.py +1087 -0
  145. megadetector/detection/run_tiled_inference.py +934 -0
  146. megadetector/detection/tf_detector.py +192 -0
  147. megadetector/detection/video_utils.py +698 -0
  148. megadetector/postprocessing/__init__.py +0 -0
  149. megadetector/postprocessing/add_max_conf.py +64 -0
  150. megadetector/postprocessing/categorize_detections_by_size.py +165 -0
  151. megadetector/postprocessing/classification_postprocessing.py +716 -0
  152. megadetector/postprocessing/combine_api_outputs.py +249 -0
  153. megadetector/postprocessing/compare_batch_results.py +966 -0
  154. megadetector/postprocessing/convert_output_format.py +396 -0
  155. megadetector/postprocessing/load_api_results.py +195 -0
  156. megadetector/postprocessing/md_to_coco.py +310 -0
  157. megadetector/postprocessing/md_to_labelme.py +330 -0
  158. megadetector/postprocessing/merge_detections.py +412 -0
  159. megadetector/postprocessing/postprocess_batch_results.py +1908 -0
  160. megadetector/postprocessing/remap_detection_categories.py +170 -0
  161. megadetector/postprocessing/render_detection_confusion_matrix.py +660 -0
  162. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +211 -0
  163. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +83 -0
  164. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1635 -0
  165. megadetector/postprocessing/separate_detections_into_folders.py +730 -0
  166. megadetector/postprocessing/subset_json_detector_output.py +700 -0
  167. megadetector/postprocessing/top_folders_to_bottom.py +223 -0
  168. megadetector/taxonomy_mapping/__init__.py +0 -0
  169. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  170. megadetector/taxonomy_mapping/map_new_lila_datasets.py +150 -0
  171. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -0
  172. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +588 -0
  173. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  174. megadetector/taxonomy_mapping/simple_image_download.py +219 -0
  175. megadetector/taxonomy_mapping/species_lookup.py +834 -0
  176. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  177. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  178. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  179. megadetector/utils/__init__.py +0 -0
  180. megadetector/utils/azure_utils.py +178 -0
  181. megadetector/utils/ct_utils.py +613 -0
  182. megadetector/utils/directory_listing.py +246 -0
  183. megadetector/utils/md_tests.py +1164 -0
  184. megadetector/utils/path_utils.py +1045 -0
  185. megadetector/utils/process_utils.py +160 -0
  186. megadetector/utils/sas_blob_utils.py +509 -0
  187. megadetector/utils/split_locations_into_train_val.py +228 -0
  188. megadetector/utils/string_utils.py +92 -0
  189. megadetector/utils/url_utils.py +323 -0
  190. megadetector/utils/write_html_image_list.py +225 -0
  191. megadetector/visualization/__init__.py +0 -0
  192. megadetector/visualization/plot_utils.py +293 -0
  193. megadetector/visualization/render_images_with_thumbnails.py +275 -0
  194. megadetector/visualization/visualization_utils.py +1536 -0
  195. megadetector/visualization/visualize_db.py +552 -0
  196. megadetector/visualization/visualize_detector_output.py +405 -0
  197. {megadetector-5.0.11.dist-info → megadetector-5.0.13.dist-info}/LICENSE +0 -0
  198. {megadetector-5.0.11.dist-info → megadetector-5.0.13.dist-info}/METADATA +2 -2
  199. megadetector-5.0.13.dist-info/RECORD +201 -0
  200. megadetector-5.0.13.dist-info/top_level.txt +1 -0
  201. megadetector-5.0.11.dist-info/RECORD +0 -5
  202. megadetector-5.0.11.dist-info/top_level.txt +0 -1
  203. {megadetector-5.0.11.dist-info → megadetector-5.0.13.dist-info}/WHEEL +0 -0
@@ -0,0 +1,547 @@
1
+ """
2
+
3
+ labelme_to_coco.py
4
+
5
+ Converts a folder of labelme-formatted .json files to COCO.
6
+
7
+ """
8
+
9
+ #%% Constants and imports
10
+
11
+ import json
12
+ import os
13
+ import uuid
14
+
15
+ from multiprocessing.pool import Pool, ThreadPool
16
+ from functools import partial
17
+ from tqdm import tqdm
18
+
19
+ from megadetector.utils import path_utils
20
+ from megadetector.visualization.visualization_utils import open_image
21
+
22
+
23
+ #%% Support functions
24
+
25
+ def _add_category(category_name,category_name_to_id,candidate_category_id=0):
26
+ """
27
+ Adds the category [category_name] to the dict [category_name_to_id], by default
28
+ using the next available integer index.
29
+ """
30
+
31
+ if category_name in category_name_to_id:
32
+ return category_name_to_id[category_name]
33
+ while candidate_category_id in category_name_to_id.values():
34
+ candidate_category_id += 1
35
+ category_name_to_id[category_name] = candidate_category_id
36
+ return candidate_category_id
37
+
38
+
39
+ def _process_labelme_file(image_fn_relative,input_folder,use_folders_as_labels,
40
+ no_json_handling,validate_image_sizes,
41
+ category_name_to_id,allow_new_categories=True):
42
+ """
43
+ Internal function for processing each image; this support function facilitates parallelization.
44
+ """
45
+
46
+ result = {}
47
+ result['im'] = None
48
+ result['annotations_this_image'] = None
49
+ result['status'] = None
50
+
51
+ image_fn_abs = os.path.join(input_folder,image_fn_relative)
52
+ json_fn_abs = os.path.splitext(image_fn_abs)[0] + '.json'
53
+
54
+ im = {}
55
+ im['id'] = image_fn_relative
56
+ im['file_name'] = image_fn_relative
57
+
58
+ # If there's no .json file for this image...
59
+ if not os.path.isfile(json_fn_abs):
60
+
61
+ # Either skip it...
62
+ if no_json_handling == 'skip':
63
+ print('Skipping image {} (no .json file)'.format(image_fn_relative))
64
+ result['status'] = 'skipped (no .json file)'
65
+ return result
66
+
67
+ # ...or error
68
+ elif no_json_handling == 'error':
69
+ raise ValueError('Image file {} has no corresponding .json file'.format(
70
+ image_fn_relative))
71
+
72
+ # ...or treat it as empty.
73
+ elif no_json_handling == 'empty':
74
+ try:
75
+ pil_im = open_image(image_fn_abs)
76
+ except Exception:
77
+ print('Warning: error opening image {}, skipping'.format(image_fn_abs))
78
+ result['status'] = 'image load error'
79
+ return result
80
+ im['width'] = pil_im.width
81
+ im['height'] = pil_im.height
82
+
83
+ # Just in case we need to differentiate between "no .json file" and "a .json file with no annotations"
84
+ im['no_labelme_json'] = True
85
+ shapes = []
86
+ else:
87
+ raise ValueError('Unrecognized specifier {} for handling images with no .json files'.format(
88
+ no_json_handling))
89
+
90
+ # If we found a .json file for this image...
91
+ else:
92
+
93
+ # Read the .json file
94
+ with open(json_fn_abs,'r') as f:
95
+ labelme_data = json.load(f)
96
+ im['width'] = labelme_data['imageWidth']
97
+ im['height'] = labelme_data['imageHeight']
98
+
99
+ if validate_image_sizes:
100
+ try:
101
+ pil_im = open_image(image_fn_abs)
102
+ except Exception:
103
+ print('Warning: error opening image {} for size validation, skipping'.format(image_fn_abs))
104
+ result['status'] = 'skipped (size validation error)'
105
+ return result
106
+ if not (im['width'] == pil_im.width and im['height'] == pil_im.height):
107
+ print('Warning: image size validation error for file {}'.format(image_fn_relative))
108
+ im['width'] = pil_im.width
109
+ im['height'] = pil_im.height
110
+ im['labelme_width'] = labelme_data['imageWidth']
111
+ im['labelme_height'] = labelme_data['imageHeight']
112
+
113
+ shapes = labelme_data['shapes']
114
+
115
+ if ('flags' in labelme_data) and (len(labelme_data['flags']) > 0):
116
+ im['flags'] = labelme_data['flags']
117
+
118
+ annotations_this_image = []
119
+
120
+ if len(shapes) == 0:
121
+
122
+ if allow_new_categories:
123
+ category_id = _add_category('empty',category_name_to_id)
124
+ else:
125
+ assert 'empty' in category_name_to_id
126
+ category_id = category_name_to_id['empty']
127
+
128
+ ann = {}
129
+ ann['id'] = str(uuid.uuid1())
130
+ ann['image_id'] = im['id']
131
+ ann['category_id'] = category_id
132
+ ann['sequence_level_annotation'] = False
133
+ annotations_this_image.append(ann)
134
+
135
+ else:
136
+
137
+ for shape in shapes:
138
+
139
+ if shape['shape_type'] != 'rectangle':
140
+ print('Only rectangles are supported, skipping an annotation of type {} in {}'.format(
141
+ shape['shape_type'],image_fn_relative))
142
+ continue
143
+
144
+ if use_folders_as_labels:
145
+ category_name = os.path.basename(os.path.dirname(image_fn_abs))
146
+ else:
147
+ category_name = shape['label']
148
+
149
+ if allow_new_categories:
150
+ category_id = _add_category(category_name,category_name_to_id)
151
+ else:
152
+ assert category_name in category_name_to_id
153
+ category_id = category_name_to_id[category_name]
154
+
155
+ points = shape['points']
156
+ if len(points) != 2:
157
+ print('Warning: illegal rectangle with {} points for {}'.format(
158
+ len(points),image_fn_relative))
159
+ continue
160
+
161
+ p0 = points[0]
162
+ p1 = points[1]
163
+ x0 = min(p0[0],p1[0])
164
+ x1 = max(p0[0],p1[0])
165
+ y0 = min(p0[1],p1[1])
166
+ y1 = max(p0[1],p1[1])
167
+
168
+ bbox = [x0,y0,abs(x1-x0),abs(y1-y0)]
169
+ ann = {}
170
+ ann['id'] = str(uuid.uuid1())
171
+ ann['image_id'] = im['id']
172
+ ann['category_id'] = category_id
173
+ ann['sequence_level_annotation'] = False
174
+ ann['bbox'] = bbox
175
+ annotations_this_image.append(ann)
176
+
177
+ # ...for each shape
178
+
179
+ result['im'] = im
180
+ result['annotations_this_image'] = annotations_this_image
181
+
182
+ return result
183
+
184
+ # ...def _process_labelme_file(...)
185
+
186
+
187
+ #%% Main function
188
+
189
+ def labelme_to_coco(input_folder,
190
+ output_file=None,
191
+ category_id_to_category_name=None,
192
+ empty_category_name='empty',
193
+ empty_category_id=None,
194
+ info_struct=None,
195
+ relative_paths_to_include=None,
196
+ relative_paths_to_exclude=None,
197
+ use_folders_as_labels=False,
198
+ recursive=True,
199
+ no_json_handling='skip',
200
+ validate_image_sizes=True,
201
+ max_workers=1,
202
+ use_threads=True):
203
+ """
204
+ Finds all images in [input_folder] that have corresponding .json files, and converts
205
+ to a COCO .json file.
206
+
207
+ Currently only supports bounding box annotations and image-level flags (i.e., does not
208
+ support point or general polygon annotations).
209
+
210
+ Labelme's image-level flags don't quite fit the COCO annotations format, so they are attached
211
+ to image objects, rather than annotation objects.
212
+
213
+ If output_file is None, just returns the resulting dict, does not write to file.
214
+
215
+ if use_folders_as_labels is False (default), the output labels come from the labelme
216
+ .json files. If use_folders_as_labels is True, the lowest-level folder name containing
217
+ each .json file will determine the output label. E.g., if use_folders_as_labels is True,
218
+ and the folder contains:
219
+
220
+ images/train/lion/image0001.json
221
+
222
+ ...all boxes in image0001.json will be given the label "lion", regardless of the labels in the
223
+ file. Empty images in the "lion" folder will still be given the label "empty" (or
224
+ [empty_category_name]).
225
+
226
+ Args:
227
+ input_folder (str): input folder to search for images and Labelme .json files
228
+ output_file (str, optional): output file to which we should write COCO-formatted data; if None
229
+ this function just returns the COCO-formatted dict
230
+ category_id_to_category_name (dict, optional): dict mapping category IDs to category names;
231
+ really used to map Labelme category names to COCO category IDs. IDs will be auto-generated
232
+ if this is None.
233
+ empty_category_id (int, optional): category ID to use for the not-very-COCO-like "empty" category;
234
+ also see the no_json_handling parameter.
235
+ info_struct (dict, optional): dict to stash in the "info" field of the resulting COCO dict
236
+ relative_paths_to_include (list, optional): allowlist of relative paths to include in the COCO
237
+ dict; there's no reason to specify this along with relative_paths_to_exclude.
238
+ relative_paths_to_exclude (list, optional): blocklist of relative paths to exclude from the COCO
239
+ dict; there's no reason to specify this along with relative_paths_to_include.
240
+ use_folders_as_labels (bool, optional): if this is True, class names will be pulled from folder names,
241
+ useful if you have images like a/b/cat/image001.jpg, a/b/dog/image002.jpg, etc.
242
+ recursive (bool, optional): whether to recurse into [input_folder]
243
+ no_json_handling (str, optional): how to deal with image files that have no corresponding .json files,
244
+ can be:
245
+
246
+ - 'skip': ignore image files with no corresponding .json files
247
+ - 'empty': treat image files with no corresponding .json files as empty
248
+ - 'error': throw an error when an image file has no corresponding .json file
249
+ validate_image_sizes (bool, optional): whether to load images to verify that the sizes specified
250
+ in the labelme files are correct
251
+ max_workers (int, optional): number of workers to use for parallelization, set to <=1 to disable
252
+ parallelization
253
+ use_threads (bool, optional): whether to use threads (True) or processes (False) for parallelization,
254
+ not relevant if max_workers <= 1
255
+
256
+ Returns:
257
+ dict: a COCO-formatted dictionary, identical to what's written to [output_file] if [output_file] is not None.
258
+ """
259
+
260
+ if max_workers > 1:
261
+ assert category_id_to_category_name is not None, \
262
+ 'When parallelizing labelme --> COCO conversion, you must supply a category mapping'
263
+
264
+ if category_id_to_category_name is None:
265
+ category_name_to_id = {}
266
+ else:
267
+ category_name_to_id = {v: k for k, v in category_id_to_category_name.items()}
268
+ for category_name in category_name_to_id:
269
+ try:
270
+ category_name_to_id[category_name] = int(category_name_to_id[category_name])
271
+ except ValueError:
272
+ raise ValueError('Category IDs must be ints or string-formatted ints')
273
+
274
+ # If the user supplied an explicit empty category ID, and the empty category
275
+ # name is already in category_name_to_id, make sure they match.
276
+ if empty_category_id is not None:
277
+ if empty_category_name in category_name_to_id:
278
+ assert category_name_to_id[empty_category_name] == empty_category_id, \
279
+ 'Ambiguous empty category specification'
280
+ if empty_category_id in category_id_to_category_name:
281
+ assert category_id_to_category_name[empty_category_id] == empty_category_name, \
282
+ 'Ambiguous empty category specification'
283
+ else:
284
+ if empty_category_name in category_name_to_id:
285
+ empty_category_id = category_name_to_id[empty_category_name]
286
+
287
+ del category_id_to_category_name
288
+
289
+ # Enumerate images
290
+ print('Enumerating images in {}'.format(input_folder))
291
+ image_filenames_relative = path_utils.find_images(input_folder,recursive=recursive,
292
+ return_relative_paths=True,
293
+ convert_slashes=True)
294
+
295
+ # Remove any images we're supposed to skip
296
+ if (relative_paths_to_include is not None) or (relative_paths_to_exclude is not None):
297
+ image_filenames_relative_to_process = []
298
+ for image_fn_relative in image_filenames_relative:
299
+ if relative_paths_to_include is not None and image_fn_relative not in relative_paths_to_include:
300
+ continue
301
+ if relative_paths_to_exclude is not None and image_fn_relative in relative_paths_to_exclude:
302
+ continue
303
+ image_filenames_relative_to_process.append(image_fn_relative)
304
+ print('Processing {} of {} images'.format(
305
+ len(image_filenames_relative_to_process),
306
+ len(image_filenames_relative)))
307
+ image_filenames_relative = image_filenames_relative_to_process
308
+
309
+ # If the user supplied a category ID to use for empty images...
310
+ if empty_category_id is not None:
311
+ try:
312
+ empty_category_id = int(empty_category_id)
313
+ except ValueError:
314
+ raise ValueError('Category IDs must be ints or string-formatted ints')
315
+
316
+ if empty_category_id is None:
317
+ empty_category_id = _add_category(empty_category_name,category_name_to_id)
318
+
319
+ if max_workers <= 1:
320
+
321
+ image_results = []
322
+ for image_fn_relative in tqdm(image_filenames_relative):
323
+
324
+ result = _process_labelme_file(image_fn_relative,input_folder,use_folders_as_labels,
325
+ no_json_handling,validate_image_sizes,
326
+ category_name_to_id,allow_new_categories=True)
327
+ image_results.append(result)
328
+
329
+ else:
330
+
331
+ n_workers = min(max_workers,len(image_filenames_relative))
332
+ assert category_name_to_id is not None
333
+
334
+ if use_threads:
335
+ pool = ThreadPool(n_workers)
336
+ else:
337
+ pool = Pool(n_workers)
338
+
339
+ image_results = list(tqdm(pool.imap(
340
+ partial(_process_labelme_file,
341
+ input_folder=input_folder,
342
+ use_folders_as_labels=use_folders_as_labels,
343
+ no_json_handling=no_json_handling,
344
+ validate_image_sizes=validate_image_sizes,
345
+ category_name_to_id=category_name_to_id,
346
+ allow_new_categories=False
347
+ ),image_filenames_relative), total=len(image_filenames_relative)))
348
+
349
+ images = []
350
+ annotations = []
351
+
352
+ # Flatten the lists of images and annotations
353
+ for result in image_results:
354
+ im = result['im']
355
+ annotations_this_image = result['annotations_this_image']
356
+
357
+ if im is None:
358
+ assert annotations_this_image is None
359
+ else:
360
+ images.append(im)
361
+ annotations.extend(annotations_this_image)
362
+
363
+ output_dict = {}
364
+ output_dict['images'] = images
365
+ output_dict['annotations'] = annotations
366
+
367
+ if info_struct is None:
368
+ info_struct = {}
369
+ if 'description' not in info_struct:
370
+ info_struct['description'] = \
371
+ 'Converted to COCO from labelme annotations in folder {}'.format(input_folder)
372
+ if 'version' not in info_struct:
373
+ info_struct['version'] = 1.0
374
+
375
+ output_dict['info'] = info_struct
376
+ categories = []
377
+ for category_name in category_name_to_id:
378
+ categories.append({'name':category_name,'id':category_name_to_id[category_name]})
379
+ output_dict['categories'] = categories
380
+
381
+ if output_file is not None:
382
+ with open(output_file,'w') as f:
383
+ json.dump(output_dict,f,indent=1)
384
+
385
+ return output_dict
386
+
387
+ # ...def labelme_to_coco()
388
+
389
+
390
+ def find_empty_labelme_files(input_folder,recursive=True):
391
+ """
392
+ Returns a list of all image files in in [input_folder] associated with .json files that have
393
+ no boxes in them. Also returns a list of images with no associated .json files. Specifically,
394
+ returns a dict:
395
+
396
+ .. code-block: none
397
+
398
+ {
399
+ 'images_with_empty_json_files':[list],
400
+ 'images_with_no_json_files':[list],
401
+ 'images_with_non_empty_json_files':[list]
402
+ }
403
+
404
+ Args:
405
+ input_folder (str): the folder to search for empty (i.e., box-less) Labelme .json files
406
+ recursive (bool, optional): whether to recurse into [input_folder]
407
+
408
+ Returns:
409
+ dict: a dict with fields:
410
+ - images_with_empty_json_files: a list of all image files in [input_folder] associated with
411
+ .json files that have no boxes in them
412
+ - images_with_no_json_files: a list of images in [input_folder] with no associated .json files
413
+ - images_with_non_empty_json_files: a list of images in [input_folder] associated with .json
414
+ files that have at least one box
415
+ """
416
+ image_filenames_relative = path_utils.find_images(input_folder,recursive=True,
417
+ return_relative_paths=True)
418
+
419
+ images_with_empty_json_files = []
420
+ images_with_no_json_files = []
421
+ images_with_non_empty_json_files = []
422
+
423
+ # fn_relative = image_filenames_relative[0]
424
+ for fn_relative in image_filenames_relative:
425
+
426
+ image_fn_abs = os.path.join(input_folder,fn_relative)
427
+ json_fn_abs = os.path.splitext(image_fn_abs)[0] + '.json'
428
+
429
+ if not os.path.isfile(json_fn_abs):
430
+ images_with_no_json_files.append(fn_relative)
431
+ continue
432
+
433
+ else:
434
+ # Read the .json file
435
+ with open(json_fn_abs,'r') as f:
436
+ labelme_data = json.load(f)
437
+ shapes = labelme_data['shapes']
438
+ if len(shapes) == 0:
439
+ images_with_empty_json_files.append(fn_relative)
440
+ else:
441
+ images_with_non_empty_json_files.append(fn_relative)
442
+
443
+ # ...for every image
444
+
445
+ return {'images_with_empty_json_files':images_with_empty_json_files,
446
+ 'images_with_no_json_files':images_with_no_json_files,
447
+ 'images_with_non_empty_json_files':images_with_non_empty_json_files}
448
+
449
+ # ...def find_empty_labelme_files(...)
450
+
451
+
452
+ #%% Interactive driver
453
+
454
+ if False:
455
+
456
+ pass
457
+
458
+ #%% Options
459
+
460
+ empty_category_name = 'empty'
461
+ empty_category_id = None
462
+ category_id_to_category_name = None
463
+ info_struct = None
464
+
465
+ input_folder = os.path.expanduser('~/data/md-test')
466
+ output_file = os.path.expanduser('~/data/md-test-labelme-to-coco.json')
467
+
468
+
469
+ #%% Programmatic execution
470
+
471
+ output_dict = labelme_to_coco(input_folder,output_file,
472
+ category_id_to_category_name=category_id_to_category_name,
473
+ empty_category_name=empty_category_name,
474
+ empty_category_id=empty_category_id,
475
+ info_struct=None,
476
+ use_folders_as_labels=False,
477
+ validate_image_sizes=False,
478
+ no_json_handling='empty')
479
+
480
+
481
+ #%% Validate
482
+
483
+ from megadetector.data_management.databases import integrity_check_json_db
484
+
485
+ options = integrity_check_json_db.IntegrityCheckOptions()
486
+
487
+ options.baseDir = input_folder
488
+ options.bCheckImageSizes = True
489
+ options.bCheckImageExistence = True
490
+ options.bFindUnusedImages = True
491
+ options.bRequireLocation = False
492
+
493
+ sortedCategories, _, errorInfo = integrity_check_json_db.integrity_check_json_db(output_file,options)
494
+
495
+
496
+ #%% Preview
497
+
498
+ from megadetector.visualization import visualize_db
499
+ options = visualize_db.DbVizOptions()
500
+ options.parallelize_rendering = True
501
+ options.viz_size = (900, -1)
502
+ options.num_to_visualize = 5000
503
+
504
+ html_file,_ = visualize_db.visualize_db(output_file,os.path.expanduser('~/tmp/labelme_to_coco_preview'),
505
+ input_folder,options)
506
+
507
+
508
+ from megadetector.utils import path_utils # noqa
509
+ path_utils.open_file(html_file)
510
+
511
+
512
+ #%% Prepare command line
513
+
514
+ s = 'python labelme_to_coco.py {} {}'.format(input_folder,output_file)
515
+ print(s)
516
+ import clipboard; clipboard.copy(s)
517
+
518
+
519
+ #%% Command-line driver
520
+
521
+ import sys,argparse
522
+
523
+ def main():
524
+
525
+ parser = argparse.ArgumentParser(
526
+ description='Convert labelme-formatted data to COCO')
527
+
528
+ parser.add_argument(
529
+ 'input_folder',
530
+ type=str,
531
+ help='Path to images and .json annotation files')
532
+
533
+ parser.add_argument(
534
+ 'output_file',
535
+ type=str,
536
+ help='Output filename (.json)')
537
+
538
+ if len(sys.argv[1:]) == 0:
539
+ parser.print_help()
540
+ parser.exit()
541
+
542
+ args = parser.parse_args()
543
+
544
+ labelme_to_coco(args.input_folder,args.output_file)
545
+
546
+ if __name__ == '__main__':
547
+ main()