megadetector 5.0.7__py3-none-any.whl → 5.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (48) hide show
  1. api/batch_processing/data_preparation/manage_local_batch.py +28 -14
  2. api/batch_processing/postprocessing/combine_api_outputs.py +2 -2
  3. api/batch_processing/postprocessing/compare_batch_results.py +1 -1
  4. api/batch_processing/postprocessing/convert_output_format.py +24 -6
  5. api/batch_processing/postprocessing/load_api_results.py +1 -3
  6. api/batch_processing/postprocessing/md_to_labelme.py +118 -51
  7. api/batch_processing/postprocessing/merge_detections.py +30 -5
  8. api/batch_processing/postprocessing/postprocess_batch_results.py +24 -12
  9. api/batch_processing/postprocessing/remap_detection_categories.py +163 -0
  10. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +15 -12
  11. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +2 -2
  12. data_management/cct_json_utils.py +7 -2
  13. data_management/coco_to_labelme.py +263 -0
  14. data_management/coco_to_yolo.py +7 -4
  15. data_management/databases/integrity_check_json_db.py +68 -59
  16. data_management/databases/subset_json_db.py +1 -1
  17. data_management/get_image_sizes.py +44 -26
  18. data_management/importers/animl_results_to_md_results.py +1 -3
  19. data_management/importers/noaa_seals_2019.py +1 -1
  20. data_management/labelme_to_coco.py +252 -143
  21. data_management/labelme_to_yolo.py +95 -52
  22. data_management/lila/create_lila_blank_set.py +106 -23
  23. data_management/lila/download_lila_subset.py +133 -65
  24. data_management/lila/generate_lila_per_image_labels.py +1 -1
  25. data_management/lila/lila_common.py +8 -38
  26. data_management/read_exif.py +65 -16
  27. data_management/remap_coco_categories.py +84 -0
  28. data_management/resize_coco_dataset.py +3 -22
  29. data_management/wi_download_csv_to_coco.py +239 -0
  30. data_management/yolo_to_coco.py +283 -83
  31. detection/run_detector_batch.py +12 -3
  32. detection/run_inference_with_yolov5_val.py +10 -3
  33. detection/run_tiled_inference.py +2 -2
  34. detection/tf_detector.py +2 -1
  35. detection/video_utils.py +1 -1
  36. md_utils/ct_utils.py +22 -3
  37. md_utils/md_tests.py +11 -2
  38. md_utils/path_utils.py +206 -32
  39. md_utils/url_utils.py +66 -1
  40. md_utils/write_html_image_list.py +12 -3
  41. md_visualization/visualization_utils.py +363 -72
  42. md_visualization/visualize_db.py +33 -10
  43. {megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/METADATA +10 -12
  44. {megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/RECORD +47 -44
  45. {megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/WHEEL +1 -1
  46. md_visualization/visualize_megadb.py +0 -183
  47. {megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/LICENSE +0 -0
  48. {megadetector-5.0.7.dist-info → megadetector-5.0.8.dist-info}/top_level.txt +0 -0
@@ -27,7 +27,6 @@ import os
27
27
  import sys
28
28
  import time
29
29
  import uuid
30
- import urllib
31
30
  import warnings
32
31
  import random
33
32
 
@@ -53,7 +52,6 @@ from md_utils import path_utils
53
52
  from data_management.cct_json_utils import (CameraTrapJsonUtils, IndexedJsonDb)
54
53
  from api.batch_processing.postprocessing.load_api_results import load_api_results
55
54
  from md_utils.ct_utils import args_to_object
56
- from md_utils.ct_utils import invert_dictionary
57
55
 
58
56
  from detection.run_detector import get_typical_confidence_threshold_from_results
59
57
 
@@ -484,7 +482,14 @@ def render_bounding_boxes(
484
482
 
485
483
  # Optionally add links back to the original images
486
484
  if options.link_images_to_originals and (image_full_path is not None):
487
- info['linkTarget'] = urllib.parse.quote(image_full_path)
485
+
486
+ # Handling special characters in links has been pushed down into
487
+ # write_html_image_list
488
+ #
489
+ # link_target = image_full_path.replace('\\','/')
490
+ # link_target = urllib.parse.quote(link_target)
491
+ link_target = image_full_path
492
+ info['linkTarget'] = link_target
488
493
 
489
494
  return info
490
495
 
@@ -848,7 +853,7 @@ def process_batch_results(options: PostProcessingOptions
848
853
  ground_truth_indexed_db = None
849
854
 
850
855
  if (options.ground_truth_json_file is not None):
851
- assert (options.confidence_threshold is None) or (isinstance(confidence_threshold,float)), \
856
+ assert (options.confidence_threshold is None) or (isinstance(options.confidence_threshold,float)), \
852
857
  'Variable confidence thresholds are not supported when supplying ground truth'
853
858
 
854
859
  if (options.ground_truth_json_file is not None) and (len(options.ground_truth_json_file) > 0):
@@ -876,7 +881,7 @@ def process_batch_results(options: PostProcessingOptions
876
881
  # If the caller hasn't supplied results, load them
877
882
  if options.api_detection_results is None:
878
883
  detections_df, other_fields = load_api_results(
879
- options.api_output_file, normalize_paths=True,
884
+ options.api_output_file, force_forward_slashes=True,
880
885
  filename_replacements=options.api_output_filename_replacements)
881
886
  ppresults.api_detection_results = detections_df
882
887
  ppresults.api_other_fields = other_fields
@@ -1087,7 +1092,7 @@ def process_batch_results(options: PostProcessingOptions
1087
1092
  (precision_at_confidence_threshold + recall_at_confidence_threshold)
1088
1093
 
1089
1094
  print('At a confidence threshold of {:.1%}, precision={:.1%}, recall={:.1%}, f1={:.1%}'.format(
1090
- str(options.confidence_threshold), precision_at_confidence_threshold,
1095
+ options.confidence_threshold, precision_at_confidence_threshold,
1091
1096
  recall_at_confidence_threshold, f1))
1092
1097
 
1093
1098
  ##%% Collect classification results, if they exist
@@ -1289,7 +1294,8 @@ def process_batch_results(options: PostProcessingOptions
1289
1294
  for file_info in tqdm(files_to_render):
1290
1295
  rendering_results.append(render_image_with_gt(
1291
1296
  file_info,ground_truth_indexed_db,
1292
- detection_categories,classification_categories))
1297
+ detection_categories,classification_categories,
1298
+ options=options))
1293
1299
  elapsed = time.time() - start_time
1294
1300
 
1295
1301
  # Map all the rendering results in the list rendering_results into the
@@ -1319,6 +1325,12 @@ def process_batch_results(options: PostProcessingOptions
1319
1325
  image_counts['tp']
1320
1326
  )
1321
1327
 
1328
+ confidence_threshold_string = ''
1329
+ if isinstance(options.confidence_threshold,float):
1330
+ confidence_threshold_string = '{:.2%}'.format(options.confidence_threshold)
1331
+ else:
1332
+ confidence_threshold_string = str(options.confidence_threshold)
1333
+
1322
1334
  index_page = """<html>
1323
1335
  {}
1324
1336
  <body>
@@ -1333,7 +1345,7 @@ def process_batch_results(options: PostProcessingOptions
1333
1345
 
1334
1346
  <h3>Sample images</h3>
1335
1347
  <div class="contentdiv">
1336
- <p>A sample of {} images, annotated with detections above {:.1%} confidence.</p>
1348
+ <p>A sample of {} images, annotated with detections above confidence {}.</p>
1337
1349
  <a href="tp.html">True positives (TP)</a> ({}) ({:0.1%})<br/>
1338
1350
  CLASSIFICATION_PLACEHOLDER_1
1339
1351
  <a href="tn.html">True negatives (TN)</a> ({}) ({:0.1%})<br/>
@@ -1343,7 +1355,7 @@ def process_batch_results(options: PostProcessingOptions
1343
1355
  </div>
1344
1356
  """.format(
1345
1357
  style_header,job_name_string,model_version_string,
1346
- image_count, str(options.confidence_threshold),
1358
+ image_count, confidence_threshold_string,
1347
1359
  all_tp_count, all_tp_count/total_count,
1348
1360
  image_counts['tn'], image_counts['tn']/total_count,
1349
1361
  image_counts['fp'], image_counts['fp']/total_count,
@@ -1353,11 +1365,11 @@ def process_batch_results(options: PostProcessingOptions
1353
1365
  index_page += """
1354
1366
  <h3>Detection results</h3>
1355
1367
  <div class="contentdiv">
1356
- <p>At a confidence threshold of {:0.1%}, precision={:0.1%}, recall={:0.1%}</p>
1368
+ <p>At a confidence threshold of {}, precision={:0.1%}, recall={:0.1%}</p>
1357
1369
  <p><strong>Precision/recall summary for all {} images</strong></p><img src="{}"><br/>
1358
1370
  </div>
1359
1371
  """.format(
1360
- str(options.confidence_threshold), precision_at_confidence_threshold, recall_at_confidence_threshold,
1372
+ confidence_threshold_string, precision_at_confidence_threshold, recall_at_confidence_threshold,
1361
1373
  len(detections_df), pr_figure_relative_filename
1362
1374
  )
1363
1375
 
@@ -1589,7 +1601,7 @@ def process_batch_results(options: PostProcessingOptions
1589
1601
 
1590
1602
  confidence_threshold_string = ''
1591
1603
  if isinstance(options.confidence_threshold,float):
1592
- confidence_threshold_string = '{:.1%}'.format(options.confidence_threshold)
1604
+ confidence_threshold_string = '{:.2%}'.format(options.confidence_threshold)
1593
1605
  else:
1594
1606
  confidence_threshold_string = str(options.confidence_threshold)
1595
1607
 
@@ -0,0 +1,163 @@
1
+ ########
2
+ #
3
+ # remap_detection_categories.py
4
+ #
5
+ # Given a MegaDetector results file, remap the category IDs according to a specified
6
+ # dictionary, writing the results to a new file.
7
+ #
8
+ # Currently only supports remapping detection categories, not classification categories.
9
+ #
10
+ ########
11
+
12
+ #%% Constants and imports
13
+
14
+ import json
15
+ import os
16
+
17
+ from tqdm import tqdm
18
+
19
+ from md_utils.ct_utils import invert_dictionary
20
+
21
+
22
+ #%% Main function
23
+
24
+ def remap_detection_categories(input_file,
25
+ output_file,
26
+ target_category_map,
27
+ extra_category_handling='error',
28
+ overwrite=False):
29
+ """
30
+ Given a MD results file [input_file], remap the category IDs according to the dictionary
31
+ [target_category_map], writing the results to [output_file]. The remapped dictionary needs to have
32
+ the same category names as the input file's detection_categories dictionary.
33
+
34
+ Currently only supports remapping detection categories, not classification categories.
35
+
36
+ target_category_map can also be a MD results file, in which case we'll use that file's
37
+ detection_categories dictionary.
38
+
39
+ [extra_category_handling] specifies what we should do if categories are present in the source file
40
+ that are not present in the target mapping.
41
+
42
+ 'error' == Error in this case.
43
+ 'drop_if_unused' == Don't include these in the output file's category mappings if they are unused,
44
+ error if they are.
45
+ 'remap' == Remap to unused category IDs. This is reserved for future use, not currently implemented.
46
+
47
+ """
48
+
49
+ if os.path.exists(output_file) and (not overwrite):
50
+ print('File {} exists, bypassing remapping'.format(output_file))
51
+ return
52
+
53
+ assert os.path.isfile(input_file), \
54
+ 'File {} does not exist'.format(input_file)
55
+
56
+ # If "target_category_map" is passed as a filename, load the "detection_categories"
57
+ # dict.
58
+ if isinstance(target_category_map,str):
59
+ target_categories_file = target_category_map
60
+ with open(target_categories_file,'r') as f:
61
+ d = json.load(f)
62
+ target_category_map = d['detection_categories']
63
+ assert isinstance(target_category_map,dict)
64
+
65
+ with open(input_file,'r') as f:
66
+ input_data = json.load(f)
67
+
68
+ input_images = input_data['images']
69
+ input_categories = input_data['detection_categories']
70
+
71
+ # Figure out which categories are actually used
72
+ used_category_ids = set()
73
+ for im in input_images:
74
+
75
+ if 'detections' not in im or im['detections'] is None:
76
+ continue
77
+
78
+ for det in im['detections']:
79
+ used_category_ids.add(det['category'])
80
+ used_category_names = [input_categories[cid] for cid in used_category_ids]
81
+
82
+ input_names_set = set(input_categories.values())
83
+ output_names_set = set(target_category_map.values())
84
+
85
+ # category_name = list(input_names_set)[0]
86
+ for category_name in input_names_set:
87
+ if category_name in output_names_set:
88
+ continue
89
+ if extra_category_handling == 'error':
90
+ raise ValueError('Category {} present in source but not in target'.format(category_name))
91
+ elif extra_category_handling == 'drop_if_unused':
92
+ if category_name in used_category_names:
93
+ raise ValueError('Category {} present (and used) in source but not in target'.format(
94
+ category_name))
95
+ else:
96
+ print('Category {} is unused and not present in the target mapping, ignoring'.format(
97
+ category_name))
98
+ continue
99
+ elif extra_category_handling == 'remap':
100
+ raise NotImplementedError('Remapping of extra category IDs not yet implemented')
101
+ else:
102
+ raise ValueError('Unrecognized extra category handling scheme {}'.format(
103
+ extra_category_handling))
104
+
105
+ output_category_name_to_output_category_id = invert_dictionary(target_category_map)
106
+
107
+ input_category_id_to_output_category_id = {}
108
+ for input_category_id in input_categories.keys():
109
+ category_name = input_categories[input_category_id]
110
+ if category_name not in output_category_name_to_output_category_id:
111
+ assert category_name not in used_category_names
112
+ else:
113
+ output_category_id = output_category_name_to_output_category_id[category_name]
114
+ input_category_id_to_output_category_id[input_category_id] = output_category_id
115
+
116
+ # im = input_images[0]
117
+ for im in tqdm(input_images):
118
+
119
+ if 'detections' not in im or im['detections'] is None:
120
+ continue
121
+
122
+ # det = im['detections'][0]
123
+ for det in im['detections']:
124
+ det['category'] = input_category_id_to_output_category_id[det['category']]
125
+
126
+ input_data['detection_categories'] = target_category_map
127
+
128
+ with open(output_file,'w') as f:
129
+ json.dump(input_data,f,indent=1)
130
+
131
+
132
+ print('Saved remapped results to {}'.format(output_file))
133
+
134
+
135
+ #%% Interactive driver
136
+
137
+ if False:
138
+
139
+ pass
140
+
141
+ #%%
142
+
143
+ target_categories_file = '/home/dmorris/tmp/usgs-tegus/model-comparison/all-classes_usgs-only_yolov5x6.json'
144
+ target_category_map = target_categories_file
145
+ input_file = '/home/dmorris/tmp/usgs-tegus/model-comparison/all-classes_usgs-goannas-lilablanks_yolov5x6-20240223.json'
146
+
147
+ output_file = input_file.replace('.json','_remapped.json')
148
+ assert output_file != input_file
149
+ overwrite = True
150
+
151
+ extra_category_handling = 'drop_if_unused'
152
+
153
+ remap_detection_categories(input_file=input_file,
154
+ output_file=output_file,
155
+ target_category_map=target_category_map,
156
+ extra_category_handling=extra_category_handling,
157
+ overwrite=overwrite)
158
+
159
+
160
+ #%% Command-line driver
161
+
162
+ # TODO
163
+
@@ -56,13 +56,13 @@ def render_image(im,render_image_constants):
56
56
 
57
57
  assert im['file'] in filename_to_ground_truth_im
58
58
 
59
- input_file = os.path.join(image_folder,im['file'])
60
- assert os.path.isfile(input_file)
61
-
62
59
  output_file = image_to_output_file(im,preview_images_folder)
63
60
  if os.path.isfile(output_file) and not force_render_images:
64
61
  return output_file
65
62
 
63
+ input_file = os.path.join(image_folder,im['file'])
64
+ assert os.path.isfile(input_file)
65
+
66
66
  detections_to_render = []
67
67
 
68
68
  for det in im['detections']:
@@ -82,8 +82,12 @@ def render_image(im,render_image_constants):
82
82
 
83
83
  #%% Main function
84
84
 
85
- def render_detection_confusion_matrix(ground_truth_file,results_file,image_folder,preview_folder,
86
- force_render_images=False, confidence_thresholds=None,
85
+ def render_detection_confusion_matrix(ground_truth_file,
86
+ results_file,
87
+ image_folder,
88
+ preview_folder,
89
+ force_render_images=False,
90
+ confidence_thresholds=None,
87
91
  rendering_confidence_thresholds=None,
88
92
  target_image_size=(1280,-1),
89
93
  parallelize_rendering=True,
@@ -223,7 +227,7 @@ def render_detection_confusion_matrix(ground_truth_file,results_file,image_folde
223
227
  filename_to_predicted_categories = defaultdict(set)
224
228
  predicted_category_name_to_filenames = defaultdict(set)
225
229
 
226
- # im = md_results['images'][0]
230
+ # im = md_formatted_results['images'][0]
227
231
  for im in tqdm(md_formatted_results['images']):
228
232
 
229
233
  assert im['file'] in filename_to_ground_truth_im
@@ -247,9 +251,6 @@ def render_detection_confusion_matrix(ground_truth_file,results_file,image_folde
247
251
 
248
252
  category_name_to_image_lists = {}
249
253
 
250
- # These may not be identical; currently the ground truth contains an "unknown" category
251
- # results_category_names = sorted(list(results_category_id_to_name.values()))
252
-
253
254
  sub_page_tokens = ['fn','tn','fp','tp']
254
255
 
255
256
  for category_name in ground_truth_category_names:
@@ -296,7 +297,7 @@ def render_detection_confusion_matrix(ground_truth_file,results_file,image_folde
296
297
  assignment = 'tn'
297
298
 
298
299
  category_name_to_image_lists[category_name][assignment].append(filename)
299
-
300
+
300
301
  # ...for each filename
301
302
 
302
303
 
@@ -333,8 +334,8 @@ def render_detection_confusion_matrix(ground_truth_file,results_file,image_folde
333
334
  results_category_name_to_confidence = defaultdict(int)
334
335
  for det in results_im['detections']:
335
336
  category_name = results_category_id_to_name[det['category']]
336
- detection_threshold = rendering_confidence_thresholds['default']
337
- if category_name in rendering_confidence_thresholds:
337
+ detection_threshold = confidence_thresholds['default']
338
+ if category_name in confidence_thresholds:
338
339
  detection_threshold = confidence_thresholds[category_name]
339
340
  if det['conf'] > detection_threshold:
340
341
  results_category_name_to_confidence[category_name] = max(
@@ -354,6 +355,8 @@ def render_detection_confusion_matrix(ground_truth_file,results_file,image_folde
354
355
 
355
356
  confusion_matrix[ground_truth_category_index,predicted_category_index] += 1
356
357
 
358
+ # ...for each file
359
+
357
360
  plt.ioff()
358
361
 
359
362
  fig_h = 3 + 0.3 * n_categories
@@ -181,7 +181,7 @@ class RepeatDetectionOptions:
181
181
 
182
182
  # Optionally show a grid that includes a sample image for the detection, plus
183
183
  # the top N additional detections
184
- bRenderDetectionTiles = False
184
+ bRenderDetectionTiles = True
185
185
 
186
186
  # If this is None, we'll render at the width of the original image
187
187
  detectionTilesPrimaryImageWidth = None
@@ -193,7 +193,7 @@ class RepeatDetectionOptions:
193
193
  # of luck.
194
194
  detectionTilesCroppedGridWidth = 0.6
195
195
  detectionTilesPrimaryImageLocation='right'
196
- detectionTilesMaxCrops = None
196
+ detectionTilesMaxCrops = 250
197
197
 
198
198
  # If bRenderOtherDetections is True, what color should we use to render the
199
199
  # (hopefully pretty subtle) non-target detections?
@@ -142,7 +142,8 @@ class IndexedJsonDb:
142
142
  def __init__(self, json_filename: Union[str, JSONObject],
143
143
  b_normalize_paths: bool = False,
144
144
  filename_replacements: Optional[Mapping[str, str]] = None,
145
- b_convert_classes_to_lower: bool = True):
145
+ b_convert_classes_to_lower: bool = True,
146
+ b_force_forward_slashes: bool = True):
146
147
  """
147
148
  json_filename can also be an existing json db
148
149
  """
@@ -162,11 +163,15 @@ class IndexedJsonDb:
162
163
  for c in self.db['categories']:
163
164
  c['name'] = c['name'].lower()
164
165
 
166
+ # Normalize paths to simplify comparisons later
165
167
  if b_normalize_paths:
166
- # Normalize paths to simplify comparisons later
167
168
  for im in self.db['images']:
168
169
  im['file_name'] = os.path.normpath(im['file_name'])
169
170
 
171
+ if b_force_forward_slashes:
172
+ for im in self.db['images']:
173
+ im['file_name'] = im['file_name'].replace('\\','/')
174
+
170
175
  if filename_replacements is not None:
171
176
  for s in filename_replacements:
172
177
  # Make custom replacements in filenames, typically used to
@@ -0,0 +1,263 @@
1
+ ########
2
+ #
3
+ # coco_to_labelme.py
4
+ #
5
+ # Converts a COCO dataset to labelme format (one .json per image file).
6
+ #
7
+ # If you want to convert YOLO data to labelme, use yolo_to_coco, then coco_to_labelme.
8
+ #
9
+ ########
10
+
11
+ #%% Imports and constants
12
+
13
+ import os
14
+ import json
15
+
16
+ from tqdm import tqdm
17
+ from collections import defaultdict
18
+
19
+ from md_visualization.visualization_utils import open_image
20
+
21
+
22
+ #%% Functions
23
+
24
+ def get_labelme_dict_for_image_from_coco_record(im,annotations,categories,info=None):
25
+ """
26
+ For the given image struct in COCO format and associated list of annotations, reformat the detections
27
+ into labelme format. Returns a dict. All annotations in this list should point to this image.
28
+
29
+ "categories" is in the standard COCO format.
30
+
31
+ 'height' and 'width' are required in [im].
32
+ """
33
+
34
+ image_base_name = os.path.basename(im['file_name'])
35
+
36
+ output_dict = {}
37
+ if info is not None:
38
+ output_dict['custom_info'] = info
39
+ output_dict['version'] = '5.3.0a0'
40
+ output_dict['flags'] = {}
41
+ output_dict['shapes'] = []
42
+ output_dict['imagePath'] = image_base_name
43
+ output_dict['imageHeight'] = im['height']
44
+ output_dict['imageWidth'] = im['width']
45
+ output_dict['imageData'] = None
46
+
47
+ # Store COCO categories in case we want to reconstruct the original IDs later
48
+ output_dict['coco_categories'] = categories
49
+
50
+ category_id_to_name = {c['id']:c['name'] for c in categories}
51
+
52
+ if 'flags' in im:
53
+ output_dict['flags'] = im['flags']
54
+
55
+ # ann = annotations[0]
56
+ for ann in annotations:
57
+
58
+ if 'bbox' not in ann:
59
+ continue
60
+
61
+ shape = {}
62
+ shape['label'] = category_id_to_name[ann['category_id']]
63
+ shape['shape_type'] = 'rectangle'
64
+ shape['description'] = ''
65
+ shape['group_id'] = None
66
+
67
+ # COCO boxes are [x_min, y_min, width_of_box, height_of_box] (absolute)
68
+ #
69
+ # labelme boxes are [[x0,y0],[x1,y1]] (absolute)
70
+ x0 = ann['bbox'][0]
71
+ y0 = ann['bbox'][1]
72
+ x1 = ann['bbox'][0] + ann['bbox'][2]
73
+ y1 = ann['bbox'][1] + ann['bbox'][3]
74
+
75
+ shape['points'] = [[x0,y0],[x1,y1]]
76
+ output_dict['shapes'].append(shape)
77
+
78
+ # ...for each detection
79
+
80
+ return output_dict
81
+
82
+ # ...def get_labelme_dict_for_image()
83
+
84
+
85
+ def coco_to_labelme(coco_data,image_base,overwrite=False,bypass_image_size_check=False,verbose=False):
86
+ """
87
+ For all the images in [coco_data] (a dict or a filename), write a .json file in
88
+ labelme format alongside the corresponding relative path within image_base.
89
+ """
90
+
91
+ # Load COCO data if necessary
92
+ if isinstance(coco_data,str):
93
+ with open(coco_data,'r') as f:
94
+ coco_data = json.load(f)
95
+ assert isinstance(coco_data,dict)
96
+
97
+
98
+ ## Read image sizes if necessary
99
+
100
+ if bypass_image_size_check:
101
+
102
+ print('Bypassing size check')
103
+
104
+ else:
105
+
106
+ # TODO: parallelize this loop
107
+
108
+ print('Reading/validating image sizes...')
109
+
110
+ # im = coco_data['images'][0]
111
+ for im in tqdm(coco_data['images']):
112
+
113
+ # Make sure this file exists
114
+ im_full_path = os.path.join(image_base,im['file_name'])
115
+ assert os.path.isfile(im_full_path), 'Image file {} does not exist'.format(im_full_path)
116
+
117
+ # Load w/h information if necessary
118
+ if 'height' not in im or 'width' not in im:
119
+
120
+ try:
121
+ pil_im = open_image(im_full_path)
122
+ im['width'] = pil_im.width
123
+ im['height'] = pil_im.height
124
+ except Exception:
125
+ print('Warning: cannot open image {}'.format(im_full_path))
126
+ if 'failure' not in im:
127
+ im['failure'] = 'Failure image access'
128
+
129
+ # ...if we need to read w/h information
130
+
131
+ # ...for each image
132
+
133
+ # ...if we need to load image sizes
134
+
135
+
136
+ ## Generate labelme files
137
+
138
+ print('Generating .json files...')
139
+
140
+ image_id_to_annotations = defaultdict(list)
141
+ for ann in coco_data['annotations']:
142
+ image_id_to_annotations[ann['image_id']].append(ann)
143
+
144
+ n_json_files_written = 0
145
+ n_json_files_error = 0
146
+ n_json_files_exist = 0
147
+
148
+ # Write output
149
+ for im in tqdm(coco_data['images']):
150
+
151
+ # Skip this image if it failed to load in whatever system generated this COCO file
152
+ skip_image = False
153
+
154
+ # Errors are represented differently depending on the source
155
+ for error_string in ('failure','error'):
156
+ if (error_string in im) and (im[error_string] is not None):
157
+ if verbose:
158
+ print('Warning: skipping labelme file generation for failed image {}'.format(
159
+ im['file_name']))
160
+ skip_image = True
161
+ n_json_files_error += 1
162
+ break
163
+ if skip_image:
164
+ continue
165
+
166
+ im_full_path = os.path.join(image_base,im['file_name'])
167
+ json_path = os.path.splitext(im_full_path)[0] + '.json'
168
+
169
+ if (not overwrite) and (os.path.isfile(json_path)):
170
+ if verbose:
171
+ print('Skipping existing file {}'.format(json_path))
172
+ n_json_files_exist += 1
173
+ continue
174
+
175
+ annotations_this_image = image_id_to_annotations[im['id']]
176
+ output_dict = get_labelme_dict_for_image_from_coco_record(im,
177
+ annotations_this_image,
178
+ coco_data['categories'],
179
+ info=None)
180
+
181
+ n_json_files_written += 1
182
+ with open(json_path,'w') as f:
183
+ json.dump(output_dict,f,indent=1)
184
+
185
+ # ...for each image
186
+
187
+ print('\nWrote {} .json files (skipped {} for errors, {} because they exist)'.format(
188
+ n_json_files_written,n_json_files_error,n_json_files_exist))
189
+
190
+ # ...def coco_to_labelme()
191
+
192
+
193
+ #%% Interactive driver
194
+
195
+ if False:
196
+
197
+ pass
198
+
199
+ #%% Configure options
200
+
201
+ coco_file = \
202
+ r'C:\\temp\\snapshot-exploration\\images\\training-images-good\\training-images-good_from_yolo.json'
203
+ image_folder = os.path.dirname(coco_file)
204
+ overwrite = True
205
+
206
+
207
+ #%% Programmatic execution
208
+
209
+ coco_to_labelme(coco_data=coco_file,image_base=image_folder,overwrite=overwrite)
210
+
211
+
212
+ #%% Command-line execution
213
+
214
+ s = 'python coco_to_labelme.py "{}" "{}"'.format(coco_file,image_folder)
215
+ if overwrite:
216
+ s += ' --overwrite'
217
+
218
+ print(s)
219
+ import clipboard; clipboard.copy(s)
220
+
221
+
222
+ #%% Opening labelme
223
+
224
+ s = 'python labelme {}'.format(image_folder)
225
+ print(s)
226
+ import clipboard; clipboard.copy(s)
227
+
228
+
229
+ #%% Command-line driver
230
+
231
+ import sys,argparse
232
+
233
+ def main():
234
+
235
+ parser = argparse.ArgumentParser(
236
+ description='Convert a COCO database to labelme annotation format')
237
+
238
+ parser.add_argument(
239
+ 'coco_file',
240
+ type=str,
241
+ help='Path to COCO data file (.json)')
242
+
243
+ parser.add_argument(
244
+ 'image_base',
245
+ type=str,
246
+ help='Path to images (also the output folder)')
247
+
248
+ parser.add_argument(
249
+ '--overwrite',
250
+ action='store_true',
251
+ help='Overwrite existing labelme .json files')
252
+
253
+ if len(sys.argv[1:]) == 0:
254
+ parser.print_help()
255
+ parser.exit()
256
+
257
+ args = parser.parse_args()
258
+
259
+ coco_to_labelme(coco_data=args.coco_file,image_base=args.image_base,overwrite=args.overwrite)
260
+
261
+
262
+ if __name__ == '__main__':
263
+ main()