megadetector 5.0.25__py3-none-any.whl → 5.0.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (34) hide show
  1. megadetector/data_management/cct_json_utils.py +15 -2
  2. megadetector/data_management/coco_to_yolo.py +53 -31
  3. megadetector/data_management/databases/combine_coco_camera_traps_files.py +7 -3
  4. megadetector/data_management/databases/integrity_check_json_db.py +2 -2
  5. megadetector/data_management/lila/generate_lila_per_image_labels.py +2 -2
  6. megadetector/data_management/lila/test_lila_metadata_urls.py +21 -10
  7. megadetector/data_management/remap_coco_categories.py +60 -11
  8. megadetector/data_management/yolo_to_coco.py +45 -15
  9. megadetector/postprocessing/classification_postprocessing.py +788 -524
  10. megadetector/postprocessing/create_crop_folder.py +95 -33
  11. megadetector/postprocessing/load_api_results.py +4 -1
  12. megadetector/postprocessing/md_to_coco.py +1 -1
  13. megadetector/postprocessing/postprocess_batch_results.py +156 -42
  14. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +3 -8
  15. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +2 -2
  16. megadetector/postprocessing/separate_detections_into_folders.py +20 -4
  17. megadetector/postprocessing/subset_json_detector_output.py +180 -15
  18. megadetector/postprocessing/validate_batch_results.py +13 -5
  19. megadetector/taxonomy_mapping/map_new_lila_datasets.py +6 -6
  20. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +3 -58
  21. megadetector/taxonomy_mapping/species_lookup.py +45 -2
  22. megadetector/utils/ct_utils.py +4 -2
  23. megadetector/utils/directory_listing.py +1 -1
  24. megadetector/utils/md_tests.py +2 -1
  25. megadetector/utils/path_utils.py +308 -19
  26. megadetector/utils/wi_utils.py +363 -186
  27. megadetector/visualization/visualization_utils.py +2 -1
  28. megadetector/visualization/visualize_db.py +1 -1
  29. megadetector/visualization/visualize_detector_output.py +1 -4
  30. {megadetector-5.0.25.dist-info → megadetector-5.0.26.dist-info}/METADATA +4 -3
  31. {megadetector-5.0.25.dist-info → megadetector-5.0.26.dist-info}/RECORD +34 -34
  32. {megadetector-5.0.25.dist-info → megadetector-5.0.26.dist-info}/WHEEL +1 -1
  33. {megadetector-5.0.25.dist-info → megadetector-5.0.26.dist-info/licenses}/LICENSE +0 -0
  34. {megadetector-5.0.25.dist-info → megadetector-5.0.26.dist-info}/top_level.txt +0 -0
@@ -300,7 +300,10 @@ class SequenceOptions:
300
300
  def __init__(self):
301
301
  #: Images separated by <= this duration will be grouped into the same sequence.
302
302
  self.episode_interval_seconds = 60.0
303
-
303
+
304
+ #: How to handle invalid datetimes: 'error' or 'none'
305
+ self.datetime_conversion_failure_behavior = 'none'
306
+
304
307
 
305
308
  #%% Functions
306
309
 
@@ -445,7 +448,17 @@ def create_sequences(image_info,options=None):
445
448
  raise ValueError('Unrecognized type for [image_info]')
446
449
 
447
450
  # Modifies the images in place
448
- _ = parse_datetimes_from_cct_image_list(image_info)
451
+ _ = parse_datetimes_from_cct_image_list(image_info,
452
+ conversion_failure_behavior=options.datetime_conversion_failure_behavior)
453
+
454
+ n_invalid_datetimes = 0
455
+ for im in image_info:
456
+ if not isinstance(im['datetime'],datetime.datetime):
457
+ assert im['datetime'] is None, 'At this point, datetimes should be valid or None'
458
+ n_invalid_datetimes += 1
459
+ if n_invalid_datetimes > 0:
460
+ print('Warning: {} of {} images have invalid datetimes'.format(
461
+ n_invalid_datetimes,len(image_info)))
449
462
 
450
463
  # Find all unique locations
451
464
  locations = set()
@@ -47,6 +47,9 @@ def write_yolo_dataset_file(yolo_dataset_file,
47
47
  class_list (list or str): an ordered list of class names (the first item will be class 0,
48
48
  etc.), or the name of a text file containing an ordered list of class names (one per
49
49
  line, starting from class zero).
50
+ train_folder_relative (str, optional): train folder name, used only to populate dataset.yaml
51
+ val_folder_relative (str, optional): val folder name, used only to populate dataset.yaml
52
+ test_folder_relative (str, optional): test folder name, used only to populate dataset.yaml
50
53
  """
51
54
 
52
55
  # Read class names
@@ -97,7 +100,7 @@ def coco_to_yolo(input_image_folder,
97
100
  category_names_to_exclude=None,
98
101
  category_names_to_include=None,
99
102
  write_output=True,
100
- flatten_paths=True):
103
+ flatten_paths=False):
101
104
  """
102
105
  Converts a COCO-formatted dataset to a YOLO-formatted dataset, optionally flattening the
103
106
  dataset to a single folder in the process.
@@ -116,17 +119,21 @@ def coco_to_yolo(input_image_folder,
116
119
  images are left alone.
117
120
  source_format (str, optional): can be 'coco' (default) or 'coco_camera_traps'. The only difference
118
121
  is that when source_format is 'coco_camera_traps', we treat an image with a non-bbox
119
- annotation with a category id of 0 as a special case, i.e. that's how an empty image
120
- is indicated. The original COCO standard is a little ambiguous on this issue. If
121
- source_format is 'coco', we either treat images as empty or error, depending on the value
122
- of [allow_empty_annotations]. [allow_empty_annotations] has no effect if source_format is
123
- 'coco_camera_traps'.
122
+ annotation as a special case, i.e. that's how an empty image is indicated. The original
123
+ COCO standard is a little ambiguous on this issue. If source_format is 'coco', we
124
+ either treat images as empty or error, depending on the value of [allow_empty_annotations].
125
+ [allow_empty_annotations] has no effect if source_format is 'coco_camera_traps'.
126
+ overwrite_images (bool, optional): over-write images in the output folder if they exist
124
127
  create_image_and_label_folder (bool, optional): whether to create separate folders called 'images' and
125
128
  'labels' in the YOLO output folder. If create_image_and_label_folders is False,
126
129
  a/b/c/image001.jpg will become a#b#c#image001.jpg, and the corresponding text file will
127
130
  be a#b#c#image001.txt. If create_image_and_label_folders is True, a/b/c/image001.jpg will become
128
131
  images/a#b#c#image001.jpg, and the corresponding text file will be
129
132
  labels/a#b#c#image001.txt.
133
+ class_file_name (str, optional): .txt file (relative to the output folder) that we should
134
+ populate with a list of classes (or None to omit)
135
+ allow_empty_annotations (bool, optional): if this is False and [source_format] is 'coco',
136
+ we'll error on annotations that have no 'bbox' field
130
137
  clip_boxes (bool, optional): whether to clip bounding box coordinates to the range [0,1] before
131
138
  converting to YOLO xywh format
132
139
  image_id_to_output_image_json_file (str, optional): an optional *output* file, to which we will write
@@ -139,12 +146,14 @@ def coco_to_yolo(input_image_folder,
139
146
  category_names_to_exclude (str, optional): category names that should not be represented in the
140
147
  YOLO output; only impacts annotations, does not prevent copying images. There's almost no reason
141
148
  you would want to specify this and [category_names_to_include].
142
- category_names_to_include (str, optional): allow-list of category names that should be represented in the
143
- YOLO output; only impacts annotations, does not prevent copying images. There's almost no reason
144
- you would want to specify this and [category_names_to_exclude].
149
+ category_names_to_include (str, optional): allow-list of category names that should be represented
150
+ in the YOLO output; only impacts annotations, does not prevent copying images. There's almost
151
+ no reason you would want to specify this and [category_names_to_exclude].
145
152
  write_output (bool, optional): determines whether we actually copy images and write annotations;
146
153
  setting this to False mostly puts this function in "dry run" "mode. The class list
147
154
  file is written regardless of the value of write_output.
155
+ flatten_paths (bool, optional): replace /'s in image filenames with [path_replacement_char],
156
+ which ensures that the output folder is a single flat folder.
148
157
 
149
158
  Returns:
150
159
  dict: information about the coco --> yolo mapping, containing at least the fields:
@@ -313,9 +322,9 @@ def coco_to_yolo(input_image_folder,
313
322
 
314
323
  elif source_format == 'coco_camera_traps':
315
324
 
316
- # We allow empty bbox lists in COCO camera traps; this is typically a negative
317
- # example in a dataset that has bounding boxes, and 0 is typically the empty
318
- # category.
325
+ # We allow empty bbox lists in COCO camera traps files; this is typically a
326
+ # negative example in a dataset that has bounding boxes, and 0 is typically
327
+ # the empty category, which is typically 0.
319
328
  if ann['category_id'] != 0:
320
329
  if not printed_empty_annotation_warning:
321
330
  printed_empty_annotation_warning = True
@@ -429,13 +438,14 @@ def coco_to_yolo(input_image_folder,
429
438
 
430
439
  print('Generating class list')
431
440
 
432
- class_list_filename = os.path.join(output_folder,class_file_name)
433
- with open(class_list_filename, 'w') as f:
434
- print('Writing class list to {}'.format(class_list_filename))
435
- for i_class in range(0,len(yolo_id_to_name)):
436
- # Category IDs should range from 0..N-1
437
- assert i_class in yolo_id_to_name
438
- f.write(yolo_id_to_name[i_class] + '\n')
441
+ if class_file_name is not None:
442
+ class_list_filename = os.path.join(output_folder,class_file_name)
443
+ with open(class_list_filename, 'w') as f:
444
+ print('Writing class list to {}'.format(class_list_filename))
445
+ for i_class in range(0,len(yolo_id_to_name)):
446
+ # Category IDs should range from 0..N-1
447
+ assert i_class in yolo_id_to_name
448
+ f.write(yolo_id_to_name[i_class] + '\n')
439
449
 
440
450
  if image_id_to_output_image_json_file is not None:
441
451
  print('Writing image ID mapping to {}'.format(image_id_to_output_image_json_file))
@@ -457,6 +467,9 @@ def coco_to_yolo(input_image_folder,
457
467
 
458
468
  source_image_to_dest_image = {}
459
469
 
470
+ label_files_written = []
471
+ n_boxes_written = 0
472
+
460
473
  # TODO: parallelize this loop
461
474
  #
462
475
  # output_info = images_to_copy[0]
@@ -471,6 +484,7 @@ def coco_to_yolo(input_image_folder,
471
484
 
472
485
  source_image_to_dest_image[source_image] = dest_image
473
486
 
487
+ # Copy the image if necessary
474
488
  if write_output:
475
489
 
476
490
  os.makedirs(os.path.dirname(dest_image),exist_ok=True)
@@ -482,17 +496,24 @@ def coco_to_yolo(input_image_folder,
482
496
  if (not os.path.isfile(dest_image)) or (overwrite_images):
483
497
  shutil.copyfile(source_image,dest_image)
484
498
 
485
- bboxes = output_info['bboxes']
499
+ bboxes = output_info['bboxes']
500
+
501
+ # Write the annotation file if necessary
502
+ #
503
+ # Only write an annotation file if there are bounding boxes. Images with
504
+ # no .txt files are treated as hard negatives, at least by YOLOv5:
505
+ #
506
+ # https://github.com/ultralytics/yolov5/issues/3218
507
+ #
508
+ # I think this is also true for images with empty .txt files, but
509
+ # I'm using the convention suggested on that issue, i.e. hard
510
+ # negatives are expressed as images without .txt files.
511
+ if len(bboxes) > 0:
486
512
 
487
- # Only write an annotation file if there are bounding boxes. Images with
488
- # no .txt files are treated as hard negatives, at least by YOLOv5:
489
- #
490
- # https://github.com/ultralytics/yolov5/issues/3218
491
- #
492
- # I think this is also true for images with empty .txt files, but
493
- # I'm using the convention suggested on that issue, i.e. hard
494
- # negatives are expressed as images without .txt files.
495
- if len(bboxes) > 0:
513
+ n_boxes_written += len(bboxes)
514
+ label_files_written.append(dest_txt)
515
+
516
+ if write_output:
496
517
 
497
518
  with open(dest_txt,'w') as f:
498
519
 
@@ -501,8 +522,7 @@ def coco_to_yolo(input_image_folder,
501
522
  assert len(bbox) == 5
502
523
  s = '{} {} {} {} {}'.format(bbox[0],bbox[1],bbox[2],bbox[3],bbox[4])
503
524
  f.write(s + '\n')
504
-
505
- # ...if we're actually writing output
525
+
506
526
 
507
527
  # ...for each image
508
528
 
@@ -510,6 +530,8 @@ def coco_to_yolo(input_image_folder,
510
530
  coco_to_yolo_info['class_list_filename'] = class_list_filename
511
531
  coco_to_yolo_info['source_image_to_dest_image'] = source_image_to_dest_image
512
532
  coco_to_yolo_info['coco_id_to_yolo_id'] = coco_id_to_yolo_id
533
+ coco_to_yolo_info['label_files_written'] = label_files_written
534
+ coco_to_yolo_info['n_boxes_written'] = n_boxes_written
513
535
 
514
536
  return coco_to_yolo_info
515
537
 
@@ -24,8 +24,10 @@ import sys
24
24
 
25
25
  #%% Merge functions
26
26
 
27
- def combine_cct_files(input_files, output_file=None, require_uniqueness=True,
28
- filename_prefixes=None):
27
+ def combine_cct_files(input_files,
28
+ output_file=None,
29
+ require_uniqueness=True,
30
+ filename_prefixes=None):
29
31
  """
30
32
  Merges the list of COCO Camera Traps files [input_files] into a single
31
33
  dictionary, optionally writing the result to [output_file].
@@ -33,8 +35,10 @@ def combine_cct_files(input_files, output_file=None, require_uniqueness=True,
33
35
  Args:
34
36
  input_files (list): paths to CCT .json files
35
37
  output_file (str, optional): path to write merged .json file
36
- require_uniqueness (bool): whether to require that the images in
38
+ require_uniqueness (bool, optional): whether to require that the images in
37
39
  each input_dict be unique
40
+ filename_prefixes (dict, optional): dict mapping input filenames to strings
41
+ that should be prepended to image filenames from that source
38
42
 
39
43
  Returns:
40
44
  dict: the merged COCO-formatted .json dict
@@ -327,7 +327,7 @@ def integrity_check_json_db(jsonFile, options=None):
327
327
 
328
328
  for i_image,result in enumerate(results):
329
329
  if result is not None:
330
- validation_errors.append(images[i_image]['file_name'],result)
330
+ validation_errors.append((images[i_image]['file_name'],result))
331
331
 
332
332
  # ...for each image
333
333
 
@@ -393,7 +393,7 @@ def integrity_check_json_db(jsonFile, options=None):
393
393
  elif image['_count'] > 1:
394
394
  nMultiAnnotated += 1
395
395
 
396
- print('Found {} unannotated images, {} images with multiple annotations'.format(
396
+ print('\nFound {} unannotated images, {} images with multiple annotations'.format(
397
397
  nUnannotated,nMultiAnnotated))
398
398
 
399
399
  if (len(base_dir) > 0) and options.bFindUnusedImages:
@@ -349,7 +349,7 @@ with open(output_file,'w',encoding='utf-8',newline='') as f:
349
349
 
350
350
  # ...with open()
351
351
 
352
- print('Processed {} datasets'.format(len(metadata_table)))
352
+ print('\nProcessed {} datasets'.format(len(metadata_table)))
353
353
 
354
354
 
355
355
  #%% Read the .csv back
@@ -393,7 +393,7 @@ def check_row(row):
393
393
  dataset_name_to_locations[ds_name].add(row['location_id'])
394
394
 
395
395
  # Faster, but more annoying to debug
396
- if False:
396
+ if True:
397
397
 
398
398
  df.progress_apply(check_row, axis=1)
399
399
 
@@ -31,9 +31,10 @@ os.makedirs(metadata_dir,exist_ok=True)
31
31
  md_results_dir = os.path.join(lila_local_base,'md_results')
32
32
  os.makedirs(md_results_dir,exist_ok=True)
33
33
 
34
- md_results_keys = ['mdv4_results_raw','mdv5a_results_raw','mdv5b_results_raw','md_results_with_rde']
34
+ md_results_keys = ['mdv4_results_raw','mdv5a_results_raw','mdv5b_results_raw',
35
+ 'md1000-redwood_results_raw','md_results_with_rde']
35
36
 
36
- preferred_cloud = 'gcp' # 'azure', 'aws'
37
+ preferred_cloud = None # 'gcp' # 'azure', 'aws'
37
38
 
38
39
  force_download = True
39
40
 
@@ -52,7 +53,7 @@ print('Loaded metadata URLs for {} datasets'.format(len(metadata_table)))
52
53
 
53
54
  #%% Download and extract metadata and MD results for each dataset
54
55
 
55
- # Takes ~60 seconds if everything needs to beo downloaded and unzipped
56
+ # Takes ~60 seconds if everything needs to be downloaded and unzipped
56
57
 
57
58
  for ds_name in metadata_table.keys():
58
59
 
@@ -88,6 +89,8 @@ url_to_source = {}
88
89
  # so we pick a semi-arbitrary image that isn't the first. How about the 2000th?
89
90
  image_index = 2000
90
91
 
92
+ # TODO: parallelize this loop
93
+ #
91
94
  # ds_name = list(metadata_table.keys())[0]
92
95
  for ds_name in metadata_table.keys():
93
96
 
@@ -101,13 +104,21 @@ for ds_name in metadata_table.keys():
101
104
  with open(json_filename, 'r') as f:
102
105
  data = json.load(f)
103
106
 
104
- image_base_url = metadata_table[ds_name]['image_base_url_' + preferred_cloud]
105
- assert not image_base_url.endswith('/')
106
- # Download a test image
107
- test_image_relative_path = data['images'][image_index]['file_name']
108
- test_image_url = image_base_url + '/' + test_image_relative_path
109
-
110
- url_to_source[test_image_url] = ds_name + ' metadata'
107
+ if preferred_cloud is not None:
108
+ clouds = [preferred_cloud]
109
+ else:
110
+ clouds = ['gcp','aws','azure']
111
+
112
+ for cloud in clouds:
113
+
114
+ image_base_url = metadata_table[ds_name]['image_base_url_' + cloud]
115
+ assert not image_base_url.endswith('/')
116
+
117
+ # Download a test image
118
+ test_image_relative_path = data['images'][image_index]['file_name']
119
+ test_image_url = image_base_url + '/' + test_image_relative_path
120
+
121
+ url_to_source[test_image_url] = ds_name + ' metadata ({})'.format(cloud)
111
122
 
112
123
  # Grab an image from the MegaDetector results
113
124
 
@@ -12,6 +12,7 @@ import os
12
12
  import json
13
13
 
14
14
  from copy import deepcopy
15
+ from megadetector.utils.ct_utils import invert_dictionary
15
16
 
16
17
 
17
18
  #%% Main function
@@ -19,17 +20,27 @@ from copy import deepcopy
19
20
  def remap_coco_categories(input_data,
20
21
  output_category_name_to_id,
21
22
  input_category_name_to_output_category_name,
22
- output_file=None):
23
+ output_file=None,
24
+ allow_unused_categories=False):
23
25
  """
24
26
  Given a COCO-formatted dataset, remap the categories to a new categories mapping, optionally
25
27
  writing the results to a new file.
26
28
 
27
- output_category_name_to_id is a dict mapping strings to ints.
29
+ Args:
30
+ input_data (str or dict): a COCO-formatted dict or a filename. If it's a dict, it will
31
+ be copied, not modified in place.
32
+ output_category_name_to_id (dict) a dict mapping strings to ints. Categories not in
33
+ this dict will be ignored or will result in errors, depending on allow_unused_categories.
34
+ input_category_name_to_output_category_name: a dict mapping strings to strings.
35
+ Annotations using categories not in this dict will be omitted or will result in
36
+ errors, depending on allow_unused_categories.
37
+ output_file (str, optional): output file to which we should write remapped COCO data
38
+ allow_unused_categories (bool, optional): should we ignore categories not present in the
39
+ input/output mappings? If this is False and we encounter an unmapped category, we'll
40
+ error.
28
41
 
29
- input_category_name_to_output_category_name is a dict mapping strings to strings.
30
-
31
- [input_data] can be a COCO-formatted dict or a filename. If it's a dict, it will be copied,
32
- not modified in place.
42
+ Returns:
43
+ dict: COCO-formatted dict
33
44
  """
34
45
 
35
46
  if isinstance(input_data,str):
@@ -48,23 +59,59 @@ def remap_coco_categories(input_data,
48
59
  input_category_name_to_input_category_id = {}
49
60
  for c in input_data['categories']:
50
61
  input_category_name_to_input_category_id[c['name']] = c['id']
51
-
62
+ input_category_id_to_input_category_name = \
63
+ invert_dictionary(input_category_name_to_input_category_id)
64
+
52
65
  # Map input IDs --> output IDs
53
66
  input_category_id_to_output_category_id = {}
54
- for input_name in input_category_name_to_output_category_name.keys():
67
+ input_category_names = list(input_category_name_to_output_category_name.keys())
68
+
69
+ # input_name = input_category_names[0]
70
+ for input_name in input_category_names:
71
+
55
72
  output_name = input_category_name_to_output_category_name[input_name]
56
73
  assert output_name in output_category_name_to_id, \
57
74
  'No output ID for {} --> {}'.format(input_name,output_name)
58
75
  input_id = input_category_name_to_input_category_id[input_name]
59
76
  output_id = output_category_name_to_id[output_name]
60
77
  input_category_id_to_output_category_id[input_id] = output_id
78
+
79
+ # ...for each category we want to keep
61
80
 
81
+ printed_unused_category_warnings = set()
82
+
83
+ valid_annotations = []
84
+
62
85
  # Map annotations
63
86
  for ann in output_data['annotations']:
64
- assert ann['category_id'] in input_category_id_to_output_category_id, \
65
- 'Unrecognized category ID {}'.format(ann['category_id'])
66
- ann['category_id'] = input_category_id_to_output_category_id[ann['category_id']]
67
87
 
88
+ input_category_id = ann['category_id']
89
+ if input_category_id not in input_category_id_to_output_category_id:
90
+ if allow_unused_categories:
91
+ if input_category_id not in printed_unused_category_warnings:
92
+ printed_unused_category_warnings.add(input_category_id)
93
+ input_category_name = \
94
+ input_category_id_to_input_category_name[input_category_id]
95
+ s = 'Skipping unmapped category ID {} ({})'.format(
96
+ input_category_id,input_category_name)
97
+ print(s)
98
+ continue
99
+ else:
100
+ s = 'Unmapped category ID {}'.format(input_category_id)
101
+ raise ValueError(s)
102
+ output_category_id = input_category_id_to_output_category_id[input_category_id]
103
+ ann['category_id'] = output_category_id
104
+ valid_annotations.append(ann)
105
+
106
+ # ...for each annotation
107
+
108
+ # The only reason annotations should get excluded is the case where we allow
109
+ # unused categories
110
+ if not allow_unused_categories:
111
+ assert len(valid_annotations) == len(output_data['annotations'])
112
+
113
+ output_data['annotations'] = valid_annotations
114
+
68
115
  # Update the category list
69
116
  output_categories = []
70
117
  for output_name in output_category_name_to_id:
@@ -78,6 +125,8 @@ def remap_coco_categories(input_data,
78
125
 
79
126
  return input_data
80
127
 
128
+ # ...def remap_coco_categories(...)
129
+
81
130
 
82
131
  #%% Command-line driver
83
132
 
@@ -34,7 +34,7 @@ def _filename_to_image_id(fn):
34
34
  return fn.replace(' ','_').replace('\\','/')
35
35
 
36
36
 
37
- def _process_image(fn_abs,input_folder,category_id_to_name):
37
+ def _process_image(fn_abs,input_folder,category_id_to_name,label_folder):
38
38
  """
39
39
  Internal support function for processing one image's labels.
40
40
  """
@@ -42,8 +42,8 @@ def _process_image(fn_abs,input_folder,category_id_to_name):
42
42
  # Create the image object for this image
43
43
  #
44
44
  # Always use forward slashes in image filenames and IDs
45
- fn_relative = os.path.relpath(fn_abs,input_folder).replace('\\','/')
46
- image_id = _filename_to_image_id(fn_relative)
45
+ image_fn_relative = os.path.relpath(fn_abs,input_folder).replace('\\','/')
46
+ image_id = _filename_to_image_id(image_fn_relative)
47
47
 
48
48
  # This is done in a separate loop now
49
49
  #
@@ -53,7 +53,7 @@ def _process_image(fn_abs,input_folder,category_id_to_name):
53
53
  # image_ids.add(image_id)
54
54
 
55
55
  im = {}
56
- im['file_name'] = fn_relative
56
+ im['file_name'] = image_fn_relative
57
57
  im['id'] = image_id
58
58
 
59
59
  annotations_this_image = []
@@ -65,14 +65,20 @@ def _process_image(fn_abs,input_folder,category_id_to_name):
65
65
  im['height'] = im_height
66
66
  im['error'] = None
67
67
  except Exception as e:
68
- print('Warning: error reading {}:\n{}'.format(fn_relative,str(e)))
68
+ print('Warning: error reading {}:\n{}'.format(image_fn_relative,str(e)))
69
69
  im['width'] = -1
70
70
  im['height'] = -1
71
71
  im['error'] = str(e)
72
72
  return (im,annotations_this_image)
73
73
 
74
74
  # Is there an annotation file for this image?
75
- annotation_file = os.path.splitext(fn_abs)[0] + '.txt'
75
+ if label_folder is not None:
76
+ assert input_folder in fn_abs
77
+ label_file_abs_base = fn_abs.replace(input_folder,label_folder)
78
+ else:
79
+ label_file_abs_base = fn_abs
80
+
81
+ annotation_file = os.path.splitext(label_file_abs_base)[0] + '.txt'
76
82
  if not os.path.isfile(annotation_file):
77
83
  annotation_file = os.path.splitext(fn_abs)[0] + '.TXT'
78
84
 
@@ -270,9 +276,14 @@ def validate_label_file(label_file,category_id_to_name=None,verbose=False):
270
276
  # ...def validate_label_file(...)
271
277
 
272
278
 
273
- def validate_yolo_dataset(input_folder, class_name_file, n_workers=1, pool_type='thread', verbose=False):
279
+ def validate_yolo_dataset(input_folder,
280
+ class_name_file,
281
+ n_workers=1,
282
+ pool_type='thread',
283
+ verbose=False):
274
284
  """
275
- Verifies all the labels in a YOLO dataset folder.
285
+ Verifies all the labels in a YOLO dataset folder. Does not yet support the case where the
286
+ labels and images are in different folders (yolo_to_coco() supports this).
276
287
 
277
288
  Looks for:
278
289
 
@@ -396,14 +407,17 @@ def yolo_to_coco(input_folder,
396
407
  recursive=True,
397
408
  exclude_string=None,
398
409
  include_string=None,
399
- overwrite_handling='overwrite'):
410
+ overwrite_handling='overwrite',
411
+ label_folder=None):
400
412
  """
401
413
  Converts a YOLO-formatted dataset to a COCO-formatted dataset.
402
414
 
403
415
  All images will be assigned an "error" value, usually None.
404
416
 
405
417
  Args:
406
- input_folder (str): the YOLO dataset folder to validate
418
+ input_folder (str): the YOLO dataset folder to convert. If the image and label
419
+ folders are different, this is the image folder, and [label_folder] is the
420
+ label folder.
407
421
  class_name_file (str or list): a list of classes, a flat text file, or a yolo
408
422
  dataset.yml/.yaml file. If it's a dataset.yml file, that file should point to
409
423
  input_folder as the base folder, though this is not explicitly checked.
@@ -432,6 +446,7 @@ def yolo_to_coco(input_folder,
432
446
  include_string (str, optional): include only images whose filename contains a string
433
447
  overwrite_handling (bool, optional): behavior if output_file exists ('load', 'overwrite', or
434
448
  'error')
449
+ label_folder (str, optional): label folder, if different from the image folder
435
450
 
436
451
  Returns:
437
452
  dict: COCO-formatted data, the same as what's written to [output_file]
@@ -439,6 +454,8 @@ def yolo_to_coco(input_folder,
439
454
 
440
455
  ## Validate input
441
456
 
457
+ input_folder = input_folder.replace('\\','/')
458
+
442
459
  assert os.path.isdir(input_folder)
443
460
  assert os.path.isfile(class_name_file)
444
461
 
@@ -487,6 +504,7 @@ def yolo_to_coco(input_folder,
487
504
  print('Enumerating images...')
488
505
 
489
506
  image_files_abs = find_images(input_folder,recursive=recursive,convert_slashes=True)
507
+ assert not any(['\\' in fn for fn in image_files_abs])
490
508
 
491
509
  n_files_original = len(image_files_abs)
492
510
 
@@ -516,8 +534,14 @@ def yolo_to_coco(input_folder,
516
534
 
517
535
  if not allow_images_without_label_files:
518
536
  print('Verifying that label files exist')
537
+ # image_file_abs = image_files_abs[0]
519
538
  for image_file_abs in tqdm(image_files_abs):
520
- label_file_abs = os.path.splitext(image_file_abs)[0] + '.txt'
539
+ if label_folder is not None:
540
+ assert input_folder in image_file_abs
541
+ label_file_abs_base = image_file_abs.replace(input_folder,label_folder)
542
+ else:
543
+ label_file_abs_base = image_file_abs
544
+ label_file_abs = os.path.splitext(label_file_abs_base)[0] + '.txt'
521
545
  assert os.path.isfile(label_file_abs), \
522
546
  'No annotation file for {}'.format(image_file_abs)
523
547
 
@@ -528,7 +552,7 @@ def yolo_to_coco(input_folder,
528
552
 
529
553
  for fn_abs in tqdm(image_files_abs):
530
554
 
531
- fn_relative = os.path.relpath(fn_abs,input_folder)
555
+ fn_relative = os.path.relpath(fn_abs,input_folder).replace('\\','/')
532
556
  image_id = _filename_to_image_id(fn_relative)
533
557
  assert image_id not in image_ids, \
534
558
  'Oops, you have hit a very esoteric case where you have the same filename ' + \
@@ -543,8 +567,12 @@ def yolo_to_coco(input_folder,
543
567
  if n_workers <= 1:
544
568
 
545
569
  image_results = []
570
+ # fn_abs = image_files_abs[0]
546
571
  for fn_abs in tqdm(image_files_abs):
547
- image_results.append(_process_image(fn_abs,input_folder,category_id_to_name))
572
+ image_results.append(_process_image(fn_abs,
573
+ input_folder,
574
+ category_id_to_name,
575
+ label_folder))
548
576
 
549
577
  else:
550
578
 
@@ -557,8 +585,10 @@ def yolo_to_coco(input_folder,
557
585
 
558
586
  print('Starting a {} pool of {} workers'.format(pool_type,n_workers))
559
587
 
560
- p = partial(_process_image,input_folder=input_folder,
561
- category_id_to_name=category_id_to_name)
588
+ p = partial(_process_image,
589
+ input_folder=input_folder,
590
+ category_id_to_name=category_id_to_name,
591
+ label_folder=label_folder)
562
592
  image_results = list(tqdm(pool.imap(p, image_files_abs),
563
593
  total=len(image_files_abs)))
564
594