megadetector 5.0.6__py3-none-any.whl → 5.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (75) hide show
  1. api/batch_processing/data_preparation/manage_local_batch.py +297 -202
  2. api/batch_processing/data_preparation/manage_video_batch.py +7 -2
  3. api/batch_processing/postprocessing/add_max_conf.py +1 -0
  4. api/batch_processing/postprocessing/combine_api_outputs.py +2 -2
  5. api/batch_processing/postprocessing/compare_batch_results.py +111 -61
  6. api/batch_processing/postprocessing/convert_output_format.py +24 -6
  7. api/batch_processing/postprocessing/load_api_results.py +56 -72
  8. api/batch_processing/postprocessing/md_to_labelme.py +119 -51
  9. api/batch_processing/postprocessing/merge_detections.py +30 -5
  10. api/batch_processing/postprocessing/postprocess_batch_results.py +175 -55
  11. api/batch_processing/postprocessing/remap_detection_categories.py +163 -0
  12. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +628 -0
  13. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +71 -23
  14. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
  15. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +224 -76
  16. api/batch_processing/postprocessing/subset_json_detector_output.py +132 -5
  17. api/batch_processing/postprocessing/top_folders_to_bottom.py +1 -1
  18. classification/prepare_classification_script.py +191 -191
  19. data_management/cct_json_utils.py +7 -2
  20. data_management/coco_to_labelme.py +263 -0
  21. data_management/coco_to_yolo.py +72 -48
  22. data_management/databases/integrity_check_json_db.py +75 -64
  23. data_management/databases/subset_json_db.py +1 -1
  24. data_management/generate_crops_from_cct.py +1 -1
  25. data_management/get_image_sizes.py +44 -26
  26. data_management/importers/animl_results_to_md_results.py +3 -5
  27. data_management/importers/noaa_seals_2019.py +2 -2
  28. data_management/importers/zamba_results_to_md_results.py +2 -2
  29. data_management/labelme_to_coco.py +264 -127
  30. data_management/labelme_to_yolo.py +96 -53
  31. data_management/lila/create_lila_blank_set.py +557 -0
  32. data_management/lila/create_lila_test_set.py +2 -1
  33. data_management/lila/create_links_to_md_results_files.py +1 -1
  34. data_management/lila/download_lila_subset.py +138 -45
  35. data_management/lila/generate_lila_per_image_labels.py +23 -14
  36. data_management/lila/get_lila_annotation_counts.py +16 -10
  37. data_management/lila/lila_common.py +15 -42
  38. data_management/lila/test_lila_metadata_urls.py +116 -0
  39. data_management/read_exif.py +65 -16
  40. data_management/remap_coco_categories.py +84 -0
  41. data_management/resize_coco_dataset.py +14 -31
  42. data_management/wi_download_csv_to_coco.py +239 -0
  43. data_management/yolo_output_to_md_output.py +40 -13
  44. data_management/yolo_to_coco.py +313 -100
  45. detection/process_video.py +36 -14
  46. detection/pytorch_detector.py +1 -1
  47. detection/run_detector.py +73 -18
  48. detection/run_detector_batch.py +116 -27
  49. detection/run_inference_with_yolov5_val.py +135 -27
  50. detection/run_tiled_inference.py +153 -43
  51. detection/tf_detector.py +2 -1
  52. detection/video_utils.py +4 -2
  53. md_utils/ct_utils.py +101 -6
  54. md_utils/md_tests.py +264 -17
  55. md_utils/path_utils.py +326 -47
  56. md_utils/process_utils.py +26 -7
  57. md_utils/split_locations_into_train_val.py +215 -0
  58. md_utils/string_utils.py +10 -0
  59. md_utils/url_utils.py +66 -3
  60. md_utils/write_html_image_list.py +12 -2
  61. md_visualization/visualization_utils.py +380 -74
  62. md_visualization/visualize_db.py +41 -10
  63. md_visualization/visualize_detector_output.py +185 -104
  64. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/METADATA +11 -13
  65. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/RECORD +74 -67
  66. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/WHEEL +1 -1
  67. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +1 -1
  68. taxonomy_mapping/map_new_lila_datasets.py +43 -39
  69. taxonomy_mapping/prepare_lila_taxonomy_release.py +5 -2
  70. taxonomy_mapping/preview_lila_taxonomy.py +27 -27
  71. taxonomy_mapping/species_lookup.py +33 -13
  72. taxonomy_mapping/taxonomy_csv_checker.py +7 -5
  73. md_visualization/visualize_megadb.py +0 -183
  74. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/LICENSE +0 -0
  75. {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/top_level.txt +0 -0
@@ -48,9 +48,18 @@ class ReadExifOptions:
48
48
  #
49
49
  # Not relevant if n_workers is 1.
50
50
  use_threads = True
51
-
51
+
52
+ # "File" and "ExifTool" are tag types used by ExifTool to report data that
53
+ # doesn't come from EXIF, rather from the file (e.g. file size).
52
54
  tag_types_to_ignore = set(['File','ExifTool'])
53
55
 
56
+ # Include/exclude specific tags (mutually incompatible)
57
+ tags_to_include = None
58
+ tags_to_exclude = None
59
+
60
+ # A useful set of tags one might want to limit queries for
61
+ # options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight','DateTime','DateTimeOriginal','Orientation']
62
+
54
63
  exiftool_command_name = 'exiftool'
55
64
 
56
65
  # How should we handle byte-formatted EXIF tags?
@@ -62,16 +71,17 @@ class ReadExifOptions:
62
71
 
63
72
  # Should we use exiftool or pil?
64
73
  processing_library = 'pil' # 'exiftool','pil'
65
-
74
+
75
+
66
76
 
67
77
  #%% Functions
68
78
 
69
- def enumerate_files(input_folder):
79
+ def enumerate_files(input_folder,recursive=True):
70
80
  """
71
81
  Enumerates all image files in input_folder, returning relative paths
72
82
  """
73
83
 
74
- image_files = find_images(input_folder,recursive=True)
84
+ image_files = find_images(input_folder,recursive=recursive)
75
85
  image_files = [os.path.relpath(s,input_folder) for s in image_files]
76
86
  image_files = [s.replace('\\','/') for s in image_files]
77
87
  print('Enumerated {} files'.format(len(image_files)))
@@ -99,7 +109,7 @@ def get_exif_ifd(exif):
99
109
  def read_pil_exif(im,options=None):
100
110
  """
101
111
  Read all the EXIF data we know how to read from [im] (path or PIL Image), whether it's
102
- in the PIL default EXIF data or not.
112
+ in the PIL default EXIF data or not. Returns a dict.
103
113
  """
104
114
 
105
115
  if options is None:
@@ -192,6 +202,32 @@ def parse_exif_datetime_string(s,verbose=False):
192
202
  return dt
193
203
 
194
204
 
205
+ def _filter_tags(tags,options):
206
+ """
207
+ Internal function used to include/exclude specific tags from the exif_tags
208
+ dict.
209
+ """
210
+
211
+ if options is None:
212
+ return tags
213
+ if options.tags_to_include is None and options.tags_to_exclude is None:
214
+ return tags
215
+ if options.tags_to_include is not None:
216
+ assert options.tags_to_exclude is None, "tags_to_include and tags_to_exclude are incompatible"
217
+ tags_to_return = {}
218
+ for tag_name in tags.keys():
219
+ if tag_name in options.tags_to_include:
220
+ tags_to_return[tag_name] = tags[tag_name]
221
+ return tags_to_return
222
+ if options.tags_to_exclude is not None:
223
+ assert options.tags_to_include is None, "tags_to_include and tags_to_exclude are incompatible"
224
+ tags_to_return = {}
225
+ for tag_name in tags.keys():
226
+ if tag_name not in options.tags_to_exclude:
227
+ tags_to_return[tag_name] = tags[tag_name]
228
+ return tags_to_return
229
+
230
+
195
231
  def read_exif_tags_for_image(file_path,options=None):
196
232
  """
197
233
  Get relevant fields from EXIF data for an image
@@ -227,8 +263,8 @@ def read_exif_tags_for_image(file_path,options=None):
227
263
  result['status'] = 'empty_read'
228
264
  else:
229
265
  result['status'] = 'success'
230
- result['tags'] = exif_tags
231
-
266
+ result['tags'] = _filter_tags(exif_tags,options)
267
+
232
268
  return result
233
269
 
234
270
  elif options.processing_library == 'exiftool':
@@ -283,9 +319,12 @@ def read_exif_tags_for_image(file_path,options=None):
283
319
  print('Ignoring tag with type {}'.format(field_type))
284
320
  continue
285
321
 
286
- field_tag = field_name_type_tokens[1].strip()
287
-
288
- tag = [field_type,field_tag,field_value]
322
+ field_name = field_name_type_tokens[1].strip()
323
+ if options.tags_to_exclude is not None and field_name in options.tags_to_exclude:
324
+ continue
325
+ if options.tags_to_include is not None and field_name not in options.tags_to_include:
326
+ continue
327
+ tag = [field_type,field_name,field_value]
289
328
 
290
329
  exif_tags.append(tag)
291
330
 
@@ -350,20 +389,22 @@ def populate_exif_data(im, image_base, options=None):
350
389
  # ...populate_exif_data()
351
390
 
352
391
 
353
- def create_image_objects(image_files):
392
+ def create_image_objects(image_files,recursive=True):
354
393
  """
355
394
  Create empty image objects for every image in [image_files], which can be a
356
395
  list of relative paths (which will get stored without processing, so the base
357
396
  path doesn't matter here), or a folder name.
358
397
 
359
398
  Returns a list of dicts with field 'file_name' (a relative path).
399
+
400
+ "recursive" is ignored if "image_files" is a list.
360
401
  """
361
402
 
362
403
  # Enumerate *relative* paths
363
404
  if isinstance(image_files,str):
364
405
  print('Enumerating image files in {}'.format(image_files))
365
406
  assert os.path.isdir(image_files), 'Invalid image folder {}'.format(image_files)
366
- image_files = enumerate_files(image_files)
407
+ image_files = enumerate_files(image_files,recursive=recursive)
367
408
 
368
409
  images = []
369
410
  for fn in image_files:
@@ -499,7 +540,7 @@ def is_executable(name):
499
540
  return which(name) is not None
500
541
 
501
542
 
502
- def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=None):
543
+ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=None,recursive=True):
503
544
  """
504
545
  Read EXIF data for all images in input_folder.
505
546
 
@@ -516,6 +557,12 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
516
557
  if options is None:
517
558
  options = ReadExifOptions()
518
559
 
560
+ # Validate options
561
+ if options.tags_to_include is not None:
562
+ assert options.tags_to_exclude is None, "tags_to_include and tags_to_exclude are incompatible"
563
+ if options.tags_to_exclude is not None:
564
+ assert options.tags_to_include is None, "tags_to_include and tags_to_exclude are incompatible"
565
+
519
566
  if input_folder is None:
520
567
  input_folder = ''
521
568
  if len(input_folder) > 0:
@@ -542,7 +589,7 @@ def read_exif_from_folder(input_folder,output_file=None,options=None,filenames=N
542
589
  assert is_executable(options.exiftool_command_name), 'exiftool not available'
543
590
 
544
591
  if filenames is None:
545
- images = create_image_objects(input_folder)
592
+ images = create_image_objects(input_folder,recursive=recursive)
546
593
  else:
547
594
  assert isinstance(filenames,list)
548
595
  images = create_image_objects(filenames)
@@ -567,14 +614,16 @@ if False:
567
614
 
568
615
  #%%
569
616
 
570
- input_folder = os.path.expanduser('~/data/KRU-test')
571
- output_file = os.path.expanduser('~/data/test-exif.json')
617
+ input_folder = r'C:\temp\md-name-testing'
618
+ output_file = None # r'C:\temp\md-name-testing\exif.json'
572
619
  options = ReadExifOptions()
573
620
  options.verbose = False
574
621
  options.n_workers = 10
575
622
  options.use_threads = False
576
623
  options.processing_library = 'pil'
577
624
  # options.processing_library = 'exiftool'
625
+ options.tags_to_include = ['DateTime','Model','Make','ExifImageWidth','ExifImageHeight','DateTime','DateTimeOriginal','Orientation']
626
+ # options.tags_to_exclude = ['MakerNote']
578
627
 
579
628
  results = read_exif_from_folder(input_folder,output_file,options)
580
629
 
@@ -0,0 +1,84 @@
1
+ ########
2
+ #
3
+ # remap_coco_categories.py
4
+ #
5
+ # Given a COCO-formatted dataset, remap the categories to a new mapping.
6
+ #
7
+ ########
8
+
9
+ #%% Imports and constants
10
+
11
+ import os
12
+ import json
13
+
14
+ from copy import deepcopy
15
+
16
+
17
+ #%% Main function
18
+
19
+ def remap_coco_categories(input_data,
20
+ output_category_name_to_id,
21
+ input_category_name_to_output_category_name,
22
+ output_file=None):
23
+ """
24
+ Given a COCO-formatted dataset, remap the categories to a new categories mapping, optionally
25
+ writing the results to a new file.
26
+
27
+ output_category_name_to_id is a dict mapping strings to ints.
28
+
29
+ input_category_name_to_output_category_name is a dict mapping strings to strings.
30
+
31
+ [input_data] can be a COCO-formatted dict or a filename. If it's a dict, it will be copied,
32
+ not modified in place.
33
+ """
34
+
35
+ if isinstance(input_data,str):
36
+ assert os.path.isfile(input_data), "Can't find file {}".format(input_data)
37
+ with open(input_data,'r') as f:
38
+ input_data = json.load(f)
39
+ assert isinstance(input_data,dict), 'Illegal COCO input data'
40
+ else:
41
+ assert isinstance(input_data,dict), 'Illegal COCO input data'
42
+ input_data = deepcopy(input_data)
43
+
44
+ # It's safe to modify in-place now
45
+ output_data = input_data
46
+
47
+ # Read input name --> ID mapping
48
+ input_category_name_to_input_category_id = {}
49
+ for c in input_data['categories']:
50
+ input_category_name_to_input_category_id[c['name']] = c['id']
51
+
52
+ # Map input IDs --> output IDs
53
+ input_category_id_to_output_category_id = {}
54
+ for input_name in input_category_name_to_output_category_name.keys():
55
+ output_name = input_category_name_to_output_category_name[input_name]
56
+ assert output_name in output_category_name_to_id, \
57
+ 'No output ID for {} --> {}'.format(input_name,output_name)
58
+ input_id = input_category_name_to_input_category_id[input_name]
59
+ output_id = output_category_name_to_id[output_name]
60
+ input_category_id_to_output_category_id[input_id] = output_id
61
+
62
+ # Map annotations
63
+ for ann in output_data['annotations']:
64
+ assert ann['category_id'] in input_category_id_to_output_category_id, \
65
+ 'Unrecognized category ID {}'.format(ann['category_id'])
66
+ ann['category_id'] = input_category_id_to_output_category_id[ann['category_id']]
67
+
68
+ # Update the category list
69
+ output_categories = []
70
+ for output_name in output_category_name_to_id:
71
+ category = {'name':output_name,'id':output_category_name_to_id[output_name]}
72
+ output_categories.append(category)
73
+ output_data['categories'] = output_categories
74
+
75
+ if output_file is not None:
76
+ with open(output_file,'w') as f:
77
+ json.dump(output_data,f,indent=1)
78
+
79
+ return input_data
80
+
81
+
82
+ #%% Command-line driver
83
+
84
+ # TODO
@@ -26,8 +26,7 @@ from md_visualization.visualization_utils import \
26
26
  def resize_coco_dataset(input_folder,input_filename,
27
27
  output_folder,output_filename,
28
28
  target_size=(-1,-1),
29
- correct_size_image_handling='copy',
30
- right_edge_quantization_threshold=None):
29
+ correct_size_image_handling='copy'):
31
30
  """
32
31
  Given a COCO-formatted dataset (images in input_folder, data in input_filename), resize
33
32
  all the images to a target size (in output_folder) and scale bounding boxes accordingly
@@ -36,7 +35,7 @@ def resize_coco_dataset(input_folder,input_filename,
36
35
  target_size should be a tuple/list of ints, length 2. If either dimension is -1, aspect ratio
37
36
  will be preserved. If both dimensions are -1, this means "keep the original size". If
38
37
  both dimensions are -1 and correct_size_image_handling is copy, this function is basically
39
- a no-op, although you might still use it for right_edge_quantization_threshold.
38
+ a no-op.
40
39
 
41
40
  correct_size_image_handling can be 'copy' (in which case the original image is just copied
42
41
  to the output folder) or 'rewrite' (in which case the image is opened via PIL and re-written,
@@ -44,11 +43,7 @@ def resize_coco_dataset(input_folder,input_filename,
44
43
  you're superstitious about biases coming from images in a training set being written
45
44
  by different image encoders.
46
45
 
47
- right_edge_quantization_threshold is an off-by-default hack to adjust large datasets where
48
- boxes that really should be running off the right side of the image only extend like 99%
49
- of the way there, due to what appears to be a slight bias inherent to MD. If a box extends
50
- within [right_edge_quantization_threshold] (a small number, from 0 to 1, but probably around
51
- 0.02) of the right edge of the image, it will be extended to the far right edge.
46
+ Returns the COCO database with resized images.
52
47
  """
53
48
 
54
49
  # Read input data
@@ -62,7 +57,9 @@ def resize_coco_dataset(input_folder,input_filename,
62
57
 
63
58
  # For each image
64
59
 
65
- # im = d['images'][1]
60
+ # TODO: this is trivially parallelizable
61
+ #
62
+ # im = d['images'][0]
66
63
  for im in tqdm(d['images']):
67
64
 
68
65
  input_fn_relative = im['file_name']
@@ -122,15 +119,6 @@ def resize_coco_dataset(input_folder,input_filename,
122
119
  bbox[2] * width_scale,
123
120
  bbox[3] * height_scale]
124
121
 
125
- # Do we need to quantize this box?
126
- if right_edge_quantization_threshold is not None and \
127
- right_edge_quantization_threshold > 0:
128
- bbox_right_edge_abs = bbox[0] + bbox[2]
129
- bbox_right_edge_norm = bbox_right_edge_abs / output_w
130
- bbox_right_edge_distance = (1.0 - bbox_right_edge_norm)
131
- if bbox_right_edge_distance < right_edge_quantization_threshold:
132
- bbox[2] = output_w - bbox[0]
133
-
134
122
  ann['bbox'] = bbox
135
123
 
136
124
  # ...if this annotation has a box
@@ -143,6 +131,8 @@ def resize_coco_dataset(input_folder,input_filename,
143
131
  with open(output_filename,'w') as f:
144
132
  json.dump(d,f,indent=1)
145
133
 
134
+ return d
135
+
146
136
  # ...def resize_coco_dataset(...)
147
137
 
148
138
 
@@ -153,27 +143,20 @@ if False:
153
143
  pass
154
144
 
155
145
  #%% Test resizing
156
-
157
- # input_filename = os.path.expanduser('~/tmp/labelme_to_coco_test.json')
158
- # input_folder = os.path.expanduser('~/data/labelme-json-test')
159
- # target_size = (600,-1)
160
-
161
- input_folder = os.path.expanduser('~/data/usgs-kissel-training')
162
- input_filename = os.path.expanduser('~/data/usgs-tegus.json')
146
+
147
+ input_folder = os.path.expanduser('~/data/usgs-tegus/usgs-kissel-training')
148
+ input_filename = os.path.expanduser('~/data/usgs-tegus/usgs-kissel-training.json')
163
149
  target_size = (1600,-1)
164
150
 
165
- output_filename = insert_before_extension(input_filename,'resized')
166
- output_folder = input_folder + '-resized'
151
+ output_filename = insert_before_extension(input_filename,'resized-test')
152
+ output_folder = input_folder + '-resized-test'
167
153
 
168
154
  correct_size_image_handling = 'rewrite'
169
155
 
170
- right_edge_quantization_threshold = 0.015
171
-
172
156
  resize_coco_dataset(input_folder,input_filename,
173
157
  output_folder,output_filename,
174
158
  target_size=target_size,
175
- correct_size_image_handling=correct_size_image_handling,
176
- right_edge_quantization_threshold=right_edge_quantization_threshold)
159
+ correct_size_image_handling=correct_size_image_handling)
177
160
 
178
161
 
179
162
  #%% Preview
@@ -0,0 +1,239 @@
1
+ ########
2
+ #
3
+ # wi_download_csv_to_coco.py
4
+ #
5
+ # Convert a .csv file from a Wildlife Insights project export to a COCO camera traps .json file.
6
+ #
7
+ # Currently assumes that common names are unique identifiers, which is convenient but unreliable.
8
+ #
9
+ ########
10
+
11
+ #%% Imports and constants
12
+
13
+ import os
14
+ import json
15
+ import pandas as pd
16
+ import numpy as np
17
+
18
+ from tqdm import tqdm
19
+ from collections import defaultdict
20
+
21
+ from md_visualization import visualization_utils as vis_utils
22
+
23
+ wi_extra_annotation_columns = \
24
+ ('is_blank','identified_by','wi_taxon_id','class','order','family','genus','species','uncertainty',
25
+ 'number_of_objects','age','sex','animal_recognizable','individual_id','individual_animal_notes',
26
+ 'behavior','highlighted','markings')
27
+
28
+ wi_extra_image_columns = ('project_id','deployment_id')
29
+
30
+ def make_location_id(project_id,deployment_id):
31
+ return 'project_' + str(project_id) + '_deployment_' + deployment_id
32
+
33
+ def isnan(v):
34
+ try:
35
+ return np.isnan(v)
36
+ except Exception:
37
+ return False
38
+
39
+ default_category_remappings = {
40
+ 'Homo Species':'Human',
41
+ 'Human-Camera Trapper':'Human',
42
+ 'No CV Result':'Unknown'
43
+ }
44
+
45
+
46
+ #%%
47
+
48
+ def wi_download_csv_to_coco(csv_file_in,
49
+ coco_file_out=None,
50
+ image_folder=None,
51
+ validate_images=False,
52
+ gs_prefix=None,
53
+ verbose=True,
54
+ category_remappings=default_category_remappings):
55
+ """
56
+ Convert a .csv file from a Wildlife Insights project export to a COCO
57
+ camera traps .json file.
58
+
59
+ If [coco_file_out] is None, uses [csv_file_in].json
60
+
61
+ gs_prefix is a string to remove from GS URLs to convert to path names... for example, if
62
+ your gs:// URLs look like:
63
+
64
+ gs://11234134_xyz/deployment/55554/dfadfasdfs.jpg
65
+
66
+ ...and you specify gs_prefix='11234134_xyz/deployment/', the filenames in
67
+ the .json file will look like:
68
+
69
+ 55554/dfadfasdfs.jpg
70
+
71
+ exclude_re discards matching images; typically use to omit thumbnail images.
72
+ """
73
+
74
+ #%% Create COCO dictionaries
75
+
76
+ category_name_to_id = {}
77
+ category_name_to_id['empty'] = 0
78
+
79
+ df = pd.read_csv(csv_file_in)
80
+
81
+ print('Read {} rows from {}'.format(len(df),csv_file_in))
82
+
83
+ image_id_to_image = {}
84
+ image_id_to_annotations = defaultdict(list)
85
+
86
+ # i_row = 0; row = df.iloc[i_row]
87
+ for i_row,row in df.iterrows():
88
+
89
+ image_id = row['image_id']
90
+
91
+ if image_id not in image_id_to_image:
92
+
93
+ im = {}
94
+ image_id_to_image[image_id] = im
95
+
96
+ im['id'] = image_id
97
+
98
+ gs_url = row['location']
99
+ assert gs_url.startswith('gs://')
100
+
101
+ file_name = gs_url.replace('gs://','')
102
+ if gs_prefix is not None:
103
+ file_name = file_name.replace(gs_prefix,'')
104
+
105
+ location_id = make_location_id(row['project_id'],row['deployment_id'])
106
+ im['file_name'] = file_name
107
+ im['location'] = location_id
108
+ im['datetime'] = row['timestamp']
109
+
110
+ im['wi_image_info'] = {}
111
+ for s in wi_extra_image_columns:
112
+ im['wi_image_info'][s] = str(row[s])
113
+
114
+ else:
115
+
116
+ im = image_id_to_image[image_id]
117
+ assert im['datetime'] == row['timestamp']
118
+ location_id = make_location_id(row['project_id'],row['deployment_id'])
119
+ assert im['location'] == location_id
120
+
121
+ category_name = row['common_name']
122
+ if category_remappings is not None and category_name in category_remappings:
123
+ category_name = category_remappings[category_name]
124
+
125
+ if category_name == 'Blank':
126
+ category_name = 'empty'
127
+ assert row['is_blank'] == 1
128
+ else:
129
+ assert row['is_blank'] == 0
130
+ assert isinstance(category_name,str)
131
+ if category_name in category_name_to_id:
132
+ category_id = category_name_to_id[category_name]
133
+ else:
134
+ category_id = len(category_name_to_id)
135
+ category_name_to_id[category_name] = category_id
136
+
137
+ ann = {}
138
+ ann['image_id'] = image_id
139
+ annotations_this_image = image_id_to_annotations[image_id]
140
+ annotation_number = len(annotations_this_image)
141
+ ann['id'] = image_id + '_' + str(annotation_number).zfill(2)
142
+ ann['category_id'] = category_id
143
+ annotations_this_image.append(ann)
144
+
145
+ extra_info = {}
146
+ for s in wi_extra_annotation_columns:
147
+ v = row[s]
148
+ if not isnan(v):
149
+ extra_info[s] = v
150
+ ann['wi_extra_info'] = extra_info
151
+
152
+ # ...for each row
153
+
154
+ images = list(image_id_to_image.values())
155
+ categories = []
156
+ for category_name in category_name_to_id:
157
+ category_id = category_name_to_id[category_name]
158
+ categories.append({'id':category_id,'name':category_name})
159
+ annotations = []
160
+ for image_id in image_id_to_annotations:
161
+ annotations_this_image = image_id_to_annotations[image_id]
162
+ for ann in annotations_this_image:
163
+ annotations.append(ann)
164
+ info = {'version':'1.00','description':'converted from WI export'}
165
+ info['source_file'] = csv_file_in
166
+ coco_data = {}
167
+ coco_data['info'] = info
168
+ coco_data['images'] = images
169
+ coco_data['annotations'] = annotations
170
+ coco_data['categories'] = categories
171
+
172
+
173
+ ##%% Validate images, add sizes
174
+
175
+ if validate_images:
176
+
177
+ print('Validating images')
178
+ # TODO: trivially parallelizable
179
+
180
+ assert os.path.isdir(image_folder), \
181
+ 'Must specify a valid image folder if you specify validate_images=True'
182
+
183
+ # im = images[0]
184
+ for im in tqdm(images):
185
+ file_name_relative = im['file_name']
186
+ file_name_abs = os.path.join(image_folder,file_name_relative)
187
+ assert os.path.isfile(file_name_abs)
188
+
189
+ im['corrupt'] = False
190
+ try:
191
+ pil_im = vis_utils.load_image(file_name_abs)
192
+ except Exception:
193
+ im['corrupt'] = True
194
+ if not im['corrupt']:
195
+ im['width'] = pil_im.width
196
+ im['height'] = pil_im.height
197
+
198
+
199
+ ##%% Write output json
200
+
201
+ if coco_file_out is None:
202
+
203
+ coco_file_out = csv_file_in + '.json'
204
+
205
+ with open(coco_file_out,'w') as f:
206
+ json.dump(coco_data,f,indent=1)
207
+
208
+
209
+ ##%% Validate output
210
+
211
+ from data_management.databases.integrity_check_json_db import \
212
+ IntegrityCheckOptions,integrity_check_json_db
213
+ options = IntegrityCheckOptions()
214
+ options.baseDir = image_folder
215
+ options.bCheckImageExistence = True
216
+ options.verbose = verbose
217
+ _ = integrity_check_json_db(coco_file_out,options)
218
+
219
+
220
+
221
+ #%% Interactive driver
222
+
223
+ if False:
224
+
225
+ #%%
226
+
227
+ base_folder = r'a/b/c'
228
+ csv_file_in = os.path.join(base_folder,'images.csv')
229
+ coco_file_out = None
230
+ gs_prefix = 'a_b_c_main/'
231
+ image_folder = os.path.join(base_folder,'images')
232
+ validate_images = False
233
+ verbose = True
234
+ category_remappings = default_category_remappings
235
+
236
+
237
+ #%% Command-line driver
238
+
239
+ # TODO
@@ -61,21 +61,37 @@ from detection.run_detector import CONF_DIGITS, COORD_DIGITS
61
61
 
62
62
  def read_classes_from_yolo_dataset_file(fn):
63
63
  """
64
- Read a dictionary mapping integer class IDs to class names from a YOLOv5 dataset.yaml
65
- file.
64
+ Read a dictionary mapping integer class IDs to class names from a YOLOv5/YOLOv8
65
+ dataset.yaml file or a .json file. A .json file should contain a dictionary mapping
66
+ integer category IDs to string category names.
66
67
  """
67
68
 
68
- with open(fn,'r') as f:
69
- lines = f.readlines()
70
-
71
- category_id_to_name = {}
72
- pat = '\d+:.+'
73
- for s in lines:
74
- if re.search(pat,s) is not None:
75
- tokens = s.split(':')
76
- assert len(tokens) == 2, 'Invalid token in category file {}'.format(fn)
77
- category_id_to_name[int(tokens[0].strip())] = tokens[1].strip()
69
+ if fn.endswith('.yml') or fn.endswith('.yaml'):
70
+
71
+ with open(fn,'r') as f:
72
+ lines = f.readlines()
73
+
74
+ category_id_to_name = {}
75
+ pat = '\d+:.+'
76
+ for s in lines:
77
+ if re.search(pat,s) is not None:
78
+ tokens = s.split(':')
79
+ assert len(tokens) == 2, 'Invalid token in category file {}'.format(fn)
80
+ category_id_to_name[int(tokens[0].strip())] = tokens[1].strip()
81
+
82
+ elif fn.endswith('.json'):
83
+
84
+ with open(fn,'r') as f:
85
+ d_in = json.load(f)
86
+ category_id_to_name = {}
87
+ for k in d_in.keys():
88
+ category_id_to_name[int(k)] = d_in[k]
78
89
 
90
+ else:
91
+
92
+ raise ValueError('Unrecognized category file type: {}'.format(fn))
93
+
94
+ assert len(category_id_to_name) > 0, 'Failed to read class mappings from {}'.format(fn)
79
95
  return category_id_to_name
80
96
 
81
97
 
@@ -125,7 +141,8 @@ def yolo_json_output_to_md_output(yolo_json_file, image_folder,
125
141
  if image_id_to_error is None:
126
142
  image_id_to_error = {}
127
143
 
128
- print('Converting {} to MD format'.format(yolo_json_file))
144
+ print('Converting {} to MD format and writing results to {}'.format(
145
+ yolo_json_file,output_file))
129
146
 
130
147
  if isinstance(yolo_category_id_to_name,str):
131
148
  assert os.path.isfile(yolo_category_id_to_name), \
@@ -194,6 +211,16 @@ def yolo_json_output_to_md_output(yolo_json_file, image_folder,
194
211
 
195
212
  # ...if image IDs are formatted as integers in YOLO output
196
213
 
214
+ # In a modified version of val.py, we use negative category IDs to indicate an error
215
+ # that happened during inference (typically truncated images with valid headers,
216
+ # so corruption was not detected during val.py's initial corruption check pass.
217
+ for det in detections:
218
+ if det['category_id'] < 0:
219
+ assert 'error' in det, 'Negative category ID present with no error string'
220
+ error_string = det['error']
221
+ print('Caught inference-time failure {} for image {}'.format(error_string,det['image_id']))
222
+ image_id_to_error[det['image_id']] = error_string
223
+
197
224
  output_images = []
198
225
 
199
226
  # image_file_relative = image_files_relative[10]