megadetector 5.0.5__py3-none-any.whl → 5.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (132) hide show
  1. api/batch_processing/data_preparation/manage_local_batch.py +302 -263
  2. api/batch_processing/data_preparation/manage_video_batch.py +81 -2
  3. api/batch_processing/postprocessing/add_max_conf.py +1 -0
  4. api/batch_processing/postprocessing/categorize_detections_by_size.py +50 -19
  5. api/batch_processing/postprocessing/compare_batch_results.py +110 -60
  6. api/batch_processing/postprocessing/load_api_results.py +56 -70
  7. api/batch_processing/postprocessing/md_to_coco.py +1 -1
  8. api/batch_processing/postprocessing/md_to_labelme.py +2 -1
  9. api/batch_processing/postprocessing/postprocess_batch_results.py +240 -81
  10. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +625 -0
  11. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +71 -23
  12. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
  13. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +227 -75
  14. api/batch_processing/postprocessing/subset_json_detector_output.py +132 -5
  15. api/batch_processing/postprocessing/top_folders_to_bottom.py +1 -1
  16. api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +2 -2
  17. classification/prepare_classification_script.py +191 -191
  18. data_management/coco_to_yolo.py +68 -45
  19. data_management/databases/integrity_check_json_db.py +7 -5
  20. data_management/generate_crops_from_cct.py +3 -3
  21. data_management/get_image_sizes.py +8 -6
  22. data_management/importers/add_timestamps_to_icct.py +79 -0
  23. data_management/importers/animl_results_to_md_results.py +160 -0
  24. data_management/importers/auckland_doc_test_to_json.py +4 -4
  25. data_management/importers/auckland_doc_to_json.py +1 -1
  26. data_management/importers/awc_to_json.py +5 -5
  27. data_management/importers/bellevue_to_json.py +5 -5
  28. data_management/importers/carrizo_shrubfree_2018.py +5 -5
  29. data_management/importers/carrizo_trail_cam_2017.py +5 -5
  30. data_management/importers/cct_field_adjustments.py +2 -3
  31. data_management/importers/channel_islands_to_cct.py +4 -4
  32. data_management/importers/ena24_to_json.py +5 -5
  33. data_management/importers/helena_to_cct.py +10 -10
  34. data_management/importers/idaho-camera-traps.py +12 -12
  35. data_management/importers/idfg_iwildcam_lila_prep.py +8 -8
  36. data_management/importers/jb_csv_to_json.py +4 -4
  37. data_management/importers/missouri_to_json.py +1 -1
  38. data_management/importers/noaa_seals_2019.py +1 -1
  39. data_management/importers/pc_to_json.py +5 -5
  40. data_management/importers/prepare-noaa-fish-data-for-lila.py +4 -4
  41. data_management/importers/prepare_zsl_imerit.py +5 -5
  42. data_management/importers/rspb_to_json.py +4 -4
  43. data_management/importers/save_the_elephants_survey_A.py +5 -5
  44. data_management/importers/save_the_elephants_survey_B.py +6 -6
  45. data_management/importers/snapshot_safari_importer.py +9 -9
  46. data_management/importers/snapshot_serengeti_lila.py +9 -9
  47. data_management/importers/timelapse_csv_set_to_json.py +5 -7
  48. data_management/importers/ubc_to_json.py +4 -4
  49. data_management/importers/umn_to_json.py +4 -4
  50. data_management/importers/wellington_to_json.py +1 -1
  51. data_management/importers/wi_to_json.py +2 -2
  52. data_management/importers/zamba_results_to_md_results.py +181 -0
  53. data_management/labelme_to_coco.py +35 -7
  54. data_management/labelme_to_yolo.py +229 -0
  55. data_management/lila/add_locations_to_island_camera_traps.py +1 -1
  56. data_management/lila/add_locations_to_nacti.py +147 -0
  57. data_management/lila/create_lila_blank_set.py +474 -0
  58. data_management/lila/create_lila_test_set.py +2 -1
  59. data_management/lila/create_links_to_md_results_files.py +106 -0
  60. data_management/lila/download_lila_subset.py +46 -21
  61. data_management/lila/generate_lila_per_image_labels.py +23 -14
  62. data_management/lila/get_lila_annotation_counts.py +17 -11
  63. data_management/lila/lila_common.py +14 -11
  64. data_management/lila/test_lila_metadata_urls.py +116 -0
  65. data_management/ocr_tools.py +829 -0
  66. data_management/resize_coco_dataset.py +13 -11
  67. data_management/yolo_output_to_md_output.py +84 -12
  68. data_management/yolo_to_coco.py +38 -20
  69. detection/process_video.py +36 -14
  70. detection/pytorch_detector.py +23 -8
  71. detection/run_detector.py +76 -19
  72. detection/run_detector_batch.py +178 -63
  73. detection/run_inference_with_yolov5_val.py +326 -57
  74. detection/run_tiled_inference.py +153 -43
  75. detection/video_utils.py +34 -8
  76. md_utils/ct_utils.py +172 -1
  77. md_utils/md_tests.py +372 -51
  78. md_utils/path_utils.py +167 -39
  79. md_utils/process_utils.py +26 -7
  80. md_utils/split_locations_into_train_val.py +215 -0
  81. md_utils/string_utils.py +10 -0
  82. md_utils/url_utils.py +0 -2
  83. md_utils/write_html_image_list.py +9 -26
  84. md_visualization/plot_utils.py +12 -8
  85. md_visualization/visualization_utils.py +106 -7
  86. md_visualization/visualize_db.py +16 -8
  87. md_visualization/visualize_detector_output.py +208 -97
  88. {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/METADATA +3 -6
  89. {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/RECORD +98 -121
  90. {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/WHEEL +1 -1
  91. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +1 -1
  92. taxonomy_mapping/map_new_lila_datasets.py +43 -39
  93. taxonomy_mapping/prepare_lila_taxonomy_release.py +5 -2
  94. taxonomy_mapping/preview_lila_taxonomy.py +27 -27
  95. taxonomy_mapping/species_lookup.py +33 -13
  96. taxonomy_mapping/taxonomy_csv_checker.py +7 -5
  97. api/synchronous/api_core/yolov5/detect.py +0 -252
  98. api/synchronous/api_core/yolov5/export.py +0 -607
  99. api/synchronous/api_core/yolov5/hubconf.py +0 -146
  100. api/synchronous/api_core/yolov5/models/__init__.py +0 -0
  101. api/synchronous/api_core/yolov5/models/common.py +0 -738
  102. api/synchronous/api_core/yolov5/models/experimental.py +0 -104
  103. api/synchronous/api_core/yolov5/models/tf.py +0 -574
  104. api/synchronous/api_core/yolov5/models/yolo.py +0 -338
  105. api/synchronous/api_core/yolov5/train.py +0 -670
  106. api/synchronous/api_core/yolov5/utils/__init__.py +0 -36
  107. api/synchronous/api_core/yolov5/utils/activations.py +0 -103
  108. api/synchronous/api_core/yolov5/utils/augmentations.py +0 -284
  109. api/synchronous/api_core/yolov5/utils/autoanchor.py +0 -170
  110. api/synchronous/api_core/yolov5/utils/autobatch.py +0 -66
  111. api/synchronous/api_core/yolov5/utils/aws/__init__.py +0 -0
  112. api/synchronous/api_core/yolov5/utils/aws/resume.py +0 -40
  113. api/synchronous/api_core/yolov5/utils/benchmarks.py +0 -148
  114. api/synchronous/api_core/yolov5/utils/callbacks.py +0 -71
  115. api/synchronous/api_core/yolov5/utils/dataloaders.py +0 -1087
  116. api/synchronous/api_core/yolov5/utils/downloads.py +0 -178
  117. api/synchronous/api_core/yolov5/utils/flask_rest_api/example_request.py +0 -19
  118. api/synchronous/api_core/yolov5/utils/flask_rest_api/restapi.py +0 -46
  119. api/synchronous/api_core/yolov5/utils/general.py +0 -1018
  120. api/synchronous/api_core/yolov5/utils/loggers/__init__.py +0 -187
  121. api/synchronous/api_core/yolov5/utils/loggers/wandb/__init__.py +0 -0
  122. api/synchronous/api_core/yolov5/utils/loggers/wandb/log_dataset.py +0 -27
  123. api/synchronous/api_core/yolov5/utils/loggers/wandb/sweep.py +0 -41
  124. api/synchronous/api_core/yolov5/utils/loggers/wandb/wandb_utils.py +0 -577
  125. api/synchronous/api_core/yolov5/utils/loss.py +0 -234
  126. api/synchronous/api_core/yolov5/utils/metrics.py +0 -355
  127. api/synchronous/api_core/yolov5/utils/plots.py +0 -489
  128. api/synchronous/api_core/yolov5/utils/torch_utils.py +0 -314
  129. api/synchronous/api_core/yolov5/val.py +0 -394
  130. md_utils/matlab_porting_tools.py +0 -97
  131. {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/LICENSE +0 -0
  132. {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/top_level.txt +0 -0
@@ -37,10 +37,12 @@ def write_yolo_dataset_file(yolo_dataset_file,
37
37
  val_folder_relative=None,
38
38
  test_folder_relative=None):
39
39
  """
40
- Write a YOLOv5 dataset.yaml file to the file yolo_dataset_file (should
40
+ Write a YOLOv5 dataset.yaml file to the absolute path yolo_dataset_file (should
41
41
  have a .yaml extension, though it's only a warning if it doesn't).
42
42
 
43
43
  [dataset_base_dir] should be the absolute path of the dataset root.
44
+
45
+ yolo_dataset_file does not have to be within dataset_base_dir.
44
46
 
45
47
  [class_list] can be an ordered list of class names (the first item will be class 0,
46
48
  etc.), or the name of a text file containing an ordered list of class names (one per
@@ -87,10 +89,12 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
87
89
  images_to_exclude=None,
88
90
  path_replacement_char='#',
89
91
  category_names_to_exclude=None,
90
- write_output=True):
92
+ category_names_to_include=None,
93
+ write_output=True,
94
+ flatten_paths=True):
91
95
  """
92
- Convert a COCO-formatted dataset to a YOLO-formatted dataset, flattening the dataset
93
- (to a single folder) in the process.
96
+ Convert a COCO-formatted dataset to a YOLO-formatted dataset, optionally flattening the
97
+ dataset to a single folder in the process.
94
98
 
95
99
  If the input and output folders are the same, writes .txt files to the input folder,
96
100
  and neither moves nor modifies images.
@@ -128,6 +132,9 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
128
132
 
129
133
  ## Validate input
130
134
 
135
+ if category_names_to_include is not None and category_names_to_exclude is not None:
136
+ raise ValueError('category_names_to_include and category_names_to_exclude are mutually exclusive')
137
+
131
138
  if output_folder is None:
132
139
  output_folder = input_image_folder
133
140
 
@@ -136,12 +143,16 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
136
143
 
137
144
  if category_names_to_exclude is None:
138
145
  category_names_to_exclude = {}
139
-
146
+
140
147
  assert os.path.isdir(input_image_folder)
141
148
  assert os.path.isfile(input_file)
142
149
  os.makedirs(output_folder,exist_ok=True)
143
150
 
144
-
151
+ if (output_folder == input_image_folder) and (overwrite_images) and \
152
+ (not create_image_and_label_folders) and (not flatten_paths):
153
+ print('Warning: output folder and input folder are the same, disabling overwrite_images')
154
+ overwrite_images = False
155
+
145
156
  ## Read input data
146
157
 
147
158
  with open(input_file,'r') as f:
@@ -182,7 +193,11 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
182
193
 
183
194
  for category in data['categories']:
184
195
  coco_id_to_name[category['id']] = category['name']
185
- if (category['name'] in category_names_to_exclude):
196
+ if (category_names_to_include is not None) and \
197
+ (category['name'] not in category_names_to_include):
198
+ coco_category_ids_to_exclude.add(category['id'])
199
+ continue
200
+ elif (category['name'] in category_names_to_exclude):
186
201
  coco_category_ids_to_exclude.add(category['id'])
187
202
  continue
188
203
  assert category['id'] not in coco_id_to_yolo_id
@@ -226,9 +241,13 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
226
241
  tokens = os.path.splitext(im['file_name'])
227
242
  if tokens[1].lower() not in typical_image_extensions:
228
243
  print('Warning: unusual image file name {}'.format(im['file_name']))
229
-
230
- image_name = tokens[0].replace('\\','/').replace('/',path_replacement_char) + \
231
- '_' + str(i_image).zfill(6)
244
+
245
+ if flatten_paths:
246
+ image_name = tokens[0].replace('\\','/').replace('/',path_replacement_char) + \
247
+ '_' + str(i_image).zfill(6)
248
+ else:
249
+ image_name = tokens[0]
250
+
232
251
  assert image_name not in image_names, 'Image name collision for {}'.format(image_name)
233
252
  image_names.add(image_name)
234
253
 
@@ -291,12 +310,6 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
291
310
  # This category isn't in our category list. This typically corresponds to whole sets
292
311
  # of images that were excluded from the YOLO set.
293
312
  if ann['category_id'] in coco_category_ids_to_exclude:
294
- category_name = coco_id_to_name[ann['category_id']]
295
- if category_name not in category_exclusion_warnings_printed:
296
- category_exclusion_warnings_printed.add(category_name)
297
- print('Warning: ignoring category {} in image {}'.format(
298
- category_name,image_id),end='')
299
- print('...are you sure you didn\'t mean to exclude this image?')
300
313
  continue
301
314
 
302
315
  yolo_category_id = coco_id_to_yolo_id[ann['category_id']]
@@ -405,30 +418,38 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
405
418
  with open(image_id_to_output_image_json_file,'w') as f:
406
419
  json.dump(image_id_to_output_image_name,f,indent=1)
407
420
 
408
- if (write_output):
409
-
421
+
422
+ if (output_folder == input_image_folder) and (not create_image_and_label_folders):
423
+ print('Creating annotation files (not copying images, input and output folder are the same)')
424
+ else:
410
425
  print('Copying images and creating annotation files')
426
+
427
+ if create_image_and_label_folders:
428
+ dest_image_folder = os.path.join(output_folder,'images')
429
+ dest_txt_folder = os.path.join(output_folder,'labels')
430
+ else:
431
+ dest_image_folder = output_folder
432
+ dest_txt_folder = output_folder
433
+
434
+ source_image_to_dest_image = {}
411
435
 
412
- if create_image_and_label_folders:
413
- dest_image_folder = os.path.join(output_folder,'images')
414
- dest_txt_folder = os.path.join(output_folder,'labels')
415
- else:
416
- dest_image_folder = output_folder
417
- dest_txt_folder = output_folder
418
-
419
- # TODO: parallelize this loop
420
- #
421
- # output_info = images_to_copy[0]
422
- for output_info in tqdm(images_to_copy):
423
-
424
- source_image = output_info['source_image']
425
- dest_image_relative = output_info['dest_image_relative']
426
- dest_txt_relative = output_info['dest_txt_relative']
427
-
428
- dest_image = os.path.join(dest_image_folder,dest_image_relative)
429
- os.makedirs(os.path.dirname(dest_image),exist_ok=True)
436
+ # TODO: parallelize this loop
437
+ #
438
+ # output_info = images_to_copy[0]
439
+ for output_info in tqdm(images_to_copy):
440
+
441
+ source_image = output_info['source_image']
442
+ dest_image_relative = output_info['dest_image_relative']
443
+ dest_txt_relative = output_info['dest_txt_relative']
444
+
445
+ dest_image = os.path.join(dest_image_folder,dest_image_relative)
446
+ dest_txt = os.path.join(dest_txt_folder,dest_txt_relative)
447
+
448
+ source_image_to_dest_image[source_image] = dest_image
449
+
450
+ if write_output:
430
451
 
431
- dest_txt = os.path.join(dest_txt_folder,dest_txt_relative)
452
+ os.makedirs(os.path.dirname(dest_image),exist_ok=True)
432
453
  os.makedirs(os.path.dirname(dest_txt),exist_ok=True)
433
454
 
434
455
  if not create_image_and_label_folders:
@@ -436,7 +457,7 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
436
457
 
437
458
  if (not os.path.isfile(dest_image)) or (overwrite_images):
438
459
  shutil.copyfile(source_image,dest_image)
439
-
460
+
440
461
  bboxes = output_info['bboxes']
441
462
 
442
463
  # Only write an annotation file if there are bounding boxes. Images with
@@ -456,15 +477,17 @@ def coco_to_yolo(input_image_folder,output_folder,input_file,
456
477
  assert len(bbox) == 5
457
478
  s = '{} {} {} {} {}'.format(bbox[0],bbox[1],bbox[2],bbox[3],bbox[4])
458
479
  f.write(s + '\n')
459
-
460
- # ...for each image
480
+
481
+ # ...if we're actually writing output
461
482
 
462
- # ...if we're actually writing output
463
-
464
- return_info = {}
465
- return_info['class_list_filename'] = class_list_filename
483
+ # ...for each image
484
+
485
+ coco_to_yolo_info = {}
486
+ coco_to_yolo_info['class_list_filename'] = class_list_filename
487
+ coco_to_yolo_info['source_image_to_dest_image'] = source_image_to_dest_image
488
+ coco_to_yolo_info['coco_id_to_yolo_id'] = coco_id_to_yolo_id
466
489
 
467
- return return_info
490
+ return coco_to_yolo_info
468
491
 
469
492
  # ...def coco_to_yolo(...)
470
493
 
@@ -89,6 +89,9 @@ def integrity_check_json_db(jsonFile, options=None):
89
89
 
90
90
  print(options.__dict__)
91
91
 
92
+ if options.baseDir is None:
93
+ options.baseDir = ''
94
+
92
95
  baseDir = options.baseDir
93
96
 
94
97
 
@@ -116,10 +119,9 @@ def integrity_check_json_db(jsonFile, options=None):
116
119
  annotations = data['annotations']
117
120
  categories = data['categories']
118
121
  # info = data['info']
119
- assert 'info' in data
122
+ assert 'info' in data, 'No info struct in database'
120
123
 
121
- if len(baseDir) > 0:
122
-
124
+ if len(baseDir) > 0:
123
125
  assert os.path.isdir(baseDir), 'Base directory {} does not exist'.format(baseDir)
124
126
 
125
127
 
@@ -139,8 +141,8 @@ def integrity_check_json_db(jsonFile, options=None):
139
141
  assert 'name' in cat
140
142
  assert 'id' in cat
141
143
 
142
- assert isinstance(cat['id'],int), 'Illegal category ID type'
143
- assert isinstance(cat['name'],str), 'Illegal category name type'
144
+ assert isinstance(cat['id'],int), 'Illegal category ID type: [{}]'.format(str(cat['id']))
145
+ assert isinstance(cat['name'],str), 'Illegal category name type [{}]'.format(str(cat['name']))
144
146
 
145
147
  catId = cat['id']
146
148
  catName = cat['name']
@@ -145,7 +145,7 @@ if False:
145
145
 
146
146
  #%%
147
147
 
148
- from md_visualization.visualize_db import DbVizOptions,process_images
148
+ from md_visualization.visualize_db import DbVizOptions,visualize_db
149
149
 
150
150
  db_path = cct_file
151
151
  output_dir = os.path.expanduser('~/tmp/noaa-fish-preview')
@@ -163,5 +163,5 @@ if False:
163
163
  options.box_thickness = 4
164
164
  options.box_expansion = 25
165
165
 
166
- htmlOutputFile,db = process_images(db_path,output_dir,image_base_dir,options)
167
-
166
+ htmlOutputFile,db = visualize_db(db_path,output_dir,image_base_dir,options)
167
+
@@ -2,7 +2,8 @@
2
2
  #
3
3
  # get_image_sizes.py
4
4
  #
5
- # Given a json-formatted list of image filenames, retrieve the width and height of every image.
5
+ # Given a json-formatted list of image filenames, retrieve the width and height of
6
+ # every image, optionally writing the results to a new .json file.
6
7
  #
7
8
  ########
8
9
 
@@ -70,7 +71,7 @@ def process_images(filenames,image_prefix=None,n_threads=default_n_threads):
70
71
  return all_results
71
72
 
72
73
 
73
- def process_list_file(input_file,output_file,image_prefix=None,n_threads=default_n_threads):
74
+ def process_list_file(input_file,output_file=None,image_prefix=None,n_threads=default_n_threads):
74
75
 
75
76
  assert os.path.isdir(os.path.dirname(output_file))
76
77
  assert os.path.isfile(input_file)
@@ -81,8 +82,9 @@ def process_list_file(input_file,output_file,image_prefix=None,n_threads=default
81
82
 
82
83
  all_results = process_images(filenames,image_prefix=image_prefix,n_threads=n_threads)
83
84
 
84
- with open(output_file,'w') as f:
85
- json.dump(all_results,f,indent=2)
85
+ if output_file is not None:
86
+ with open(output_file,'w') as f:
87
+ json.dump(all_results,f,indent=1)
86
88
 
87
89
 
88
90
  #%% Interactive driver
@@ -102,14 +104,14 @@ if False:
102
104
  image_names = path_utils.find_images(base_dir,recursive=True)
103
105
 
104
106
  with open(image_list_file,'w') as f:
105
- json.dump(image_names,f,indent=2)
107
+ json.dump(image_names,f,indent=1)
106
108
 
107
109
  relative_image_names = []
108
110
  for s in image_names:
109
111
  relative_image_names.append(os.path.relpath(s,base_dir))
110
112
 
111
113
  with open(relative_image_list_file,'w') as f:
112
- json.dump(relative_image_names,f,indent=2)
114
+ json.dump(relative_image_names,f,indent=1)
113
115
 
114
116
 
115
117
  #%%
@@ -0,0 +1,79 @@
1
+ ########
2
+ #
3
+ # add_timestamps_to_icct.py
4
+ #
5
+ # The Island Conservation Camera Traps dataset was originally posted without timestamps
6
+ # in either .json metadata or EXIF metadata. We pulled timestamps out using ocr_tools.py,
7
+ # this script adds those timestamps into the .json metadata.
8
+ #
9
+ ########
10
+
11
+ #%% Imports and constants
12
+
13
+ import json
14
+
15
+ ocr_results_file = r'g:\temp\ocr_results.2023.10.31.07.37.54.json'
16
+ input_metadata_file = r'd:\lila\islandconservationcameratraps\island_conservation.json'
17
+ output_metadata_file = r'g:\temp\island_conservation_camera_traps_1.02.json'
18
+ ocr_results_file_base = 'g:/temp/island_conservation_camera_traps/'
19
+ assert ocr_results_file_base.endswith('/')
20
+
21
+
22
+ #%% Read input metadata
23
+
24
+ with open(input_metadata_file,'r') as f:
25
+ input_metadata = json.load(f)
26
+
27
+ assert input_metadata['info']['version'] == '1.01'
28
+
29
+ # im = input_metadata['images'][0]
30
+ for im in input_metadata['images']:
31
+ assert 'datetime' not in im
32
+
33
+
34
+ #%% Read OCR results
35
+
36
+ with open(ocr_results_file,'r') as f:
37
+ abs_filename_to_ocr_results = json.load(f)
38
+
39
+ relative_filename_to_ocr_results = {}
40
+
41
+ for fn_abs in abs_filename_to_ocr_results:
42
+ assert ocr_results_file_base in fn_abs
43
+ fn_relative = fn_abs.replace(ocr_results_file_base,'')
44
+ relative_filename_to_ocr_results[fn_relative] = abs_filename_to_ocr_results[fn_abs]
45
+
46
+
47
+ #%% Add datetimes to metadata
48
+
49
+ images_not_in_datetime_results = []
50
+ images_with_failed_datetimes = []
51
+
52
+ for i_image,im in enumerate(input_metadata['images']):
53
+ if im['file_name'] not in relative_filename_to_ocr_results:
54
+ images_not_in_datetime_results.append(im)
55
+ im['datetime'] = None
56
+ continue
57
+ ocr_results = relative_filename_to_ocr_results[im['file_name']]
58
+ if ocr_results['datetime'] is None:
59
+ images_with_failed_datetimes.append(im)
60
+ im['datetime'] = None
61
+ continue
62
+ im['datetime'] = ocr_results['datetime']
63
+
64
+ print('{} of {} images were not in datetime results'.format(
65
+ len(images_not_in_datetime_results),len(input_metadata['images'])))
66
+
67
+ print('{} of {} images were had failed datetime results'.format(
68
+ len(images_with_failed_datetimes),len(input_metadata['images'])))
69
+
70
+ for im in input_metadata['images']:
71
+ assert 'datetime' in im
72
+
73
+
74
+ #%% Write output
75
+
76
+ input_metadata['info']['version'] = '1.02'
77
+
78
+ with open(output_metadata_file,'w') as f:
79
+ json.dump(input_metadata,f,indent=1)
@@ -0,0 +1,160 @@
1
+ ########
2
+ #
3
+ # animl_results_to_md_results.py
4
+ #
5
+ # Convert a .csv file produced by the Animl package:
6
+ #
7
+ # https://github.com/conservationtechlab/animl-py
8
+ #
9
+ # ...to a MD results file suitable for import into Timelapse.
10
+ #
11
+ # Columns are expected to be:
12
+ #
13
+ # file
14
+ # category (MD category identifies: 1==animal, 2==person, 3==vehicle)
15
+ # detection_conf
16
+ # bbox1,bbox2,bbox3,bbox4
17
+ # class
18
+ # classification_conf
19
+ #
20
+ ########
21
+
22
+ #%% Imports and constants
23
+
24
+ import pandas as pd
25
+ import json
26
+
27
+ # It's a little icky to hard-code this here rather than importing from elsewhere
28
+ # in the MD repo, but it seemed silly to take a dependency on lots of MD code
29
+ # just for this, so, hard-coding.
30
+ detection_category_id_to_name = {'1':'animal','2':'person','3':'vehicle'}
31
+
32
+
33
+ #%% Main function
34
+
35
+ def animl_results_to_md_results(input_file,output_file=None):
36
+ """
37
+ Converts the .csv file [input_file] to the MD-formatted .json file [output_file].
38
+
39
+ If [output_file] is None, '.json' will be appended to the input file.
40
+ """
41
+
42
+ #%%
43
+
44
+ if output_file is None:
45
+ output_file = input_file + '.json'
46
+
47
+ df = pd.read_csv(input_file)
48
+
49
+ expected_columns = ('file','category','detection_conf',
50
+ 'bbox1','bbox2','bbox3','bbox4','class','classification_conf')
51
+
52
+ for s in expected_columns:
53
+ assert s in df.columns,\
54
+ 'Expected column {} not found'.format(s)
55
+
56
+ classification_category_name_to_id = {}
57
+ filename_to_results = {}
58
+
59
+ # i_row = 0; row = df.iloc[i_row]
60
+ for i_row,row in df.iterrows():
61
+
62
+ # Is this the first detection we've seen for this file?
63
+ if row['file'] in filename_to_results:
64
+ im = filename_to_results[row['file']]
65
+ else:
66
+ im = {}
67
+ im['detections'] = []
68
+ im['file'] = row['file']
69
+ filename_to_results[im['file']] = im
70
+
71
+ assert isinstance(row['category'],int),'Invalid category identifier in row {}'.format(im['file'])
72
+ detection_category_id = str(row['category'])
73
+ assert detection_category_id in detection_category_id_to_name,\
74
+ 'Unrecognized detection category ID {}'.format(detection_category_id)
75
+
76
+ detection = {}
77
+ detection['category'] = detection_category_id
78
+ detection['conf'] = row['detection_conf']
79
+ bbox = [row['bbox1'],row['bbox2'],row['bbox3'],row['bbox4']]
80
+ detection['bbox'] = bbox
81
+ classification_category_name = row['class']
82
+
83
+ # Have we seen this classification category before?
84
+ if classification_category_name in classification_category_name_to_id:
85
+ classification_category_id = \
86
+ classification_category_name_to_id[classification_category_name]
87
+ else:
88
+ classification_category_id = str(len(classification_category_name_to_id))
89
+ classification_category_name_to_id[classification_category_name] = \
90
+ classification_category_id
91
+
92
+ classifications = [[classification_category_id,row['classification_conf']]]
93
+ detection['classifications'] = classifications
94
+
95
+ im['detections'].append(detection)
96
+
97
+ # ...for each row
98
+
99
+ info = {}
100
+ info['format_version'] = '1.3'
101
+ info['detector'] = 'Animl'
102
+ info['classifier'] = 'Animl'
103
+
104
+ results = {}
105
+ results['info'] = info
106
+ results['detection_categories'] = detection_category_id_to_name
107
+ results['classification_categories'] = \
108
+ {v: k for k, v in classification_category_name_to_id.items()}
109
+ results['images'] = list(filename_to_results.values())
110
+
111
+ with open(output_file,'w') as f:
112
+ json.dump(results,f,indent=1)
113
+
114
+ # ...zamba_results_to_md_results(...)
115
+
116
+
117
+ #%% Interactive driver
118
+
119
+ if False:
120
+
121
+ pass
122
+
123
+ #%%
124
+
125
+ input_file = r"G:\temp\animl-runs\animl-runs\Coati_v2\manifest.csv"
126
+ output_file = None
127
+ animl_results_to_md_results(input_file,output_file)
128
+
129
+
130
+ #%% Command-line driver
131
+
132
+ import sys,argparse
133
+
134
+ def main():
135
+
136
+ parser = argparse.ArgumentParser(
137
+ description='Convert an Animl-formatted .csv results file to MD-formatted .json results file')
138
+
139
+ parser.add_argument(
140
+ 'input_file',
141
+ type=str,
142
+ help='input .csv file')
143
+
144
+ parser.add_argument(
145
+ '--output_file',
146
+ type=str,
147
+ default=None,
148
+ help='output .json file (defaults to input file appended with ".json")')
149
+
150
+ if len(sys.argv[1:]) == 0:
151
+ parser.print_help()
152
+ parser.exit()
153
+
154
+ args = parser.parse_args()
155
+
156
+ animl_results_to_md_results(args.input_file,args.output_file)
157
+
158
+ if __name__ == '__main__':
159
+ main()
160
+
@@ -21,7 +21,7 @@ import numpy as np
21
21
  from tqdm import tqdm
22
22
 
23
23
  from md_visualization import visualize_db
24
- from data_management.databases import sanity_check_json_db
24
+ from data_management.databases import integrity_check_json_db
25
25
  from md_utils.path_utils import find_images
26
26
 
27
27
  input_base_dir = r'e:\auckland-test\2_Testing'
@@ -314,13 +314,13 @@ print('Finished writing json to {}'.format(output_json_filename))
314
314
 
315
315
  #%% Validate .json file
316
316
 
317
- options = sanity_check_json_db.SanityCheckOptions()
317
+ options = integrity_check_json_db.IntegrityCheckOptions()
318
318
  options.baseDir = input_base_dir
319
319
  options.bCheckImageSizes = False
320
320
  options.bCheckImageExistence = False
321
321
  options.bFindUnusedImages = False
322
322
 
323
- sortedCategories, data, _ = sanity_check_json_db.sanity_check_json_db(output_json_filename, options)
323
+ sortedCategories, data, _ = integrity_check_json_db.integrity_check_json_db(output_json_filename, options)
324
324
 
325
325
 
326
326
  #%% Preview labels
@@ -332,7 +332,7 @@ viz_options.add_search_links = False
332
332
  viz_options.sort_by_filename = False
333
333
  viz_options.parallelize_rendering = True
334
334
  viz_options.classes_to_exclude = ['empty']
335
- html_output_file, image_db = visualize_db.process_images(db_path=output_json_filename,
335
+ html_output_file, image_db = visualize_db.visualize_db(db_path=output_json_filename,
336
336
  output_dir=os.path.join(
337
337
  output_base_dir, 'preview'),
338
338
  image_base_dir=input_base_dir,
@@ -192,7 +192,7 @@ viz_options.add_search_links = False
192
192
  viz_options.sort_by_filename = False
193
193
  viz_options.parallelize_rendering = True
194
194
  viz_options.classes_to_exclude = ['test']
195
- html_output_file, image_db = visualize_db.process_images(db_path=output_json_filename,
195
+ html_output_file, image_db = visualize_db.visualize_db(db_path=output_json_filename,
196
196
  output_dir=os.path.join(
197
197
  output_base_dir, 'preview'),
198
198
  image_base_dir=input_base_dir,
@@ -168,12 +168,12 @@ print('Finished writing .json file with {} images, {} annotations, and {} catego
168
168
  len(images),len(annotations),len(categories)))
169
169
 
170
170
 
171
- #%% Sanity-check the database's integrity
171
+ #%% Validate the database's integrity
172
172
 
173
- from data_management.databases import sanity_check_json_db
173
+ from data_management.databases import integrity_check_json_db
174
174
 
175
- options = sanity_check_json_db.SanityCheckOptions()
176
- sortedCategories,data = sanity_check_json_db.sanity_check_json_db(output_file, options)
175
+ options = integrity_check_json_db.IntegrityCheckOptions()
176
+ sortedCategories,data = integrity_check_json_db.integrity_check_json_db(output_file, options)
177
177
 
178
178
 
179
179
  #%% Render a bunch of images to make sure the labels got carried along correctly
@@ -185,5 +185,5 @@ options = visualize_db.BboxDbVizOptions()
185
185
  options.num_to_visualize = 1000
186
186
  options.sort_by_filename = False
187
187
 
188
- htmlOutputFile = visualize_db.process_images(bbox_db_path,output_dir,image_base,options)
188
+ htmlOutputFile = visualize_db.visualize_db(bbox_db_path,output_dir,image_base,options)
189
189
 
@@ -242,17 +242,17 @@ json.dump(data, open(output_filename,'w'), indent=1)
242
242
  print('Finished writing json to {}'.format(output_filename))
243
243
 
244
244
 
245
- #%% Sanity-check data
245
+ #%% Validate data
246
246
 
247
- from data_management.databases import sanity_check_json_db
247
+ from data_management.databases import integrity_check_json_db
248
248
 
249
- options = sanity_check_json_db.SanityCheckOptions()
249
+ options = integrity_check_json_db.IntegrityCheckOptions()
250
250
  options.baseDir = base_dir
251
251
  options.bCheckImageSizes = False
252
252
  options.bCheckImageExistence = True
253
253
  options.bFindUnusedImages = False
254
254
 
255
- sorted_categories = sanity_check_json_db.sanity_check_json_db(output_filename,options)
255
+ sorted_categories = integrity_check_json_db.integrity_check_json_db(output_filename,options)
256
256
 
257
257
 
258
258
  #%% Label previews
@@ -266,7 +266,7 @@ viz_options.parallelize_rendering = True
266
266
  viz_options.trim_to_images_with_bboxes = False
267
267
  viz_options.add_search_links = True
268
268
  viz_options.sort_by_filename = False
269
- html_output_file,image_db = visualize_db.process_images(output_filename,
269
+ html_output_file,image_db = visualize_db.visualize_db(output_filename,
270
270
  os.path.join(output_base,'preview'),
271
271
  base_dir,viz_options)
272
272
  os.startfile(html_output_file)
@@ -237,20 +237,20 @@ print('Finished writing .json file with {} images, {} annotations, and {} catego
237
237
 
238
238
  #%% Validate output
239
239
 
240
- from data_management.databases import sanity_check_json_db
240
+ from data_management.databases import integrity_check_json_db
241
241
 
242
- options = sanity_check_json_db.SanityCheckOptions()
242
+ options = integrity_check_json_db.IntegrityCheckOptions()
243
243
  options.baseDir = image_directory
244
244
  options.bCheckImageSizes = False
245
245
  options.bCheckImageExistence = False
246
246
  options.bFindUnusedImages = False
247
- data = sanity_check_json_db.sanity_check_json_db(output_json_file,options)
247
+ data = integrity_check_json_db.integrity_check_json_db(output_json_file,options)
248
248
 
249
249
 
250
250
  #%% Preview labels
251
251
 
252
252
  from md_visualization import visualize_db
253
- from data_management.databases import sanity_check_json_db
253
+ from data_management.databases import integrity_check_json_db
254
254
 
255
255
  viz_options = visualize_db.DbVizOptions()
256
256
  viz_options.num_to_visualize = None
@@ -259,7 +259,7 @@ viz_options.add_search_links = True
259
259
  viz_options.sort_by_filename = False
260
260
  viz_options.parallelize_rendering = True
261
261
  viz_options.classes_to_exclude = ['empty']
262
- html_output_file,image_db = visualize_db.process_images(db_path=output_json_file,
262
+ html_output_file,image_db = visualize_db.visualize_db(db_path=output_json_file,
263
263
  output_dir=os.path.join(
264
264
  output_base, 'carrizo shrub-free 2018/preview'),
265
265
  image_base_dir=image_directory,