megadetector 10.0.10__py3-none-any.whl → 10.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (80) hide show
  1. megadetector/data_management/animl_to_md.py +5 -2
  2. megadetector/data_management/cct_json_utils.py +4 -2
  3. megadetector/data_management/cct_to_md.py +5 -4
  4. megadetector/data_management/cct_to_wi.py +5 -1
  5. megadetector/data_management/coco_to_yolo.py +3 -2
  6. megadetector/data_management/databases/combine_coco_camera_traps_files.py +4 -4
  7. megadetector/data_management/databases/integrity_check_json_db.py +2 -2
  8. megadetector/data_management/databases/subset_json_db.py +0 -3
  9. megadetector/data_management/generate_crops_from_cct.py +6 -4
  10. megadetector/data_management/get_image_sizes.py +5 -35
  11. megadetector/data_management/labelme_to_coco.py +10 -6
  12. megadetector/data_management/labelme_to_yolo.py +19 -28
  13. megadetector/data_management/lila/create_lila_test_set.py +22 -2
  14. megadetector/data_management/lila/generate_lila_per_image_labels.py +7 -5
  15. megadetector/data_management/lila/lila_common.py +2 -2
  16. megadetector/data_management/lila/test_lila_metadata_urls.py +0 -1
  17. megadetector/data_management/ocr_tools.py +6 -10
  18. megadetector/data_management/read_exif.py +59 -16
  19. megadetector/data_management/remap_coco_categories.py +1 -1
  20. megadetector/data_management/remove_exif.py +10 -5
  21. megadetector/data_management/rename_images.py +20 -13
  22. megadetector/data_management/resize_coco_dataset.py +10 -4
  23. megadetector/data_management/speciesnet_to_md.py +3 -3
  24. megadetector/data_management/yolo_output_to_md_output.py +3 -1
  25. megadetector/data_management/yolo_to_coco.py +28 -19
  26. megadetector/detection/change_detection.py +26 -18
  27. megadetector/detection/process_video.py +1 -1
  28. megadetector/detection/pytorch_detector.py +5 -5
  29. megadetector/detection/run_detector.py +34 -10
  30. megadetector/detection/run_detector_batch.py +2 -1
  31. megadetector/detection/run_inference_with_yolov5_val.py +3 -1
  32. megadetector/detection/run_md_and_speciesnet.py +215 -101
  33. megadetector/detection/run_tiled_inference.py +7 -7
  34. megadetector/detection/tf_detector.py +1 -1
  35. megadetector/detection/video_utils.py +9 -6
  36. megadetector/postprocessing/add_max_conf.py +4 -4
  37. megadetector/postprocessing/categorize_detections_by_size.py +3 -2
  38. megadetector/postprocessing/classification_postprocessing.py +7 -8
  39. megadetector/postprocessing/combine_batch_outputs.py +3 -2
  40. megadetector/postprocessing/compare_batch_results.py +49 -27
  41. megadetector/postprocessing/convert_output_format.py +8 -6
  42. megadetector/postprocessing/create_crop_folder.py +13 -4
  43. megadetector/postprocessing/generate_csv_report.py +22 -8
  44. megadetector/postprocessing/load_api_results.py +8 -4
  45. megadetector/postprocessing/md_to_coco.py +2 -3
  46. megadetector/postprocessing/md_to_labelme.py +12 -8
  47. megadetector/postprocessing/md_to_wi.py +2 -1
  48. megadetector/postprocessing/merge_detections.py +4 -6
  49. megadetector/postprocessing/postprocess_batch_results.py +4 -3
  50. megadetector/postprocessing/remap_detection_categories.py +6 -3
  51. megadetector/postprocessing/render_detection_confusion_matrix.py +18 -10
  52. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
  53. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +5 -3
  54. megadetector/postprocessing/separate_detections_into_folders.py +10 -4
  55. megadetector/postprocessing/subset_json_detector_output.py +1 -1
  56. megadetector/postprocessing/top_folders_to_bottom.py +22 -7
  57. megadetector/postprocessing/validate_batch_results.py +1 -1
  58. megadetector/taxonomy_mapping/map_new_lila_datasets.py +59 -3
  59. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +1 -1
  60. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +26 -17
  61. megadetector/taxonomy_mapping/species_lookup.py +51 -2
  62. megadetector/utils/ct_utils.py +9 -4
  63. megadetector/utils/extract_frames_from_video.py +4 -0
  64. megadetector/utils/gpu_test.py +6 -6
  65. megadetector/utils/md_tests.py +21 -21
  66. megadetector/utils/path_utils.py +112 -44
  67. megadetector/utils/split_locations_into_train_val.py +0 -4
  68. megadetector/utils/url_utils.py +5 -3
  69. megadetector/utils/wi_taxonomy_utils.py +37 -8
  70. megadetector/utils/write_html_image_list.py +1 -2
  71. megadetector/visualization/plot_utils.py +31 -19
  72. megadetector/visualization/render_images_with_thumbnails.py +3 -0
  73. megadetector/visualization/visualization_utils.py +18 -7
  74. megadetector/visualization/visualize_db.py +9 -26
  75. megadetector/visualization/visualize_video_output.py +14 -2
  76. {megadetector-10.0.10.dist-info → megadetector-10.0.11.dist-info}/METADATA +1 -1
  77. {megadetector-10.0.10.dist-info → megadetector-10.0.11.dist-info}/RECORD +80 -80
  78. {megadetector-10.0.10.dist-info → megadetector-10.0.11.dist-info}/WHEEL +0 -0
  79. {megadetector-10.0.10.dist-info → megadetector-10.0.11.dist-info}/licenses/LICENSE +0 -0
  80. {megadetector-10.0.10.dist-info → megadetector-10.0.11.dist-info}/top_level.txt +0 -0
@@ -252,9 +252,10 @@ def render_detection_confusion_matrix(ground_truth_file,
252
252
  md_formatted_results['images']),
253
253
  total=len(md_formatted_results['images'])))
254
254
  finally:
255
- pool.close()
256
- pool.join()
257
- print("Pool closed and joined for confusion matrix rendering")
255
+ if pool is not None:
256
+ pool.close()
257
+ pool.join()
258
+ print("Pool closed and joined for confusion matrix rendering")
258
259
 
259
260
  else:
260
261
 
@@ -369,11 +370,15 @@ def render_detection_confusion_matrix(ground_truth_file,
369
370
 
370
371
  # If there were no detections at all, call this image empty
371
372
  if len(results_im['detections']) == 0:
373
+
372
374
  predicted_category_name = empty_category_name
375
+
373
376
  # Otherwise look for above-threshold detections
374
377
  else:
378
+
375
379
  results_category_name_to_confidence = defaultdict(int)
376
380
  for det in results_im['detections']:
381
+
377
382
  category_name = results_category_id_to_name[det['category']]
378
383
  detection_threshold = confidence_thresholds['default']
379
384
  if category_name in confidence_thresholds:
@@ -381,12 +386,15 @@ def render_detection_confusion_matrix(ground_truth_file,
381
386
  if det['conf'] > detection_threshold:
382
387
  results_category_name_to_confidence[category_name] = max(
383
388
  results_category_name_to_confidence[category_name],det['conf'])
384
- # If there were no detections above threshold
385
- if len(results_category_name_to_confidence) == 0:
386
- predicted_category_name = empty_category_name
387
- else:
388
- predicted_category_name = max(results_category_name_to_confidence,
389
- key=results_category_name_to_confidence.get)
389
+
390
+ # ...for each detection
391
+
392
+ # If there were no detections above threshold
393
+ if len(results_category_name_to_confidence) == 0:
394
+ predicted_category_name = empty_category_name
395
+ else:
396
+ predicted_category_name = max(results_category_name_to_confidence,
397
+ key=results_category_name_to_confidence.get)
390
398
 
391
399
  ground_truth_category_index = gt_category_name_to_category_index[ground_truth_category_name]
392
400
  predicted_category_index = gt_category_name_to_category_index[predicted_category_name]
@@ -396,7 +404,7 @@ def render_detection_confusion_matrix(ground_truth_file,
396
404
 
397
405
  confusion_matrix[ground_truth_category_index,predicted_category_index] += 1
398
406
 
399
- # ...for each file
407
+ # ...for each ground truth file
400
408
 
401
409
  plt.ioff()
402
410
 
@@ -37,7 +37,7 @@ def remove_repeat_detections(input_file,output_file,filtering_dir):
37
37
  """
38
38
 
39
39
  assert os.path.isfile(input_file), "Can't find file {}".format(input_file)
40
- assert os.path.isdir(filtering_dir), "Can't find folder {}".format(filtering_dir)
40
+ assert os.path.exists(filtering_dir), "Can't find input file/folder {}".format(filtering_dir)
41
41
  options = repeat_detections_core.RepeatDetectionOptions()
42
42
  if os.path.isfile(filtering_dir):
43
43
  options.filterFileToLoad = filtering_dir
@@ -869,7 +869,7 @@ def _update_detection_table(repeat_detection_results, options, output_file_name=
869
869
  detection_to_modify = row_detections[instance.i_detection]
870
870
 
871
871
  # Make sure the bounding box matches
872
- assert (instance_bbox[0:3] == detection_to_modify['bbox'][0:3])
872
+ assert (instance_bbox[0:4] == detection_to_modify['bbox'][0:4])
873
873
 
874
874
  # Make the probability negative, if it hasn't been switched by
875
875
  # another bounding box
@@ -1149,7 +1149,8 @@ def find_repeat_detections(input_filename, output_file_name=None, options=None):
1149
1149
 
1150
1150
  # Load the filtering file
1151
1151
  detection_index_file_name = options.filterFileToLoad
1152
- s_in = open(detection_index_file_name, 'r').read()
1152
+ with open(detection_index_file_name, 'r') as f:
1153
+ s_in = f.read()
1153
1154
  detection_info = jsonpickle.decode(s_in)
1154
1155
  filtering_base_dir = os.path.dirname(options.filterFileToLoad)
1155
1156
  suspicious_detections = detection_info['suspicious_detections']
@@ -1382,7 +1383,8 @@ def find_repeat_detections(input_filename, output_file_name=None, options=None):
1382
1383
 
1383
1384
  # candidate_detection_file = all_candidate_detection_files[0]
1384
1385
  for candidate_detection_file in all_candidate_detection_files:
1385
- s = open(candidate_detection_file, 'r').read()
1386
+ with open(candidate_detection_file, 'r') as f:
1387
+ s = f.read()
1386
1388
  candidate_detections_this_file = jsonpickle.decode(s)
1387
1389
  all_candidate_detections.append(candidate_detections_this_file)
1388
1390
 
@@ -494,7 +494,8 @@ def separate_detections_into_folders(options):
494
494
 
495
495
  # Load detection results
496
496
  print('Loading detection results')
497
- results = json.load(open(options.results_file))
497
+ with open(options.results_file,'r') as f:
498
+ results = json.load(f)
498
499
  images = results['images']
499
500
 
500
501
  for im in images:
@@ -618,8 +619,13 @@ def separate_detections_into_folders(options):
618
619
 
619
620
  print('Starting a pool with {} threads'.format(options.n_threads))
620
621
  pool = ThreadPool(options.n_threads)
621
- process_detections_with_options = partial(_process_detections, options=options)
622
- _ = list(tqdm(pool.imap(process_detections_with_options, images), total=len(images)))
622
+ try:
623
+ process_detections_with_options = partial(_process_detections, options=options)
624
+ _ = list(tqdm(pool.imap(process_detections_with_options, images), total=len(images)))
625
+ finally:
626
+ pool.close()
627
+ pool.join()
628
+ print('Pool closed and joined for folder separation')
623
629
 
624
630
  if options.remove_empty_folders:
625
631
  print('Removing empty folders from {}'.format(options.base_output_folder))
@@ -736,7 +742,7 @@ def main(): # noqa
736
742
  help='Line thickness (in pixels) for rendering, only meaningful if ' + \
737
743
  'using render_boxes (defaults to {})'.format(
738
744
  default_line_thickness))
739
- parser.add_argument('--box_expansion', type=int, default=default_line_thickness,
745
+ parser.add_argument('--box_expansion', type=int, default=default_box_expansion,
740
746
  help='Box expansion (in pixels) for rendering, only meaningful if ' + \
741
747
  'using render_boxes (defaults to {})'.format(
742
748
  default_box_expansion))
@@ -433,7 +433,7 @@ def subset_json_detector_output_by_list(data, options):
433
433
  """
434
434
 
435
435
  if options.keep_files_in_list is None:
436
- return
436
+ return data
437
437
 
438
438
  files_to_keep = None
439
439
 
@@ -45,7 +45,12 @@ class TopFoldersToBottomOptions:
45
45
  Options used to parameterize top_folders_to_bottom()
46
46
  """
47
47
 
48
- def __init__(self,input_folder,output_folder,copy=True,n_threads=1):
48
+ def __init__(self,
49
+ input_folder,
50
+ output_folder,
51
+ copy=True,
52
+ n_threads=1,
53
+ overwrite=False):
49
54
 
50
55
  #: Whether to copy (True) vs. move (False) false when re-organizing
51
56
  self.copy = copy
@@ -60,7 +65,7 @@ class TopFoldersToBottomOptions:
60
65
  self.output_folder = output_folder
61
66
 
62
67
  #: If this is False and an output file exists, throw an error
63
- self.overwrite = False
68
+ self.overwrite = overwrite
64
69
 
65
70
 
66
71
  #%% Main functions
@@ -130,6 +135,7 @@ def top_folders_to_bottom(options):
130
135
  options (TopFoldersToBottomOptions): See TopFoldersToBottomOptions for parameter details.
131
136
 
132
137
  """
138
+
133
139
  os.makedirs(options.output_folder,exist_ok=True)
134
140
 
135
141
  # Enumerate input folder
@@ -167,10 +173,15 @@ def top_folders_to_bottom(options):
167
173
 
168
174
  print('Starting a pool with {} threads'.format(options.n_threads))
169
175
  pool = ThreadPool(options.n_threads)
170
- process_file_with_options = partial(_process_file, options=options)
171
- _ = list(tqdm(pool.imap(process_file_with_options, relative_files), total=len(relative_files)))
176
+ try:
177
+ process_file_with_options = partial(_process_file, options=options)
178
+ _ = list(tqdm(pool.imap(process_file_with_options, relative_files), total=len(relative_files)))
179
+ finally:
180
+ pool.close()
181
+ pool.join()
182
+ print('Pool closed and join for folder inversion')
172
183
 
173
- # ...def top_folders_to_bottom()
184
+ # ...def top_folders_to_bottom(...)
174
185
 
175
186
 
176
187
  #%% Interactive driver
@@ -192,7 +203,7 @@ if False:
192
203
 
193
204
  #%% Command-line driver
194
205
 
195
- # python top_folders_to_bottom.py "g:\temp\separated_images" "g:\temp\separated_images_inverted" --n_threads 100
206
+ # python top_folders_to_bottom.py "g:\temp\separated_images" "g:\temp\separated_images_inverted" --n_threads 10
196
207
 
197
208
  def main(): # noqa
198
209
 
@@ -215,7 +226,11 @@ def main(): # noqa
215
226
 
216
227
  # Convert to an options object
217
228
  options = TopFoldersToBottomOptions(
218
- args.input_folder,args.output_folder,copy=args.copy,n_threads=args.n_threads)
229
+ args.input_folder,
230
+ args.output_folder,
231
+ copy=args.copy,
232
+ n_threads=args.n_threads,
233
+ overwrite=args.overwrite)
219
234
 
220
235
  top_folders_to_bottom(options)
221
236
 
@@ -39,7 +39,7 @@ typical_keys = ['classification_categories',
39
39
 
40
40
  class ValidateBatchResultsOptions:
41
41
  """
42
- Options controlling the behavior of validate_bach_results()
42
+ Options controlling the behavior of validate_batch_results()
43
43
  """
44
44
 
45
45
  def __init__(self):
@@ -15,10 +15,10 @@ import json
15
15
  # Created by get_lila_category_list.py
16
16
  input_lila_category_list_file = os.path.expanduser('~/lila/lila_categories_list/lila_dataset_to_categories.json')
17
17
 
18
- output_file = os.path.expanduser('~/lila/lila_additions_2025.06.23.csv')
18
+ output_file = os.path.expanduser('~/lila/lila_additions_2025.10.07.csv')
19
19
 
20
20
  datasets_to_map = [
21
- 'Nkhotakota Camera Traps'
21
+ 'California Small Animals'
22
22
  ]
23
23
 
24
24
 
@@ -128,6 +128,52 @@ output_df.to_csv(output_file, index=None, header=True)
128
128
  # from megadetector.utils.path_utils import open_file; open_file(output_file)
129
129
 
130
130
 
131
+ #%% Remap missing entries in the .csv file
132
+
133
+ # ...typically because I made a change to the mapping code.
134
+
135
+ from megadetector.utils.path_utils import insert_before_extension
136
+ from megadetector.utils.ct_utils import is_empty
137
+
138
+ remapped_file = insert_before_extension(output_file,'remapped')
139
+
140
+ df = pd.read_csv(output_file)
141
+
142
+ for i_row,row in df.iterrows():
143
+
144
+ # Do we need to map this row?
145
+ if is_empty(row['source']):
146
+
147
+ query = row['query']
148
+ print('Mapping {}'.format(query))
149
+
150
+ taxonomic_match = get_preferred_taxonomic_match(query,taxonomy_preference=taxonomy_preference)
151
+
152
+ if (taxonomic_match.source == taxonomy_preference):
153
+
154
+ source = taxonomic_match.source
155
+ taxonomy_level = taxonomic_match.taxonomic_level
156
+ scientific_name = taxonomic_match.scientific_name
157
+ common_name = taxonomic_match.common_name
158
+ taxonomy_string = taxonomic_match.taxonomy_string
159
+
160
+ # Write source, taxonomy_level, scientific_name, common_name, and taxonomy_string
161
+ # to the corresponding columns in the current row in df
162
+ df.loc[i_row, 'source'] = source
163
+ df.loc[i_row, 'taxonomy_level'] = taxonomy_level
164
+ df.loc[i_row, 'scientific_name'] = scientific_name
165
+ df.loc[i_row, 'common_name'] = common_name
166
+ df.loc[i_row, 'taxonomy_string'] = taxonomy_string
167
+
168
+ # ...if we found a match
169
+
170
+ # ...do we need to map this row?
171
+
172
+ # ...for each row
173
+
174
+ df.to_csv(remapped_file, index=None, header=True)
175
+
176
+
131
177
  #%% Manual lookup
132
178
 
133
179
  if False:
@@ -140,11 +186,19 @@ if False:
140
186
 
141
187
  #%%
142
188
 
143
- q = 'animalia'
189
+ from megadetector.taxonomy_mapping.species_lookup import pop_levels
190
+
191
+ # Use this when an iNat match includes an empty subgenus with the same name as the genus
192
+ n_levels_to_pop = 0
193
+ q = 'sus scrofa'
144
194
 
145
195
  taxonomy_preference = 'inat'
146
196
  m = get_preferred_taxonomic_match(q,taxonomy_preference)
197
+ if n_levels_to_pop > 0:
198
+ m = pop_levels(m,n_levels_to_pop)
199
+
147
200
  # print(m.scientific_name); import clipboard; clipboard.copy(m.scientific_name)
201
+ # common_name = eval(m.__dict__['taxonomy_string'])[0][-1][0]; print(common_name); clipboard.copy(common_name)
148
202
 
149
203
  if (m is None) or (len(m.taxonomy_string) == 0):
150
204
  print('No match')
@@ -155,3 +209,5 @@ if False:
155
209
  print(m.source)
156
210
  print(m.taxonomy_string)
157
211
  import clipboard; clipboard.copy(m.taxonomy_string)
212
+
213
+
@@ -162,4 +162,4 @@ if False:
162
162
 
163
163
  print('Wrote final output to {}'.format(release_taxonomy_file))
164
164
 
165
- # ...if False
165
+
@@ -16,7 +16,7 @@ import os
16
16
  import pandas as pd
17
17
 
18
18
  # lila_taxonomy_file = r"c:\git\agentmorrisprivate\lila-taxonomy\lila-taxonomy-mapping.csv"
19
- lila_taxonomy_file = os.path.expanduser('~/lila/lila_additions_2025.06.23.csv')
19
+ lila_taxonomy_file = os.path.expanduser('~/lila/lila_additions_2025.10.07.csv')
20
20
 
21
21
  preview_base = os.path.expanduser('~/lila/lila_taxonomy_preview')
22
22
  os.makedirs(preview_base,exist_ok=True)
@@ -56,11 +56,6 @@ def taxonomy_string_to_level(taxonomy_string):
56
56
  return level
57
57
 
58
58
 
59
- #%% Read the taxonomy mapping file
60
-
61
- df = pd.read_csv(lila_taxonomy_file)
62
-
63
-
64
59
  #%% Prepare taxonomy lookup
65
60
 
66
61
  from megadetector.taxonomy_mapping.species_lookup import \
@@ -95,20 +90,29 @@ taxonomy_preference = 'inat'
95
90
  # i_row = 0; row = df.iloc[i_row]
96
91
  for i_row,row in tqdm(df.iterrows(),total=len(df)):
97
92
 
98
- sn = row['scientific_name']
99
- if not isinstance(sn,str):
100
- continue
93
+ try:
94
+
95
+ sn = row['scientific_name']
96
+ if not isinstance(sn,str):
97
+ continue
101
98
 
102
- m = get_preferred_taxonomic_match(sn,taxonomy_preference)
103
- assert m.scientific_name == sn
99
+ m = get_preferred_taxonomic_match(sn,taxonomy_preference)
100
+ assert m.scientific_name == sn
104
101
 
105
- ts = row['taxonomy_string']
106
- assert m.taxonomy_string[0:50] == ts[0:50], 'Mismatch for {}:\n\n{}\n\n{}\n'.format(
107
- row['dataset_name'],ts,m.taxonomy_string)
102
+ ts = row['taxonomy_string']
103
+ assert m.taxonomy_string[0:50] == ts[0:50], 'Mismatch for {}:\n\n{}\n\n{}\n'.format(
104
+ row['dataset_name'],ts,m.taxonomy_string)
105
+
106
+ if ts != m.taxonomy_string:
107
+ n_taxonomy_changes += 1
108
+ df.loc[i_row,'taxonomy_string'] = m.taxonomy_string
109
+
110
+ except Exception as e:
108
111
 
109
- if ts != m.taxonomy_string:
110
- n_taxonomy_changes += 1
111
- df.loc[i_row,'taxonomy_string'] = m.taxonomy_string
112
+ print('Error at row {}: {}'.format(i_row,str(e)))
113
+ raise
114
+
115
+ # ...for each row
112
116
 
113
117
  print('\nMade {} taxonomy changes'.format(n_taxonomy_changes))
114
118
 
@@ -325,6 +329,11 @@ for i_row,row in df.iterrows():
325
329
 
326
330
  #%% Download sample images for all scientific names
327
331
 
332
+ # You might have to do this:
333
+ #
334
+ # pip install python-magic
335
+ # pip install python-magic-bin
336
+
328
337
  # Takes ~1 minute per 10 rows
329
338
 
330
339
  remapped_queries = {'papio':'papio+baboon',
@@ -560,6 +560,7 @@ def get_taxonomic_info(query: str) -> List[Dict[str, Any]]:
560
560
  Main entry point: get taxonomic matches from both taxonomies for [query],
561
561
  which may be a scientific or common name.
562
562
  """
563
+
563
564
  query = query.strip().lower()
564
565
  # print("Finding taxonomy information for: {0}".format(query))
565
566
 
@@ -682,6 +683,35 @@ hyphenated_terms = ['crowned', 'backed', 'throated', 'tailed', 'headed', 'cheeke
682
683
  'fronted', 'bellied', 'spotted', 'eared', 'collared', 'breasted',
683
684
  'necked']
684
685
 
686
+ def pop_levels(m, n_levels=1):
687
+ """
688
+ Remove [n_levels] levels from the bottom of the TaxonomicMatch object m, typically used to remove
689
+ silly subgenera.
690
+ """
691
+
692
+ v = eval(m.taxonomy_string)
693
+ assert v[0][1] == m.taxonomic_level
694
+ assert v[0][2] == m.scientific_name
695
+ popped_v = v[n_levels:]
696
+ taxonomic_level = popped_v[0][1]
697
+ scientific_name = popped_v[0][2]
698
+ common_name = popped_v[0][3]
699
+ if len(common_name) == 0:
700
+ common_name = ''
701
+ else:
702
+ common_name = common_name[0]
703
+ taxonomy_string = str(popped_v)
704
+ source = m.source
705
+ return TaxonomicMatch(scientific_name=scientific_name,
706
+ common_name=common_name,
707
+ taxonomic_level=taxonomic_level,
708
+ source=source,
709
+ taxonomy_string=taxonomy_string,
710
+ match=None)
711
+
712
+ # ...def pop_levels(...)
713
+
714
+
685
715
  def get_preferred_taxonomic_match(query: str, taxonomy_preference = 'inat', retry=True) -> TaxonomicMatch:
686
716
  """
687
717
  Wrapper for _get_preferred_taxonomic_match, but expressing a variety of heuristics
@@ -704,6 +734,17 @@ def get_preferred_taxonomic_match(query: str, taxonomy_preference = 'inat', retr
704
734
  for s in hyphenated_terms:
705
735
  query = query.replace(' ' + s,'-' + s)
706
736
  m,query = _get_preferred_taxonomic_match(query=query,taxonomy_preference=taxonomy_preference)
737
+
738
+ if (len(m.scientific_name) > 0) or (not retry):
739
+ return m
740
+
741
+ query = query.replace(' species','')
742
+ query = query.replace(' order','')
743
+ query = query.replace(' genus','')
744
+ query = query.replace(' family','')
745
+ query = query.replace(' subfamily','')
746
+ m,query = _get_preferred_taxonomic_match(query=query,taxonomy_preference=taxonomy_preference)
747
+
707
748
  return m
708
749
 
709
750
 
@@ -887,8 +928,16 @@ def _get_preferred_taxonomic_match(query: str, taxonomy_preference = 'inat') ->
887
928
 
888
929
  taxonomy_string = str(match)
889
930
 
890
- return TaxonomicMatch(scientific_name, common_name, taxonomic_level, source,
891
- taxonomy_string, match),query
931
+ m = TaxonomicMatch(scientific_name, common_name, taxonomic_level, source,
932
+ taxonomy_string, match)
933
+
934
+ if (m.taxonomic_level == 'subgenus' and \
935
+ match[1][1] == 'genus' and \
936
+ match[1][2] == m.scientific_name):
937
+ print('Removing redundant subgenus {}'.format(scientific_name))
938
+ m = pop_levels(m,1)
939
+
940
+ return m,query
892
941
 
893
942
  # ...def _get_preferred_taxonomic_match()
894
943
 
@@ -241,7 +241,10 @@ def write_json(path,
241
241
  elif force_str:
242
242
  default_handler = str
243
243
 
244
- os.makedirs(os.path.dirname(path), exist_ok=True)
244
+ # Create the parent directory if necessary
245
+ parent_dir = os.path.dirname(path)
246
+ if len(parent_dir) > 0:
247
+ os.makedirs(parent_dir, exist_ok=True)
245
248
 
246
249
  with open(path, 'w', newline='\n', encoding=encoding) as f:
247
250
  json.dump(content, f, indent=indent, default=default_handler, ensure_ascii=ensure_ascii)
@@ -562,7 +565,7 @@ def sort_dictionary_by_value(d,sort_values=None,reverse=False):
562
565
  reverse (bool, optional): whether to sort in reverse (descending) order
563
566
 
564
567
  Returns:
565
- dict: sorted copy of [d
568
+ dict: sorted copy of [d]
566
569
  """
567
570
 
568
571
  if sort_values is None:
@@ -1022,8 +1025,10 @@ def parse_bool_string(s, strict=False):
1022
1025
  s = str(s).lower().strip()
1023
1026
 
1024
1027
  if strict:
1025
- false_strings = ('false')
1026
- true_strings = ('true')
1028
+ # Fun fact: ('false') (rather than ('false,')) creates a string,
1029
+ # not a tuple.
1030
+ false_strings = ('false',)
1031
+ true_strings = ('true',)
1027
1032
  else:
1028
1033
  false_strings = ('no', 'false', 'f', 'n', '0')
1029
1034
  true_strings = ('yes', 'true', 't', 'y', '1')
@@ -55,6 +55,10 @@ class FrameExtractionOptions:
55
55
  #: must be a folder when this is specified.
56
56
  self.detector_output_file = None
57
57
 
58
+ # ...def __init__(...)
59
+
60
+ # ...class FrameExtractionOptions
61
+
58
62
 
59
63
  #%% Core functions
60
64
 
@@ -34,7 +34,7 @@ def torch_test():
34
34
  except Exception as e: #noqa
35
35
  print('PyTorch unavailable, not running PyTorch tests. PyTorch import error was:\n{}'.format(
36
36
  str(e)))
37
- return
37
+ return 0
38
38
 
39
39
  print('Torch version: {}'.format(str(torch.__version__)))
40
40
  print('CUDA available (according to PyTorch): {}'.format(torch.cuda.is_available()))
@@ -71,17 +71,17 @@ def tf_test():
71
71
  Print diagnostic information about TF/CUDA status.
72
72
 
73
73
  Returns:
74
- int: The number of CUDA devices reported by PyTorch.
74
+ int: The number of CUDA devices reported by TensorFlow.
75
75
  """
76
76
 
77
77
  try:
78
- import tensorflow as tf
78
+ import tensorflow as tf # type: ignore
79
79
  except Exception as e: #noqa
80
80
  print('TensorFlow unavailable, not running TF tests. TF import error was:\n{}'.format(
81
81
  str(e)))
82
- return
82
+ return 0
83
83
 
84
- from tensorflow.python.platform import build_info as build
84
+ from tensorflow.python.platform import build_info as build # type: ignore
85
85
  print(f"TF version: {tf.__version__}")
86
86
 
87
87
  if 'cuda_version' not in build.build_info:
@@ -94,7 +94,7 @@ def tf_test():
94
94
  print(f"CuDNN build version reported by TensorFlow: {build.build_info['cudnn_version']}")
95
95
 
96
96
  try:
97
- from tensorflow.python.compiler.tensorrt import trt_convert as trt
97
+ from tensorflow.python.compiler.tensorrt import trt_convert as trt # type: ignore
98
98
  print("Linked TensorRT version: {}".format(trt.trt_utils._pywrap_py_utils.get_linked_tensorrt_version()))
99
99
  except Exception:
100
100
  print('Could not probe TensorRT version')
@@ -386,7 +386,7 @@ def output_files_are_identical(fn1,fn2,verbose=False):
386
386
  fn2_results['images'] = \
387
387
  sorted(fn2_results['images'], key=lambda d: d['file'])
388
388
 
389
- if len(fn1_results['images']) != len(fn1_results['images']):
389
+ if len(fn1_results['images']) != len(fn2_results['images']):
390
390
  if verbose:
391
391
  print('{} images in {}, {} images in {}'.format(
392
392
  len(fn1_results['images']),fn1,
@@ -1249,8 +1249,8 @@ def run_cli_tests(options):
1249
1249
  cmd_results = execute_and_print(cmd)
1250
1250
 
1251
1251
  assert output_files_are_identical(fn1=inference_output_file,
1252
- fn2=inference_output_file_queue,
1253
- verbose=True)
1252
+ fn2=inference_output_file_queue,
1253
+ verbose=True)
1254
1254
 
1255
1255
 
1256
1256
  ## Run again with the image queue and worker-side preprocessing enabled
@@ -1265,24 +1265,24 @@ def run_cli_tests(options):
1265
1265
  cmd_results = execute_and_print(cmd)
1266
1266
 
1267
1267
  assert output_files_are_identical(fn1=inference_output_file,
1268
- fn2=inference_output_file_preprocess_queue,
1269
- verbose=True)
1268
+ fn2=inference_output_file_preprocess_queue,
1269
+ verbose=True)
1270
1270
 
1271
1271
 
1272
- ## Run again with the image queue and worker-side preprocessing
1272
+ ## Run again with the image queue but no worker-side preprocessing
1273
1273
 
1274
- print('\n** Running MD on a folder (with image queue and preprocessing) (CLI) **\n')
1274
+ print('\n** Running MD on a folder (with image queue but no worker-side preprocessing) (CLI) **\n')
1275
1275
 
1276
- cmd = base_cmd + ' --use_image_queue --preprocess_on_image_queue'
1277
- inference_output_file_preprocess_queue = \
1278
- insert_before_extension(inference_output_file,'preprocess_queue')
1279
- cmd = cmd.replace(inference_output_file,inference_output_file_preprocess_queue)
1276
+ cmd = base_cmd + ' --use_image_queue'
1277
+ inference_output_file_no_preprocess_queue = \
1278
+ insert_before_extension(inference_output_file,'no_preprocess_queue')
1279
+ cmd = cmd.replace(inference_output_file,inference_output_file_no_preprocess_queue)
1280
1280
  cmd += ' --detector_options {}'.format(dict_to_kvp_list(options.detector_options))
1281
1281
  cmd_results = execute_and_print(cmd)
1282
1282
 
1283
1283
  assert output_files_are_identical(fn1=inference_output_file,
1284
- fn2=inference_output_file_preprocess_queue,
1285
- verbose=True)
1284
+ fn2=inference_output_file_no_preprocess_queue,
1285
+ verbose=True)
1286
1286
 
1287
1287
 
1288
1288
  ## Run again with the worker-side preprocessing and an alternative batch size
@@ -1316,8 +1316,8 @@ def run_cli_tests(options):
1316
1316
  cmd_results = execute_and_print(cmd)
1317
1317
 
1318
1318
  assert output_files_are_identical(fn1=inference_output_file,
1319
- fn2=inference_output_file_checkpoint,
1320
- verbose=True)
1319
+ fn2=inference_output_file_checkpoint,
1320
+ verbose=True)
1321
1321
 
1322
1322
 
1323
1323
  ## Run again with "modern" postprocessing, make sure the results are *not* the same as classic
@@ -1331,8 +1331,8 @@ def run_cli_tests(options):
1331
1331
  cmd_results = execute_and_print(cmd)
1332
1332
 
1333
1333
  assert not output_files_are_identical(fn1=inference_output_file,
1334
- fn2=inference_output_file_modern,
1335
- verbose=True)
1334
+ fn2=inference_output_file_modern,
1335
+ verbose=True)
1336
1336
 
1337
1337
 
1338
1338
  ## Run again with "modern" postprocessing and worker-side preprocessing,
@@ -1348,13 +1348,13 @@ def run_cli_tests(options):
1348
1348
 
1349
1349
  # This should not be the same as the "classic" results
1350
1350
  assert not output_files_are_identical(fn1=inference_output_file,
1351
- fn2=inference_output_file_modern_worker_preprocessing,
1352
- verbose=True)
1351
+ fn2=inference_output_file_modern_worker_preprocessing,
1352
+ verbose=True)
1353
1353
 
1354
1354
  # ...but it should be the same as the single-threaded "modern" results
1355
1355
  assert output_files_are_identical(fn1=inference_output_file_modern,
1356
- fn2=inference_output_file_modern_worker_preprocessing,
1357
- verbose=True)
1356
+ fn2=inference_output_file_modern_worker_preprocessing,
1357
+ verbose=True)
1358
1358
 
1359
1359
 
1360
1360
  if not options.skip_cpu_tests: