megadetector 5.0.29__py3-none-any.whl → 10.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (95) hide show
  1. megadetector/classification/efficientnet/model.py +8 -8
  2. megadetector/classification/efficientnet/utils.py +6 -5
  3. megadetector/classification/prepare_classification_script_mc.py +3 -3
  4. megadetector/data_management/annotations/annotation_constants.py +0 -1
  5. megadetector/data_management/camtrap_dp_to_coco.py +34 -1
  6. megadetector/data_management/cct_json_utils.py +2 -2
  7. megadetector/data_management/coco_to_yolo.py +22 -5
  8. megadetector/data_management/databases/add_width_and_height_to_db.py +85 -12
  9. megadetector/data_management/databases/combine_coco_camera_traps_files.py +2 -2
  10. megadetector/data_management/databases/integrity_check_json_db.py +29 -15
  11. megadetector/data_management/generate_crops_from_cct.py +50 -1
  12. megadetector/data_management/labelme_to_coco.py +4 -2
  13. megadetector/data_management/labelme_to_yolo.py +82 -2
  14. megadetector/data_management/lila/generate_lila_per_image_labels.py +276 -18
  15. megadetector/data_management/lila/get_lila_annotation_counts.py +5 -3
  16. megadetector/data_management/lila/lila_common.py +3 -0
  17. megadetector/data_management/lila/test_lila_metadata_urls.py +15 -5
  18. megadetector/data_management/mewc_to_md.py +5 -0
  19. megadetector/data_management/ocr_tools.py +4 -3
  20. megadetector/data_management/read_exif.py +20 -5
  21. megadetector/data_management/remap_coco_categories.py +66 -4
  22. megadetector/data_management/remove_exif.py +50 -1
  23. megadetector/data_management/rename_images.py +3 -3
  24. megadetector/data_management/resize_coco_dataset.py +563 -95
  25. megadetector/data_management/yolo_output_to_md_output.py +131 -2
  26. megadetector/data_management/yolo_to_coco.py +140 -5
  27. megadetector/detection/change_detection.py +4 -3
  28. megadetector/detection/pytorch_detector.py +60 -22
  29. megadetector/detection/run_detector.py +225 -25
  30. megadetector/detection/run_detector_batch.py +42 -16
  31. megadetector/detection/run_inference_with_yolov5_val.py +12 -2
  32. megadetector/detection/run_tiled_inference.py +1 -0
  33. megadetector/detection/video_utils.py +53 -24
  34. megadetector/postprocessing/add_max_conf.py +4 -0
  35. megadetector/postprocessing/categorize_detections_by_size.py +1 -1
  36. megadetector/postprocessing/classification_postprocessing.py +55 -20
  37. megadetector/postprocessing/combine_batch_outputs.py +3 -2
  38. megadetector/postprocessing/compare_batch_results.py +64 -10
  39. megadetector/postprocessing/convert_output_format.py +12 -8
  40. megadetector/postprocessing/create_crop_folder.py +137 -10
  41. megadetector/postprocessing/load_api_results.py +26 -8
  42. megadetector/postprocessing/md_to_coco.py +4 -4
  43. megadetector/postprocessing/md_to_labelme.py +18 -7
  44. megadetector/postprocessing/merge_detections.py +5 -0
  45. megadetector/postprocessing/postprocess_batch_results.py +6 -3
  46. megadetector/postprocessing/remap_detection_categories.py +55 -2
  47. megadetector/postprocessing/render_detection_confusion_matrix.py +9 -6
  48. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +2 -2
  49. megadetector/taxonomy_mapping/map_new_lila_datasets.py +3 -4
  50. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +40 -19
  51. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +1 -1
  52. megadetector/taxonomy_mapping/species_lookup.py +123 -41
  53. megadetector/utils/ct_utils.py +133 -113
  54. megadetector/utils/md_tests.py +93 -13
  55. megadetector/utils/path_utils.py +137 -107
  56. megadetector/utils/split_locations_into_train_val.py +2 -2
  57. megadetector/utils/string_utils.py +7 -7
  58. megadetector/utils/url_utils.py +81 -58
  59. megadetector/utils/wi_utils.py +46 -17
  60. megadetector/visualization/plot_utils.py +13 -9
  61. megadetector/visualization/render_images_with_thumbnails.py +2 -1
  62. megadetector/visualization/visualization_utils.py +94 -46
  63. megadetector/visualization/visualize_db.py +36 -9
  64. megadetector/visualization/visualize_detector_output.py +4 -4
  65. {megadetector-5.0.29.dist-info → megadetector-10.0.1.dist-info}/METADATA +135 -135
  66. megadetector-10.0.1.dist-info/RECORD +139 -0
  67. {megadetector-5.0.29.dist-info → megadetector-10.0.1.dist-info}/licenses/LICENSE +0 -0
  68. {megadetector-5.0.29.dist-info → megadetector-10.0.1.dist-info}/top_level.txt +0 -0
  69. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  70. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  71. megadetector/api/batch_processing/api_core/batch_service/score.py +0 -438
  72. megadetector/api/batch_processing/api_core/server.py +0 -294
  73. megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
  74. megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
  75. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  76. megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
  77. megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
  78. megadetector/api/batch_processing/api_core/server_utils.py +0 -88
  79. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  80. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  81. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  82. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  83. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  84. megadetector/api/synchronous/__init__.py +0 -0
  85. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  86. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
  87. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
  88. megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
  89. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  90. megadetector/api/synchronous/api_core/tests/load_test.py +0 -109
  91. megadetector/utils/azure_utils.py +0 -178
  92. megadetector/utils/sas_blob_utils.py +0 -513
  93. megadetector-5.0.29.dist-info/RECORD +0 -163
  94. /megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
  95. {megadetector-5.0.29.dist-info → megadetector-10.0.1.dist-info}/WHEEL +0 -0
@@ -11,6 +11,8 @@ of images representing all above-threshold crops from the original folder.
11
11
 
12
12
  import os
13
13
  import json
14
+ import argparse
15
+
14
16
  from tqdm import tqdm
15
17
 
16
18
  from multiprocessing.pool import Pool, ThreadPool
@@ -19,7 +21,7 @@ from functools import partial
19
21
 
20
22
  from megadetector.utils.path_utils import insert_before_extension
21
23
  from megadetector.utils.ct_utils import invert_dictionary
22
- from megadetector.utils.ct_utils import is_list_sorted
24
+ from megadetector.utils.ct_utils import is_list_sorted
23
25
  from megadetector.visualization.visualization_utils import crop_image
24
26
  from megadetector.visualization.visualization_utils import exif_preserving_save
25
27
 
@@ -63,6 +65,7 @@ def _get_crop_filename(image_fn,crop_id):
63
65
  """
64
66
  Generate crop filenames in a consistent way.
65
67
  """
68
+
66
69
  if isinstance(crop_id,int):
67
70
  crop_id = str(crop_id).zfill(3)
68
71
  assert isinstance(crop_id,str)
@@ -75,7 +78,15 @@ def _generate_crops_for_single_image(crops_this_image,
75
78
  options):
76
79
  """
77
80
  Generate all the crops required for a single image.
81
+
82
+ Args:
83
+ crops_this_image (list of dict): list of dicts with at least keys
84
+ 'image_fn_relative', 'crop_id'
85
+ input_folder (str): input folder (whole images)
86
+ output_folder (crops): output folder (crops)
87
+ options (CreateCropFolderOptions): cropping options
78
88
  """
89
+
79
90
  if len(crops_this_image) == 0:
80
91
  return
81
92
 
@@ -147,9 +158,9 @@ def crop_results_to_image_results(image_results_file_with_crop_ids,
147
158
  crop_results_prefix (str, optional): if not None, removes this prefix from crop
148
159
  results filenames. Intended to support the case where the crop results
149
160
  use absolute paths.
150
- detections_without_classification_handling (str, optional): what to do when we
161
+ detections_without_classification_handling (str, optional): what to do when we
151
162
  encounter a crop that doesn't appear in classification results: 'error',
152
- or 'include' ("include" means "leave the detection alone, without classifications"
163
+ or 'include' ("include" means "leave the detection alone, without classifications"
153
164
  """
154
165
 
155
166
  ##%% Validate inputs
@@ -190,7 +201,7 @@ def crop_results_to_image_results(image_results_file_with_crop_ids,
190
201
  crop_filename_to_results = {}
191
202
 
192
203
  # im = crop_results['images'][0]
193
- for im in crop_results['images']:
204
+ for im in crop_results['images']:
194
205
  fn = im['file']
195
206
  # Possibly remove a prefix from each filename
196
207
  if (crop_results_prefix is not None) and (crop_results_prefix in fn):
@@ -260,14 +271,14 @@ def crop_results_to_image_results(image_results_file_with_crop_ids,
260
271
  assert crop_results_this_detection['detections'][0]['bbox'] == [0,0,1,1], \
261
272
  'Invalid crop bounding box'
262
273
 
263
- # This check was helpful for the case where crop-level results had already
274
+ # This check was helpful for the case where crop-level results had already
264
275
  # taken detection confidence values from detector output by construct, but this isn't
265
276
  # really meaningful for most cases.
266
277
  # assert abs(crop_results_this_detection['detections'][0]['conf'] - det['conf']) < 0.01
267
-
278
+
268
279
  if require_identical_detection_categories:
269
280
  assert crop_results_this_detection['detections'][0]['category'] == det['category']
270
-
281
+
271
282
  # Copy the crop-level classifications
272
283
  det['classifications'] = crop_results_this_detection['detections'][0]['classifications']
273
284
  confidence_values = [x[1] for x in det['classifications']]
@@ -381,8 +392,8 @@ def create_crop_folder(input_file,
381
392
  det['crop_id'] = i_detection
382
393
 
383
394
  crop_info = {'image_fn_relative':image_fn_relative,
384
- 'crop_id':i_detection,
385
- 'detection':det}
395
+ 'crop_id':i_detection,
396
+ 'detection':det}
386
397
 
387
398
  crop_filename_relative = _get_crop_filename(image_fn_relative,
388
399
  crop_info['crop_id'])
@@ -490,4 +501,120 @@ def create_crop_folder(input_file,
490
501
 
491
502
  #%% Command-line driver
492
503
 
493
- # TODO
504
+ def main():
505
+ """
506
+ Command-line interface for creating a crop folder from MegaDetector results.
507
+ """
508
+
509
+ parser = argparse.ArgumentParser(
510
+ description='Create a folder of crops from MegaDetector results'
511
+ )
512
+ parser.add_argument(
513
+ 'input_file',
514
+ type=str,
515
+ help='Path to the MegaDetector .json results file'
516
+ )
517
+ parser.add_argument(
518
+ 'input_folder',
519
+ type=str,
520
+ help='Path to the folder containing the original images'
521
+ )
522
+ parser.add_argument(
523
+ 'output_folder',
524
+ type=str,
525
+ help='Path to the folder where cropped images will be saved'
526
+ )
527
+ parser.add_argument(
528
+ '--output_file',
529
+ type=str,
530
+ default=None,
531
+ help='Path to save the modified MegaDetector .json file (with crop IDs and filenames)'
532
+ )
533
+ parser.add_argument(
534
+ '--crops_output_file',
535
+ type=str,
536
+ default=None,
537
+ help='Path to save a new .json file for the crops themselves (with full-image detections for each crop)'
538
+ )
539
+ parser.add_argument(
540
+ '--confidence_threshold',
541
+ type=float,
542
+ default=0.1,
543
+ help='Confidence threshold for detections to be cropped (default: 0.1)'
544
+ )
545
+ parser.add_argument(
546
+ '--expansion',
547
+ type=int,
548
+ default=0,
549
+ help='Number of pixels to expand each crop (default: 0)'
550
+ )
551
+ parser.add_argument(
552
+ '--quality',
553
+ type=int,
554
+ default=95,
555
+ help='JPEG quality for saving crops (default: 95)'
556
+ )
557
+ parser.add_argument(
558
+ '--overwrite',
559
+ type=str,
560
+ default='true',
561
+ choices=['true', 'false'],
562
+ help="Overwrite existing crop images (default: 'true')"
563
+ )
564
+ parser.add_argument(
565
+ '--n_workers',
566
+ type=int,
567
+ default=8,
568
+ help='Number of concurrent workers (default: 8)'
569
+ )
570
+ parser.add_argument(
571
+ '--pool_type',
572
+ type=str,
573
+ default='thread',
574
+ choices=['thread', 'process'],
575
+ help="Type of parallelism to use ('thread' or 'process', default: 'thread')"
576
+ )
577
+ parser.add_argument(
578
+ '--category_names',
579
+ type=str,
580
+ default=None,
581
+ help="Comma-separated list of category names to include " + \
582
+ "(e.g., 'animal,person'). If None (default), all categories are included."
583
+ )
584
+
585
+ args = parser.parse_args()
586
+
587
+ options = CreateCropFolderOptions()
588
+ options.confidence_threshold = args.confidence_threshold
589
+ options.expansion = args.expansion
590
+ options.quality = args.quality
591
+ options.overwrite = (args.overwrite.lower() == 'true')
592
+ options.n_workers = args.n_workers
593
+ options.pool_type = args.pool_type
594
+
595
+ if args.category_names:
596
+ options.category_names_to_include = [name.strip() for name in args.category_names.split(',')]
597
+ else:
598
+ options.category_names_to_include = None
599
+
600
+ print('Starting crop folder creation...')
601
+ print('Input MD results: {}'.format(args.input_file))
602
+ print('Input image folder {}'.format(args.input_folder))
603
+ print('Output crop folder: {}'.format(args.output_folder))
604
+
605
+ if args.output_file:
606
+ print('Modified MD results will be saved to {}'.format(args.output_file))
607
+ if args.crops_output_file:
608
+ print('Crops .json output will be saved to {}'.format(args.crops_output_file))
609
+
610
+ create_crop_folder(
611
+ input_file=args.input_file,
612
+ input_folder=args.input_folder,
613
+ output_folder=args.output_folder,
614
+ output_file=args.output_file,
615
+ crops_output_file=args.crops_output_file,
616
+ options=options
617
+ )
618
+
619
+ if __name__ == '__main__':
620
+ main()
@@ -18,7 +18,8 @@ Includes functions to read/write the (very very old) .csv results format.
18
18
  import json
19
19
  import os
20
20
 
21
- from typing import Mapping, Optional
21
+ from typing import Optional
22
+ from collections.abc import Mapping
22
23
 
23
24
  import pandas as pd
24
25
 
@@ -36,13 +37,13 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
36
37
  Loads json-formatted MegaDetector results to a Pandas DataFrame.
37
38
 
38
39
  Args:
39
- api_output_path: path to the output json file
40
- normalize_paths: whether to apply os.path.normpath to the 'file' field
41
- in each image entry in the output file
42
- filename_replacements: replace some path tokens to match local paths to
43
- the original blob structure
44
- force_forward_slashes: whether to convert backslashes to forward slashes
45
- in filenames
40
+ api_output_path (str): path to the output json file
41
+ normalize_paths (bool, optional): whether to apply os.path.normpath to the 'file'
42
+ field in each image entry in the output file
43
+ filename_replacements (dict, optional): replace some path tokens to match local paths
44
+ to the original file structure
45
+ force_forward_slashes (bool, optional): whether to convert backslashes to forward
46
+ slashes in filenames
46
47
 
47
48
  Returns:
48
49
  detection_results: pd.DataFrame, contains at least the columns ['file', 'detections','failure']
@@ -98,6 +99,11 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
98
99
  def write_api_results(detection_results_table, other_fields, out_path):
99
100
  """
100
101
  Writes a Pandas DataFrame to the MegaDetector .json format.
102
+
103
+ Args:
104
+ detection_results_table (DataFrame): data to write
105
+ other_fields (dict): additional fields to include in the output .json
106
+ out_path (str): output .json filename
101
107
  """
102
108
 
103
109
  print('Writing detection results to {}'.format(out_path))
@@ -144,6 +150,14 @@ def load_api_results_csv(filename, normalize_paths=True, filename_replacements=N
144
150
  [DEPRECATED]
145
151
 
146
152
  Loads .csv-formatted MegaDetector results to a pandas table
153
+
154
+ Args:
155
+ filename (str): path to the csv file to read
156
+ normalize_paths (bool, optional): whether to apply os.path.normpath to the 'file'
157
+ field in each image entry in the output file
158
+ filename_replacements (dict, optional): replace some path tokens to match local paths
159
+ to the original file structure
160
+ nrows (int, optional): read only the first N rows of [filename]
147
161
  """
148
162
 
149
163
  if filename_replacements is None:
@@ -192,6 +206,10 @@ def write_api_results_csv(detection_results, filename):
192
206
  Writes a Pandas table to csv in a way that's compatible with the .csv output
193
207
  format. Currently just a wrapper around to_csv that forces output writing
194
208
  to go through a common code path.
209
+
210
+ Args:
211
+ detection_results (DataFrame): dataframe to write to [filename]
212
+ filename (str): .csv filename to write
195
213
  """
196
214
 
197
215
  print('Writing detection results to {}'.format(filename))
@@ -74,9 +74,9 @@ def md_to_coco(md_results_file,
74
74
  data (e.g. EXIF metadata) will be propagated to COCO output
75
75
  include_failed_images (bool, optional): if this is True, failed images will be propagated to COCO output
76
76
  with a non-empty "failure" field and no other fields, otherwise failed images will be skipped.
77
- include_annotations_without_bounding_boxes (bool, optional): if this is True, annotations with
78
- only class labels (no bounding boxes) will be included in the output. If this is False, empty
79
- images will be represented with no annotations.
77
+ include_annotations_without_bounding_boxes (bool, optional): the only time we end up with
78
+ annotations without bounding boxes is when a detection has the category [empty_category_id];
79
+ this determines whether those annotations are included in the output.
80
80
  empty_category_id (str, optional): category ID reserved for the 'empty' class, should not be
81
81
  attached to any bounding boxes
82
82
  overwrite_behavior (str, optional): determines behavior if the output file exists ('skip' to skip conversion,
@@ -257,7 +257,7 @@ def md_to_coco(md_results_file,
257
257
  else:
258
258
 
259
259
  # In very esoteric cases, we use the empty category (0) in MD-formatted output files
260
- print('Warning: empty category ({}) used for annotation in file {}'.format(
260
+ print('Warning: empty category ({}) used for annotation for image {}'.format(
261
261
  empty_category_id,im['file']))
262
262
  pass
263
263
 
@@ -108,8 +108,13 @@ def get_labelme_dict_for_image(im,image_base_name=None,category_id_to_name=None,
108
108
  # ...def get_labelme_dict_for_image()
109
109
 
110
110
 
111
- def _write_output_for_image(im,image_base,extension_prefix,info,
112
- confidence_threshold,category_id_to_name,overwrite,
111
+ def _write_output_for_image(im,
112
+ image_base,
113
+ extension_prefix,
114
+ info,
115
+ confidence_threshold,
116
+ category_id_to_name,
117
+ overwrite,
113
118
  verbose=False):
114
119
 
115
120
  if 'failure' in im and im['failure'] is not None:
@@ -140,9 +145,14 @@ def _write_output_for_image(im,image_base,extension_prefix,info,
140
145
 
141
146
 
142
147
 
143
- def md_to_labelme(results_file,image_base,confidence_threshold=None,
144
- overwrite=False,extension_prefix='',n_workers=1,
145
- use_threads=False,bypass_image_size_read=False,
148
+ def md_to_labelme(results_file,
149
+ image_base,
150
+ confidence_threshold=None,
151
+ overwrite=False,
152
+ extension_prefix='',
153
+ n_workers=1,
154
+ use_threads=False,
155
+ bypass_image_size_read=False,
146
156
  verbose=False):
147
157
  """
148
158
  For all the images in [results_file], write a .json file in labelme format alongside the
@@ -153,11 +163,12 @@ def md_to_labelme(results_file,image_base,confidence_threshold=None,
153
163
  image_base (str): folder of images; filenames in [results_file] should be relative to
154
164
  this folder
155
165
  confidence_threshold (float, optional): only detections at or above this confidence threshold
156
- will be included in the output dict
166
+ will be included in the output dict. If None, no threshold will be applied.
157
167
  overwrite (bool, optional): whether to overwrite existing output files; if this is False
158
168
  and the output file for an image exists, we'll skip that image
159
169
  extension_prefix (str, optional): if non-empty, "extension_prefix" will be inserted before the .json
160
- extension
170
+ extension (typically used to generate multiple copies of labelme files representing different
171
+ MD thresholds)
161
172
  n_workers (int, optional): enables multiprocessing if > 1
162
173
  use_threads (bool, optional): if [n_workers] > 1, determines whether we parallelize via threads (True)
163
174
  or processes (False)
@@ -85,6 +85,11 @@ def merge_detections(source_files,target_file,output_file,options=None):
85
85
 
86
86
  The results are written to [output_file].
87
87
 
88
+ Args:
89
+ source_files (list of str): list of files to merge into the results in [target_file]
90
+ target_file (str): filename that is treated as the primary source of results
91
+ output_file (str): file to which we should write merged results
92
+ options (MergeDetectionsOptions, optional): see MergeDetectionsOptions
88
93
  """
89
94
 
90
95
  if isinstance(source_files,str):
@@ -1001,13 +1001,16 @@ def process_batch_results(options):
1001
1001
  options.separate_detections_by_category = False
1002
1002
 
1003
1003
  ground_truth_indexed_db = IndexedJsonDb(
1004
- options.ground_truth_json_file, b_normalize_paths=True,
1004
+ options.ground_truth_json_file,
1005
+ b_normalize_paths=True,
1005
1006
  filename_replacements=options.ground_truth_filename_replacements)
1006
1007
 
1007
1008
  # Mark images in the ground truth as positive or negative
1008
1009
  n_negative, n_positive, n_unknown, n_ambiguous = _mark_detection_status(
1009
- ground_truth_indexed_db, negative_classes=options.negative_classes,
1010
+ ground_truth_indexed_db,
1011
+ negative_classes=options.negative_classes,
1010
1012
  unknown_classes=options.unlabeled_classes)
1013
+
1011
1014
  print(f'Finished loading and indexing ground truth: {n_negative} '
1012
1015
  f'negative, {n_positive} positive, {n_unknown} unknown, '
1013
1016
  f'{n_ambiguous} ambiguous')
@@ -1348,7 +1351,7 @@ def process_batch_results(options):
1348
1351
  # Prepend class name on each line and add to the top
1349
1352
  cm_str_lines = [' ' * 16 + ' '.join(classname_headers)]
1350
1353
  cm_str_lines += ['{:>15}'.format(cn[:15]) + ' ' + cm_line for cn, cm_line in \
1351
- zip(classname_list, cm_str.splitlines())]
1354
+ zip(classname_list, cm_str.splitlines(), strict=True)]
1352
1355
 
1353
1356
  # Print formatted confusion matrix
1354
1357
  if False:
@@ -11,8 +11,9 @@ Currently only supports remapping detection categories, not classification categ
11
11
 
12
12
  #%% Constants and imports
13
13
 
14
- import json
15
14
  import os
15
+ import json
16
+ import argparse
16
17
 
17
18
  from tqdm import tqdm
18
19
 
@@ -167,4 +168,56 @@ if False:
167
168
 
168
169
  #%% Command-line driver
169
170
 
170
- # TODO
171
+ def main():
172
+ """
173
+ Command-line interface for remapping detection categories in a MegaDetector results file
174
+ """
175
+
176
+ parser = argparse.ArgumentParser(
177
+ description='Remap detection categories in a MegaDetector .json results file'
178
+ )
179
+ parser.add_argument(
180
+ 'input_file',
181
+ type=str,
182
+ help='Path to the MegaDetector .json results file to remap'
183
+ )
184
+ parser.add_argument(
185
+ 'output_file',
186
+ type=str,
187
+ help='Path to save the remapped .json results file'
188
+ )
189
+ parser.add_argument(
190
+ 'target_category_map_file',
191
+ type=str,
192
+ help="Path to a MegaDetector .json results file from which to take the target 'detection_categories' mapping"
193
+ )
194
+ parser.add_argument(
195
+ '--extra_category_handling',
196
+ type=str,
197
+ default='error',
198
+ choices=['error', 'drop_if_unused'],
199
+ help="How to handle source categories not in target map (default: 'error')"
200
+ )
201
+ parser.add_argument(
202
+ '--overwrite',
203
+ type=str,
204
+ default='false',
205
+ choices=['true', 'false'],
206
+ help="Overwrite output file if it exists (default: 'false')."
207
+ )
208
+
209
+ args = parser.parse_args()
210
+
211
+ overwrite_bool = (args.overwrite.lower() == 'true')
212
+
213
+ print('Starting category remapping...')
214
+
215
+ remap_detection_categories(
216
+ input_file=args.input_file,
217
+ output_file=args.output_file,
218
+ target_category_map=args.target_category_map_file, # Pass filename directly
219
+ extra_category_handling=args.extra_category_handling,
220
+ overwrite=overwrite_bool)
221
+
222
+ if __name__ == '__main__':
223
+ main()
@@ -118,25 +118,28 @@ def render_detection_confusion_matrix(ground_truth_file,
118
118
  [results_file] should be relative to this folder.
119
119
  preview_folder (str): the output folder, i.e. the folder in which we'll create our nifty
120
120
  HTML stuff.
121
- force_rendering_images (bool, optional): if False, skips images that already exist
121
+ force_render_images (bool, optional): if False, skips images that already exist
122
122
  confidence_thresholds (dict, optional): a dictionary mapping class names to thresholds;
123
123
  all classes not explicitly named here will use the threshold for the "default" category.
124
- rendering_thresholds (dict, optional): a dictionary mapping class names to thresholds;
124
+ rendering_confidence_thresholds (dict, optional): a dictionary mapping class names to thresholds;
125
125
  all classes not explicitly named here will use the threshold for the "default" category.
126
126
  target_image_size (tuple, optional): output image size, as a pair of ints (width,height). If one
127
127
  value is -1 and the other is not, aspect ratio is preserved. If both are -1, the original image
128
128
  sizes are preserved.
129
129
  parallelize_rendering (bool, optional): enable (default) or disable parallelization when rendering
130
- parallelize_rendering_n_core (int, optional): number of threads or processes to use for rendering, only
130
+ parallelize_rendering_n_cores (int, optional): number of threads or processes to use for rendering, only
131
131
  used if parallelize_rendering is True
132
- parallelize_rendering_with_threads: whether to use threads (True) or processes (False) when rendering,
133
- only used if parallelize_rendering is True
132
+ parallelize_rendering_with_threads (bool, optional): whether to use threads (True) or processes (False)
133
+ when rendering, only used if parallelize_rendering is True
134
134
  job_name (str, optional): job name to include in big letters in the output file
135
- model_file (str, optional) model filename to include in HTML output
135
+ model_file (str, optional): model filename to include in HTML output
136
136
  empty_category_name (str, optional): special category name that we should treat as empty, typically
137
137
  "empty"
138
138
  html_image_list_options (dict, optional): options listed passed along to write_html_image_list;
139
139
  see write_html_image_list for documentation.
140
+
141
+ Returns:
142
+ dict: confusion matrix information, containing at least the key "html_file"
140
143
  """
141
144
 
142
145
  ##%% Argument and path handling
@@ -1119,8 +1119,8 @@ def find_repeat_detections(input_filename, output_file_name=None, options=None):
1119
1119
  output_file_name (str, optional): the filename to which we should write results
1120
1120
  with repeat detections removed, typically set to None during the first
1121
1121
  part of the RDE process.
1122
- options (RepeatDetectionOptions): all the interesting options controlling this
1123
- process; see RepeatDetectionOptions for details.
1122
+ options (RepeatDetectionOptions, optional): all the interesting options controlling
1123
+ this process; see RepeatDetectionOptions for details.
1124
1124
 
1125
1125
  Returns:
1126
1126
  RepeatDetectionResults: results of the RDE process; see RepeatDetectionResults
@@ -15,10 +15,10 @@ import json
15
15
  # Created by get_lila_category_list.py
16
16
  input_lila_category_list_file = os.path.expanduser('~/lila/lila_categories_list/lila_dataset_to_categories.json')
17
17
 
18
- output_file = os.path.expanduser('~/lila/lila_additions_2025.03.24.csv')
18
+ output_file = os.path.expanduser('~/lila/lila_additions_2025.06.23.csv')
19
19
 
20
20
  datasets_to_map = [
21
- 'UNSW Predators'
21
+ 'Nkhotakota Camera Traps'
22
22
  ]
23
23
 
24
24
 
@@ -140,7 +140,7 @@ if False:
140
140
 
141
141
  #%%
142
142
 
143
- q = 'dasyurus maculatus'
143
+ q = 'animalia'
144
144
 
145
145
  taxonomy_preference = 'inat'
146
146
  m = get_preferred_taxonomic_match(q,taxonomy_preference)
@@ -154,5 +154,4 @@ if False:
154
154
  # raise ValueError('')
155
155
  print(m.source)
156
156
  print(m.taxonomy_string)
157
- # print(m.scientific_name); import clipboard; clipboard.copy(m.scientific_name)
158
157
  import clipboard; clipboard.copy(m.taxonomy_string)
@@ -52,6 +52,8 @@ if False:
52
52
 
53
53
  df['used'] = False
54
54
 
55
+ n_dropped = 0
56
+
55
57
  # i_row = 0; row = df.iloc[i_row]; row
56
58
  for i_row,row in df.iterrows():
57
59
  ds_name = row['dataset_name']
@@ -60,8 +62,11 @@ if False:
60
62
  if mapping_name in used_category_mappings:
61
63
  df.loc[i_row,'used'] = True
62
64
  else:
65
+ n_dropped += 1
63
66
  print('Dropping unused mapping {}'.format(mapping_name))
64
67
 
68
+ print('Dropping {} of {} mappings'.format(n_dropped,len(df)))
69
+
65
70
  df = df[df.used]
66
71
  df = df.drop('used',axis=1)
67
72
 
@@ -71,29 +76,41 @@ if False:
71
76
  assert not os.path.isfile(release_taxonomy_file), \
72
77
  'File {} exists, delete it manually before proceeding'.format(release_taxonomy_file)
73
78
 
74
- known_levels = ['stateofmatter', #noqa
75
- 'kingdom',
76
- 'phylum','subphylum',
77
- 'superclass','class','subclass','infraclass',
78
- 'superorder','order','parvorder','suborder','infraorder',
79
- 'zoosection',
80
- 'superfamily','family','subfamily','tribe',
81
- 'genus',
82
- 'species','subspecies','variety']
83
-
84
79
  levels_to_include = ['kingdom',
85
- 'phylum','subphylum',
86
- 'superclass','class','subclass','infraclass',
87
- 'superorder','order','suborder','infraorder',
88
- 'superfamily','family','subfamily','tribe',
89
- 'genus',
90
- 'species','subspecies','variety']
91
-
92
- levels_to_exclude = ['stateofmatter','zoosection','parvorder','complex','epifamily']
80
+ 'phylum',
81
+ 'subphylum',
82
+ 'superclass',
83
+ 'class',
84
+ 'subclass',
85
+ 'infraclass',
86
+ 'superorder',
87
+ 'order',
88
+ 'suborder',
89
+ 'infraorder',
90
+ 'superfamily',
91
+ 'family',
92
+ 'subfamily',
93
+ 'tribe',
94
+ 'genus',
95
+ 'subgenus',
96
+ 'species',
97
+ 'subspecies',
98
+ 'variety']
99
+
100
+ levels_to_exclude = ['stateofmatter',
101
+ 'zoosection',
102
+ 'parvorder',
103
+ 'complex',
104
+ 'epifamily']
105
+
106
+ for x in [levels_to_include,levels_to_exclude]:
107
+ assert len(x) == len(set(x))
93
108
 
94
109
  for s in levels_to_exclude:
95
110
  assert s not in levels_to_include
96
111
 
112
+ known_levels = levels_to_include + levels_to_exclude
113
+
97
114
  levels_used = set()
98
115
 
99
116
  # i_row = 0; row = df.iloc[i_row]; row
@@ -103,17 +120,21 @@ if False:
103
120
  assert not isinstance(row['taxonomy_string'],str)
104
121
  continue
105
122
 
123
+ # This is a list of length-4 tuples that each look like:
124
+ #
125
+ # (41789, 'species', 'taxidea taxus', ['american badger'])
106
126
  taxonomic_match = eval(row['taxonomy_string'])
107
127
 
108
128
  # match_at_level = taxonomic_match[0]
109
129
  for match_at_level in taxonomic_match:
110
130
  assert len(match_at_level) == 4
131
+ # E.g. "species"
111
132
  levels_used.add(match_at_level[1])
112
133
 
113
134
  levels_used = [s for s in levels_used if isinstance(s,str)]
114
135
 
115
136
  for s in levels_used:
116
- assert s in levels_to_exclude or s in levels_to_include, 'Unrecognized level {}'.format(s)
137
+ assert s in known_levels, 'Unrecognized level {}'.format(s)
117
138
 
118
139
  for s in levels_to_include:
119
140
  assert s in levels_used
@@ -16,7 +16,7 @@ import os
16
16
  import pandas as pd
17
17
 
18
18
  # lila_taxonomy_file = r"c:\git\agentmorrisprivate\lila-taxonomy\lila-taxonomy-mapping.csv"
19
- lila_taxonomy_file = os.path.expanduser('~/lila/lila_additions_2025.03.24.csv')
19
+ lila_taxonomy_file = os.path.expanduser('~/lila/lila_additions_2025.06.23.csv')
20
20
 
21
21
  preview_base = os.path.expanduser('~/lila/lila_taxonomy_preview')
22
22
  os.makedirs(preview_base,exist_ok=True)