megadetector 5.0.5__py3-none-any.whl → 5.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (132) hide show
  1. api/batch_processing/data_preparation/manage_local_batch.py +302 -263
  2. api/batch_processing/data_preparation/manage_video_batch.py +81 -2
  3. api/batch_processing/postprocessing/add_max_conf.py +1 -0
  4. api/batch_processing/postprocessing/categorize_detections_by_size.py +50 -19
  5. api/batch_processing/postprocessing/compare_batch_results.py +110 -60
  6. api/batch_processing/postprocessing/load_api_results.py +56 -70
  7. api/batch_processing/postprocessing/md_to_coco.py +1 -1
  8. api/batch_processing/postprocessing/md_to_labelme.py +2 -1
  9. api/batch_processing/postprocessing/postprocess_batch_results.py +240 -81
  10. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +625 -0
  11. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +71 -23
  12. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
  13. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +227 -75
  14. api/batch_processing/postprocessing/subset_json_detector_output.py +132 -5
  15. api/batch_processing/postprocessing/top_folders_to_bottom.py +1 -1
  16. api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +2 -2
  17. classification/prepare_classification_script.py +191 -191
  18. data_management/coco_to_yolo.py +68 -45
  19. data_management/databases/integrity_check_json_db.py +7 -5
  20. data_management/generate_crops_from_cct.py +3 -3
  21. data_management/get_image_sizes.py +8 -6
  22. data_management/importers/add_timestamps_to_icct.py +79 -0
  23. data_management/importers/animl_results_to_md_results.py +160 -0
  24. data_management/importers/auckland_doc_test_to_json.py +4 -4
  25. data_management/importers/auckland_doc_to_json.py +1 -1
  26. data_management/importers/awc_to_json.py +5 -5
  27. data_management/importers/bellevue_to_json.py +5 -5
  28. data_management/importers/carrizo_shrubfree_2018.py +5 -5
  29. data_management/importers/carrizo_trail_cam_2017.py +5 -5
  30. data_management/importers/cct_field_adjustments.py +2 -3
  31. data_management/importers/channel_islands_to_cct.py +4 -4
  32. data_management/importers/ena24_to_json.py +5 -5
  33. data_management/importers/helena_to_cct.py +10 -10
  34. data_management/importers/idaho-camera-traps.py +12 -12
  35. data_management/importers/idfg_iwildcam_lila_prep.py +8 -8
  36. data_management/importers/jb_csv_to_json.py +4 -4
  37. data_management/importers/missouri_to_json.py +1 -1
  38. data_management/importers/noaa_seals_2019.py +1 -1
  39. data_management/importers/pc_to_json.py +5 -5
  40. data_management/importers/prepare-noaa-fish-data-for-lila.py +4 -4
  41. data_management/importers/prepare_zsl_imerit.py +5 -5
  42. data_management/importers/rspb_to_json.py +4 -4
  43. data_management/importers/save_the_elephants_survey_A.py +5 -5
  44. data_management/importers/save_the_elephants_survey_B.py +6 -6
  45. data_management/importers/snapshot_safari_importer.py +9 -9
  46. data_management/importers/snapshot_serengeti_lila.py +9 -9
  47. data_management/importers/timelapse_csv_set_to_json.py +5 -7
  48. data_management/importers/ubc_to_json.py +4 -4
  49. data_management/importers/umn_to_json.py +4 -4
  50. data_management/importers/wellington_to_json.py +1 -1
  51. data_management/importers/wi_to_json.py +2 -2
  52. data_management/importers/zamba_results_to_md_results.py +181 -0
  53. data_management/labelme_to_coco.py +35 -7
  54. data_management/labelme_to_yolo.py +229 -0
  55. data_management/lila/add_locations_to_island_camera_traps.py +1 -1
  56. data_management/lila/add_locations_to_nacti.py +147 -0
  57. data_management/lila/create_lila_blank_set.py +474 -0
  58. data_management/lila/create_lila_test_set.py +2 -1
  59. data_management/lila/create_links_to_md_results_files.py +106 -0
  60. data_management/lila/download_lila_subset.py +46 -21
  61. data_management/lila/generate_lila_per_image_labels.py +23 -14
  62. data_management/lila/get_lila_annotation_counts.py +17 -11
  63. data_management/lila/lila_common.py +14 -11
  64. data_management/lila/test_lila_metadata_urls.py +116 -0
  65. data_management/ocr_tools.py +829 -0
  66. data_management/resize_coco_dataset.py +13 -11
  67. data_management/yolo_output_to_md_output.py +84 -12
  68. data_management/yolo_to_coco.py +38 -20
  69. detection/process_video.py +36 -14
  70. detection/pytorch_detector.py +23 -8
  71. detection/run_detector.py +76 -19
  72. detection/run_detector_batch.py +178 -63
  73. detection/run_inference_with_yolov5_val.py +326 -57
  74. detection/run_tiled_inference.py +153 -43
  75. detection/video_utils.py +34 -8
  76. md_utils/ct_utils.py +172 -1
  77. md_utils/md_tests.py +372 -51
  78. md_utils/path_utils.py +167 -39
  79. md_utils/process_utils.py +26 -7
  80. md_utils/split_locations_into_train_val.py +215 -0
  81. md_utils/string_utils.py +10 -0
  82. md_utils/url_utils.py +0 -2
  83. md_utils/write_html_image_list.py +9 -26
  84. md_visualization/plot_utils.py +12 -8
  85. md_visualization/visualization_utils.py +106 -7
  86. md_visualization/visualize_db.py +16 -8
  87. md_visualization/visualize_detector_output.py +208 -97
  88. {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/METADATA +3 -6
  89. {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/RECORD +98 -121
  90. {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/WHEEL +1 -1
  91. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +1 -1
  92. taxonomy_mapping/map_new_lila_datasets.py +43 -39
  93. taxonomy_mapping/prepare_lila_taxonomy_release.py +5 -2
  94. taxonomy_mapping/preview_lila_taxonomy.py +27 -27
  95. taxonomy_mapping/species_lookup.py +33 -13
  96. taxonomy_mapping/taxonomy_csv_checker.py +7 -5
  97. api/synchronous/api_core/yolov5/detect.py +0 -252
  98. api/synchronous/api_core/yolov5/export.py +0 -607
  99. api/synchronous/api_core/yolov5/hubconf.py +0 -146
  100. api/synchronous/api_core/yolov5/models/__init__.py +0 -0
  101. api/synchronous/api_core/yolov5/models/common.py +0 -738
  102. api/synchronous/api_core/yolov5/models/experimental.py +0 -104
  103. api/synchronous/api_core/yolov5/models/tf.py +0 -574
  104. api/synchronous/api_core/yolov5/models/yolo.py +0 -338
  105. api/synchronous/api_core/yolov5/train.py +0 -670
  106. api/synchronous/api_core/yolov5/utils/__init__.py +0 -36
  107. api/synchronous/api_core/yolov5/utils/activations.py +0 -103
  108. api/synchronous/api_core/yolov5/utils/augmentations.py +0 -284
  109. api/synchronous/api_core/yolov5/utils/autoanchor.py +0 -170
  110. api/synchronous/api_core/yolov5/utils/autobatch.py +0 -66
  111. api/synchronous/api_core/yolov5/utils/aws/__init__.py +0 -0
  112. api/synchronous/api_core/yolov5/utils/aws/resume.py +0 -40
  113. api/synchronous/api_core/yolov5/utils/benchmarks.py +0 -148
  114. api/synchronous/api_core/yolov5/utils/callbacks.py +0 -71
  115. api/synchronous/api_core/yolov5/utils/dataloaders.py +0 -1087
  116. api/synchronous/api_core/yolov5/utils/downloads.py +0 -178
  117. api/synchronous/api_core/yolov5/utils/flask_rest_api/example_request.py +0 -19
  118. api/synchronous/api_core/yolov5/utils/flask_rest_api/restapi.py +0 -46
  119. api/synchronous/api_core/yolov5/utils/general.py +0 -1018
  120. api/synchronous/api_core/yolov5/utils/loggers/__init__.py +0 -187
  121. api/synchronous/api_core/yolov5/utils/loggers/wandb/__init__.py +0 -0
  122. api/synchronous/api_core/yolov5/utils/loggers/wandb/log_dataset.py +0 -27
  123. api/synchronous/api_core/yolov5/utils/loggers/wandb/sweep.py +0 -41
  124. api/synchronous/api_core/yolov5/utils/loggers/wandb/wandb_utils.py +0 -577
  125. api/synchronous/api_core/yolov5/utils/loss.py +0 -234
  126. api/synchronous/api_core/yolov5/utils/metrics.py +0 -355
  127. api/synchronous/api_core/yolov5/utils/plots.py +0 -489
  128. api/synchronous/api_core/yolov5/utils/torch_utils.py +0 -314
  129. api/synchronous/api_core/yolov5/val.py +0 -394
  130. md_utils/matlab_porting_tools.py +0 -97
  131. {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/LICENSE +0 -0
  132. {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/top_level.txt +0 -0
@@ -2,17 +2,18 @@
2
2
  #
3
3
  # load_api_results.py
4
4
  #
5
- # Loads the output of the batch processing API (json) into a pandas dataframe.
5
+ # DEPRECATED
6
6
  #
7
- # Also functions to group entries by seq_id.
7
+ # As of 2023.12, this module is used in postprocessing and RDE. Not recommended
8
+ # for new code.
8
9
  #
9
- # Includes the deprecated functions that worked with the old CSV API output format.
10
+ # Loads the output of the batch processing API (json) into a Pandas dataframe.
11
+ #
12
+ # Includes functions to read/write the (very very old) .csv results format.
10
13
  #
11
14
  ########
12
15
 
13
- #%% Constants and imports
14
-
15
- from collections import defaultdict
16
+ #%% Imports
16
17
 
17
18
  import json
18
19
  import os
@@ -23,72 +24,32 @@ import pandas as pd
23
24
 
24
25
  from md_utils import ct_utils
25
26
 
26
- headers = ['image_path', 'max_confidence', 'detections']
27
-
28
-
29
- #%% Functions for grouping by sequence_id
30
-
31
- def ss_file_to_file_name(f):
32
- # example
33
- # input 'file': 'SER/S1/F08/F08_R3/S1_F08_R3_PICT1150.JPG'
34
- # output 'id': 'S1/F08/F08_R3/S1_F08_R3_PICT1150.JPG'
35
- return f.split('SER/')[1].split('.JPG')[0]
36
-
37
-
38
- def caltech_file_to_file_name(f):
39
- return f.split('cct_images/')[1].split('.')[0]
40
27
 
41
-
42
- def api_results_groupby(api_output_path, gt_db_indexed, file_to_image_id, field='seq_id'):
43
- """
44
- Given the output file of the API, groupby (currently only seq_id).
45
-
46
- Args:
47
- api_output_path: path to the API output json file
48
- gt_db_indexed: an instance of IndexedJsonDb so we know the seq_id to image_id mapping
49
- file_to_image_id: a function that takes in the 'file' field in 'images' in the detector
50
- output file and converts it to the 'id' field in the gt DB.
51
- field: which field in the 'images' array to group by
52
-
53
- Returns:
54
- A dict where the keys are of the field requested, each points to an array
55
- containing entries in the 'images' section of the output file
56
- """
57
-
58
- with open(api_output_path) as f:
59
- detection_results = json.load(f)
60
-
61
- res = defaultdict(list)
62
- for i in detection_results['images']:
63
- image_id = file_to_image_id(i['file'])
64
- field_val = gt_db_indexed.image_id_to_image[image_id][field]
65
- res[field_val].append(i)
66
- return res
67
-
68
-
69
- #%% Functions for loading the result as a Pandas DataFrame
28
+ #%% Functions for loading .json results into a Pandas DataFrame, and writing back to .json
70
29
 
71
30
  def load_api_results(api_output_path: str, normalize_paths: bool = True,
72
- filename_replacements: Optional[Mapping[str, str]] = None
31
+ filename_replacements: Optional[Mapping[str, str]] = None,
32
+ force_forward_slashes: bool = True
73
33
  ) -> Tuple[pd.DataFrame, Dict]:
74
34
  """
75
- Loads the json formatted results from the batch processing API to a
76
- Pandas DataFrame, mainly useful for various postprocessing functions.
35
+ Loads json-formatted MegaDetector results to a Pandas DataFrame.
77
36
 
78
37
  Args:
79
- api_output_path: path to the API output json file
38
+ api_output_path: path to the output json file
80
39
  normalize_paths: whether to apply os.path.normpath to the 'file' field
81
40
  in each image entry in the output file
82
41
  filename_replacements: replace some path tokens to match local paths to
83
42
  the original blob structure
43
+ force_forward_slashes: whether to convert backslashes to forward slashes
44
+ in filenames
84
45
 
85
46
  Returns:
86
47
  detection_results: pd.DataFrame, contains at least the columns:
87
- ['file', 'detections','failure']
48
+ ['file', 'detections','failure']
88
49
  other_fields: a dict containing fields in the results other than 'images'
89
50
  """
90
51
 
91
- print('Loading API results from {}'.format(api_output_path))
52
+ print('Loading results from {}'.format(api_output_path))
92
53
 
93
54
  with open(api_output_path) as f:
94
55
  detection_results = json.load(f)
@@ -97,7 +58,7 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
97
58
  for s in ['info', 'detection_categories', 'images']:
98
59
  assert s in detection_results, 'Missing field {} in detection results'.format(s)
99
60
 
100
- # Fields in the API output json other than 'images'
61
+ # Fields in the output json other than 'images'
101
62
  other_fields = {}
102
63
  for k, v in detection_results.items():
103
64
  if k != 'images':
@@ -109,6 +70,10 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
109
70
  image['file'] = os.path.normpath(image['file'])
110
71
  # image['file'] = image['file'].replace('\\','/')
111
72
 
73
+ if force_forward_slashes:
74
+ for image in detection_results['images']:
75
+ image['file'] = image['file'].replace('\\','/')
76
+
112
77
  # Replace some path tokens to match local paths to original blob structure
113
78
  if filename_replacements is not None:
114
79
  for string_to_replace in filename_replacements.keys():
@@ -127,9 +92,7 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
127
92
  # Pack the json output into a Pandas DataFrame
128
93
  detection_results = pd.DataFrame(detection_results['images'])
129
94
 
130
-
131
-
132
- print('Finished loading API results for {} images from {}'.format(
95
+ print('Finished loading MegaDetector results for {} images from {}'.format(
133
96
  len(detection_results),api_output_path))
134
97
 
135
98
  return detection_results, other_fields
@@ -137,7 +100,7 @@ def load_api_results(api_output_path: str, normalize_paths: bool = True,
137
100
 
138
101
  def write_api_results(detection_results_table, other_fields, out_path):
139
102
  """
140
- Writes a Pandas DataFrame back to a json that is compatible with the API output format.
103
+ Writes a Pandas DataFrame to the MegaDetector .json format.
141
104
  """
142
105
 
143
106
  print('Writing detection results to {}'.format(out_path))
@@ -148,6 +111,27 @@ def write_api_results(detection_results_table, other_fields, out_path):
148
111
  double_precision=3)
149
112
  images = json.loads(images)
150
113
  fields['images'] = images
114
+
115
+ # Convert the 'version' field back to a string as per format convention
116
+ try:
117
+ version = other_fields['info']['format_version']
118
+ if not isinstance(version,str):
119
+ other_fields['info']['format_version'] = str(version)
120
+ except Exception:
121
+ print('Warning: error determining format version')
122
+ pass
123
+
124
+ # Remove 'max_detection_conf' as per newer file convention (format >= v1.3)
125
+ try:
126
+ version = other_fields['info']['format_version']
127
+ version = float(version)
128
+ if version >= 1.3:
129
+ for im in images:
130
+ if 'max_detection_conf' in im:
131
+ del im['max_detection_conf']
132
+ except Exception:
133
+ print('Warning: error removing max_detection_conf from output')
134
+ pass
151
135
 
152
136
  with open(out_path, 'w') as f:
153
137
  json.dump(fields, f, indent=1)
@@ -157,17 +141,18 @@ def write_api_results(detection_results_table, other_fields, out_path):
157
141
 
158
142
  def load_api_results_csv(filename, normalize_paths=True, filename_replacements={}, nrows=None):
159
143
  """
160
- DEPRECATED
161
- Loads .csv-formatted results from the batch processing API to a pandas table
144
+ [DEPRECATED]
145
+
146
+ Loads .csv-formatted MegaDetector results to a pandas table
162
147
  """
163
148
 
164
- print('Loading API results from {}'.format(filename))
149
+ print('Loading MegaDetector results from {}'.format(filename))
165
150
 
166
151
  detection_results = pd.read_csv(filename,nrows=nrows)
167
152
 
168
- print('De-serializing API results from {}'.format(filename))
153
+ print('De-serializing MegaDetector results from {}'.format(filename))
169
154
 
170
- # Sanity-check that this is really a detector output file
155
+ # Confirm that this is really a detector output file
171
156
  for s in ['image_path','max_confidence','detections']:
172
157
  assert s in detection_results.columns
173
158
 
@@ -191,17 +176,18 @@ def load_api_results_csv(filename, normalize_paths=True, filename_replacements={
191
176
  fn = fn.replace(string_to_replace,replacement_string)
192
177
  detection_results.at[iRow,'image_path'] = fn
193
178
 
194
- print('Finished loading and de-serializing API results for {} images from {}'.format(
179
+ print('Finished loading and de-serializing MD results for {} images from {}'.format(
195
180
  len(detection_results),filename))
196
181
 
197
182
  return detection_results
198
183
 
199
184
 
200
185
  def write_api_results_csv(detection_results, filename):
201
- """
202
- DEPRECATED
203
- Writes a pandas table to csv in a way that's compatible with the .csv API output
204
- format. Currently just a wrapper around to_csv that just forces output writing
186
+ """
187
+ [DEPRECATED]
188
+
189
+ Writes a Pandas table to csv in a way that's compatible with the .csv output
190
+ format. Currently just a wrapper around to_csv that forces output writing
205
191
  to go through a common code path.
206
192
  """
207
193
 
@@ -227,7 +227,7 @@ if False:
227
227
  options.viz_size = (900, -1)
228
228
  options.num_to_visualize = 5000
229
229
 
230
- html_file,_ = visualize_db.process_images(coco_output_file,
230
+ html_file,_ = visualize_db.visualize_db(coco_output_file,
231
231
  os.path.expanduser('~/tmp/md_to_coco_preview'),
232
232
  image_folder,options)
233
233
 
@@ -40,7 +40,7 @@ def get_labelme_dict_for_image(im,image_base_name,category_id_to_name,info=None,
40
40
 
41
41
  output_dict = {}
42
42
  if info is not None:
43
- output_dict['md_info'] = info
43
+ output_dict['detector_info'] = info
44
44
  output_dict['version'] = '5.3.0a0'
45
45
  output_dict['flags'] = {}
46
46
  output_dict['shapes'] = []
@@ -48,6 +48,7 @@ def get_labelme_dict_for_image(im,image_base_name,category_id_to_name,info=None,
48
48
  output_dict['imageHeight'] = im['height']
49
49
  output_dict['imageWidth'] = im['width']
50
50
  output_dict['imageData'] = None
51
+ output_dict['detections'] = im['detections']
51
52
 
52
53
  for det in im['detections']:
53
54