megadetector 5.0.9__py3-none-any.whl → 5.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (226) hide show
  1. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/LICENSE +0 -0
  2. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/METADATA +12 -11
  3. megadetector-5.0.11.dist-info/RECORD +5 -0
  4. megadetector-5.0.11.dist-info/top_level.txt +1 -0
  5. api/__init__.py +0 -0
  6. api/batch_processing/__init__.py +0 -0
  7. api/batch_processing/api_core/__init__.py +0 -0
  8. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  9. api/batch_processing/api_core/batch_service/score.py +0 -439
  10. api/batch_processing/api_core/server.py +0 -294
  11. api/batch_processing/api_core/server_api_config.py +0 -98
  12. api/batch_processing/api_core/server_app_config.py +0 -55
  13. api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  14. api/batch_processing/api_core/server_job_status_table.py +0 -152
  15. api/batch_processing/api_core/server_orchestration.py +0 -360
  16. api/batch_processing/api_core/server_utils.py +0 -92
  17. api/batch_processing/api_core_support/__init__.py +0 -0
  18. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  19. api/batch_processing/api_support/__init__.py +0 -0
  20. api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  21. api/batch_processing/data_preparation/__init__.py +0 -0
  22. api/batch_processing/data_preparation/manage_local_batch.py +0 -2391
  23. api/batch_processing/data_preparation/manage_video_batch.py +0 -327
  24. api/batch_processing/integration/digiKam/setup.py +0 -6
  25. api/batch_processing/integration/digiKam/xmp_integration.py +0 -465
  26. api/batch_processing/integration/eMammal/test_scripts/config_template.py +0 -5
  27. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -126
  28. api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +0 -55
  29. api/batch_processing/postprocessing/__init__.py +0 -0
  30. api/batch_processing/postprocessing/add_max_conf.py +0 -64
  31. api/batch_processing/postprocessing/categorize_detections_by_size.py +0 -163
  32. api/batch_processing/postprocessing/combine_api_outputs.py +0 -249
  33. api/batch_processing/postprocessing/compare_batch_results.py +0 -958
  34. api/batch_processing/postprocessing/convert_output_format.py +0 -397
  35. api/batch_processing/postprocessing/load_api_results.py +0 -195
  36. api/batch_processing/postprocessing/md_to_coco.py +0 -310
  37. api/batch_processing/postprocessing/md_to_labelme.py +0 -330
  38. api/batch_processing/postprocessing/merge_detections.py +0 -401
  39. api/batch_processing/postprocessing/postprocess_batch_results.py +0 -1904
  40. api/batch_processing/postprocessing/remap_detection_categories.py +0 -170
  41. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +0 -661
  42. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +0 -211
  43. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +0 -82
  44. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +0 -1631
  45. api/batch_processing/postprocessing/separate_detections_into_folders.py +0 -731
  46. api/batch_processing/postprocessing/subset_json_detector_output.py +0 -696
  47. api/batch_processing/postprocessing/top_folders_to_bottom.py +0 -223
  48. api/synchronous/__init__.py +0 -0
  49. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  50. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -152
  51. api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -266
  52. api/synchronous/api_core/animal_detection_api/config.py +0 -35
  53. api/synchronous/api_core/animal_detection_api/data_management/annotations/annotation_constants.py +0 -47
  54. api/synchronous/api_core/animal_detection_api/detection/detector_training/copy_checkpoints.py +0 -43
  55. api/synchronous/api_core/animal_detection_api/detection/detector_training/model_main_tf2.py +0 -114
  56. api/synchronous/api_core/animal_detection_api/detection/process_video.py +0 -543
  57. api/synchronous/api_core/animal_detection_api/detection/pytorch_detector.py +0 -304
  58. api/synchronous/api_core/animal_detection_api/detection/run_detector.py +0 -627
  59. api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +0 -1029
  60. api/synchronous/api_core/animal_detection_api/detection/run_inference_with_yolov5_val.py +0 -581
  61. api/synchronous/api_core/animal_detection_api/detection/run_tiled_inference.py +0 -754
  62. api/synchronous/api_core/animal_detection_api/detection/tf_detector.py +0 -165
  63. api/synchronous/api_core/animal_detection_api/detection/video_utils.py +0 -495
  64. api/synchronous/api_core/animal_detection_api/md_utils/azure_utils.py +0 -174
  65. api/synchronous/api_core/animal_detection_api/md_utils/ct_utils.py +0 -262
  66. api/synchronous/api_core/animal_detection_api/md_utils/directory_listing.py +0 -251
  67. api/synchronous/api_core/animal_detection_api/md_utils/matlab_porting_tools.py +0 -97
  68. api/synchronous/api_core/animal_detection_api/md_utils/path_utils.py +0 -416
  69. api/synchronous/api_core/animal_detection_api/md_utils/process_utils.py +0 -110
  70. api/synchronous/api_core/animal_detection_api/md_utils/sas_blob_utils.py +0 -509
  71. api/synchronous/api_core/animal_detection_api/md_utils/string_utils.py +0 -59
  72. api/synchronous/api_core/animal_detection_api/md_utils/url_utils.py +0 -144
  73. api/synchronous/api_core/animal_detection_api/md_utils/write_html_image_list.py +0 -226
  74. api/synchronous/api_core/animal_detection_api/md_visualization/visualization_utils.py +0 -841
  75. api/synchronous/api_core/tests/__init__.py +0 -0
  76. api/synchronous/api_core/tests/load_test.py +0 -110
  77. classification/__init__.py +0 -0
  78. classification/aggregate_classifier_probs.py +0 -108
  79. classification/analyze_failed_images.py +0 -227
  80. classification/cache_batchapi_outputs.py +0 -198
  81. classification/create_classification_dataset.py +0 -627
  82. classification/crop_detections.py +0 -516
  83. classification/csv_to_json.py +0 -226
  84. classification/detect_and_crop.py +0 -855
  85. classification/efficientnet/__init__.py +0 -9
  86. classification/efficientnet/model.py +0 -415
  87. classification/efficientnet/utils.py +0 -610
  88. classification/evaluate_model.py +0 -520
  89. classification/identify_mislabeled_candidates.py +0 -152
  90. classification/json_to_azcopy_list.py +0 -63
  91. classification/json_validator.py +0 -695
  92. classification/map_classification_categories.py +0 -276
  93. classification/merge_classification_detection_output.py +0 -506
  94. classification/prepare_classification_script.py +0 -194
  95. classification/prepare_classification_script_mc.py +0 -228
  96. classification/run_classifier.py +0 -286
  97. classification/save_mislabeled.py +0 -110
  98. classification/train_classifier.py +0 -825
  99. classification/train_classifier_tf.py +0 -724
  100. classification/train_utils.py +0 -322
  101. data_management/__init__.py +0 -0
  102. data_management/annotations/__init__.py +0 -0
  103. data_management/annotations/annotation_constants.py +0 -34
  104. data_management/camtrap_dp_to_coco.py +0 -238
  105. data_management/cct_json_utils.py +0 -395
  106. data_management/cct_to_md.py +0 -176
  107. data_management/cct_to_wi.py +0 -289
  108. data_management/coco_to_labelme.py +0 -272
  109. data_management/coco_to_yolo.py +0 -662
  110. data_management/databases/__init__.py +0 -0
  111. data_management/databases/add_width_and_height_to_db.py +0 -33
  112. data_management/databases/combine_coco_camera_traps_files.py +0 -206
  113. data_management/databases/integrity_check_json_db.py +0 -477
  114. data_management/databases/subset_json_db.py +0 -115
  115. data_management/generate_crops_from_cct.py +0 -149
  116. data_management/get_image_sizes.py +0 -188
  117. data_management/importers/add_nacti_sizes.py +0 -52
  118. data_management/importers/add_timestamps_to_icct.py +0 -79
  119. data_management/importers/animl_results_to_md_results.py +0 -158
  120. data_management/importers/auckland_doc_test_to_json.py +0 -372
  121. data_management/importers/auckland_doc_to_json.py +0 -200
  122. data_management/importers/awc_to_json.py +0 -189
  123. data_management/importers/bellevue_to_json.py +0 -273
  124. data_management/importers/cacophony-thermal-importer.py +0 -796
  125. data_management/importers/carrizo_shrubfree_2018.py +0 -268
  126. data_management/importers/carrizo_trail_cam_2017.py +0 -287
  127. data_management/importers/cct_field_adjustments.py +0 -57
  128. data_management/importers/channel_islands_to_cct.py +0 -913
  129. data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  130. data_management/importers/eMammal/eMammal_helpers.py +0 -249
  131. data_management/importers/eMammal/make_eMammal_json.py +0 -223
  132. data_management/importers/ena24_to_json.py +0 -275
  133. data_management/importers/filenames_to_json.py +0 -385
  134. data_management/importers/helena_to_cct.py +0 -282
  135. data_management/importers/idaho-camera-traps.py +0 -1407
  136. data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  137. data_management/importers/jb_csv_to_json.py +0 -150
  138. data_management/importers/mcgill_to_json.py +0 -250
  139. data_management/importers/missouri_to_json.py +0 -489
  140. data_management/importers/nacti_fieldname_adjustments.py +0 -79
  141. data_management/importers/noaa_seals_2019.py +0 -181
  142. data_management/importers/pc_to_json.py +0 -365
  143. data_management/importers/plot_wni_giraffes.py +0 -123
  144. data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
  145. data_management/importers/prepare_zsl_imerit.py +0 -131
  146. data_management/importers/rspb_to_json.py +0 -356
  147. data_management/importers/save_the_elephants_survey_A.py +0 -320
  148. data_management/importers/save_the_elephants_survey_B.py +0 -332
  149. data_management/importers/snapshot_safari_importer.py +0 -758
  150. data_management/importers/snapshot_safari_importer_reprise.py +0 -665
  151. data_management/importers/snapshot_serengeti_lila.py +0 -1067
  152. data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  153. data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  154. data_management/importers/sulross_get_exif.py +0 -65
  155. data_management/importers/timelapse_csv_set_to_json.py +0 -490
  156. data_management/importers/ubc_to_json.py +0 -399
  157. data_management/importers/umn_to_json.py +0 -507
  158. data_management/importers/wellington_to_json.py +0 -263
  159. data_management/importers/wi_to_json.py +0 -441
  160. data_management/importers/zamba_results_to_md_results.py +0 -181
  161. data_management/labelme_to_coco.py +0 -548
  162. data_management/labelme_to_yolo.py +0 -272
  163. data_management/lila/__init__.py +0 -0
  164. data_management/lila/add_locations_to_island_camera_traps.py +0 -97
  165. data_management/lila/add_locations_to_nacti.py +0 -147
  166. data_management/lila/create_lila_blank_set.py +0 -557
  167. data_management/lila/create_lila_test_set.py +0 -151
  168. data_management/lila/create_links_to_md_results_files.py +0 -106
  169. data_management/lila/download_lila_subset.py +0 -177
  170. data_management/lila/generate_lila_per_image_labels.py +0 -515
  171. data_management/lila/get_lila_annotation_counts.py +0 -170
  172. data_management/lila/get_lila_image_counts.py +0 -111
  173. data_management/lila/lila_common.py +0 -300
  174. data_management/lila/test_lila_metadata_urls.py +0 -132
  175. data_management/ocr_tools.py +0 -874
  176. data_management/read_exif.py +0 -681
  177. data_management/remap_coco_categories.py +0 -84
  178. data_management/remove_exif.py +0 -66
  179. data_management/resize_coco_dataset.py +0 -189
  180. data_management/wi_download_csv_to_coco.py +0 -246
  181. data_management/yolo_output_to_md_output.py +0 -441
  182. data_management/yolo_to_coco.py +0 -676
  183. detection/__init__.py +0 -0
  184. detection/detector_training/__init__.py +0 -0
  185. detection/detector_training/model_main_tf2.py +0 -114
  186. detection/process_video.py +0 -703
  187. detection/pytorch_detector.py +0 -337
  188. detection/run_detector.py +0 -779
  189. detection/run_detector_batch.py +0 -1219
  190. detection/run_inference_with_yolov5_val.py +0 -917
  191. detection/run_tiled_inference.py +0 -935
  192. detection/tf_detector.py +0 -188
  193. detection/video_utils.py +0 -606
  194. docs/source/conf.py +0 -43
  195. md_utils/__init__.py +0 -0
  196. md_utils/azure_utils.py +0 -174
  197. md_utils/ct_utils.py +0 -612
  198. md_utils/directory_listing.py +0 -246
  199. md_utils/md_tests.py +0 -968
  200. md_utils/path_utils.py +0 -1044
  201. md_utils/process_utils.py +0 -157
  202. md_utils/sas_blob_utils.py +0 -509
  203. md_utils/split_locations_into_train_val.py +0 -228
  204. md_utils/string_utils.py +0 -92
  205. md_utils/url_utils.py +0 -323
  206. md_utils/write_html_image_list.py +0 -225
  207. md_visualization/__init__.py +0 -0
  208. md_visualization/plot_utils.py +0 -293
  209. md_visualization/render_images_with_thumbnails.py +0 -275
  210. md_visualization/visualization_utils.py +0 -1537
  211. md_visualization/visualize_db.py +0 -551
  212. md_visualization/visualize_detector_output.py +0 -406
  213. megadetector-5.0.9.dist-info/RECORD +0 -224
  214. megadetector-5.0.9.dist-info/top_level.txt +0 -8
  215. taxonomy_mapping/__init__.py +0 -0
  216. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +0 -491
  217. taxonomy_mapping/map_new_lila_datasets.py +0 -154
  218. taxonomy_mapping/prepare_lila_taxonomy_release.py +0 -142
  219. taxonomy_mapping/preview_lila_taxonomy.py +0 -591
  220. taxonomy_mapping/retrieve_sample_image.py +0 -71
  221. taxonomy_mapping/simple_image_download.py +0 -218
  222. taxonomy_mapping/species_lookup.py +0 -834
  223. taxonomy_mapping/taxonomy_csv_checker.py +0 -159
  224. taxonomy_mapping/taxonomy_graph.py +0 -346
  225. taxonomy_mapping/validate_lila_category_mappings.py +0 -83
  226. {megadetector-5.0.9.dist-info → megadetector-5.0.11.dist-info}/WHEEL +0 -0
@@ -1,506 +0,0 @@
1
- """
2
-
3
- merge_classification_detection_output.py
4
-
5
- Merges classification results with Batch Detection API outputs.
6
-
7
- This script takes 2 main files as input:
8
-
9
- 1) Either a "dataset CSV" (output of create_classification_dataset.py) or a
10
- "classification results CSV" (output of evaluate_model.py). The CSV is
11
- expected to have columns listed below. The 'label' and [label names] columns
12
- are optional, but at least one of them must be provided.
13
- * 'path': str, path to cropped image
14
- * if passing in a detections JSON, must match
15
- <img_file>___cropXX_mdvY.Y.jpg
16
- * if passing in a queried images JSON, must match
17
- <dataset>/<img_file>___cropXX_mdvY.Y.jpg or
18
- <dataset>/<img_file>___cropXX.jpg
19
- * 'label': str, label assigned to this crop
20
- * [label names]: float, confidence in each label
21
-
22
- 2) Either a "detections JSON" (output of MegaDetector) or a "queried images
23
- JSON" (output of json_validatory.py).
24
-
25
- If the CSV contains [label names] columns (e.g., output of evaluate_model.py),
26
- then each crop's "classifications" output will have one value per category.
27
- Categories are sorted decreasing by confidence.
28
- "classifications": [
29
- ["3", 0.901],
30
- ["1", 0.071],
31
- ["4", 0.025],
32
- ["2", 0.003],
33
- ]
34
-
35
- If the CSV only contains the 'label' column (e.g., output of
36
- create_classification_dataset.py), then each crop's "classifications" output
37
- will have only one value, with a confidence of 1.0. The label's classification
38
- category ID is always greater than 1,000,000, to distinguish it from a predicted
39
- category ID.
40
- "classifications": [
41
- ["1000004", 1.0]
42
- ]
43
-
44
- If the CSV contains both [label names] and 'label' columns, then both the
45
- predicted categories and label category will be included. By default, the
46
- label-category is included last; if the --label-first flag is given, then the
47
- label category is placed first in the results.
48
- "classifications": [
49
- ["1000004", 1.0], # label put first if --label-first flag is given
50
- ["3", 0.901], # all other results are sorted by confidence
51
- ["1", 0.071],
52
- ["4", 0.025],
53
- ["2", 0.003]
54
- ]
55
-
56
- """
57
-
58
- #%% Imports
59
-
60
- from __future__ import annotations
61
-
62
- import argparse
63
- import datetime
64
- import json
65
- import os
66
-
67
- from collections.abc import Mapping, Sequence
68
- from typing import Any
69
-
70
- import pandas as pd
71
- from tqdm import tqdm
72
-
73
- from md_utils.ct_utils import truncate_float
74
-
75
-
76
- #%% Example usage
77
-
78
- """
79
- python merge_classification_detection_output.py \
80
- BASE_LOGDIR/LOGDIR/outputs_test.csv.gz \
81
- BASE_LOGDIR/label_index.json \
82
- BASE_LOGDIR/queried_images.json \
83
- --classifier-name "efficientnet-b3-idfg-moredata" \
84
- --detector-output-cache-dir $HOME/classifier-training/mdcache \
85
- --detector-version "4.1" \
86
- --output-json BASE_LOGDIR/LOGDIR/classifier_results.json \
87
- --datasets idfg idfg_swwlf_2019
88
- """
89
-
90
-
91
- #%% Support functions
92
-
93
- def row_to_classification_list(row: Mapping[str, Any],
94
- label_names: Sequence[str],
95
- contains_preds: bool,
96
- label_pos: str | None,
97
- threshold: float,
98
- relative_conf: bool = False
99
- ) -> list[tuple[str, float]]:
100
- """
101
- Given a mapping from label name to output probability, returns a list of
102
- tuples, (str(label_id), prob), which can be serialized into the Batch API
103
- output format.
104
-
105
- The list of tuples is returned in sorted order by the predicted probability
106
- for each label.
107
-
108
- If 'label' is in row and label_pos is not None, then we add
109
- (label_id + 1_000_000, 1.) to the list. If label_pos='first', we put this at
110
- the front of the list. Otherwise, we put it at the end.
111
- """
112
-
113
- contains_label = ('label' in row)
114
- assert contains_label or contains_preds
115
- if relative_conf:
116
- assert contains_label and contains_preds
117
-
118
- result = []
119
- if contains_preds:
120
- result = [(str(i), row[label]) for i, label in enumerate(label_names)]
121
- if relative_conf:
122
- label_conf = row[row['label']]
123
- result = [(k, max(v - label_conf, 0)) for k, v in result]
124
-
125
- # filter out confidences below the threshold, and set precision to 4
126
- result = [
127
- (k, truncate_float(conf, precision=4))
128
- for k, conf in result if conf >= threshold
129
- ]
130
-
131
- # sort from highest to lowest probability
132
- result = sorted(result, key=lambda x: x[1], reverse=True)
133
-
134
- if contains_label and label_pos is not None:
135
- label = row['label']
136
- label_id = label_names.index(label)
137
- item = (str(label_id + 1_000_000), 1.)
138
- if label_pos == 'first':
139
- result = [item] + result
140
- else:
141
- result.append(item)
142
- return result
143
-
144
-
145
- def process_queried_images(
146
- df: pd.DataFrame,
147
- queried_images_json_path: str,
148
- detector_output_cache_base_dir: str,
149
- detector_version: str,
150
- datasets: Sequence[str] | None = None,
151
- samples_per_label: int | None = None,
152
- seed: int = 123
153
- ) -> dict[str, Any]:
154
- """
155
- Creates a detection JSON object roughly in the Batch API detection
156
- format.
157
-
158
- Detections are either ground-truth (from the queried images JSON) or
159
- retrieved from the detector output cache. Only images corresponding to crop
160
- paths from the given pd.DataFrame are included in the detection JSON.
161
-
162
- Args:
163
- df: pd.DataFrame, either a "classification dataset CSV" or a
164
- "classification results CSV", column 'path' has format
165
- <dataset>/<img_file>___cropXX[...].jpg
166
- queried_images_json_path: str, path to queried images JSON
167
- detector_output_cache_base_dir: str
168
- detector_version: str
169
- datasets: optional list of str, only crops from these datasets will be
170
- be included in the output, set to None to include all datasets
171
- samples_per_label: optional int, if not None, then randomly sample this
172
- many bounding boxes per label (each label must have at least this
173
- many examples)
174
- seed: int, used for random sampling if samples_per_label is not None
175
-
176
- Returns: dict, detections JSON file, except that the 'images' field is a
177
- dict (img_path => dict) instead of a list
178
- """
179
-
180
- # input validation
181
- assert os.path.exists(queried_images_json_path)
182
- detection_cache_dir = os.path.join(
183
- detector_output_cache_base_dir, f'v{detector_version}')
184
- assert os.path.isdir(detection_cache_dir)
185
-
186
- # extract dataset name from crop path so we can process 1 dataset at a time
187
- df['dataset'] = df.index.map(lambda x: x[:x.find('/')])
188
- unique_datasets = df['dataset'].unique()
189
-
190
- if datasets is not None:
191
- for ds in datasets:
192
- assert ds in unique_datasets
193
- df = df[df['dataset'].isin(datasets)] # filter by dataset
194
- else:
195
- datasets = unique_datasets
196
-
197
- # randomly sample images for each class
198
- if samples_per_label is not None:
199
- print(f'Sampling {samples_per_label} bounding boxes per label')
200
- df = df.groupby('label').sample(samples_per_label, random_state=seed)
201
-
202
- # load queried images JSON, needed for ground-truth bbox info
203
- with open(queried_images_json_path, 'r') as f:
204
- queried_images_js = json.load(f)
205
-
206
- merged_js: dict[str, Any] = {
207
- 'images': {}, # start as dict, will convert to list later
208
- 'info': {}
209
- }
210
- images = merged_js['images']
211
-
212
- for ds in datasets:
213
- print('processing dataset:', ds)
214
- ds_df = df[df['dataset'] == ds]
215
-
216
- with open(os.path.join(detection_cache_dir, f'{ds}.json'), 'r') as f:
217
- detection_js = json.load(f)
218
- img_file_to_index = {
219
- im['file']: idx
220
- for idx, im in enumerate(detection_js['images'])
221
- }
222
-
223
- # compare info dicts
224
- class_info = merged_js['info']
225
- detection_info = detection_js['info']
226
- key = 'detector'
227
- if key not in class_info:
228
- class_info[key] = detection_info[key]
229
- assert class_info[key] == detection_info[key]
230
-
231
- # compare detection categories
232
- key = 'detection_categories'
233
- if key not in merged_js:
234
- merged_js[key] = detection_js[key]
235
- assert merged_js[key] == detection_js[key]
236
- cat_to_catid = {v: k for k, v in detection_js[key].items()}
237
-
238
- for crop_path in tqdm(ds_df.index):
239
- # crop_path: <dataset>/<img_file>___cropXX_mdvY.Y.jpg
240
- # [----<img_path>----] [-<suffix>--]
241
- img_path, suffix = crop_path.split('___crop')
242
- img_file = img_path[img_path.find('/') + 1:]
243
-
244
- # file has detection entry
245
- if '_mdv' in suffix and img_path not in images:
246
- img_idx = img_file_to_index[img_file]
247
- images[img_path] = detection_js['images'][img_idx]
248
- images[img_path]['file'] = img_path
249
-
250
- # bounding box is from ground truth
251
- elif img_path not in images:
252
- images[img_path] = {
253
- 'file': img_path,
254
- 'detections': [
255
- {
256
- 'category': cat_to_catid[bbox_dict['category']],
257
- 'conf': 1.0,
258
- 'bbox': bbox_dict['bbox']
259
- }
260
- for bbox_dict in queried_images_js[img_path]['bbox']
261
- ]
262
- }
263
- return merged_js
264
-
265
-
266
- def combine_classification_with_detection(
267
- detection_js: dict[str, Any],
268
- df: pd.DataFrame,
269
- idx_to_label: Mapping[str, str],
270
- label_names: Sequence[str],
271
- classifier_name: str,
272
- classifier_timestamp: str,
273
- threshold: float,
274
- label_pos: str | None = None,
275
- relative_conf: bool = False,
276
- typical_confidence_threshold: float = None
277
- ) -> dict[str, Any]:
278
- """
279
- Adds classification information to a detection JSON. Classification
280
- information may include the true label and/or the predicted confidences
281
- of each label.
282
-
283
- Args:
284
- detection_js: dict, detections JSON file, except that the 'images'
285
- field is a dict (img_path => dict) instead of a list
286
- df: pd.DataFrame, classification results, indexed by crop path
287
- idx_to_label: dict, str(label_id) => label name, may also include
288
- str(label_id + 1e6) => 'label: {label_name}'
289
- label_names: list of str, label names
290
- classifier_name: str, name of classifier to include in output JSON
291
- classifier_timestamp: str, timestamp to include in output JSON
292
- threshold: float, for each crop, omit classification results for
293
- categories whose confidence is below this threshold
294
- label_pos: one of [None, 'first', 'last']
295
- None: do not include labels in the output JSON
296
- 'first' / 'last': position in classification list to put the label
297
- relative_conf: bool, if True then for each class, outputs its relative
298
- confidence over the confidence of the true label, requires 'label'
299
- to be in CSV
300
- typical_confidence_threshold: float, useful default confidence
301
- threshold; not used directly, just passed along to the output file
302
-
303
- Returns: dict, detections JSON file updated with classification results
304
- """
305
-
306
- classification_metadata = {
307
- 'classifier': classifier_name,
308
- 'classification_completion_time': classifier_timestamp
309
- }
310
-
311
- if typical_confidence_threshold is not None:
312
- classification_metadata['classifier_metadata'] = \
313
- {'typical_classification_threshold':typical_confidence_threshold}
314
-
315
- detection_js['info'].update(classification_metadata)
316
- detection_js['classification_categories'] = idx_to_label
317
-
318
- contains_preds = (set(label_names) <= set(df.columns))
319
- if not contains_preds:
320
- print('CSV does not contain predictions. Outputting labels only.')
321
-
322
- images = detection_js['images']
323
-
324
- for crop_path in tqdm(df.index):
325
- # crop_path: <dataset>/<img_file>___cropXX_mdvY.Y.jpg
326
- # [----<img_path>----] [-<suffix>--]
327
- img_path, suffix = crop_path.split('___crop')
328
- crop_index = int(suffix[:2])
329
-
330
- detection_dict = images[img_path]['detections'][crop_index]
331
- detection_dict['classifications'] = row_to_classification_list(
332
- row=df.loc[crop_path], label_names=label_names,
333
- contains_preds=contains_preds, label_pos=label_pos,
334
- threshold=threshold, relative_conf=relative_conf)
335
-
336
- detection_js['images'] = list(images.values())
337
- return detection_js
338
-
339
-
340
- #%% Main function
341
-
342
- def main(classification_csv_path: str,
343
- label_names_json_path: str,
344
- output_json_path: str,
345
- classifier_name: str,
346
- threshold: float,
347
- datasets: Sequence[str] | None,
348
- detection_json_path: str | None,
349
- queried_images_json_path: str | None,
350
- detector_output_cache_base_dir: str | None,
351
- detector_version: str | None,
352
- samples_per_label: int | None,
353
- seed: int,
354
- label_pos: str | None,
355
- relative_conf: bool,
356
- typical_confidence_threshold: float) -> None:
357
-
358
- # input validation
359
- assert os.path.exists(classification_csv_path)
360
- assert os.path.exists(label_names_json_path)
361
- assert 0 <= threshold <= 1
362
- for x in [detection_json_path, queried_images_json_path]:
363
- if x is not None:
364
- assert os.path.exists(x)
365
- assert label_pos in [None, 'first', 'last']
366
-
367
- # load classification CSV
368
- print('Loading classification CSV...')
369
- df = pd.read_csv(classification_csv_path, float_precision='high',
370
- index_col='path')
371
- if relative_conf or label_pos is not None:
372
- assert 'label' in df.columns
373
-
374
- # load label names
375
- with open(label_names_json_path, 'r') as f:
376
- idx_to_label = json.load(f)
377
- label_names = [idx_to_label[str(i)] for i in range(len(idx_to_label))]
378
- if 'label' in df.columns:
379
- for i, label in enumerate(label_names):
380
- idx_to_label[str(i + 1_000_000)] = f'label: {label}'
381
-
382
- if queried_images_json_path is not None:
383
- assert detector_output_cache_base_dir is not None
384
- assert detector_version is not None
385
- detection_js = process_queried_images(
386
- df=df, queried_images_json_path=queried_images_json_path,
387
- detector_output_cache_base_dir=detector_output_cache_base_dir,
388
- detector_version=detector_version, datasets=datasets,
389
- samples_per_label=samples_per_label, seed=seed)
390
- elif detection_json_path is not None:
391
- with open(detection_json_path, 'r') as f:
392
- detection_js = json.load(f)
393
- images = {}
394
- for img in detection_js['images']:
395
- path = img['file']
396
- if datasets is None or path[:path.find('/')] in datasets:
397
- images[path] = img
398
- detection_js['images'] = images
399
-
400
- classification_time = datetime.date.fromtimestamp(
401
- os.path.getmtime(classification_csv_path))
402
- classifier_timestamp = classification_time.strftime('%Y-%m-%d %H:%M:%S')
403
-
404
- classification_js = combine_classification_with_detection(
405
- detection_js=detection_js, df=df, idx_to_label=idx_to_label,
406
- label_names=label_names, classifier_name=classifier_name,
407
- classifier_timestamp=classifier_timestamp, threshold=threshold,
408
- label_pos=label_pos, relative_conf=relative_conf,
409
- typical_confidence_threshold=typical_confidence_threshold)
410
-
411
- os.makedirs(os.path.dirname(output_json_path), exist_ok=True)
412
- with open(output_json_path, 'w') as f:
413
- json.dump(classification_js, f, indent=1)
414
-
415
- print('Wrote merged classification/detection results to {}'.format(output_json_path))
416
-
417
-
418
- #%% Command-line driver
419
-
420
- def _parse_args() -> argparse.Namespace:
421
-
422
- parser = argparse.ArgumentParser(
423
- formatter_class=argparse.ArgumentDefaultsHelpFormatter,
424
- description='Merges classification results with Batch Detection API '
425
- 'outputs.')
426
- parser.add_argument(
427
- 'classification_csv',
428
- help='path to classification CSV')
429
- parser.add_argument(
430
- 'label_names_json',
431
- help='path to JSON file mapping label index to label name')
432
- parser.add_argument(
433
- '-o', '--output-json', required=True,
434
- help='(required) path to save output JSON with both detection and '
435
- 'classification results')
436
- parser.add_argument(
437
- '-n', '--classifier-name', required=True,
438
- help='(required) name of classifier')
439
- parser.add_argument(
440
- '-t', '--threshold', type=float, default=0.1,
441
- help='Confidence threshold between 0 and 1. In the output file, omit '
442
- 'classifier results on classes whose confidence is below this '
443
- 'threshold.')
444
- parser.add_argument(
445
- '-d', '--datasets', nargs='*',
446
- help='optionally limit output to images from certain datasets. Assumes '
447
- 'that image paths are given as <dataset>/<img_file>.')
448
- parser.add_argument(
449
- '--typical-confidence-threshold', type=float, default=None,
450
- help='useful default confidence threshold; not used directly, just '
451
- 'passed along to the output file')
452
-
453
- detection_json_group = parser.add_argument_group(
454
- 'arguments for passing in a detections JSON file')
455
- detection_json_group.add_argument(
456
- '-j', '--detection-json',
457
- help='path to detections JSON file')
458
-
459
- queried_images_group = parser.add_argument_group(
460
- 'arguments for passing in a queried images JSON file')
461
- queried_images_group.add_argument(
462
- '-q', '--queried-images-json',
463
- help='path to queried images JSON file')
464
- queried_images_group.add_argument(
465
- '-c', '--detector-output-cache-dir',
466
- help='(required) path to directory where detector outputs are cached')
467
- queried_images_group.add_argument(
468
- '-v', '--detector-version',
469
- help='(required) detector version string, e.g., "4.1"')
470
- queried_images_group.add_argument(
471
- '-s', '--samples-per-label', type=int,
472
- help='randomly sample this many bounding boxes per label (each label '
473
- 'must have at least this many examples)')
474
- queried_images_group.add_argument(
475
- '--seed', type=int, default=123,
476
- help='random seed, only used if --samples-per-label is given')
477
- queried_images_group.add_argument(
478
- '--label', choices=['first', 'last'], default=None,
479
- help='Whether to put the label first or last in the list of '
480
- 'classifications. If this argument is omitted, then no labels are '
481
- 'included in the output.')
482
- queried_images_group.add_argument(
483
- '--relative-conf', action='store_true',
484
- help='for each class, outputs its relative confidence over the '
485
- 'confidence of the true label, requires "label" to be in CSV')
486
- return parser.parse_args()
487
-
488
-
489
- if __name__ == '__main__':
490
-
491
- args = _parse_args()
492
- main(classification_csv_path=args.classification_csv,
493
- label_names_json_path=args.label_names_json,
494
- output_json_path=args.output_json,
495
- classifier_name=args.classifier_name,
496
- threshold=args.threshold,
497
- datasets=args.datasets,
498
- detection_json_path=args.detection_json,
499
- queried_images_json_path=args.queried_images_json,
500
- detector_output_cache_base_dir=args.detector_output_cache_dir,
501
- detector_version=args.detector_version,
502
- samples_per_label=args.samples_per_label,
503
- seed=args.seed,
504
- label_pos=args.label,
505
- relative_conf=args.relative_conf,
506
- typical_confidence_threshold=args.typical_confidence_threshold)
@@ -1,194 +0,0 @@
1
- """
2
-
3
- prepare_classification_script.py
4
-
5
- Notebook-y script used to prepare a series of shell commands to run a classifier
6
- (other than MegaClassifier) on a MegaDetector result set.
7
-
8
- Differs from prepare_classification_script_mc.py only in the final class mapping step.
9
-
10
- """
11
-
12
- #%% Job options
13
-
14
- import os
15
-
16
- def main():
17
- organization_name = 'idfg'
18
- job_name = 'idfg-2022-01-27-EOE2021S_Group6'
19
- input_filename = 'idfg-2022-01-27-EOE2021S_Group6_detections.filtered_rde_0.60_0.85_30_0.20.json'
20
- image_base = '/datadrive/idfg/EOE2021S_Group6'
21
- crop_path = os.path.join(os.path.expanduser('~/crops'),job_name + '_crops')
22
- device_id = 1
23
-
24
- working_dir_base = os.path.join(os.path.expanduser('~/postprocessing'),
25
- organization_name,
26
- job_name)
27
-
28
- output_base = os.path.join(working_dir_base,'combined_api_outputs')
29
-
30
- assert os.path.isdir(working_dir_base)
31
- assert os.path.isdir(output_base)
32
-
33
- output_file = os.path.join(working_dir_base,'run_idfgclassifier_' + job_name + '.sh')
34
-
35
- input_files = [
36
- os.path.join(
37
- os.path.expanduser('~/postprocessing'),
38
- organization_name,
39
- job_name,
40
- 'combined_api_outputs',
41
- input_filename
42
- )
43
- ]
44
-
45
- for fn in input_files:
46
- assert os.path.isfile(fn)
47
-
48
-
49
- #%% Constants
50
-
51
- include_cropping = False
52
-
53
- classifier_base = os.path.expanduser('~/models/camera_traps/idfg_classifier/idfg_classifier_20200905_042558')
54
- assert os.path.isdir(classifier_base)
55
-
56
- checkpoint_path = os.path.join(classifier_base,'idfg_classifier_ckpt_14_compiled.pt')
57
- assert os.path.isfile(checkpoint_path)
58
-
59
- classifier_categories_path = os.path.join(classifier_base,'label_index.json')
60
- assert os.path.isfile(classifier_categories_path)
61
-
62
- classifier_output_suffix = '_idfg_classifier_output.csv.gz'
63
- final_output_suffix = '_idfgclassifier.json'
64
-
65
- threshold_str = '0.65'
66
- n_threads_str = '50'
67
- image_size_str = '300'
68
- batch_size_str = '64'
69
- num_workers_str = '8'
70
- logdir = working_dir_base
71
-
72
- classification_threshold_str = '0.05'
73
-
74
- # This is just passed along to the metadata in the output file, it has no impact
75
- # on how the classification scripts run.
76
- typical_classification_threshold_str = '0.75'
77
-
78
- classifier_name = 'idfg4'
79
-
80
-
81
- #%% Set up environment
82
-
83
- commands = []
84
- # commands.append('cd MegaDetector/classification\n')
85
- # commands.append('conda activate cameratraps-classifier\n')
86
-
87
-
88
- #%% Crop images
89
-
90
- if include_cropping:
91
-
92
- commands.append('\n### Cropping ###\n')
93
-
94
- # fn = input_files[0]
95
- for fn in input_files:
96
-
97
- input_file_path = fn
98
- crop_cmd = ''
99
-
100
- crop_comment = '\n# Cropping {}\n'.format(fn)
101
- crop_cmd += crop_comment
102
-
103
- crop_cmd += "python crop_detections.py \\\n" + \
104
- input_file_path + ' \\\n' + \
105
- crop_path + ' \\\n' + \
106
- '--images-dir "' + image_base + '"' + ' \\\n' + \
107
- '--threshold "' + threshold_str + '"' + ' \\\n' + \
108
- '--square-crops ' + ' \\\n' + \
109
- '--threads "' + n_threads_str + '"' + ' \\\n' + \
110
- '--logdir "' + logdir + '"' + ' \\\n' + \
111
- '\n'
112
- crop_cmd = '{}'.format(crop_cmd)
113
- commands.append(crop_cmd)
114
-
115
-
116
- #%% Run classifier
117
-
118
- commands.append('\n### Classifying ###\n')
119
-
120
- # fn = input_files[0]
121
- for fn in input_files:
122
-
123
- input_file_path = fn
124
- classifier_output_path = crop_path + classifier_output_suffix
125
-
126
- classify_cmd = ''
127
-
128
- classify_comment = '\n# Classifying {}\n'.format(fn)
129
- classify_cmd += classify_comment
130
-
131
- classify_cmd += "python run_classifier.py \\\n" + \
132
- checkpoint_path + ' \\\n' + \
133
- crop_path + ' \\\n' + \
134
- classifier_output_path + ' \\\n' + \
135
- '--detections-json "' + input_file_path + '"' + ' \\\n' + \
136
- '--classifier-categories "' + classifier_categories_path + '"' + ' \\\n' + \
137
- '--image-size "' + image_size_str + '"' + ' \\\n' + \
138
- '--batch-size "' + batch_size_str + '"' + ' \\\n' + \
139
- '--num-workers "' + num_workers_str + '"' + ' \\\n'
140
-
141
- if device_id is not None:
142
- classify_cmd += '--device {}'.format(device_id)
143
-
144
- classify_cmd += '\n\n'
145
- classify_cmd = '{}'.format(classify_cmd)
146
- commands.append(classify_cmd)
147
-
148
-
149
- #%% Merge classification and detection outputs
150
-
151
- commands.append('\n### Merging ###\n')
152
-
153
- # fn = input_files[0]
154
- for fn in input_files:
155
-
156
- input_file_path = fn
157
- classifier_output_path = crop_path + classifier_output_suffix
158
- final_output_path = os.path.join(output_base,
159
- os.path.basename(classifier_output_path)).\
160
- replace(classifier_output_suffix,
161
- final_output_suffix)
162
- final_output_path = final_output_path.replace('_detections','')
163
- final_output_path = final_output_path.replace('_crops','')
164
-
165
- merge_cmd = ''
166
-
167
- merge_comment = '\n# Merging {}\n'.format(fn)
168
- merge_cmd += merge_comment
169
-
170
- merge_cmd += "python merge_classification_detection_output.py \\\n" + \
171
- classifier_output_path + ' \\\n' + \
172
- classifier_categories_path + ' \\\n' + \
173
- '--output-json "' + final_output_path + '"' + ' \\\n' + \
174
- '--detection-json "' + input_file_path + '"' + ' \\\n' + \
175
- '--classifier-name "' + classifier_name + '"' + ' \\\n' + \
176
- '--threshold "' + classification_threshold_str + '"' + ' \\\n' + \
177
- '--typical-confidence-threshold "' + typical_classification_threshold_str + '"' + ' \\\n' + \
178
- '\n'
179
- merge_cmd = '{}'.format(merge_cmd)
180
- commands.append(merge_cmd)
181
-
182
-
183
- #%% Write everything out
184
-
185
- with open(output_file,'w') as f:
186
- for s in commands:
187
- f.write('{}'.format(s))
188
-
189
- import stat
190
- st = os.stat(output_file)
191
- os.chmod(output_file, st.st_mode | stat.S_IEXEC)
192
-
193
- if __name__ == '__main__':
194
- main()