megadetector 5.0.10__py3-none-any.whl → 5.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (226) hide show
  1. {megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/LICENSE +0 -0
  2. {megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/METADATA +12 -11
  3. megadetector-5.0.11.dist-info/RECORD +5 -0
  4. megadetector-5.0.11.dist-info/top_level.txt +1 -0
  5. api/__init__.py +0 -0
  6. api/batch_processing/__init__.py +0 -0
  7. api/batch_processing/api_core/__init__.py +0 -0
  8. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  9. api/batch_processing/api_core/batch_service/score.py +0 -439
  10. api/batch_processing/api_core/server.py +0 -294
  11. api/batch_processing/api_core/server_api_config.py +0 -98
  12. api/batch_processing/api_core/server_app_config.py +0 -55
  13. api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  14. api/batch_processing/api_core/server_job_status_table.py +0 -152
  15. api/batch_processing/api_core/server_orchestration.py +0 -360
  16. api/batch_processing/api_core/server_utils.py +0 -92
  17. api/batch_processing/api_core_support/__init__.py +0 -0
  18. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  19. api/batch_processing/api_support/__init__.py +0 -0
  20. api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  21. api/batch_processing/data_preparation/__init__.py +0 -0
  22. api/batch_processing/data_preparation/manage_local_batch.py +0 -2391
  23. api/batch_processing/data_preparation/manage_video_batch.py +0 -327
  24. api/batch_processing/integration/digiKam/setup.py +0 -6
  25. api/batch_processing/integration/digiKam/xmp_integration.py +0 -465
  26. api/batch_processing/integration/eMammal/test_scripts/config_template.py +0 -5
  27. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -126
  28. api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +0 -55
  29. api/batch_processing/postprocessing/__init__.py +0 -0
  30. api/batch_processing/postprocessing/add_max_conf.py +0 -64
  31. api/batch_processing/postprocessing/categorize_detections_by_size.py +0 -163
  32. api/batch_processing/postprocessing/combine_api_outputs.py +0 -249
  33. api/batch_processing/postprocessing/compare_batch_results.py +0 -958
  34. api/batch_processing/postprocessing/convert_output_format.py +0 -397
  35. api/batch_processing/postprocessing/load_api_results.py +0 -195
  36. api/batch_processing/postprocessing/md_to_coco.py +0 -310
  37. api/batch_processing/postprocessing/md_to_labelme.py +0 -330
  38. api/batch_processing/postprocessing/merge_detections.py +0 -401
  39. api/batch_processing/postprocessing/postprocess_batch_results.py +0 -1904
  40. api/batch_processing/postprocessing/remap_detection_categories.py +0 -170
  41. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +0 -661
  42. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +0 -211
  43. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +0 -82
  44. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +0 -1631
  45. api/batch_processing/postprocessing/separate_detections_into_folders.py +0 -731
  46. api/batch_processing/postprocessing/subset_json_detector_output.py +0 -696
  47. api/batch_processing/postprocessing/top_folders_to_bottom.py +0 -223
  48. api/synchronous/__init__.py +0 -0
  49. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  50. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -152
  51. api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -266
  52. api/synchronous/api_core/animal_detection_api/config.py +0 -35
  53. api/synchronous/api_core/animal_detection_api/data_management/annotations/annotation_constants.py +0 -47
  54. api/synchronous/api_core/animal_detection_api/detection/detector_training/copy_checkpoints.py +0 -43
  55. api/synchronous/api_core/animal_detection_api/detection/detector_training/model_main_tf2.py +0 -114
  56. api/synchronous/api_core/animal_detection_api/detection/process_video.py +0 -543
  57. api/synchronous/api_core/animal_detection_api/detection/pytorch_detector.py +0 -304
  58. api/synchronous/api_core/animal_detection_api/detection/run_detector.py +0 -627
  59. api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +0 -1029
  60. api/synchronous/api_core/animal_detection_api/detection/run_inference_with_yolov5_val.py +0 -581
  61. api/synchronous/api_core/animal_detection_api/detection/run_tiled_inference.py +0 -754
  62. api/synchronous/api_core/animal_detection_api/detection/tf_detector.py +0 -165
  63. api/synchronous/api_core/animal_detection_api/detection/video_utils.py +0 -495
  64. api/synchronous/api_core/animal_detection_api/md_utils/azure_utils.py +0 -174
  65. api/synchronous/api_core/animal_detection_api/md_utils/ct_utils.py +0 -262
  66. api/synchronous/api_core/animal_detection_api/md_utils/directory_listing.py +0 -251
  67. api/synchronous/api_core/animal_detection_api/md_utils/matlab_porting_tools.py +0 -97
  68. api/synchronous/api_core/animal_detection_api/md_utils/path_utils.py +0 -416
  69. api/synchronous/api_core/animal_detection_api/md_utils/process_utils.py +0 -110
  70. api/synchronous/api_core/animal_detection_api/md_utils/sas_blob_utils.py +0 -509
  71. api/synchronous/api_core/animal_detection_api/md_utils/string_utils.py +0 -59
  72. api/synchronous/api_core/animal_detection_api/md_utils/url_utils.py +0 -144
  73. api/synchronous/api_core/animal_detection_api/md_utils/write_html_image_list.py +0 -226
  74. api/synchronous/api_core/animal_detection_api/md_visualization/visualization_utils.py +0 -841
  75. api/synchronous/api_core/tests/__init__.py +0 -0
  76. api/synchronous/api_core/tests/load_test.py +0 -110
  77. classification/__init__.py +0 -0
  78. classification/aggregate_classifier_probs.py +0 -108
  79. classification/analyze_failed_images.py +0 -227
  80. classification/cache_batchapi_outputs.py +0 -198
  81. classification/create_classification_dataset.py +0 -627
  82. classification/crop_detections.py +0 -516
  83. classification/csv_to_json.py +0 -226
  84. classification/detect_and_crop.py +0 -855
  85. classification/efficientnet/__init__.py +0 -9
  86. classification/efficientnet/model.py +0 -415
  87. classification/efficientnet/utils.py +0 -610
  88. classification/evaluate_model.py +0 -520
  89. classification/identify_mislabeled_candidates.py +0 -152
  90. classification/json_to_azcopy_list.py +0 -63
  91. classification/json_validator.py +0 -695
  92. classification/map_classification_categories.py +0 -276
  93. classification/merge_classification_detection_output.py +0 -506
  94. classification/prepare_classification_script.py +0 -194
  95. classification/prepare_classification_script_mc.py +0 -228
  96. classification/run_classifier.py +0 -286
  97. classification/save_mislabeled.py +0 -110
  98. classification/train_classifier.py +0 -825
  99. classification/train_classifier_tf.py +0 -724
  100. classification/train_utils.py +0 -322
  101. data_management/__init__.py +0 -0
  102. data_management/annotations/__init__.py +0 -0
  103. data_management/annotations/annotation_constants.py +0 -34
  104. data_management/camtrap_dp_to_coco.py +0 -238
  105. data_management/cct_json_utils.py +0 -395
  106. data_management/cct_to_md.py +0 -176
  107. data_management/cct_to_wi.py +0 -289
  108. data_management/coco_to_labelme.py +0 -272
  109. data_management/coco_to_yolo.py +0 -662
  110. data_management/databases/__init__.py +0 -0
  111. data_management/databases/add_width_and_height_to_db.py +0 -33
  112. data_management/databases/combine_coco_camera_traps_files.py +0 -206
  113. data_management/databases/integrity_check_json_db.py +0 -477
  114. data_management/databases/subset_json_db.py +0 -115
  115. data_management/generate_crops_from_cct.py +0 -149
  116. data_management/get_image_sizes.py +0 -188
  117. data_management/importers/add_nacti_sizes.py +0 -52
  118. data_management/importers/add_timestamps_to_icct.py +0 -79
  119. data_management/importers/animl_results_to_md_results.py +0 -158
  120. data_management/importers/auckland_doc_test_to_json.py +0 -372
  121. data_management/importers/auckland_doc_to_json.py +0 -200
  122. data_management/importers/awc_to_json.py +0 -189
  123. data_management/importers/bellevue_to_json.py +0 -273
  124. data_management/importers/cacophony-thermal-importer.py +0 -796
  125. data_management/importers/carrizo_shrubfree_2018.py +0 -268
  126. data_management/importers/carrizo_trail_cam_2017.py +0 -287
  127. data_management/importers/cct_field_adjustments.py +0 -57
  128. data_management/importers/channel_islands_to_cct.py +0 -913
  129. data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  130. data_management/importers/eMammal/eMammal_helpers.py +0 -249
  131. data_management/importers/eMammal/make_eMammal_json.py +0 -223
  132. data_management/importers/ena24_to_json.py +0 -275
  133. data_management/importers/filenames_to_json.py +0 -385
  134. data_management/importers/helena_to_cct.py +0 -282
  135. data_management/importers/idaho-camera-traps.py +0 -1407
  136. data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  137. data_management/importers/jb_csv_to_json.py +0 -150
  138. data_management/importers/mcgill_to_json.py +0 -250
  139. data_management/importers/missouri_to_json.py +0 -489
  140. data_management/importers/nacti_fieldname_adjustments.py +0 -79
  141. data_management/importers/noaa_seals_2019.py +0 -181
  142. data_management/importers/pc_to_json.py +0 -365
  143. data_management/importers/plot_wni_giraffes.py +0 -123
  144. data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
  145. data_management/importers/prepare_zsl_imerit.py +0 -131
  146. data_management/importers/rspb_to_json.py +0 -356
  147. data_management/importers/save_the_elephants_survey_A.py +0 -320
  148. data_management/importers/save_the_elephants_survey_B.py +0 -332
  149. data_management/importers/snapshot_safari_importer.py +0 -758
  150. data_management/importers/snapshot_safari_importer_reprise.py +0 -665
  151. data_management/importers/snapshot_serengeti_lila.py +0 -1067
  152. data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  153. data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  154. data_management/importers/sulross_get_exif.py +0 -65
  155. data_management/importers/timelapse_csv_set_to_json.py +0 -490
  156. data_management/importers/ubc_to_json.py +0 -399
  157. data_management/importers/umn_to_json.py +0 -507
  158. data_management/importers/wellington_to_json.py +0 -263
  159. data_management/importers/wi_to_json.py +0 -441
  160. data_management/importers/zamba_results_to_md_results.py +0 -181
  161. data_management/labelme_to_coco.py +0 -548
  162. data_management/labelme_to_yolo.py +0 -272
  163. data_management/lila/__init__.py +0 -0
  164. data_management/lila/add_locations_to_island_camera_traps.py +0 -97
  165. data_management/lila/add_locations_to_nacti.py +0 -147
  166. data_management/lila/create_lila_blank_set.py +0 -557
  167. data_management/lila/create_lila_test_set.py +0 -151
  168. data_management/lila/create_links_to_md_results_files.py +0 -106
  169. data_management/lila/download_lila_subset.py +0 -177
  170. data_management/lila/generate_lila_per_image_labels.py +0 -515
  171. data_management/lila/get_lila_annotation_counts.py +0 -170
  172. data_management/lila/get_lila_image_counts.py +0 -111
  173. data_management/lila/lila_common.py +0 -300
  174. data_management/lila/test_lila_metadata_urls.py +0 -132
  175. data_management/ocr_tools.py +0 -874
  176. data_management/read_exif.py +0 -681
  177. data_management/remap_coco_categories.py +0 -84
  178. data_management/remove_exif.py +0 -66
  179. data_management/resize_coco_dataset.py +0 -189
  180. data_management/wi_download_csv_to_coco.py +0 -246
  181. data_management/yolo_output_to_md_output.py +0 -441
  182. data_management/yolo_to_coco.py +0 -676
  183. detection/__init__.py +0 -0
  184. detection/detector_training/__init__.py +0 -0
  185. detection/detector_training/model_main_tf2.py +0 -114
  186. detection/process_video.py +0 -703
  187. detection/pytorch_detector.py +0 -337
  188. detection/run_detector.py +0 -779
  189. detection/run_detector_batch.py +0 -1219
  190. detection/run_inference_with_yolov5_val.py +0 -917
  191. detection/run_tiled_inference.py +0 -935
  192. detection/tf_detector.py +0 -188
  193. detection/video_utils.py +0 -606
  194. docs/source/conf.py +0 -43
  195. md_utils/__init__.py +0 -0
  196. md_utils/azure_utils.py +0 -174
  197. md_utils/ct_utils.py +0 -612
  198. md_utils/directory_listing.py +0 -246
  199. md_utils/md_tests.py +0 -968
  200. md_utils/path_utils.py +0 -1044
  201. md_utils/process_utils.py +0 -157
  202. md_utils/sas_blob_utils.py +0 -509
  203. md_utils/split_locations_into_train_val.py +0 -228
  204. md_utils/string_utils.py +0 -92
  205. md_utils/url_utils.py +0 -323
  206. md_utils/write_html_image_list.py +0 -225
  207. md_visualization/__init__.py +0 -0
  208. md_visualization/plot_utils.py +0 -293
  209. md_visualization/render_images_with_thumbnails.py +0 -275
  210. md_visualization/visualization_utils.py +0 -1537
  211. md_visualization/visualize_db.py +0 -551
  212. md_visualization/visualize_detector_output.py +0 -406
  213. megadetector-5.0.10.dist-info/RECORD +0 -224
  214. megadetector-5.0.10.dist-info/top_level.txt +0 -8
  215. taxonomy_mapping/__init__.py +0 -0
  216. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +0 -491
  217. taxonomy_mapping/map_new_lila_datasets.py +0 -154
  218. taxonomy_mapping/prepare_lila_taxonomy_release.py +0 -142
  219. taxonomy_mapping/preview_lila_taxonomy.py +0 -591
  220. taxonomy_mapping/retrieve_sample_image.py +0 -71
  221. taxonomy_mapping/simple_image_download.py +0 -218
  222. taxonomy_mapping/species_lookup.py +0 -834
  223. taxonomy_mapping/taxonomy_csv_checker.py +0 -159
  224. taxonomy_mapping/taxonomy_graph.py +0 -346
  225. taxonomy_mapping/validate_lila_category_mappings.py +0 -83
  226. {megadetector-5.0.10.dist-info → megadetector-5.0.11.dist-info}/WHEEL +0 -0
@@ -1,855 +0,0 @@
1
- """
2
-
3
- detect_and_crop.py
4
-
5
- Run MegaDetector on images via Batch API, then save crops of the detected
6
- bounding boxes.
7
-
8
- The input to this script is a "queried images" JSON file, whose keys are paths
9
- to images and values are dicts containing information relevant for training
10
- a classifier, including labels and (optionally) ground-truth bounding boxes.
11
- The image paths are in the format `<dataset-name>/<blob-name>` where we assume
12
- that the dataset name does not contain '/'.
13
-
14
- {
15
- "caltech/cct_images/59f79901-23d2-11e8-a6a3-ec086b02610b.jpg": {
16
- "dataset": "caltech",
17
- "location": 13,
18
- "class": "mountain_lion", # class from dataset
19
- "bbox": [{"category": "animal",
20
- "bbox": [0, 0.347, 0.237, 0.257]}], # ground-truth bbox
21
- "label": ["monutain_lion"] # labels to use in classifier
22
- },
23
- "caltech/cct_images/59f5fe2b-23d2-11e8-a6a3-ec086b02610b.jpg": {
24
- "dataset": "caltech",
25
- "location": 13,
26
- "class": "mountain_lion", # class from dataset
27
- "label": ["monutain_lion"] # labels to use in classifier
28
- },
29
- ...
30
- }
31
-
32
- We assume that no image contains over 100 bounding boxes, and we always save
33
- crops as RGB .jpg files for consistency. For each image, each bounding box is
34
- cropped and saved to a file with a suffix "___cropXX.jpg" (ground truth bbox) or
35
- "___cropXX_mdvY.Y.jpg" (detected bbox) added to the filename of the original
36
- image. "XX" ranges from "00" to "99" and "Y.Y" indicates the MegaDetector
37
- version. If an image has ground truth bounding boxes, we assume that they are
38
- exhaustive--i.e., there are no other objects of interest, so we don't need to
39
- run MegaDetector on the image. If an image does not have ground truth bounding
40
- boxes, we run MegaDetector on the image and label the detected boxes in order
41
- from 00 up to 99. Based on the given confidence threshold, we may skip saving
42
- certain bounding box crops, but we still increment the bounding box number for
43
- skipped boxes.
44
-
45
- Example cropped image path (with ground truth bbox from MegaDB)
46
-
47
- "path/to/crops/image.jpg___crop00.jpg"
48
-
49
- Example cropped image path (with MegaDetector bbox)
50
-
51
- "path/to/crops/image.jpg___crop00_mdv4.1.jpg"
52
-
53
- By default, the images are cropped exactly per the given bounding box
54
- coordinates. However, if square crops are desired, pass the --square-crops
55
- flag. This will always generate a square crop whose size is the larger of the
56
- bounding box width or height. In the case that the square crop boundaries exceed
57
- the original image size, the crop is padded with 0s.
58
-
59
- This script currently only supports running MegaDetector via the Batch Detection
60
- API. See the classification README for instructions on running MegaDetector
61
- locally. If running the Batch Detection API, set the following environment
62
- variables for the Azure Blob Storage container in which we save the intermediate
63
- task lists:
64
-
65
- BATCH_DETECTION_API_URL # API URL
66
- CLASSIFICATION_BLOB_STORAGE_ACCOUNT # storage account name
67
- CLASSIFICATION_BLOB_CONTAINER # container name
68
- CLASSIFICATION_BLOB_CONTAINER_WRITE_SAS # SAS token, without leading '?'
69
- DETECTION_API_CALLER # allow-listed API caller
70
-
71
- This script allows specifying a directory where MegaDetector outputs are cached
72
- via the --detector-output-cache-dir argument. This directory must be
73
- organized as:
74
-
75
- <cache-dir>/<MegaDetector-version>/<dataset-name>.json
76
-
77
- Example: If the `cameratrapssc/classifier-training` Azure blob storage
78
- container is mounted to the local machine via blobfuse, it may be used as
79
- a MegaDetector output cache directory by passing
80
- "cameratrapssc/classifier-training/mdcache/"
81
- as the value for --detector-output-cache-dir.
82
-
83
- This script outputs either 1 or 3 files, depending on whether the Batch Detection API
84
- is run:
85
-
86
- - <output_dir>/detect_and_crop_log_{timestamp}.json
87
- log of images missing detections and images that failed to properly
88
- download and crop
89
- - <output_dir>/batchapi_tasklists/{task_id}.json
90
- (if --run-detector) task lists uploaded to the Batch Detection API
91
- - <output_dir>/batchapi_response/{task_id}.json
92
- (if --run-detector) task status responses for completed tasks
93
-
94
- """
95
-
96
- #%% Imports
97
-
98
- from __future__ import annotations
99
-
100
- import argparse
101
- from collections.abc import Collection, Iterable, Mapping, Sequence
102
- from concurrent import futures
103
- from datetime import datetime
104
- import json
105
- import os
106
- import pprint
107
- import time
108
- from typing import Any, Optional
109
-
110
- from azure.storage.blob import ContainerClient
111
- import requests
112
- from tqdm import tqdm
113
-
114
- from api.batch_processing.data_preparation.prepare_api_submission import (
115
- BatchAPIResponseError, Task, TaskStatus, divide_list_into_tasks)
116
- from classification.cache_batchapi_outputs import cache_detections
117
- from classification.crop_detections import load_and_crop
118
- from data_management.megadb import megadb_utils
119
- from md_utils import path_utils
120
- from md_utils import sas_blob_utils
121
-
122
-
123
- #%% Example usage
124
-
125
- """
126
- python detect_and_crop.py \
127
- base_logdir/queried_images.json \
128
- base_logdir \
129
- --detector-output-cache-dir /path/to/classifier-training/mdcache \
130
- --detector-version 4.1 \
131
- --run-detector --resume-file base_logdir/resume.json \
132
- --cropped-images-dir /path/to/crops --square-crops --threshold 0.9 \
133
- --save-full-images --images-dir /path/to/images --threads 50
134
- """
135
-
136
-
137
- #%% Main function
138
-
139
- def main(queried_images_json_path: str,
140
- output_dir: str,
141
- detector_version: str,
142
- detector_output_cache_base_dir: str,
143
- run_detector: bool,
144
- resume_file_path: Optional[str],
145
- cropped_images_dir: Optional[str],
146
- save_full_images: bool,
147
- square_crops: bool,
148
- check_crops_valid: bool,
149
- confidence_threshold: float,
150
- images_dir: Optional[str],
151
- threads: int) -> None:
152
- """
153
- Args:
154
- queried_images_json_path: str, path to output of json_validator.py
155
- detector_version: str, detector version string, e.g., '4.1',
156
- see {batch_detection_api_url}/supported_model_versions,
157
- determines the subfolder of detector_output_cache_base_dir in
158
- which to find and save detector outputs
159
- detector_output_cache_base_dir: str, path to local directory
160
- where detector outputs are cached, 1 JSON file per dataset
161
- cropped_images_dir: str, path to local directory for saving crops of
162
- bounding boxes
163
- run_detector: bool, whether to run Batch Detection API, or to skip
164
- running the detector entirely
165
- output_dir: str, path to directory to save outputs, see module docstring
166
- save_full_images: bool, whether to save downloaded images to images_dir,
167
- images_dir must be given if save_full_images=True
168
- square_crops: bool, whether to crop bounding boxes as squares
169
- check_crops_valid: bool, whether to load each crop to ensure the file is
170
- valid (i.e., not truncated)
171
- confidence_threshold: float, only crop bounding boxes above this value
172
- images_dir: optional str, path to local directory where images are saved
173
- threads: int, number of threads to use for downloading images
174
- resume_file_path: optional str, path to save JSON file with list of info
175
- dicts on running tasks, or to resume from running tasks, only used
176
- if run_detector=True
177
- """
178
-
179
- # This dictionary will get written out at the end of this process; store
180
- # diagnostic variables here
181
- log: dict[str, Any] = {}
182
-
183
- # error checking
184
- assert 0 <= confidence_threshold <= 1
185
- if save_full_images:
186
- assert images_dir is not None
187
- if not os.path.exists(images_dir):
188
- os.makedirs(images_dir, exist_ok=True)
189
- print(f'Created images_dir at {images_dir}')
190
-
191
- with open(queried_images_json_path, 'r') as f:
192
- js = json.load(f)
193
- detector_output_cache_dir = os.path.join(
194
- detector_output_cache_base_dir, f'v{detector_version}')
195
- if not os.path.exists(detector_output_cache_dir):
196
- os.makedirs(detector_output_cache_dir)
197
- print(f'Created directory at {detector_output_cache_dir}')
198
- images_without_ground_truth_bbox = [k for k in js if 'bbox' not in js[k]]
199
- images_to_detect, detection_cache, categories = filter_detected_images(
200
- potential_images_to_detect=images_without_ground_truth_bbox,
201
- detector_output_cache_dir=detector_output_cache_dir)
202
- print(f'{len(images_to_detect)} images not in detection cache')
203
-
204
- if run_detector:
205
- log['images_submitted_for_detection'] = images_to_detect
206
-
207
- assert resume_file_path is not None
208
- assert not os.path.isdir(resume_file_path)
209
- batch_detection_api_url = os.environ['BATCH_DETECTION_API_URL']
210
-
211
- if os.path.exists(resume_file_path):
212
- tasks_by_dataset = resume_tasks(
213
- resume_file_path,
214
- batch_detection_api_url=batch_detection_api_url)
215
- else:
216
- task_lists_dir = os.path.join(output_dir, 'batchapi_tasklists')
217
- tasks_by_dataset = submit_batch_detection_api(
218
- images_to_detect=images_to_detect,
219
- task_lists_dir=task_lists_dir,
220
- detector_version=detector_version,
221
- account=os.environ['CLASSIFICATION_BLOB_STORAGE_ACCOUNT'],
222
- container=os.environ['CLASSIFICATION_BLOB_CONTAINER'],
223
- sas_token=os.environ['CLASSIFICATION_BLOB_CONTAINER_WRITE_SAS'],
224
- caller=os.environ['DETECTION_API_CALLER'],
225
- batch_detection_api_url=batch_detection_api_url,
226
- resume_file_path=resume_file_path)
227
-
228
- wait_for_tasks(tasks_by_dataset, detector_output_cache_dir,
229
- output_dir=output_dir)
230
-
231
- # refresh detection cache
232
- print('Refreshing detection cache...')
233
- images_to_detect, detection_cache, categories = filter_detected_images(
234
- potential_images_to_detect=images_without_ground_truth_bbox,
235
- detector_output_cache_dir=detector_output_cache_dir)
236
- print(f'{len(images_to_detect)} images not in detection cache')
237
-
238
- log['images_missing_detections'] = images_to_detect
239
-
240
- if cropped_images_dir is not None:
241
-
242
- images_failed_dload_crop, num_downloads, num_crops = download_and_crop(
243
- queried_images_json=js,
244
- detection_cache=detection_cache,
245
- detection_categories=categories,
246
- detector_version=detector_version,
247
- cropped_images_dir=cropped_images_dir,
248
- confidence_threshold=confidence_threshold,
249
- save_full_images=save_full_images,
250
- square_crops=square_crops,
251
- check_crops_valid=check_crops_valid,
252
- images_dir=images_dir,
253
- threads=threads,
254
- images_missing_detections=images_to_detect)
255
- log['images_failed_download_or_crop'] = images_failed_dload_crop
256
- log['num_new_downloads'] = num_downloads
257
- log['num_new_crops'] = num_crops
258
-
259
- print(f'{len(images_to_detect)} images with missing detections.')
260
- if cropped_images_dir is not None:
261
- print(f'{len(images_failed_dload_crop)} images failed to download or '
262
- 'crop.')
263
-
264
- # save log of bad images
265
- date = datetime.now().strftime('%Y%m%d_%H%M%S') # e.g., '20200722_110816'
266
- log_path = os.path.join(output_dir, f'detect_and_crop_log_{date}.json')
267
- with open(log_path, 'w') as f:
268
- json.dump(log, f, indent=1)
269
-
270
-
271
- #%% Support functions
272
-
273
- def load_detection_cache(detector_output_cache_dir: str,
274
- datasets: Collection[str]) -> tuple[
275
- dict[str, dict[str, dict[str, Any]]],
276
- dict[str, str]
277
- ]:
278
- """
279
- Loads detection cache for a given dataset. Returns empty dictionaries
280
- if the cache does not exist.
281
-
282
- Args:
283
- detector_output_cache_dir: str, path to local directory where detector
284
- outputs are cached, 1 JSON file per dataset
285
- datasets: list of str, names of datasets
286
-
287
- Returns:
288
- detection_cache: dict, maps dataset name to dict, which maps
289
- image file to corresponding entry in 'images' list from the
290
- Batch Detection API output. detection_cache[ds] is an empty dict
291
- if no cached detections were found for the given dataset ds.
292
- detection_categories: dict, maps str category ID to str category name
293
- """
294
-
295
- # cache of Detector outputs: dataset name => {img_path => detection_dict}
296
- detection_cache = {}
297
- detection_categories: dict[str, str] = {}
298
-
299
- pbar = tqdm(datasets)
300
- for ds in pbar:
301
- pbar.set_description(f'Loading dataset {ds} into detection cache')
302
- cache_path = os.path.join(detector_output_cache_dir, f'{ds}.json')
303
- if os.path.exists(cache_path):
304
- with open(cache_path, 'r') as f:
305
- js = json.load(f)
306
- detection_cache[ds] = {img['file']: img for img in js['images']}
307
- if len(detection_categories) == 0:
308
- detection_categories = js['detection_categories']
309
- assert detection_categories == js['detection_categories']
310
- else:
311
- tqdm.write(f'No detection cache found for dataset {ds}')
312
- detection_cache[ds] = {}
313
- return detection_cache, detection_categories
314
-
315
-
316
- def filter_detected_images(
317
- potential_images_to_detect: Iterable[str],
318
- detector_output_cache_dir: str
319
- ) -> tuple[list[str],
320
- dict[str, dict[str, dict[str, Any]]],
321
- dict[str, str]]:
322
- """
323
- Checks image paths against cached Detector outputs, and prepares
324
- the SAS URIs for each image not in the cache.
325
-
326
- Args:
327
- potential_images_to_detect: list of str, paths to images that do not
328
- have ground truth bounding boxes, each path has format
329
- <dataset-name>/<img-filename>, where <img-filename> is the blob name
330
- detector_output_cache_dir: str, path to local directory where detector
331
- outputs are cached, 1 JSON file per dataset
332
-
333
- Returns:
334
- images_to_detect: list of str, paths to images not in the detector
335
- output cache, with the format <dataset-name>/<img-filename>
336
- detection_cache: dict, maps str dataset name to dict,
337
- detection_cache[dataset_name] is the 'detections' list from the
338
- Batch Detection API output
339
- detection_categories: dict, maps str category ID to str category name,
340
- empty dict if no cached detections are found
341
- """
342
-
343
- datasets = set(img_path[:img_path.find('/')]
344
- for img_path in potential_images_to_detect)
345
- detection_cache, detection_categories = load_detection_cache(
346
- detector_output_cache_dir, datasets)
347
-
348
- images_to_detect = []
349
- for img_path in potential_images_to_detect:
350
- # img_path: <dataset-name>/<img-filename>
351
- ds, img_file = img_path.split('/', maxsplit=1)
352
- if img_file not in detection_cache[ds]:
353
- images_to_detect.append(img_path)
354
-
355
- return images_to_detect, detection_cache, detection_categories
356
-
357
-
358
- def split_images_list_by_dataset(images_to_detect: Iterable[str]
359
- ) -> dict[str, list[str]]:
360
- """
361
- Args:
362
- images_to_detect: list of str, image paths with the format
363
- <dataset-name>/<image-filename>
364
-
365
- Returns: dict, maps dataset name to a list of image paths
366
- """
367
-
368
- images_by_dataset: dict[str, list[str]] = {}
369
- for img_path in images_to_detect:
370
- dataset = img_path[:img_path.find('/')]
371
- if dataset not in images_by_dataset:
372
- images_by_dataset[dataset] = []
373
- images_by_dataset[dataset].append(img_path)
374
- return images_by_dataset
375
-
376
-
377
- def submit_batch_detection_api(images_to_detect: Iterable[str],
378
- task_lists_dir: str,
379
- detector_version: str,
380
- account: str,
381
- container: str,
382
- sas_token: str,
383
- caller: str,
384
- batch_detection_api_url: str,
385
- resume_file_path: str
386
- ) -> dict[str, list[Task]]:
387
- """
388
- Args:
389
- images_to_detect: list of str, list of str, image paths with the format
390
- <dataset-name>/<image-filename>
391
- task_lists_dir: str, path to local directory for saving JSON files
392
- each containing a list of image URLs corresponding to an API task
393
- detector_version: str, MegaDetector version string, e.g., '4.1',
394
- see {batch_detection_api_url}/supported_model_versions
395
- account: str, Azure Storage account name
396
- container: str, Azure Blob Storage container name, where the task lists
397
- will be uploaded
398
- sas_token: str, SAS token with write permissions for the container
399
- caller: str, allow-listed caller
400
- batch_detection_api_url: str, URL to batch detection API
401
- resume_file_path: str, path to save resume file
402
-
403
- Returns: dict, maps str dataset name to list of Task objects
404
- """
405
-
406
- filtered_images_to_detect = [
407
- x for x in images_to_detect if path_utils.is_image_file(x)]
408
- not_images = set(images_to_detect) - set(filtered_images_to_detect)
409
- if len(not_images) == 0:
410
- print('Good! All image files have valid file extensions.')
411
- else:
412
- print(f'Skipping {len(not_images)} files with non-image extensions:')
413
- pprint.pprint(sorted(not_images))
414
- images_to_detect = filtered_images_to_detect
415
-
416
- datasets_table = megadb_utils.MegadbUtils().get_datasets_table()
417
-
418
- images_by_dataset = split_images_list_by_dataset(images_to_detect)
419
- tasks_by_dataset = {}
420
- for dataset, image_paths in images_by_dataset.items():
421
- # get SAS URL for images container
422
- images_sas_token = datasets_table[dataset]['container_sas_key']
423
- if images_sas_token[0] == '?':
424
- images_sas_token = images_sas_token[1:]
425
- images_container_url = sas_blob_utils.build_azure_storage_uri(
426
- account=datasets_table[dataset]['storage_account'],
427
- container=datasets_table[dataset]['container'],
428
- sas_token=images_sas_token)
429
-
430
- # strip image paths of dataset name
431
- image_blob_names = [path[path.find('/') + 1:] for path in image_paths]
432
-
433
- tasks_by_dataset[dataset] = submit_batch_detection_api_by_dataset(
434
- dataset=dataset,
435
- image_blob_names=image_blob_names,
436
- images_container_url=images_container_url,
437
- task_lists_dir=task_lists_dir,
438
- detector_version=detector_version,
439
- account=account, container=container, sas_token=sas_token,
440
- caller=caller, batch_detection_api_url=batch_detection_api_url)
441
-
442
- # save list of dataset names and task IDs for resuming
443
- resume_json = [
444
- {
445
- 'dataset': dataset,
446
- 'task_name': task.name,
447
- 'task_id': task.id,
448
- 'local_images_list_path': task.local_images_list_path
449
- }
450
- for dataset in tasks_by_dataset
451
- for task in tasks_by_dataset[dataset]
452
- ]
453
- with open(resume_file_path, 'w') as f:
454
- json.dump(resume_json, f, indent=1)
455
- return tasks_by_dataset
456
-
457
-
458
- def submit_batch_detection_api_by_dataset(
459
- dataset: str,
460
- image_blob_names: Sequence[str],
461
- images_container_url: str,
462
- task_lists_dir: str,
463
- detector_version: str,
464
- account: str,
465
- container: str,
466
- sas_token: str,
467
- caller: str,
468
- batch_detection_api_url: str
469
- ) -> list[Task]:
470
- """
471
- Args:
472
- dataset: str, name of dataset
473
- image_blob_names: list of str, image blob names from the same dataset
474
- images_container_url: str, URL to blob storage container where images
475
- from this dataset are stored, including SAS token with read
476
- permissions if container is not public
477
- **see submit_batch_detection_api() for description of other args
478
-
479
- Returns: list of Task objects
480
- """
481
-
482
- os.makedirs(task_lists_dir, exist_ok=True)
483
-
484
- date = datetime.now().strftime('%Y%m%d_%H%M%S') # e.g., '20200722_110816'
485
- task_list_base_filename = f'task_list_{dataset}_{date}.json'
486
-
487
- task_list_paths, _ = divide_list_into_tasks(
488
- file_list=image_blob_names,
489
- save_path=os.path.join(task_lists_dir, task_list_base_filename))
490
-
491
- # complete task name: 'detect_for_classifier_caltech_20200722_110816_task01'
492
- task_name_template = 'detect_for_classifier_{dataset}_{date}_task{n:>02d}'
493
- tasks: list[Task] = []
494
- for i, task_list_path in enumerate(task_list_paths):
495
- task = Task(
496
- name=task_name_template.format(dataset=dataset, date=date, n=i),
497
- images_list_path=task_list_path, api_url=batch_detection_api_url)
498
- task.upload_images_list(
499
- account=account, container=container, sas_token=sas_token)
500
- task.generate_api_request(
501
- caller=caller,
502
- input_container_url=images_container_url,
503
- model_version=detector_version)
504
- print(f'Submitting task for: {task_list_path}')
505
- task.submit()
506
- print(f'- task ID: {task.id}')
507
- tasks.append(task)
508
-
509
- # HACK! Sleep for 10s between task submissions in the hopes that it
510
- # decreases the chance of backend JSON "database" corruption
511
- time.sleep(10)
512
- return tasks
513
-
514
-
515
- def resume_tasks(resume_file_path: str, batch_detection_api_url: str
516
- ) -> dict[str, list[Task]]:
517
- """
518
- Args:
519
- resume_file_path: str, path to resume file with list of info dicts on
520
- running tasks
521
- batch_detection_api_url: str, URL to batch detection API
522
-
523
- Returns: dict, maps str dataset name to list of Task objects
524
- """
525
-
526
- with open(resume_file_path, 'r') as f:
527
- resume_json = json.load(f)
528
-
529
- tasks_by_dataset: dict[str, list[Task]] = {}
530
- for info_dict in resume_json:
531
- dataset = info_dict['dataset']
532
- if dataset not in tasks_by_dataset:
533
- tasks_by_dataset[dataset] = []
534
- task = Task(name=info_dict['task_name'],
535
- task_id=info_dict['task_id'],
536
- images_list_path=info_dict['local_images_list_path'],
537
- validate=False,
538
- api_url=batch_detection_api_url)
539
- tasks_by_dataset[dataset].append(task)
540
- return tasks_by_dataset
541
-
542
-
543
- def wait_for_tasks(tasks_by_dataset: Mapping[str, Iterable[Task]],
544
- detector_output_cache_dir: str,
545
- output_dir: Optional[str] = None,
546
- poll_interval: int = 120) -> None:
547
- """
548
- Waits for the Batch Detection API tasks to finish running.
549
-
550
- For jobs that finish successfully, merges the output with cached detector
551
- outputs.
552
-
553
- Args:
554
- tasks_by_dataset: dict, maps str dataset name to list of Task objects
555
- detector_output_cache_dir: str, path to local directory where detector
556
- outputs are cached, 1 JSON file per dataset, directory must
557
- already exist
558
- output_dir: optional str, task status responses for completed tasks are
559
- saved to <output_dir>/batchapi_response/{task_id}.json
560
- poll_interval: int, # of seconds between pinging the task status API
561
- """
562
-
563
- remaining_tasks: list[tuple[str, Task]] = [
564
- (dataset, task) for dataset, tasks in tasks_by_dataset.items()
565
- for task in tasks]
566
-
567
- progbar = tqdm(total=len(remaining_tasks))
568
- while True:
569
- new_remaining_tasks = []
570
- for dataset, task in remaining_tasks:
571
- try:
572
- task.check_status()
573
- except (BatchAPIResponseError, requests.HTTPError) as e:
574
- exception_type = type(e).__name__
575
- tqdm.write(f'Error in checking status of task {task.id}: '
576
- f'({exception_type}) {e}')
577
- tqdm.write(f'Skipping task {task.id}.')
578
- continue
579
-
580
- # task still running => continue
581
- if task.status == TaskStatus.RUNNING:
582
- new_remaining_tasks.append((dataset, task))
583
- continue
584
-
585
- progbar.update(1)
586
- tqdm.write(f'Task {task.id} stopped with status {task.status}')
587
-
588
- if task.status in [TaskStatus.PROBLEM, TaskStatus.FAILED]:
589
- tqdm.write('API response:')
590
- tqdm.write(str(task.response))
591
- continue
592
-
593
- # task finished successfully, save response to disk
594
- assert task.status == TaskStatus.COMPLETED
595
- if output_dir is not None:
596
- save_dir = os.path.join(output_dir, 'batchapi_response')
597
- if not os.path.exists(save_dir):
598
- tqdm.write(f'Creating API output dir: {save_dir}')
599
- os.makedirs(save_dir)
600
- with open(os.path.join(save_dir, f'{task.id}.json'), 'w') as f:
601
- json.dump(task.response, f, indent=1)
602
- message = task.response['Status']['message']
603
- num_failed_shards = message['num_failed_shards']
604
- if num_failed_shards != 0:
605
- tqdm.write(f'Task {task.id} completed with {num_failed_shards} '
606
- 'failed shards.')
607
-
608
- detections_url = message['output_file_urls']['detections']
609
- if task.id not in detections_url:
610
- tqdm.write('Invalid detections URL in response. Skipping task.')
611
- continue
612
-
613
- detections = requests.get(detections_url).json()
614
- msg = cache_detections(
615
- detections=detections, dataset=dataset,
616
- detector_output_cache_dir=detector_output_cache_dir)
617
- tqdm.write(msg)
618
-
619
- remaining_tasks = new_remaining_tasks
620
- if len(remaining_tasks) == 0:
621
- break
622
- tqdm.write(f'Sleeping for {poll_interval} seconds...')
623
- time.sleep(poll_interval)
624
-
625
- progbar.close()
626
-
627
-
628
- def download_and_crop(
629
- queried_images_json: Mapping[str, Mapping[str, Any]],
630
- detection_cache: Mapping[str, Mapping[str, Mapping[str, Any]]],
631
- detection_categories: Mapping[str, str],
632
- detector_version: str,
633
- cropped_images_dir: str,
634
- confidence_threshold: float,
635
- save_full_images: bool,
636
- square_crops: bool,
637
- check_crops_valid: bool,
638
- images_dir: Optional[str] = None,
639
- threads: int = 1,
640
- images_missing_detections: Optional[Iterable[str]] = None
641
- ) -> tuple[list[str], int, int]:
642
- """
643
- Saves crops to a file with the same name as the original image with an
644
- additional suffix appended, starting with 3 underscores:
645
- - if image has ground truth bboxes: "___cropXX.jpg", where "XX" indicates
646
- the bounding box index
647
- - if image has bboxes from MegaDetector: "___cropXX_mdvY.Y.jpg", where
648
- "Y.Y" indicates the MegaDetector version
649
- See module docstring for more info and examples.
650
-
651
- Note: this function is very similar to the "download_and_crop()" function in
652
- crop_detections.py. The main difference is that this function uses
653
- MegaDB to look up Azure Storage container information for images based
654
- on the dataset, whereas the crop_detections.py version has no concept
655
- of a "dataset" and "ground-truth" bounding boxes from MegaDB.
656
-
657
- Args:
658
- queried_images_json: dict, represents JSON output of json_validator.py,
659
- all images in queried_images_json are assumed to have either ground
660
- truth or cached detected bounding boxes unless
661
- images_missing_detections is given
662
- detection_cache: dict, dataset_name => {img_path => detection_dict}
663
- detector_version: str, detector version string, e.g., '4.1'
664
- cropped_images_dir: str, path to folder where cropped images are saved
665
- confidence_threshold: float, only crop bounding boxes above this value
666
- save_full_images: bool, whether to save downloaded images to images_dir,
667
- images_dir must be given and must exist if save_full_images=True
668
- square_crops: bool, whether to crop bounding boxes as squares
669
- check_crops_valid: bool, whether to load each crop to ensure the file is
670
- valid (i.e., not truncated)
671
- images_dir: optional str, path to folder where full images are saved
672
- threads: int, number of threads to use for downloading images
673
- images_missing_detections: optional list of str, image files to skip
674
- because they have no ground truth or cached detected bounding boxes
675
-
676
- Returns: list of str, images with bounding boxes that failed to download or
677
- crop properly
678
- """
679
-
680
- # error checking before we download and crop any images
681
- valid_img_paths = set(queried_images_json.keys())
682
- if images_missing_detections is not None:
683
- valid_img_paths -= set(images_missing_detections)
684
- for img_path in valid_img_paths:
685
- info_dict = queried_images_json[img_path]
686
- ds, img_file = img_path.split('/', maxsplit=1)
687
- assert ds == info_dict['dataset']
688
-
689
- if 'bbox' in info_dict: # ground-truth bounding boxes
690
- pass
691
- elif img_file in detection_cache[ds]: # detected bounding boxes
692
- bbox_dicts = detection_cache[ds][img_file]['detections']
693
- assert all('conf' in bbox_dict for bbox_dict in bbox_dicts)
694
- # convert from category ID to category name
695
- for d in bbox_dicts:
696
- d['category'] = detection_categories[d['category']]
697
- else:
698
- raise ValueError(f'{img_path} has no ground truth bounding boxes '
699
- 'and was not found in the detection cache. Please '
700
- 'include it in images_missing_detections.')
701
-
702
- # we need the datasets table for getting SAS keys
703
- datasets_table = megadb_utils.MegadbUtils().get_datasets_table()
704
- container_clients = {} # dataset name => ContainerClient
705
-
706
- pool = futures.ThreadPoolExecutor(max_workers=threads)
707
- future_to_img_path = {}
708
- images_failed_download = []
709
-
710
- print(f'Getting bbox info for {len(valid_img_paths)} images...')
711
- for img_path in tqdm(sorted(valid_img_paths)):
712
- # we already did all error checking above, so we don't do any here
713
- info_dict = queried_images_json[img_path]
714
- ds, img_file = img_path.split('/', maxsplit=1)
715
-
716
- # get ContainerClient
717
- if ds not in container_clients:
718
- sas_token = datasets_table[ds]['container_sas_key']
719
- if sas_token[0] == '?':
720
- sas_token = sas_token[1:]
721
- url = sas_blob_utils.build_azure_storage_uri(
722
- account=datasets_table[ds]['storage_account'],
723
- container=datasets_table[ds]['container'],
724
- sas_token=sas_token)
725
- container_clients[ds] = ContainerClient.from_container_url(url)
726
- container_client = container_clients[ds]
727
-
728
- # get bounding boxes
729
- # we must include the dataset <ds> in <crop_path_template> because
730
- # '{img_path}' actually gets populated with <img_file> in
731
- # load_and_crop()
732
- is_ground_truth = ('bbox' in info_dict)
733
- if is_ground_truth: # ground-truth bounding boxes
734
- bbox_dicts = info_dict['bbox']
735
- crop_path_template = os.path.join(
736
- cropped_images_dir, ds, '{img_path}___crop{n:>02d}.jpg')
737
- else: # detected bounding boxes
738
- bbox_dicts = detection_cache[ds][img_file]['detections']
739
- crop_path_template = os.path.join(
740
- cropped_images_dir, ds,
741
- '{img_path}___crop{n:>02d}_' + f'mdv{detector_version}.jpg')
742
-
743
- ds_dir = None if images_dir is None else os.path.join(images_dir, ds)
744
-
745
- # get the image, either from disk or from Blob Storage
746
- future = pool.submit(
747
- load_and_crop, img_file, ds_dir, container_client, bbox_dicts,
748
- confidence_threshold, crop_path_template, save_full_images,
749
- square_crops, check_crops_valid)
750
- future_to_img_path[future] = img_path
751
-
752
- total = len(future_to_img_path)
753
- total_downloads = 0
754
- total_new_crops = 0
755
- print(f'Reading/downloading {total} images and cropping...')
756
- for future in tqdm(futures.as_completed(future_to_img_path), total=total):
757
- img_path = future_to_img_path[future]
758
- try:
759
- did_download, num_new_crops = future.result()
760
- total_downloads += did_download
761
- total_new_crops += num_new_crops
762
- except Exception as e: # pylint: disable=broad-except
763
- exception_type = type(e).__name__
764
- tqdm.write(f'{img_path} - generated {exception_type}: {e}')
765
- images_failed_download.append(img_path)
766
-
767
- pool.shutdown()
768
- for container_client in container_clients.values():
769
- # inelegant way to close the container_clients
770
- with container_client:
771
- pass
772
-
773
- print(f'Downloaded {total_downloads} images.')
774
- print(f'Made {total_new_crops} new crops.')
775
- return images_failed_download, total_downloads, total_new_crops
776
-
777
-
778
- #%% Command-line driver
779
-
780
- def _parse_args() -> argparse.Namespace:
781
-
782
- parser = argparse.ArgumentParser(
783
- formatter_class=argparse.ArgumentDefaultsHelpFormatter,
784
- description='Detects and crops images.')
785
- parser.add_argument(
786
- 'queried_images_json',
787
- help='path to JSON file mapping image paths and classification info')
788
- parser.add_argument(
789
- 'output_dir',
790
- help='path to directory to save log file. If --run-detector, then '
791
- 'task lists and status responses are also saved here.')
792
- parser.add_argument(
793
- '-c', '--detector-output-cache-dir', required=True,
794
- help='(required) path to directory where detector outputs are cached')
795
- parser.add_argument(
796
- '-v', '--detector-version', required=True,
797
- help='(required) detector version string, e.g., "4.1"')
798
- parser.add_argument(
799
- '-d', '--run-detector', action='store_true',
800
- help='Run the Batch Detection API. If not given, skips running the '
801
- 'detector (and only use ground truth and cached bounding boxes).')
802
- parser.add_argument(
803
- '-r', '--resume-file',
804
- help='path to save JSON file with list of info dicts on running tasks, '
805
- 'or to resume from running tasks. Only used if --run-detector is '
806
- 'set. Each dict has keys '
807
- '["dataset", "task_id", "task_name", "local_images_list_path", '
808
- '"remote_images_list_url"]')
809
- parser.add_argument(
810
- '-p', '--cropped-images-dir',
811
- help='path to local directory for saving crops of bounding boxes. No '
812
- 'images are downloaded or cropped if this argument is not given.')
813
- parser.add_argument(
814
- '--save-full-images', action='store_true',
815
- help='if downloading an image, save the full image to --images-dir, '
816
- 'only used if <cropped_images_dir> is not None')
817
- parser.add_argument(
818
- '--square-crops', action='store_true',
819
- help='crop bounding boxes as squares, '
820
- 'only used if <cropped_images_dir> is not None')
821
- parser.add_argument(
822
- '--check-crops-valid', action='store_true',
823
- help='load each crop to ensure file is valid (i.e., not truncated), '
824
- 'only used if <cropped_images_dir> is not None')
825
- parser.add_argument(
826
- '-t', '--threshold', type=float, default=0.0,
827
- help='confidence threshold above which to crop bounding boxes, '
828
- 'only used if <cropped_images_dir> is not None')
829
- parser.add_argument(
830
- '-i', '--images-dir',
831
- help='path to local directory where images are saved, '
832
- 'only used if <cropped_images_dir> is not None')
833
- parser.add_argument(
834
- '-n', '--threads', type=int, default=1,
835
- help='number of threads to use for downloading images, '
836
- 'only used if <cropped_images_dir> is not None')
837
- return parser.parse_args()
838
-
839
-
840
- if __name__ == '__main__':
841
-
842
- args = _parse_args()
843
- main(queried_images_json_path=args.queried_images_json,
844
- output_dir=args.output_dir,
845
- detector_version=args.detector_version,
846
- detector_output_cache_base_dir=args.detector_output_cache_dir,
847
- run_detector=args.run_detector,
848
- resume_file_path=args.resume_file,
849
- cropped_images_dir=args.cropped_images_dir,
850
- save_full_images=args.save_full_images,
851
- square_crops=args.square_crops,
852
- check_crops_valid=args.check_crops_valid,
853
- confidence_threshold=args.threshold,
854
- images_dir=args.images_dir,
855
- threads=args.threads)