megadetector 10.0.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/__init__.py +0 -0
- megadetector/api/__init__.py +0 -0
- megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
- megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +125 -0
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
- megadetector/classification/__init__.py +0 -0
- megadetector/classification/aggregate_classifier_probs.py +108 -0
- megadetector/classification/analyze_failed_images.py +227 -0
- megadetector/classification/cache_batchapi_outputs.py +198 -0
- megadetector/classification/create_classification_dataset.py +626 -0
- megadetector/classification/crop_detections.py +516 -0
- megadetector/classification/csv_to_json.py +226 -0
- megadetector/classification/detect_and_crop.py +853 -0
- megadetector/classification/efficientnet/__init__.py +9 -0
- megadetector/classification/efficientnet/model.py +415 -0
- megadetector/classification/efficientnet/utils.py +608 -0
- megadetector/classification/evaluate_model.py +520 -0
- megadetector/classification/identify_mislabeled_candidates.py +152 -0
- megadetector/classification/json_to_azcopy_list.py +63 -0
- megadetector/classification/json_validator.py +696 -0
- megadetector/classification/map_classification_categories.py +276 -0
- megadetector/classification/merge_classification_detection_output.py +509 -0
- megadetector/classification/prepare_classification_script.py +194 -0
- megadetector/classification/prepare_classification_script_mc.py +228 -0
- megadetector/classification/run_classifier.py +287 -0
- megadetector/classification/save_mislabeled.py +110 -0
- megadetector/classification/train_classifier.py +827 -0
- megadetector/classification/train_classifier_tf.py +725 -0
- megadetector/classification/train_utils.py +323 -0
- megadetector/data_management/__init__.py +0 -0
- megadetector/data_management/animl_to_md.py +161 -0
- megadetector/data_management/annotations/__init__.py +0 -0
- megadetector/data_management/annotations/annotation_constants.py +33 -0
- megadetector/data_management/camtrap_dp_to_coco.py +270 -0
- megadetector/data_management/cct_json_utils.py +566 -0
- megadetector/data_management/cct_to_md.py +184 -0
- megadetector/data_management/cct_to_wi.py +293 -0
- megadetector/data_management/coco_to_labelme.py +284 -0
- megadetector/data_management/coco_to_yolo.py +702 -0
- megadetector/data_management/databases/__init__.py +0 -0
- megadetector/data_management/databases/add_width_and_height_to_db.py +107 -0
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +210 -0
- megadetector/data_management/databases/integrity_check_json_db.py +528 -0
- megadetector/data_management/databases/subset_json_db.py +195 -0
- megadetector/data_management/generate_crops_from_cct.py +200 -0
- megadetector/data_management/get_image_sizes.py +164 -0
- megadetector/data_management/labelme_to_coco.py +559 -0
- megadetector/data_management/labelme_to_yolo.py +349 -0
- megadetector/data_management/lila/__init__.py +0 -0
- megadetector/data_management/lila/create_lila_blank_set.py +556 -0
- megadetector/data_management/lila/create_lila_test_set.py +187 -0
- megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
- megadetector/data_management/lila/download_lila_subset.py +182 -0
- megadetector/data_management/lila/generate_lila_per_image_labels.py +777 -0
- megadetector/data_management/lila/get_lila_annotation_counts.py +174 -0
- megadetector/data_management/lila/get_lila_image_counts.py +112 -0
- megadetector/data_management/lila/lila_common.py +319 -0
- megadetector/data_management/lila/test_lila_metadata_urls.py +164 -0
- megadetector/data_management/mewc_to_md.py +344 -0
- megadetector/data_management/ocr_tools.py +873 -0
- megadetector/data_management/read_exif.py +964 -0
- megadetector/data_management/remap_coco_categories.py +195 -0
- megadetector/data_management/remove_exif.py +156 -0
- megadetector/data_management/rename_images.py +194 -0
- megadetector/data_management/resize_coco_dataset.py +663 -0
- megadetector/data_management/speciesnet_to_md.py +41 -0
- megadetector/data_management/wi_download_csv_to_coco.py +247 -0
- megadetector/data_management/yolo_output_to_md_output.py +594 -0
- megadetector/data_management/yolo_to_coco.py +876 -0
- megadetector/data_management/zamba_to_md.py +188 -0
- megadetector/detection/__init__.py +0 -0
- megadetector/detection/change_detection.py +840 -0
- megadetector/detection/process_video.py +479 -0
- megadetector/detection/pytorch_detector.py +1451 -0
- megadetector/detection/run_detector.py +1267 -0
- megadetector/detection/run_detector_batch.py +2159 -0
- megadetector/detection/run_inference_with_yolov5_val.py +1314 -0
- megadetector/detection/run_md_and_speciesnet.py +1494 -0
- megadetector/detection/run_tiled_inference.py +1038 -0
- megadetector/detection/tf_detector.py +209 -0
- megadetector/detection/video_utils.py +1379 -0
- megadetector/postprocessing/__init__.py +0 -0
- megadetector/postprocessing/add_max_conf.py +72 -0
- megadetector/postprocessing/categorize_detections_by_size.py +166 -0
- megadetector/postprocessing/classification_postprocessing.py +1752 -0
- megadetector/postprocessing/combine_batch_outputs.py +249 -0
- megadetector/postprocessing/compare_batch_results.py +2110 -0
- megadetector/postprocessing/convert_output_format.py +403 -0
- megadetector/postprocessing/create_crop_folder.py +629 -0
- megadetector/postprocessing/detector_calibration.py +570 -0
- megadetector/postprocessing/generate_csv_report.py +522 -0
- megadetector/postprocessing/load_api_results.py +223 -0
- megadetector/postprocessing/md_to_coco.py +428 -0
- megadetector/postprocessing/md_to_labelme.py +351 -0
- megadetector/postprocessing/md_to_wi.py +41 -0
- megadetector/postprocessing/merge_detections.py +392 -0
- megadetector/postprocessing/postprocess_batch_results.py +2077 -0
- megadetector/postprocessing/remap_detection_categories.py +226 -0
- megadetector/postprocessing/render_detection_confusion_matrix.py +677 -0
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +206 -0
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +82 -0
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1665 -0
- megadetector/postprocessing/separate_detections_into_folders.py +795 -0
- megadetector/postprocessing/subset_json_detector_output.py +964 -0
- megadetector/postprocessing/top_folders_to_bottom.py +238 -0
- megadetector/postprocessing/validate_batch_results.py +332 -0
- megadetector/taxonomy_mapping/__init__.py +0 -0
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +213 -0
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +165 -0
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +543 -0
- megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
- megadetector/taxonomy_mapping/simple_image_download.py +224 -0
- megadetector/taxonomy_mapping/species_lookup.py +1008 -0
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
- megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
- megadetector/tests/__init__.py +0 -0
- megadetector/tests/test_nms_synthetic.py +335 -0
- megadetector/utils/__init__.py +0 -0
- megadetector/utils/ct_utils.py +1857 -0
- megadetector/utils/directory_listing.py +199 -0
- megadetector/utils/extract_frames_from_video.py +307 -0
- megadetector/utils/gpu_test.py +125 -0
- megadetector/utils/md_tests.py +2072 -0
- megadetector/utils/path_utils.py +2832 -0
- megadetector/utils/process_utils.py +172 -0
- megadetector/utils/split_locations_into_train_val.py +237 -0
- megadetector/utils/string_utils.py +234 -0
- megadetector/utils/url_utils.py +825 -0
- megadetector/utils/wi_platform_utils.py +968 -0
- megadetector/utils/wi_taxonomy_utils.py +1759 -0
- megadetector/utils/write_html_image_list.py +239 -0
- megadetector/visualization/__init__.py +0 -0
- megadetector/visualization/plot_utils.py +309 -0
- megadetector/visualization/render_images_with_thumbnails.py +243 -0
- megadetector/visualization/visualization_utils.py +1940 -0
- megadetector/visualization/visualize_db.py +630 -0
- megadetector/visualization/visualize_detector_output.py +479 -0
- megadetector/visualization/visualize_video_output.py +705 -0
- megadetector-10.0.13.dist-info/METADATA +134 -0
- megadetector-10.0.13.dist-info/RECORD +147 -0
- megadetector-10.0.13.dist-info/WHEEL +5 -0
- megadetector-10.0.13.dist-info/licenses/LICENSE +19 -0
- megadetector-10.0.13.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,2159 @@
|
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
run_detector_batch.py
|
|
4
|
+
|
|
5
|
+
Module to run MegaDetector on lots of images, writing the results
|
|
6
|
+
to a file in the MegaDetector results format.
|
|
7
|
+
|
|
8
|
+
https://lila.science/megadetector-output-format
|
|
9
|
+
|
|
10
|
+
This enables the results to be used in our post-processing pipeline; see postprocess_batch_results.py.
|
|
11
|
+
|
|
12
|
+
This script can save results to checkpoints intermittently, in case disaster
|
|
13
|
+
strikes. To enable this, set --checkpoint_frequency to n > 0, and results
|
|
14
|
+
will be saved as a checkpoint every n images. Checkpoints will be written
|
|
15
|
+
to a file in the same directory as the output_file, and after all images
|
|
16
|
+
are processed and final results file written to output_file, the temporary
|
|
17
|
+
checkpoint file will be deleted. If you want to resume from a checkpoint, set
|
|
18
|
+
the checkpoint file's path using --resume_from_checkpoint.
|
|
19
|
+
|
|
20
|
+
Has multiprocessing support for CPUs only; if a GPU is available, it will
|
|
21
|
+
use the GPU instead of CPUs, and the --ncores option will be ignored. Checkpointing
|
|
22
|
+
is not supported when using a GPU.
|
|
23
|
+
|
|
24
|
+
The lack of GPU multiprocessing support might sound annoying, but in practice we
|
|
25
|
+
run a gazillion MegaDetector images on multiple GPUs using this script, we just only use
|
|
26
|
+
one GPU *per invocation of this script*. Dividing a list of images into one chunk
|
|
27
|
+
per GPU happens outside of this script.
|
|
28
|
+
|
|
29
|
+
Does not have a command-line option to bind the process to a particular GPU, but you can
|
|
30
|
+
prepend with "CUDA_VISIBLE_DEVICES=0 ", for example, to bind to GPU 0, e.g.:
|
|
31
|
+
|
|
32
|
+
CUDA_VISIBLE_DEVICES=0 python detection/run_detector_batch.py md_v4.1.0.pb ~/data ~/mdv4test.json
|
|
33
|
+
|
|
34
|
+
You can disable GPU processing entirely by setting CUDA_VISIBLE_DEVICES=''.
|
|
35
|
+
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
#%% Constants, imports, environment
|
|
39
|
+
|
|
40
|
+
import argparse
|
|
41
|
+
import json
|
|
42
|
+
import os
|
|
43
|
+
import sys
|
|
44
|
+
import time
|
|
45
|
+
import copy
|
|
46
|
+
import shutil
|
|
47
|
+
import random
|
|
48
|
+
import warnings
|
|
49
|
+
import itertools
|
|
50
|
+
import humanfriendly
|
|
51
|
+
|
|
52
|
+
from datetime import datetime
|
|
53
|
+
from functools import partial
|
|
54
|
+
from copy import deepcopy
|
|
55
|
+
from tqdm import tqdm
|
|
56
|
+
|
|
57
|
+
import multiprocessing
|
|
58
|
+
from threading import Thread
|
|
59
|
+
from multiprocessing import Process, Manager
|
|
60
|
+
|
|
61
|
+
# This pool is used for multi-CPU parallelization, not for data loading workers
|
|
62
|
+
# from multiprocessing.pool import ThreadPool as workerpool
|
|
63
|
+
from multiprocessing.pool import Pool as workerpool
|
|
64
|
+
|
|
65
|
+
from megadetector.detection import run_detector
|
|
66
|
+
from megadetector.detection.run_detector import \
|
|
67
|
+
is_gpu_available,\
|
|
68
|
+
load_detector,\
|
|
69
|
+
try_download_known_detector,\
|
|
70
|
+
get_detector_version_from_filename,\
|
|
71
|
+
get_detector_metadata_from_version_string
|
|
72
|
+
|
|
73
|
+
from megadetector.utils import path_utils
|
|
74
|
+
from megadetector.utils import ct_utils
|
|
75
|
+
from megadetector.utils.ct_utils import parse_kvp_list
|
|
76
|
+
from megadetector.utils.ct_utils import split_list_into_n_chunks
|
|
77
|
+
from megadetector.utils.ct_utils import sort_list_of_dicts_by_key
|
|
78
|
+
from megadetector.visualization import visualization_utils as vis_utils
|
|
79
|
+
from megadetector.data_management import read_exif
|
|
80
|
+
from megadetector.data_management.yolo_output_to_md_output import read_classes_from_yolo_dataset_file
|
|
81
|
+
|
|
82
|
+
# Numpy FutureWarnings from tensorflow import
|
|
83
|
+
warnings.filterwarnings('ignore', category=FutureWarning)
|
|
84
|
+
|
|
85
|
+
# Default number of loaders to use when --image_queue is set
|
|
86
|
+
default_loaders = 4
|
|
87
|
+
|
|
88
|
+
# Should we do preprocessing on the image queue?
|
|
89
|
+
default_preprocess_on_image_queue = False
|
|
90
|
+
|
|
91
|
+
# Number of images to pre-fetch per worker
|
|
92
|
+
max_queue_size = 10
|
|
93
|
+
|
|
94
|
+
# How often should we print progress when using the image queue?
|
|
95
|
+
n_queue_print = 1000
|
|
96
|
+
|
|
97
|
+
# Only used if --include_exif_tags or --include_image_timestamp are supplied
|
|
98
|
+
exif_options_base = read_exif.ReadExifOptions()
|
|
99
|
+
exif_options_base.processing_library = 'pil'
|
|
100
|
+
exif_options_base.byte_handling = 'convert_to_string'
|
|
101
|
+
|
|
102
|
+
# Only relevant when we're running our test harness; because bugs in batch
|
|
103
|
+
# inference are dependent on batch grouping, we randomize batch grouping
|
|
104
|
+
# during testing to maximize the probability that latent bugs come up
|
|
105
|
+
# eventually.
|
|
106
|
+
randomize_batch_order_during_testing = True
|
|
107
|
+
|
|
108
|
+
# TODO: it's a little sloppy that the following are module-level globals, but in practice it
|
|
109
|
+
# doesn't really matter, so I'm not in a big rush to move these to options until I do
|
|
110
|
+
# a larger cleanup of all the long argument lists in this module.
|
|
111
|
+
|
|
112
|
+
# Should the consumer loop run on its own process, or here in the main process?
|
|
113
|
+
run_separate_consumer_process = False
|
|
114
|
+
|
|
115
|
+
# Should we use threads (rather than processes) for the data loading workers?
|
|
116
|
+
use_threads_for_queue = False
|
|
117
|
+
|
|
118
|
+
# Enable additional debug output
|
|
119
|
+
verbose = False
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
#%% Support functions for multiprocessing
|
|
123
|
+
|
|
124
|
+
def _producer_func(q,
|
|
125
|
+
image_files,
|
|
126
|
+
producer_id=-1,
|
|
127
|
+
preprocessor=None,
|
|
128
|
+
detector_options=None,
|
|
129
|
+
verbose=False,
|
|
130
|
+
image_size=None,
|
|
131
|
+
augment=None):
|
|
132
|
+
"""
|
|
133
|
+
Producer function; only used when using the (optional) image queue.
|
|
134
|
+
|
|
135
|
+
Reads images from disk and puts, optionally preprocesses them (depending on whether "preprocessor"
|
|
136
|
+
is None, then puts them on the blocking queue for processing. Each image is queued as a tuple of
|
|
137
|
+
[filename,Image]. Sends "None" to the queue when finished.
|
|
138
|
+
|
|
139
|
+
The "detector" argument is only used for preprocessing.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
q (Queue): multiprocessing queue to put loaded/preprocessed images into
|
|
143
|
+
image_files (list): list of image file paths to process
|
|
144
|
+
producer_id (int, optional): identifier for this producer worker (for logging)
|
|
145
|
+
preprocessor (str, optional): model file path/identifier for preprocessing, or None to skip preprocessing
|
|
146
|
+
detector_options (dict, optional): key/value pairs that are interpreted differently
|
|
147
|
+
by different detectors
|
|
148
|
+
verbose (bool, optional): enable additional debug output
|
|
149
|
+
image_size (int, optional): image size to use for preprocessing
|
|
150
|
+
augment (bool, optional): enable image augmentation during preprocessing
|
|
151
|
+
"""
|
|
152
|
+
|
|
153
|
+
if verbose:
|
|
154
|
+
print('Producer starting: ID {}, preprocessor {}'.format(producer_id,preprocessor))
|
|
155
|
+
sys.stdout.flush()
|
|
156
|
+
|
|
157
|
+
if preprocessor is not None:
|
|
158
|
+
assert isinstance(preprocessor,str)
|
|
159
|
+
detector_options = deepcopy(detector_options)
|
|
160
|
+
# Tell the detector object it's being loaded as a preprocessor, so it
|
|
161
|
+
# shouldn't actually load model weights.
|
|
162
|
+
detector_options['preprocess_only'] = True
|
|
163
|
+
preprocessor = load_detector(preprocessor,
|
|
164
|
+
detector_options=detector_options,
|
|
165
|
+
verbose=verbose)
|
|
166
|
+
|
|
167
|
+
for im_file in image_files:
|
|
168
|
+
|
|
169
|
+
try:
|
|
170
|
+
|
|
171
|
+
image = vis_utils.load_image(im_file)
|
|
172
|
+
|
|
173
|
+
if preprocessor is not None:
|
|
174
|
+
|
|
175
|
+
image_info = preprocessor.preprocess_image(image,
|
|
176
|
+
image_id=im_file,
|
|
177
|
+
image_size=image_size,
|
|
178
|
+
verbose=verbose)
|
|
179
|
+
if 'failure' in image_info:
|
|
180
|
+
assert image_info['failure'] == run_detector.FAILURE_INFER
|
|
181
|
+
raise
|
|
182
|
+
|
|
183
|
+
image = image_info
|
|
184
|
+
|
|
185
|
+
except Exception as e:
|
|
186
|
+
print('Producer process: image {} cannot be loaded:\n{}'.format(im_file,str(e)))
|
|
187
|
+
image = run_detector.FAILURE_IMAGE_OPEN
|
|
188
|
+
|
|
189
|
+
q.put([im_file,image,producer_id])
|
|
190
|
+
|
|
191
|
+
# ...for each image
|
|
192
|
+
|
|
193
|
+
# This is a signal to the consumer function that a worker has finished
|
|
194
|
+
q.put(None)
|
|
195
|
+
|
|
196
|
+
if verbose:
|
|
197
|
+
print('Loader worker {} finished'.format(producer_id))
|
|
198
|
+
sys.stdout.flush()
|
|
199
|
+
|
|
200
|
+
# ...def _producer_func(...)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def _consumer_func(q,
|
|
204
|
+
return_queue,
|
|
205
|
+
model_file,
|
|
206
|
+
confidence_threshold,
|
|
207
|
+
loader_workers,
|
|
208
|
+
image_size=None,
|
|
209
|
+
include_image_size=False,
|
|
210
|
+
include_image_timestamp=False,
|
|
211
|
+
include_exif_tags=None,
|
|
212
|
+
augment=False,
|
|
213
|
+
detector_options=None,
|
|
214
|
+
preprocess_on_image_queue=default_preprocess_on_image_queue,
|
|
215
|
+
n_total_images=None,
|
|
216
|
+
batch_size=1,
|
|
217
|
+
checkpoint_path=None,
|
|
218
|
+
checkpoint_frequency=-1
|
|
219
|
+
):
|
|
220
|
+
"""
|
|
221
|
+
Consumer function; only used when using the (optional) image queue.
|
|
222
|
+
|
|
223
|
+
Pulls images from a blocking queue and processes them. Returns when "None" has
|
|
224
|
+
been read from each loader's queue.
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
q (Queue): multiprocessing queue to pull images from
|
|
228
|
+
return_queue (Queue): queue to put final results into
|
|
229
|
+
model_file (str or detector object): model file path/identifier or pre-loaded detector
|
|
230
|
+
confidence_threshold (float): only detections above this threshold are returned
|
|
231
|
+
loader_workers (int): number of producer workers (used to know when all are finished)
|
|
232
|
+
image_size (int, optional): image size to use for inference
|
|
233
|
+
include_image_size (bool, optional): include image dimensions in output
|
|
234
|
+
include_image_timestamp (bool, optional): include image timestamps in output
|
|
235
|
+
include_exif_tags (str, optional): comma-separated list of EXIF tags to include in output
|
|
236
|
+
augment (bool, optional): enable image augmentation
|
|
237
|
+
detector_options (dict, optional): key/value pairs that are interpreted differently
|
|
238
|
+
by different detectors
|
|
239
|
+
preprocess_on_image_queue (bool, optional): whether images are already preprocessed on
|
|
240
|
+
the queue
|
|
241
|
+
n_total_images (int, optional): total number of images expected (for progress bar)
|
|
242
|
+
batch_size (int, optional): batch size for GPU inference
|
|
243
|
+
checkpoint_path (str, optional): path to write checkpoint files, None disables
|
|
244
|
+
checkpointing
|
|
245
|
+
checkpoint_frequency (int, optional): write checkpoint every N images, -1 disables
|
|
246
|
+
checkpointing
|
|
247
|
+
"""
|
|
248
|
+
|
|
249
|
+
if verbose:
|
|
250
|
+
print('Consumer starting'); sys.stdout.flush()
|
|
251
|
+
|
|
252
|
+
start_time = time.time()
|
|
253
|
+
|
|
254
|
+
if isinstance(model_file,str):
|
|
255
|
+
detector = load_detector(model_file,
|
|
256
|
+
detector_options=detector_options,
|
|
257
|
+
verbose=verbose)
|
|
258
|
+
elapsed = time.time() - start_time
|
|
259
|
+
print('Loaded model (before queueing) in {}, printing updates every {} images'.format(
|
|
260
|
+
humanfriendly.format_timespan(elapsed),n_queue_print))
|
|
261
|
+
sys.stdout.flush()
|
|
262
|
+
else:
|
|
263
|
+
detector = model_file
|
|
264
|
+
print('Detector of type {} passed to consumer function'.format(type(detector)))
|
|
265
|
+
|
|
266
|
+
results = []
|
|
267
|
+
|
|
268
|
+
n_images_processed = 0
|
|
269
|
+
n_queues_finished = 0
|
|
270
|
+
last_checkpoint_count = 0
|
|
271
|
+
|
|
272
|
+
def _should_write_checkpoint():
|
|
273
|
+
"""
|
|
274
|
+
Check whether we should write a checkpoint. Returns True if we've crossed a
|
|
275
|
+
checkpoint boundary.
|
|
276
|
+
"""
|
|
277
|
+
|
|
278
|
+
if (checkpoint_frequency <= 0) or (checkpoint_path is None):
|
|
279
|
+
return False
|
|
280
|
+
|
|
281
|
+
# Calculate the checkpoint threshold we should have crossed
|
|
282
|
+
current_checkpoint_threshold = \
|
|
283
|
+
(n_images_processed // checkpoint_frequency) * checkpoint_frequency
|
|
284
|
+
last_checkpoint_threshold = \
|
|
285
|
+
(last_checkpoint_count // checkpoint_frequency) * checkpoint_frequency
|
|
286
|
+
|
|
287
|
+
# We should write a checkpoint if we've crossed into a new checkpoint interval
|
|
288
|
+
return (current_checkpoint_threshold > last_checkpoint_threshold)
|
|
289
|
+
|
|
290
|
+
pbar = None
|
|
291
|
+
if n_total_images is not None:
|
|
292
|
+
# TODO: in principle I should close this pbar
|
|
293
|
+
pbar = tqdm(total=n_total_images)
|
|
294
|
+
|
|
295
|
+
# Batch processing state
|
|
296
|
+
if batch_size > 1:
|
|
297
|
+
current_batch_items = []
|
|
298
|
+
|
|
299
|
+
while True:
|
|
300
|
+
|
|
301
|
+
r = q.get()
|
|
302
|
+
|
|
303
|
+
# Is this the last image in one of the producer queues?
|
|
304
|
+
if r is None:
|
|
305
|
+
|
|
306
|
+
n_queues_finished += 1
|
|
307
|
+
q.task_done()
|
|
308
|
+
|
|
309
|
+
if verbose:
|
|
310
|
+
print('Consumer thread: {} of {} queues finished'.format(
|
|
311
|
+
n_queues_finished,loader_workers))
|
|
312
|
+
|
|
313
|
+
# Was this the last worker to finish?
|
|
314
|
+
if n_queues_finished == loader_workers:
|
|
315
|
+
|
|
316
|
+
# Do we have any leftover images?
|
|
317
|
+
if (batch_size > 1) and (len(current_batch_items) > 0):
|
|
318
|
+
|
|
319
|
+
# We should never have more than one batch of work left to do, so this loop
|
|
320
|
+
# not strictly necessary; it's a bit of future-proofing.
|
|
321
|
+
leftover_batches = _group_into_batches(current_batch_items, batch_size)
|
|
322
|
+
|
|
323
|
+
if len(leftover_batches) > 1:
|
|
324
|
+
print('Warning: after all producer queues finished, '
|
|
325
|
+
'{} images were left for processing, which is more than'
|
|
326
|
+
'the batch size of {}'.format(len(current_batch_items),batch_size))
|
|
327
|
+
|
|
328
|
+
for leftover_batch in leftover_batches:
|
|
329
|
+
|
|
330
|
+
batch_results = _process_batch(leftover_batch,
|
|
331
|
+
detector,
|
|
332
|
+
confidence_threshold,
|
|
333
|
+
quiet=True,
|
|
334
|
+
image_size=image_size,
|
|
335
|
+
include_image_size=include_image_size,
|
|
336
|
+
include_image_timestamp=include_image_timestamp,
|
|
337
|
+
include_exif_tags=include_exif_tags,
|
|
338
|
+
augment=augment)
|
|
339
|
+
results.extend(batch_results)
|
|
340
|
+
|
|
341
|
+
if pbar is not None:
|
|
342
|
+
pbar.update(len(leftover_batch))
|
|
343
|
+
|
|
344
|
+
n_images_processed += len(leftover_batch)
|
|
345
|
+
|
|
346
|
+
# In theory we could write a checkpoint here, but because we're basically
|
|
347
|
+
# done at this point, there's not much upside to writing another checkpoint,
|
|
348
|
+
# so for simplicity, I'm skipping it.
|
|
349
|
+
|
|
350
|
+
# ...for each batch we have left to process
|
|
351
|
+
|
|
352
|
+
return_queue.put(results)
|
|
353
|
+
return
|
|
354
|
+
|
|
355
|
+
else:
|
|
356
|
+
|
|
357
|
+
continue
|
|
358
|
+
|
|
359
|
+
# ...if we pulled the sentinel signal (None) telling us that a worker finished
|
|
360
|
+
|
|
361
|
+
# At this point, we have a real image (i.e., not a sentinel indicating that a worker finished)
|
|
362
|
+
#
|
|
363
|
+
# "r" is always a tuple of (filename,image,producer_id)
|
|
364
|
+
#
|
|
365
|
+
# Image can be a PIL image (if the loader wasn't doing preprocessing) or a dict with
|
|
366
|
+
# a preprocessed image and associated metadata.
|
|
367
|
+
im_file = r[0]
|
|
368
|
+
image = r[1]
|
|
369
|
+
|
|
370
|
+
# Handle failed images immediately (don't batch them)
|
|
371
|
+
#
|
|
372
|
+
# Loader workers communicate failures by passing a string to
|
|
373
|
+
# the consumer, rather than an image.
|
|
374
|
+
if isinstance(image,str):
|
|
375
|
+
|
|
376
|
+
results.append({'file': im_file,
|
|
377
|
+
'failure': image})
|
|
378
|
+
n_images_processed += 1
|
|
379
|
+
|
|
380
|
+
if pbar is not None:
|
|
381
|
+
pbar.update(1)
|
|
382
|
+
|
|
383
|
+
# This is a catastrophic internal failure; preprocessing workers should
|
|
384
|
+
# be passing the consumer dicts that represent processed images
|
|
385
|
+
elif preprocess_on_image_queue and (not isinstance(image,dict)):
|
|
386
|
+
|
|
387
|
+
print('Expected a dict, received an image of type {}'.format(type(image)))
|
|
388
|
+
results.append({'file': im_file,
|
|
389
|
+
'failure': 'illegal image type'})
|
|
390
|
+
n_images_processed += 1
|
|
391
|
+
|
|
392
|
+
if pbar is not None:
|
|
393
|
+
pbar.update(1)
|
|
394
|
+
|
|
395
|
+
else:
|
|
396
|
+
|
|
397
|
+
# At this point, "image" is either an image (if the producer workers are only
|
|
398
|
+
# doing loading) or a dict (if the producer workers are doing preprocessing)
|
|
399
|
+
|
|
400
|
+
if batch_size > 1:
|
|
401
|
+
|
|
402
|
+
# Add to current batch
|
|
403
|
+
current_batch_items.append([im_file, image, r[2]])
|
|
404
|
+
|
|
405
|
+
# Process batch when full
|
|
406
|
+
if len(current_batch_items) >= batch_size:
|
|
407
|
+
batch_results = _process_batch(current_batch_items,
|
|
408
|
+
detector,
|
|
409
|
+
confidence_threshold,
|
|
410
|
+
quiet=True,
|
|
411
|
+
image_size=image_size,
|
|
412
|
+
include_image_size=include_image_size,
|
|
413
|
+
include_image_timestamp=include_image_timestamp,
|
|
414
|
+
include_exif_tags=include_exif_tags,
|
|
415
|
+
augment=augment)
|
|
416
|
+
results.extend(batch_results)
|
|
417
|
+
|
|
418
|
+
if pbar is not None:
|
|
419
|
+
pbar.update(len(current_batch_items))
|
|
420
|
+
|
|
421
|
+
n_images_processed += len(current_batch_items)
|
|
422
|
+
current_batch_items = []
|
|
423
|
+
else:
|
|
424
|
+
|
|
425
|
+
# Process single image
|
|
426
|
+
result = _process_image(im_file=im_file,
|
|
427
|
+
detector=detector,
|
|
428
|
+
confidence_threshold=confidence_threshold,
|
|
429
|
+
image=image,
|
|
430
|
+
quiet=True,
|
|
431
|
+
image_size=image_size,
|
|
432
|
+
include_image_size=include_image_size,
|
|
433
|
+
include_image_timestamp=include_image_timestamp,
|
|
434
|
+
include_exif_tags=include_exif_tags,
|
|
435
|
+
augment=augment)
|
|
436
|
+
results.append(result)
|
|
437
|
+
n_images_processed += 1
|
|
438
|
+
|
|
439
|
+
if pbar is not None:
|
|
440
|
+
pbar.update(1)
|
|
441
|
+
|
|
442
|
+
# ...if we are/aren't doing batch processing
|
|
443
|
+
|
|
444
|
+
# Write checkpoint if necessary
|
|
445
|
+
if _should_write_checkpoint():
|
|
446
|
+
print('Consumer: writing checkpoint after {} images'.format(
|
|
447
|
+
n_images_processed))
|
|
448
|
+
write_checkpoint(checkpoint_path, results)
|
|
449
|
+
last_checkpoint_count = n_images_processed
|
|
450
|
+
|
|
451
|
+
# ...whether we received a string (indicating failure) or an image from the loader worker
|
|
452
|
+
|
|
453
|
+
q.task_done()
|
|
454
|
+
|
|
455
|
+
# ...while True (consumer loop)
|
|
456
|
+
|
|
457
|
+
# ...def _consumer_func(...)
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
def _run_detector_with_image_queue(image_files,
|
|
461
|
+
model_file,
|
|
462
|
+
confidence_threshold,
|
|
463
|
+
quiet=False,
|
|
464
|
+
image_size=None,
|
|
465
|
+
include_image_size=False,
|
|
466
|
+
include_image_timestamp=False,
|
|
467
|
+
include_exif_tags=None,
|
|
468
|
+
augment=False,
|
|
469
|
+
detector_options=None,
|
|
470
|
+
loader_workers=default_loaders,
|
|
471
|
+
preprocess_on_image_queue=default_preprocess_on_image_queue,
|
|
472
|
+
batch_size=1,
|
|
473
|
+
checkpoint_path=None,
|
|
474
|
+
checkpoint_frequency=-1):
|
|
475
|
+
"""
|
|
476
|
+
Driver function for the (optional) multiprocessing-based image queue. Spawns workers to read and
|
|
477
|
+
preprocess images, runs the consumer function in the calling process.
|
|
478
|
+
|
|
479
|
+
Args:
|
|
480
|
+
image_files (str): list of absolute paths to images
|
|
481
|
+
model_file (str): filename or model identifier (e.g. "MDV5A")
|
|
482
|
+
confidence_threshold (float): minimum confidence detection to include in
|
|
483
|
+
output
|
|
484
|
+
quiet (bool, optional): suppress per-image console printouts
|
|
485
|
+
image_size (int, optional): image size to use for inference, only mess with this
|
|
486
|
+
if (a) you're using a model other than MegaDetector or (b) you know what you're
|
|
487
|
+
doing
|
|
488
|
+
include_image_size (bool, optional): should we include image size in the output for each image?
|
|
489
|
+
include_image_timestamp (bool, optional): should we include image timestamps in the output for each image?
|
|
490
|
+
include_exif_tags (str, optional): comma-separated list of EXIF tags to include in output
|
|
491
|
+
augment (bool, optional): enable image augmentation
|
|
492
|
+
detector_options (dict, optional): key/value pairs that are interpreted differently
|
|
493
|
+
by different detectors
|
|
494
|
+
loader_workers (int, optional): number of loaders to use
|
|
495
|
+
preprocess_on_image_queue (bool, optional): if the image queue is enabled, should it handle
|
|
496
|
+
image loading and preprocessing (True), or just image loading (False)?
|
|
497
|
+
batch_size (int, optional): batch size for GPU processing
|
|
498
|
+
checkpoint_path (str, optional): path to write checkpoint files, None disables checkpointing
|
|
499
|
+
checkpoint_frequency (int, optional): write checkpoint every N images, -1 disables checkpointing
|
|
500
|
+
|
|
501
|
+
Returns:
|
|
502
|
+
list: list of dicts in the format returned by process_image()
|
|
503
|
+
"""
|
|
504
|
+
|
|
505
|
+
# Validate inputs
|
|
506
|
+
assert isinstance(model_file,str)
|
|
507
|
+
|
|
508
|
+
if loader_workers <= 0:
|
|
509
|
+
loader_workers = 1
|
|
510
|
+
|
|
511
|
+
if detector_options is None:
|
|
512
|
+
detector_options = {}
|
|
513
|
+
|
|
514
|
+
q = multiprocessing.JoinableQueue(max_queue_size)
|
|
515
|
+
return_queue = multiprocessing.Queue(1)
|
|
516
|
+
|
|
517
|
+
producers = []
|
|
518
|
+
|
|
519
|
+
worker_string = 'thread' if use_threads_for_queue else 'process'
|
|
520
|
+
print('Starting a {} pool with {} workers'.format(worker_string,loader_workers))
|
|
521
|
+
|
|
522
|
+
preprocessor = None
|
|
523
|
+
|
|
524
|
+
if preprocess_on_image_queue:
|
|
525
|
+
print('Enabling image queue preprocessing')
|
|
526
|
+
preprocessor = model_file
|
|
527
|
+
|
|
528
|
+
n_total_images = len(image_files)
|
|
529
|
+
|
|
530
|
+
chunks = split_list_into_n_chunks(image_files, loader_workers, chunk_strategy='greedy')
|
|
531
|
+
for i_chunk,chunk in enumerate(chunks):
|
|
532
|
+
if use_threads_for_queue:
|
|
533
|
+
producer = Thread(target=_producer_func,args=(q,
|
|
534
|
+
chunk,
|
|
535
|
+
i_chunk,preprocessor,
|
|
536
|
+
detector_options,
|
|
537
|
+
verbose,
|
|
538
|
+
image_size,
|
|
539
|
+
augment))
|
|
540
|
+
else:
|
|
541
|
+
producer = Process(target=_producer_func,args=(q,
|
|
542
|
+
chunk,
|
|
543
|
+
i_chunk,
|
|
544
|
+
preprocessor,
|
|
545
|
+
detector_options,
|
|
546
|
+
verbose,
|
|
547
|
+
image_size,
|
|
548
|
+
augment))
|
|
549
|
+
producers.append(producer)
|
|
550
|
+
|
|
551
|
+
for producer in producers:
|
|
552
|
+
producer.daemon = False
|
|
553
|
+
producer.start()
|
|
554
|
+
|
|
555
|
+
if run_separate_consumer_process:
|
|
556
|
+
if use_threads_for_queue:
|
|
557
|
+
consumer = Thread(target=_consumer_func,args=(q,
|
|
558
|
+
return_queue,
|
|
559
|
+
model_file,
|
|
560
|
+
confidence_threshold,
|
|
561
|
+
loader_workers,
|
|
562
|
+
image_size,
|
|
563
|
+
include_image_size,
|
|
564
|
+
include_image_timestamp,
|
|
565
|
+
include_exif_tags,
|
|
566
|
+
augment,
|
|
567
|
+
detector_options,
|
|
568
|
+
preprocess_on_image_queue,
|
|
569
|
+
n_total_images,
|
|
570
|
+
batch_size,
|
|
571
|
+
checkpoint_path,
|
|
572
|
+
checkpoint_frequency))
|
|
573
|
+
else:
|
|
574
|
+
consumer = Process(target=_consumer_func,args=(q,
|
|
575
|
+
return_queue,
|
|
576
|
+
model_file,
|
|
577
|
+
confidence_threshold,
|
|
578
|
+
loader_workers,
|
|
579
|
+
image_size,
|
|
580
|
+
include_image_size,
|
|
581
|
+
include_image_timestamp,
|
|
582
|
+
include_exif_tags,
|
|
583
|
+
augment,
|
|
584
|
+
detector_options,
|
|
585
|
+
preprocess_on_image_queue,
|
|
586
|
+
n_total_images,
|
|
587
|
+
batch_size,
|
|
588
|
+
checkpoint_path,
|
|
589
|
+
checkpoint_frequency))
|
|
590
|
+
consumer.daemon = True
|
|
591
|
+
consumer.start()
|
|
592
|
+
else:
|
|
593
|
+
_consumer_func(q,
|
|
594
|
+
return_queue,
|
|
595
|
+
model_file,
|
|
596
|
+
confidence_threshold,
|
|
597
|
+
loader_workers,
|
|
598
|
+
image_size,
|
|
599
|
+
include_image_size,
|
|
600
|
+
include_image_timestamp,
|
|
601
|
+
include_exif_tags,
|
|
602
|
+
augment,
|
|
603
|
+
detector_options,
|
|
604
|
+
preprocess_on_image_queue,
|
|
605
|
+
n_total_images,
|
|
606
|
+
batch_size,
|
|
607
|
+
checkpoint_path,
|
|
608
|
+
checkpoint_frequency)
|
|
609
|
+
|
|
610
|
+
for i_producer,producer in enumerate(producers):
|
|
611
|
+
producer.join()
|
|
612
|
+
if verbose:
|
|
613
|
+
print('Producer {} finished'.format(i_producer))
|
|
614
|
+
|
|
615
|
+
if verbose:
|
|
616
|
+
print('All producers finished')
|
|
617
|
+
|
|
618
|
+
if run_separate_consumer_process:
|
|
619
|
+
consumer.join()
|
|
620
|
+
if verbose:
|
|
621
|
+
print('Consumer loop finished')
|
|
622
|
+
|
|
623
|
+
q.join()
|
|
624
|
+
if verbose:
|
|
625
|
+
print('Queue joined')
|
|
626
|
+
|
|
627
|
+
results = return_queue.get()
|
|
628
|
+
|
|
629
|
+
return results
|
|
630
|
+
|
|
631
|
+
# ...def _run_detector_with_image_queue(...)
|
|
632
|
+
|
|
633
|
+
|
|
634
|
+
#%% Other support functions
|
|
635
|
+
|
|
636
|
+
def _chunks_by_number_of_chunks(ls, n):
|
|
637
|
+
"""
|
|
638
|
+
Splits a list into n even chunks.
|
|
639
|
+
|
|
640
|
+
External callers should use ct_utils.split_list_into_n_chunks().
|
|
641
|
+
|
|
642
|
+
Args:
|
|
643
|
+
ls (list): list to break up into chunks
|
|
644
|
+
n (int): number of chunks
|
|
645
|
+
"""
|
|
646
|
+
|
|
647
|
+
for i in range(0, n):
|
|
648
|
+
yield ls[i::n]
|
|
649
|
+
|
|
650
|
+
|
|
651
|
+
#%% Batch processing helper functions
|
|
652
|
+
|
|
653
|
+
def _group_into_batches(items, batch_size):
|
|
654
|
+
"""
|
|
655
|
+
Group items into batches.
|
|
656
|
+
|
|
657
|
+
Args:
|
|
658
|
+
items (list): items to group into batches
|
|
659
|
+
batch_size (int): size of each batch
|
|
660
|
+
|
|
661
|
+
Returns:
|
|
662
|
+
list: list of batches, where each batch is a list of items
|
|
663
|
+
"""
|
|
664
|
+
|
|
665
|
+
if batch_size <= 0:
|
|
666
|
+
raise ValueError('Batch size must be positive')
|
|
667
|
+
|
|
668
|
+
batches = []
|
|
669
|
+
for i_item in range(0, len(items), batch_size):
|
|
670
|
+
batch = items[i_item:i_item + batch_size]
|
|
671
|
+
batches.append(batch)
|
|
672
|
+
|
|
673
|
+
return batches
|
|
674
|
+
|
|
675
|
+
|
|
676
|
+
def _process_batch(image_items_batch,
|
|
677
|
+
detector,
|
|
678
|
+
confidence_threshold,
|
|
679
|
+
quiet=False,
|
|
680
|
+
image_size=None,
|
|
681
|
+
include_image_size=False,
|
|
682
|
+
include_image_timestamp=False,
|
|
683
|
+
include_exif_tags=None,
|
|
684
|
+
augment=False):
|
|
685
|
+
"""
|
|
686
|
+
Process a batch of images using generate_detections_one_batch(). Does not necessarily return
|
|
687
|
+
results in the same order in which they were supplied; in particular, images that fail preprocessing
|
|
688
|
+
will be returned out of order.
|
|
689
|
+
|
|
690
|
+
Args:
|
|
691
|
+
image_items_batch (list): list of image file paths (strings) or list of tuples [filename, image, producer_id]
|
|
692
|
+
detector: loaded detector object
|
|
693
|
+
confidence_threshold (float): confidence threshold for detections
|
|
694
|
+
quiet (bool, optional): suppress per-image output
|
|
695
|
+
image_size (int, optional): image size override
|
|
696
|
+
include_image_size (bool, optional): include image dimensions in results
|
|
697
|
+
include_image_timestamp (bool, optional): include image timestamps in results
|
|
698
|
+
include_exif_tags (str, optional): comma-separated list of EXIF tags to include in output
|
|
699
|
+
augment (bool, optional): whether to use image augmentation
|
|
700
|
+
|
|
701
|
+
Returns:
|
|
702
|
+
list of dict: list of results for each image in the batch
|
|
703
|
+
"""
|
|
704
|
+
|
|
705
|
+
# This will be the set of items we send for inference; it may be
|
|
706
|
+
# smaller than the input list (image_items_batch) if some images
|
|
707
|
+
# fail to load. [valid_images] will be either a list of PIL Image
|
|
708
|
+
# objects or a list of dicts containing preprocessed images.
|
|
709
|
+
valid_images = []
|
|
710
|
+
valid_image_filenames = []
|
|
711
|
+
|
|
712
|
+
batch_results = []
|
|
713
|
+
|
|
714
|
+
for i_image, item in enumerate(image_items_batch):
|
|
715
|
+
|
|
716
|
+
# Handle both filename strings and tuples
|
|
717
|
+
if isinstance(item, str):
|
|
718
|
+
im_file = item
|
|
719
|
+
try:
|
|
720
|
+
image = vis_utils.load_image(im_file)
|
|
721
|
+
except Exception as e:
|
|
722
|
+
print('Image {} cannot be loaded: {}'.format(im_file,str(e)))
|
|
723
|
+
failed_result = {
|
|
724
|
+
'file': im_file,
|
|
725
|
+
'failure': run_detector.FAILURE_IMAGE_OPEN
|
|
726
|
+
}
|
|
727
|
+
batch_results.append(failed_result)
|
|
728
|
+
continue
|
|
729
|
+
else:
|
|
730
|
+
assert len(item) == 3
|
|
731
|
+
im_file, image, producer_id = item
|
|
732
|
+
|
|
733
|
+
valid_images.append(image)
|
|
734
|
+
valid_image_filenames.append(im_file)
|
|
735
|
+
|
|
736
|
+
# ...for each image in the batch
|
|
737
|
+
|
|
738
|
+
assert len(valid_images) == len(valid_image_filenames)
|
|
739
|
+
|
|
740
|
+
valid_batch_results = []
|
|
741
|
+
|
|
742
|
+
# Process the batch if we have any valid images
|
|
743
|
+
if len(valid_images) > 0:
|
|
744
|
+
|
|
745
|
+
try:
|
|
746
|
+
|
|
747
|
+
batch_detections = \
|
|
748
|
+
detector.generate_detections_one_batch(valid_images,
|
|
749
|
+
valid_image_filenames,
|
|
750
|
+
verbose=verbose)
|
|
751
|
+
|
|
752
|
+
assert len(batch_detections) == len(valid_images)
|
|
753
|
+
|
|
754
|
+
# Apply confidence threshold and add metadata
|
|
755
|
+
for i_valid_image,image_result in enumerate(batch_detections):
|
|
756
|
+
|
|
757
|
+
assert valid_image_filenames[i_valid_image] == image_result['file']
|
|
758
|
+
|
|
759
|
+
if 'failure' not in image_result:
|
|
760
|
+
|
|
761
|
+
# Apply confidence threshold
|
|
762
|
+
image_result['detections'] = \
|
|
763
|
+
[det for det in image_result['detections'] if det['conf'] >= confidence_threshold]
|
|
764
|
+
|
|
765
|
+
if include_image_size or include_image_timestamp or (include_exif_tags is not None):
|
|
766
|
+
|
|
767
|
+
image = valid_images[i_valid_image]
|
|
768
|
+
|
|
769
|
+
# If this was preprocessed by the producer thread, pull out the PIL version
|
|
770
|
+
if isinstance(image,dict):
|
|
771
|
+
|
|
772
|
+
image = image['img_original_pil']
|
|
773
|
+
|
|
774
|
+
if include_image_size:
|
|
775
|
+
|
|
776
|
+
image_result['width'] = image.width
|
|
777
|
+
image_result['height'] = image.height
|
|
778
|
+
|
|
779
|
+
if include_image_timestamp:
|
|
780
|
+
|
|
781
|
+
image_result['datetime'] = get_image_datetime(image)
|
|
782
|
+
|
|
783
|
+
if include_exif_tags is not None:
|
|
784
|
+
|
|
785
|
+
exif_options = copy.copy(exif_options_base)
|
|
786
|
+
exif_options.tags_to_include = include_exif_tags
|
|
787
|
+
image_result['exif_metadata'] = read_exif.read_pil_exif(
|
|
788
|
+
image,exif_options)
|
|
789
|
+
|
|
790
|
+
# ...if we need to store metadata
|
|
791
|
+
|
|
792
|
+
# ...if this image succeeded
|
|
793
|
+
|
|
794
|
+
# Failures here should be very rare; there's almost no reason an image would fail
|
|
795
|
+
# within a batch once it's been loaded
|
|
796
|
+
else:
|
|
797
|
+
|
|
798
|
+
print('Warning: within-batch processing failure for image {}'.format(
|
|
799
|
+
image_result['file']))
|
|
800
|
+
|
|
801
|
+
# Add to the list of results for the batch whether or not it succeeded
|
|
802
|
+
valid_batch_results.append(image_result)
|
|
803
|
+
|
|
804
|
+
# ...for each image in this batch
|
|
805
|
+
|
|
806
|
+
except Exception as e:
|
|
807
|
+
|
|
808
|
+
print('Batch processing failure for {} images: {}'.format(len(valid_images),str(e)))
|
|
809
|
+
|
|
810
|
+
# Throw out any successful results for this batch, this should almost never happen
|
|
811
|
+
valid_batch_results = []
|
|
812
|
+
|
|
813
|
+
for image_id in valid_image_filenames:
|
|
814
|
+
r = {'file':image_id,'failure': run_detector.FAILURE_INFER}
|
|
815
|
+
valid_batch_results.append(r)
|
|
816
|
+
|
|
817
|
+
# ...try/except
|
|
818
|
+
|
|
819
|
+
assert len(valid_batch_results) == len(valid_images)
|
|
820
|
+
|
|
821
|
+
# ...if we have valid images in this batch
|
|
822
|
+
|
|
823
|
+
batch_results.extend(valid_batch_results)
|
|
824
|
+
|
|
825
|
+
return batch_results
|
|
826
|
+
|
|
827
|
+
# ...def _process_batch(...)
|
|
828
|
+
|
|
829
|
+
|
|
830
|
+
#%% Image processing functions
|
|
831
|
+
|
|
832
|
+
def _process_images(im_files,
|
|
833
|
+
detector,
|
|
834
|
+
confidence_threshold,
|
|
835
|
+
use_image_queue=False,
|
|
836
|
+
quiet=False,
|
|
837
|
+
image_size=None,
|
|
838
|
+
checkpoint_queue=None,
|
|
839
|
+
include_image_size=False,
|
|
840
|
+
include_image_timestamp=False,
|
|
841
|
+
include_exif_tags=None,
|
|
842
|
+
augment=False,
|
|
843
|
+
detector_options=None,
|
|
844
|
+
loader_workers=default_loaders,
|
|
845
|
+
preprocess_on_image_queue=default_preprocess_on_image_queue):
|
|
846
|
+
"""
|
|
847
|
+
Runs a detector (typically MegaDetector) over a list of image files, possibly using multiple
|
|
848
|
+
image loading workers, but not using multiple inference workers.
|
|
849
|
+
|
|
850
|
+
Args:
|
|
851
|
+
im_files (list): paths to image files
|
|
852
|
+
detector (str or detector object): loaded model or str; if this is a string, it can be a
|
|
853
|
+
path to a .pb/.pt model file or a known model identifier (e.g. "MDV5A")
|
|
854
|
+
confidence_threshold (float): only detections above this threshold are returned
|
|
855
|
+
use_image_queue (bool, optional): separate image loading onto a dedicated worker process
|
|
856
|
+
quiet (bool, optional): suppress per-image printouts
|
|
857
|
+
image_size (int, optional): image size to use for inference, only mess with this
|
|
858
|
+
if (a) you're using a model other than MegaDetector or (b) you know what you're
|
|
859
|
+
doing
|
|
860
|
+
checkpoint_queue (Queue, optional): internal parameter used to pass image queues around
|
|
861
|
+
include_image_size (bool, optional): should we include image size in the output for each image?
|
|
862
|
+
include_image_timestamp (bool, optional): should we include image timestamps in the output for each image?
|
|
863
|
+
include_exif_tags (str, optional): comma-separated list of EXIF tags to include in output
|
|
864
|
+
augment (bool, optional): enable image augmentation
|
|
865
|
+
detector_options (dict, optional): key/value pairs that are interpreted differently
|
|
866
|
+
by different detectors
|
|
867
|
+
loader_workers (int, optional): number of loaders to use (only relevant when using image queue)
|
|
868
|
+
preprocess_on_image_queue (bool, optional): if the image queue is enabled, should it handle
|
|
869
|
+
image loading and preprocessing (True), or just image loading (False)?
|
|
870
|
+
|
|
871
|
+
Returns:
|
|
872
|
+
list: list of dicts, in which each dict represents detections on one image,
|
|
873
|
+
see the 'images' key in https://lila.science/megadetector-output-format
|
|
874
|
+
"""
|
|
875
|
+
|
|
876
|
+
if isinstance(detector, str):
|
|
877
|
+
|
|
878
|
+
start_time = time.time()
|
|
879
|
+
detector = load_detector(detector,
|
|
880
|
+
detector_options=detector_options,
|
|
881
|
+
verbose=verbose)
|
|
882
|
+
elapsed = time.time() - start_time
|
|
883
|
+
print('Loaded model (process_images) in {}'.format(humanfriendly.format_timespan(elapsed)))
|
|
884
|
+
|
|
885
|
+
if detector_options is None:
|
|
886
|
+
detector_options = {}
|
|
887
|
+
|
|
888
|
+
if use_image_queue:
|
|
889
|
+
|
|
890
|
+
results = _run_detector_with_image_queue(im_files,
|
|
891
|
+
detector,
|
|
892
|
+
confidence_threshold,
|
|
893
|
+
quiet=quiet,
|
|
894
|
+
image_size=image_size,
|
|
895
|
+
include_image_size=include_image_size,
|
|
896
|
+
include_image_timestamp=include_image_timestamp,
|
|
897
|
+
include_exif_tags=include_exif_tags,
|
|
898
|
+
augment=augment,
|
|
899
|
+
detector_options=detector_options,
|
|
900
|
+
loader_workers=loader_workers,
|
|
901
|
+
preprocess_on_image_queue=preprocess_on_image_queue)
|
|
902
|
+
return results
|
|
903
|
+
|
|
904
|
+
else:
|
|
905
|
+
|
|
906
|
+
results = []
|
|
907
|
+
for im_file in im_files:
|
|
908
|
+
result = _process_image(im_file,
|
|
909
|
+
detector,
|
|
910
|
+
confidence_threshold,
|
|
911
|
+
quiet=quiet,
|
|
912
|
+
image_size=image_size,
|
|
913
|
+
include_image_size=include_image_size,
|
|
914
|
+
include_image_timestamp=include_image_timestamp,
|
|
915
|
+
include_exif_tags=include_exif_tags,
|
|
916
|
+
augment=augment)
|
|
917
|
+
|
|
918
|
+
if checkpoint_queue is not None:
|
|
919
|
+
checkpoint_queue.put(result)
|
|
920
|
+
results.append(result)
|
|
921
|
+
|
|
922
|
+
return results
|
|
923
|
+
|
|
924
|
+
# ...if we are/aren't using the image queue
|
|
925
|
+
|
|
926
|
+
# ...def _process_images(...)
|
|
927
|
+
|
|
928
|
+
|
|
929
|
+
def _process_image(im_file,
|
|
930
|
+
detector,
|
|
931
|
+
confidence_threshold,
|
|
932
|
+
image=None,
|
|
933
|
+
quiet=False,
|
|
934
|
+
image_size=None,
|
|
935
|
+
include_image_size=False,
|
|
936
|
+
include_image_timestamp=False,
|
|
937
|
+
include_exif_tags=False,
|
|
938
|
+
augment=False):
|
|
939
|
+
"""
|
|
940
|
+
Runs a detector (typically MegaDetector) on a single image file.
|
|
941
|
+
|
|
942
|
+
Args:
|
|
943
|
+
im_file (str): path to image file
|
|
944
|
+
detector (detector object): loaded model, this can no longer be a string by the time
|
|
945
|
+
you get this far down the pipeline
|
|
946
|
+
confidence_threshold (float): only detections above this threshold are returned
|
|
947
|
+
image (Image or dict, optional): previously-loaded image, if available, used when a worker
|
|
948
|
+
thread is handling image loading (and possibly preprocessing)
|
|
949
|
+
quiet (bool, optional): suppress per-image printouts
|
|
950
|
+
image_size (int, optional): image size to use for inference, only mess with this
|
|
951
|
+
if (a) you're using a model other than MegaDetector or (b) you know what you're
|
|
952
|
+
doing
|
|
953
|
+
include_image_size (bool, optional): should we include image size in the output for each image?
|
|
954
|
+
include_image_timestamp (bool, optional): should we include image timestamps in the output for each image?
|
|
955
|
+
include_exif_tags (str, optional): comma-separated list of EXIF tags to include in output
|
|
956
|
+
augment (bool, optional): enable image augmentation
|
|
957
|
+
|
|
958
|
+
Returns:
|
|
959
|
+
dict: dict representing detections on one image,
|
|
960
|
+
see the 'images' key in https://lila.science/megadetector-output-format
|
|
961
|
+
"""
|
|
962
|
+
|
|
963
|
+
if not quiet:
|
|
964
|
+
print('Processing image {}'.format(im_file))
|
|
965
|
+
|
|
966
|
+
if image is None:
|
|
967
|
+
try:
|
|
968
|
+
image = vis_utils.load_image(im_file)
|
|
969
|
+
except Exception as e:
|
|
970
|
+
if not quiet:
|
|
971
|
+
print('Image {} cannot be loaded. Exception: {}'.format(im_file, e))
|
|
972
|
+
result = {
|
|
973
|
+
'file': im_file,
|
|
974
|
+
'failure': run_detector.FAILURE_IMAGE_OPEN
|
|
975
|
+
}
|
|
976
|
+
return result
|
|
977
|
+
|
|
978
|
+
try:
|
|
979
|
+
|
|
980
|
+
result = detector.generate_detections_one_image(
|
|
981
|
+
image,
|
|
982
|
+
im_file,
|
|
983
|
+
detection_threshold=confidence_threshold,
|
|
984
|
+
image_size=image_size,
|
|
985
|
+
augment=augment,
|
|
986
|
+
verbose=verbose)
|
|
987
|
+
|
|
988
|
+
except Exception as e:
|
|
989
|
+
if not quiet:
|
|
990
|
+
print('Image {} cannot be processed. Exception: {}'.format(im_file, e))
|
|
991
|
+
result = {
|
|
992
|
+
'file': im_file,
|
|
993
|
+
'failure': run_detector.FAILURE_INFER
|
|
994
|
+
}
|
|
995
|
+
return result
|
|
996
|
+
|
|
997
|
+
# If this image has already been preprocessed
|
|
998
|
+
if isinstance(image,dict):
|
|
999
|
+
image = image['img_original_pil']
|
|
1000
|
+
|
|
1001
|
+
if include_image_size:
|
|
1002
|
+
result['width'] = image.width
|
|
1003
|
+
result['height'] = image.height
|
|
1004
|
+
|
|
1005
|
+
if include_image_timestamp:
|
|
1006
|
+
result['datetime'] = get_image_datetime(image)
|
|
1007
|
+
|
|
1008
|
+
if include_exif_tags is not None:
|
|
1009
|
+
exif_options = copy.copy(exif_options_base)
|
|
1010
|
+
exif_options.tags_to_include = include_exif_tags
|
|
1011
|
+
result['exif_metadata'] = read_exif.read_pil_exif(image,exif_options)
|
|
1012
|
+
|
|
1013
|
+
return result
|
|
1014
|
+
|
|
1015
|
+
# ...def _process_image(...)
|
|
1016
|
+
|
|
1017
|
+
|
|
1018
|
+
def _load_custom_class_mapping(class_mapping_filename):
|
|
1019
|
+
"""
|
|
1020
|
+
Allows the use of non-MD models, disables the code that enforces MD-like class lists.
|
|
1021
|
+
|
|
1022
|
+
Args:
|
|
1023
|
+
class_mapping_filename (str): .json file that maps int-strings to strings, or a YOLOv5
|
|
1024
|
+
dataset.yaml file.
|
|
1025
|
+
|
|
1026
|
+
Returns:
|
|
1027
|
+
dict: maps class IDs (int-strings) to class names
|
|
1028
|
+
"""
|
|
1029
|
+
|
|
1030
|
+
if class_mapping_filename is None:
|
|
1031
|
+
return
|
|
1032
|
+
|
|
1033
|
+
run_detector.USE_MODEL_NATIVE_CLASSES = True
|
|
1034
|
+
if class_mapping_filename.endswith('.json'):
|
|
1035
|
+
with open(class_mapping_filename,'r') as f:
|
|
1036
|
+
class_mapping = json.load(f)
|
|
1037
|
+
elif (class_mapping_filename.endswith('.yml') or class_mapping_filename.endswith('.yaml')):
|
|
1038
|
+
class_mapping = read_classes_from_yolo_dataset_file(class_mapping_filename)
|
|
1039
|
+
# convert from ints to int-strings
|
|
1040
|
+
class_mapping = {str(k):v for k,v in class_mapping.items()}
|
|
1041
|
+
else:
|
|
1042
|
+
raise ValueError('Unrecognized class mapping file {}'.format(class_mapping_filename))
|
|
1043
|
+
|
|
1044
|
+
print('Loaded custom class mapping:')
|
|
1045
|
+
print(class_mapping)
|
|
1046
|
+
run_detector.DEFAULT_DETECTOR_LABEL_MAP = class_mapping
|
|
1047
|
+
return class_mapping
|
|
1048
|
+
|
|
1049
|
+
|
|
1050
|
+
#%% Main function
|
|
1051
|
+
|
|
1052
|
+
def load_and_run_detector_batch(model_file,
|
|
1053
|
+
image_file_names,
|
|
1054
|
+
checkpoint_path=None,
|
|
1055
|
+
confidence_threshold=run_detector.DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD,
|
|
1056
|
+
checkpoint_frequency=-1,
|
|
1057
|
+
results=None,
|
|
1058
|
+
n_cores=1,
|
|
1059
|
+
use_image_queue=False,
|
|
1060
|
+
quiet=False,
|
|
1061
|
+
image_size=None,
|
|
1062
|
+
class_mapping_filename=None,
|
|
1063
|
+
include_image_size=False,
|
|
1064
|
+
include_image_timestamp=False,
|
|
1065
|
+
include_exif_tags=None,
|
|
1066
|
+
augment=False,
|
|
1067
|
+
force_model_download=False,
|
|
1068
|
+
detector_options=None,
|
|
1069
|
+
loader_workers=default_loaders,
|
|
1070
|
+
preprocess_on_image_queue=default_preprocess_on_image_queue,
|
|
1071
|
+
batch_size=1,
|
|
1072
|
+
verbose_output=False):
|
|
1073
|
+
"""
|
|
1074
|
+
Load a model file and run it on a list of images.
|
|
1075
|
+
|
|
1076
|
+
Args:
|
|
1077
|
+
model_file (str): path to model file, or supported model string (e.g. "MDV5A")
|
|
1078
|
+
image_file_names (list or str): list of strings (image filenames), a single image filename,
|
|
1079
|
+
a folder to recursively search for images in, or a .json or .txt file containing a list
|
|
1080
|
+
of images.
|
|
1081
|
+
checkpoint_path (str, optional): path to use for checkpoints (if None, checkpointing
|
|
1082
|
+
is disabled)
|
|
1083
|
+
confidence_threshold (float, optional): only detections above this threshold are returned
|
|
1084
|
+
checkpoint_frequency (int, optional): int, write results to JSON checkpoint file every N
|
|
1085
|
+
images, -1 disabled checkpointing
|
|
1086
|
+
results (list, optional): list of dicts, existing results loaded from checkpoint; generally
|
|
1087
|
+
not useful if you're using this function outside of the CLI
|
|
1088
|
+
n_cores (int, optional): number of parallel worker to use, ignored if we're running on a GPU
|
|
1089
|
+
use_image_queue (bool, optional): use a dedicated worker for image loading
|
|
1090
|
+
quiet (bool, optional): disable per-image console output
|
|
1091
|
+
image_size (int, optional): image size to use for inference, only mess with this
|
|
1092
|
+
if (a) you're using a model other than MegaDetector or (b) you know what you're
|
|
1093
|
+
doing
|
|
1094
|
+
class_mapping_filename (str, optional): use a non-default class mapping supplied in a .json
|
|
1095
|
+
file or YOLOv5 dataset.yaml file
|
|
1096
|
+
include_image_size (bool, optional): should we include image size in the output for each image?
|
|
1097
|
+
include_image_timestamp (bool, optional): should we include image timestamps in the output for each image?
|
|
1098
|
+
include_exif_tags (str, optional): comma-separated list of EXIF tags to include in output
|
|
1099
|
+
augment (bool, optional): enable image augmentation
|
|
1100
|
+
force_model_download (bool, optional): force downloading the model file if
|
|
1101
|
+
a named model (e.g. "MDV5A") is supplied, even if the local file already
|
|
1102
|
+
exists
|
|
1103
|
+
detector_options (dict, optional): key/value pairs that are interpreted differently
|
|
1104
|
+
by different detectors
|
|
1105
|
+
loader_workers (int, optional): number of loaders to use, only relevant when use_image_queue is True
|
|
1106
|
+
preprocess_on_image_queue (bool, optional): if the image queue is enabled, should it handle
|
|
1107
|
+
image loading and preprocessing (True), or just image loading (False)?
|
|
1108
|
+
batch_size (int, optional): batch size for GPU processing, automatically set to 1 for CPU processing
|
|
1109
|
+
verbose_output (bool, optional): enable additional debug output
|
|
1110
|
+
|
|
1111
|
+
Returns:
|
|
1112
|
+
results: list of dicts; each dict represents detections on one image
|
|
1113
|
+
"""
|
|
1114
|
+
|
|
1115
|
+
# Validate input arguments
|
|
1116
|
+
if n_cores is None or n_cores <= 0:
|
|
1117
|
+
n_cores = 1
|
|
1118
|
+
|
|
1119
|
+
if detector_options is None:
|
|
1120
|
+
detector_options = {}
|
|
1121
|
+
|
|
1122
|
+
if confidence_threshold is None:
|
|
1123
|
+
confidence_threshold=run_detector.DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD
|
|
1124
|
+
|
|
1125
|
+
# Disable checkpointing if checkpoint_path is None
|
|
1126
|
+
if checkpoint_frequency is None or checkpoint_path is None:
|
|
1127
|
+
checkpoint_frequency = -1
|
|
1128
|
+
|
|
1129
|
+
if class_mapping_filename is not None:
|
|
1130
|
+
_load_custom_class_mapping(class_mapping_filename)
|
|
1131
|
+
|
|
1132
|
+
global verbose
|
|
1133
|
+
if verbose_output:
|
|
1134
|
+
print('Enabling verbose output')
|
|
1135
|
+
verbose = True
|
|
1136
|
+
|
|
1137
|
+
# Handle the case where image_file_names is not yet actually a list
|
|
1138
|
+
if isinstance(image_file_names,str):
|
|
1139
|
+
|
|
1140
|
+
# Find the images to score; images can be a directory, may need to recurse
|
|
1141
|
+
if os.path.isdir(image_file_names):
|
|
1142
|
+
image_dir = image_file_names
|
|
1143
|
+
image_file_names = path_utils.find_images(image_dir, True)
|
|
1144
|
+
print('{} image files found in folder {}'.format(len(image_file_names),image_dir))
|
|
1145
|
+
|
|
1146
|
+
# A single file, or a list of image paths
|
|
1147
|
+
elif os.path.isfile(image_file_names):
|
|
1148
|
+
list_file = image_file_names
|
|
1149
|
+
if image_file_names.endswith('.json'):
|
|
1150
|
+
with open(list_file,'r') as f:
|
|
1151
|
+
image_file_names = json.load(f)
|
|
1152
|
+
print('Loaded {} image filenames from .json list file {}'.format(
|
|
1153
|
+
len(image_file_names),list_file))
|
|
1154
|
+
elif image_file_names.endswith('.txt'):
|
|
1155
|
+
with open(list_file,'r') as f:
|
|
1156
|
+
image_file_names = f.readlines()
|
|
1157
|
+
image_file_names = [s.strip() for s in image_file_names if len(s.strip()) > 0]
|
|
1158
|
+
print('Loaded {} image filenames from .txt list file {}'.format(
|
|
1159
|
+
len(image_file_names),list_file))
|
|
1160
|
+
elif path_utils.is_image_file(image_file_names):
|
|
1161
|
+
image_file_names = [image_file_names]
|
|
1162
|
+
print('Processing image {}'.format(image_file_names[0]))
|
|
1163
|
+
else:
|
|
1164
|
+
raise ValueError(
|
|
1165
|
+
'File {} supplied as [image_file_names] argument, but extension is neither .json nor .txt'\
|
|
1166
|
+
.format(
|
|
1167
|
+
list_file))
|
|
1168
|
+
else:
|
|
1169
|
+
raise ValueError(
|
|
1170
|
+
'{} supplied as [image_file_names] argument, but it does not appear to be a file or folder'.format(
|
|
1171
|
+
image_file_names))
|
|
1172
|
+
|
|
1173
|
+
if results is None:
|
|
1174
|
+
results = []
|
|
1175
|
+
|
|
1176
|
+
already_processed = set([i['file'] for i in results])
|
|
1177
|
+
|
|
1178
|
+
model_file = try_download_known_detector(model_file,
|
|
1179
|
+
force_download=force_model_download,
|
|
1180
|
+
verbose=verbose)
|
|
1181
|
+
|
|
1182
|
+
gpu_available = is_gpu_available(model_file)
|
|
1183
|
+
|
|
1184
|
+
print('GPU available: {}'.format(gpu_available))
|
|
1185
|
+
|
|
1186
|
+
if (n_cores > 1) and gpu_available:
|
|
1187
|
+
|
|
1188
|
+
print('Warning: multiple cores requested, but a GPU is available; parallelization across ' + \
|
|
1189
|
+
'GPUs is not currently supported, defaulting to one GPU')
|
|
1190
|
+
n_cores = 1
|
|
1191
|
+
|
|
1192
|
+
if (n_cores > 1) and use_image_queue:
|
|
1193
|
+
|
|
1194
|
+
print('Warning: multiple cores requested, but the image queue is enabled; parallelization ' + \
|
|
1195
|
+
'with the image queue is not currently supported, defaulting to one worker')
|
|
1196
|
+
n_cores = 1
|
|
1197
|
+
|
|
1198
|
+
if use_image_queue:
|
|
1199
|
+
|
|
1200
|
+
assert n_cores <= 1
|
|
1201
|
+
|
|
1202
|
+
# Filter out already processed images
|
|
1203
|
+
images_to_process = [im_file for im_file in image_file_names
|
|
1204
|
+
if im_file not in already_processed]
|
|
1205
|
+
|
|
1206
|
+
if len(images_to_process) != len(image_file_names):
|
|
1207
|
+
print('Bypassing {} images that have already been processed'.format(
|
|
1208
|
+
len(image_file_names) - len(images_to_process)))
|
|
1209
|
+
|
|
1210
|
+
new_results = _run_detector_with_image_queue(images_to_process,
|
|
1211
|
+
model_file,
|
|
1212
|
+
confidence_threshold,
|
|
1213
|
+
quiet,
|
|
1214
|
+
image_size=image_size,
|
|
1215
|
+
include_image_size=include_image_size,
|
|
1216
|
+
include_image_timestamp=include_image_timestamp,
|
|
1217
|
+
include_exif_tags=include_exif_tags,
|
|
1218
|
+
augment=augment,
|
|
1219
|
+
detector_options=detector_options,
|
|
1220
|
+
loader_workers=loader_workers,
|
|
1221
|
+
preprocess_on_image_queue=preprocess_on_image_queue,
|
|
1222
|
+
batch_size=batch_size,
|
|
1223
|
+
checkpoint_path=checkpoint_path,
|
|
1224
|
+
checkpoint_frequency=checkpoint_frequency)
|
|
1225
|
+
|
|
1226
|
+
# Merge new results with existing results from checkpoint
|
|
1227
|
+
results.extend(new_results)
|
|
1228
|
+
|
|
1229
|
+
elif n_cores <= 1:
|
|
1230
|
+
|
|
1231
|
+
# Single-threaded processing, no image queue
|
|
1232
|
+
|
|
1233
|
+
# Load the detector
|
|
1234
|
+
start_time = time.time()
|
|
1235
|
+
detector = load_detector(model_file,
|
|
1236
|
+
detector_options=detector_options,
|
|
1237
|
+
verbose=verbose)
|
|
1238
|
+
elapsed = time.time() - start_time
|
|
1239
|
+
print('Loaded model in {}'.format(humanfriendly.format_timespan(elapsed)))
|
|
1240
|
+
|
|
1241
|
+
if (batch_size > 1) and (not gpu_available):
|
|
1242
|
+
print('Batch size of {} requested, but no GPU is available, using batch size 1'.format(
|
|
1243
|
+
batch_size))
|
|
1244
|
+
batch_size = 1
|
|
1245
|
+
|
|
1246
|
+
# Filter out already processed images
|
|
1247
|
+
images_to_process = [im_file for im_file in image_file_names
|
|
1248
|
+
if im_file not in already_processed]
|
|
1249
|
+
|
|
1250
|
+
if len(images_to_process) != len(image_file_names):
|
|
1251
|
+
print('Bypassing {} images that have already been processed'.format(
|
|
1252
|
+
len(image_file_names) - len(images_to_process)))
|
|
1253
|
+
|
|
1254
|
+
image_count = 0
|
|
1255
|
+
|
|
1256
|
+
if (batch_size > 1):
|
|
1257
|
+
|
|
1258
|
+
# During testing, randomize the order of images_to_process to help detect
|
|
1259
|
+
# non-deterministic batching issues
|
|
1260
|
+
if randomize_batch_order_during_testing and ('PYTEST_CURRENT_TEST' in os.environ):
|
|
1261
|
+
print('PyTest detected: randomizing batch order')
|
|
1262
|
+
random.seed(int(time.time()))
|
|
1263
|
+
debug_seed = random.randint(0, 2**31 - 1)
|
|
1264
|
+
print('Debug seed: {}'.format(debug_seed))
|
|
1265
|
+
random.seed(debug_seed)
|
|
1266
|
+
random.shuffle(images_to_process)
|
|
1267
|
+
|
|
1268
|
+
# Use batch processing
|
|
1269
|
+
image_batches = _group_into_batches(images_to_process, batch_size)
|
|
1270
|
+
|
|
1271
|
+
for batch in tqdm(image_batches):
|
|
1272
|
+
batch_results = _process_batch(batch,
|
|
1273
|
+
detector,
|
|
1274
|
+
confidence_threshold,
|
|
1275
|
+
quiet=quiet,
|
|
1276
|
+
image_size=image_size,
|
|
1277
|
+
include_image_size=include_image_size,
|
|
1278
|
+
include_image_timestamp=include_image_timestamp,
|
|
1279
|
+
include_exif_tags=include_exif_tags,
|
|
1280
|
+
augment=augment)
|
|
1281
|
+
|
|
1282
|
+
results.extend(batch_results)
|
|
1283
|
+
image_count += len(batch)
|
|
1284
|
+
|
|
1285
|
+
# Write a checkpoint if necessary
|
|
1286
|
+
if (checkpoint_frequency != -1) and ((image_count % checkpoint_frequency) == 0):
|
|
1287
|
+
print('Writing a new checkpoint after having processed {} images since '
|
|
1288
|
+
'last restart'.format(image_count))
|
|
1289
|
+
write_checkpoint(checkpoint_path, results)
|
|
1290
|
+
|
|
1291
|
+
else:
|
|
1292
|
+
|
|
1293
|
+
# Use non-batch processing
|
|
1294
|
+
for im_file in tqdm(images_to_process):
|
|
1295
|
+
|
|
1296
|
+
image_count += 1
|
|
1297
|
+
|
|
1298
|
+
result = _process_image(im_file,
|
|
1299
|
+
detector,
|
|
1300
|
+
confidence_threshold,
|
|
1301
|
+
quiet=quiet,
|
|
1302
|
+
image_size=image_size,
|
|
1303
|
+
include_image_size=include_image_size,
|
|
1304
|
+
include_image_timestamp=include_image_timestamp,
|
|
1305
|
+
include_exif_tags=include_exif_tags,
|
|
1306
|
+
augment=augment)
|
|
1307
|
+
results.append(result)
|
|
1308
|
+
|
|
1309
|
+
# Write a checkpoint if necessary
|
|
1310
|
+
if (checkpoint_frequency != -1) and ((image_count % checkpoint_frequency) == 0):
|
|
1311
|
+
print('Writing a new checkpoint after having processed {} images since '
|
|
1312
|
+
'last restart'.format(image_count))
|
|
1313
|
+
write_checkpoint(checkpoint_path, results)
|
|
1314
|
+
|
|
1315
|
+
# ...if the batch size is > 1
|
|
1316
|
+
|
|
1317
|
+
else:
|
|
1318
|
+
|
|
1319
|
+
# Multiprocessing is enabled at this point
|
|
1320
|
+
|
|
1321
|
+
# When using multiprocessing, tell the workers to load the model on each
|
|
1322
|
+
# process, by passing the model_file string as the "model" argument to
|
|
1323
|
+
# process_images.
|
|
1324
|
+
detector = model_file
|
|
1325
|
+
|
|
1326
|
+
print('Creating worker pool with {} cores'.format(n_cores))
|
|
1327
|
+
|
|
1328
|
+
if len(already_processed) > 0:
|
|
1329
|
+
n_images_all = len(image_file_names)
|
|
1330
|
+
image_file_names = [fn for fn in image_file_names if fn not in already_processed]
|
|
1331
|
+
print('Loaded {} of {} images from checkpoint'.format(
|
|
1332
|
+
len(already_processed),n_images_all))
|
|
1333
|
+
|
|
1334
|
+
# Divide images into chunks; we'll send one chunk to each worker process
|
|
1335
|
+
image_chunks = list(_chunks_by_number_of_chunks(image_file_names, n_cores))
|
|
1336
|
+
|
|
1337
|
+
pool = None
|
|
1338
|
+
try:
|
|
1339
|
+
pool = workerpool(n_cores)
|
|
1340
|
+
|
|
1341
|
+
if checkpoint_path is not None:
|
|
1342
|
+
|
|
1343
|
+
# Multiprocessing and checkpointing are both enabled at this point
|
|
1344
|
+
|
|
1345
|
+
checkpoint_queue = Manager().Queue()
|
|
1346
|
+
|
|
1347
|
+
# Pass the "results" array (which may already contain images loaded from an
|
|
1348
|
+
# existing checkpoint) to the checkpoint queue handler function, which will
|
|
1349
|
+
# append results to the list as they become available.
|
|
1350
|
+
checkpoint_thread = Thread(target=_checkpoint_queue_handler,
|
|
1351
|
+
args=(checkpoint_path, checkpoint_frequency,
|
|
1352
|
+
checkpoint_queue, results), daemon=True)
|
|
1353
|
+
checkpoint_thread.start()
|
|
1354
|
+
|
|
1355
|
+
pool.map(partial(_process_images,
|
|
1356
|
+
detector=detector,
|
|
1357
|
+
confidence_threshold=confidence_threshold,
|
|
1358
|
+
use_image_queue=False,
|
|
1359
|
+
quiet=quiet,
|
|
1360
|
+
image_size=image_size,
|
|
1361
|
+
checkpoint_queue=checkpoint_queue,
|
|
1362
|
+
include_image_size=include_image_size,
|
|
1363
|
+
include_image_timestamp=include_image_timestamp,
|
|
1364
|
+
include_exif_tags=include_exif_tags,
|
|
1365
|
+
augment=augment,
|
|
1366
|
+
detector_options=detector_options),
|
|
1367
|
+
image_chunks)
|
|
1368
|
+
|
|
1369
|
+
checkpoint_queue.put(None)
|
|
1370
|
+
|
|
1371
|
+
else:
|
|
1372
|
+
|
|
1373
|
+
# Multprocessing is enabled, but checkpointing is not
|
|
1374
|
+
|
|
1375
|
+
new_results = pool.map(partial(_process_images,
|
|
1376
|
+
detector=detector,
|
|
1377
|
+
confidence_threshold=confidence_threshold,
|
|
1378
|
+
use_image_queue=False,
|
|
1379
|
+
quiet=quiet,
|
|
1380
|
+
checkpoint_queue=None,
|
|
1381
|
+
image_size=image_size,
|
|
1382
|
+
include_image_size=include_image_size,
|
|
1383
|
+
include_image_timestamp=include_image_timestamp,
|
|
1384
|
+
include_exif_tags=include_exif_tags,
|
|
1385
|
+
augment=augment,
|
|
1386
|
+
detector_options=detector_options),
|
|
1387
|
+
image_chunks)
|
|
1388
|
+
|
|
1389
|
+
new_results = list(itertools.chain.from_iterable(new_results))
|
|
1390
|
+
|
|
1391
|
+
# Append the results we just computed to "results", which is *usually* empty, but will
|
|
1392
|
+
# be non-empty if we resumed from a checkpoint
|
|
1393
|
+
results.extend(new_results)
|
|
1394
|
+
|
|
1395
|
+
# ...if checkpointing is/isn't enabled
|
|
1396
|
+
|
|
1397
|
+
finally:
|
|
1398
|
+
if pool is not None:
|
|
1399
|
+
pool.close()
|
|
1400
|
+
pool.join()
|
|
1401
|
+
print('Pool closed and joined for multi-core inference')
|
|
1402
|
+
|
|
1403
|
+
# ...if we're running (1) with image queue, (2) on one core, or (3) on multiple cores
|
|
1404
|
+
|
|
1405
|
+
# 'results' may have been modified in place, but we also return it for
|
|
1406
|
+
# backwards-compatibility.
|
|
1407
|
+
return results
|
|
1408
|
+
|
|
1409
|
+
# ...def load_and_run_detector_batch(...)
|
|
1410
|
+
|
|
1411
|
+
|
|
1412
|
+
def _checkpoint_queue_handler(checkpoint_path, checkpoint_frequency, checkpoint_queue, results):
|
|
1413
|
+
"""
|
|
1414
|
+
Thread function to accumulate results and write checkpoints when checkpointing and
|
|
1415
|
+
multiprocessing are both enabled.
|
|
1416
|
+
"""
|
|
1417
|
+
|
|
1418
|
+
result_count = 0
|
|
1419
|
+
while True:
|
|
1420
|
+
result = checkpoint_queue.get()
|
|
1421
|
+
if result is None:
|
|
1422
|
+
break
|
|
1423
|
+
|
|
1424
|
+
result_count +=1
|
|
1425
|
+
results.append(result)
|
|
1426
|
+
|
|
1427
|
+
if (checkpoint_frequency != -1) and (result_count % checkpoint_frequency == 0):
|
|
1428
|
+
|
|
1429
|
+
print('Writing a new checkpoint after having processed {} images since '
|
|
1430
|
+
'last restart'.format(result_count))
|
|
1431
|
+
|
|
1432
|
+
write_checkpoint(checkpoint_path, results)
|
|
1433
|
+
|
|
1434
|
+
|
|
1435
|
+
def write_checkpoint(checkpoint_path, results):
|
|
1436
|
+
"""
|
|
1437
|
+
Writes the object in [results] to a json checkpoint file, as a dict with the
|
|
1438
|
+
key "checkpoint". First backs up the checkpoint file if it exists, in case we
|
|
1439
|
+
crash while writing the file.
|
|
1440
|
+
|
|
1441
|
+
Args:
|
|
1442
|
+
checkpoint_path (str): the file to write the checkpoint to
|
|
1443
|
+
results (object): the object we should write
|
|
1444
|
+
"""
|
|
1445
|
+
|
|
1446
|
+
assert checkpoint_path is not None
|
|
1447
|
+
|
|
1448
|
+
# Back up any previous checkpoints, to protect against crashes while we're writing
|
|
1449
|
+
# the checkpoint file.
|
|
1450
|
+
checkpoint_tmp_path = None
|
|
1451
|
+
if os.path.isfile(checkpoint_path):
|
|
1452
|
+
checkpoint_tmp_path = checkpoint_path + '_tmp'
|
|
1453
|
+
shutil.copyfile(checkpoint_path,checkpoint_tmp_path)
|
|
1454
|
+
|
|
1455
|
+
# Write the new checkpoint
|
|
1456
|
+
ct_utils.write_json(checkpoint_path, {'checkpoint': results}, force_str=True)
|
|
1457
|
+
|
|
1458
|
+
# Remove the backup checkpoint if it exists
|
|
1459
|
+
if checkpoint_tmp_path is not None:
|
|
1460
|
+
try:
|
|
1461
|
+
os.remove(checkpoint_tmp_path)
|
|
1462
|
+
except Exception as e:
|
|
1463
|
+
print('Warning: error removing backup checkpoint file {}:\n{}'.format(
|
|
1464
|
+
checkpoint_tmp_path,str(e)))
|
|
1465
|
+
|
|
1466
|
+
|
|
1467
|
+
def load_checkpoint(checkpoint_path):
|
|
1468
|
+
"""
|
|
1469
|
+
Loads results from a checkpoint file. A checkpoint file is always a dict
|
|
1470
|
+
with the key "checkpoint".
|
|
1471
|
+
|
|
1472
|
+
Args:
|
|
1473
|
+
checkpoint_path (str): the .json file to load
|
|
1474
|
+
|
|
1475
|
+
Returns:
|
|
1476
|
+
object: object retrieved from the checkpoint, typically a list of results
|
|
1477
|
+
"""
|
|
1478
|
+
|
|
1479
|
+
print('Loading previous results from checkpoint file {}'.format(checkpoint_path))
|
|
1480
|
+
|
|
1481
|
+
with open(checkpoint_path, 'r') as f:
|
|
1482
|
+
checkpoint_data = json.load(f)
|
|
1483
|
+
|
|
1484
|
+
if 'checkpoint' not in checkpoint_data:
|
|
1485
|
+
raise ValueError('Checkpoint file {} is missing "checkpoint" field'.format(checkpoint_path))
|
|
1486
|
+
|
|
1487
|
+
results = checkpoint_data['checkpoint']
|
|
1488
|
+
print('Restored {} entries from the checkpoint {}'.format(len(results),checkpoint_path))
|
|
1489
|
+
|
|
1490
|
+
return results
|
|
1491
|
+
|
|
1492
|
+
|
|
1493
|
+
def get_image_datetime(image):
|
|
1494
|
+
"""
|
|
1495
|
+
Reads EXIF datetime from a PIL Image object.
|
|
1496
|
+
|
|
1497
|
+
Args:
|
|
1498
|
+
image (Image): the PIL Image object from which we should read datetime information
|
|
1499
|
+
|
|
1500
|
+
Returns:
|
|
1501
|
+
str: the EXIF datetime from [image] (a PIL Image object), if available, as a string;
|
|
1502
|
+
returns None if EXIF datetime is not available.
|
|
1503
|
+
"""
|
|
1504
|
+
|
|
1505
|
+
exif_tags = read_exif.read_pil_exif(image,exif_options_base)
|
|
1506
|
+
|
|
1507
|
+
try:
|
|
1508
|
+
datetime_str = exif_tags['DateTimeOriginal']
|
|
1509
|
+
_ = time.strptime(datetime_str, '%Y:%m:%d %H:%M:%S')
|
|
1510
|
+
return datetime_str
|
|
1511
|
+
|
|
1512
|
+
except Exception:
|
|
1513
|
+
return None
|
|
1514
|
+
|
|
1515
|
+
|
|
1516
|
+
def write_results_to_file(results,
|
|
1517
|
+
output_file,
|
|
1518
|
+
relative_path_base=None,
|
|
1519
|
+
detector_file=None,
|
|
1520
|
+
info=None,
|
|
1521
|
+
include_max_conf=False,
|
|
1522
|
+
custom_metadata=None,
|
|
1523
|
+
force_forward_slashes=True):
|
|
1524
|
+
"""
|
|
1525
|
+
Writes list of detection results to JSON output file. Format matches:
|
|
1526
|
+
|
|
1527
|
+
https://lila.science/megadetector-output-format
|
|
1528
|
+
|
|
1529
|
+
Args:
|
|
1530
|
+
results (list): list of dict, each dict represents detections on one image
|
|
1531
|
+
output_file (str): path to JSON output file, should end in '.json'
|
|
1532
|
+
relative_path_base (str, optional): path to a directory as the base for relative paths, can
|
|
1533
|
+
be None if the paths in [results] are absolute
|
|
1534
|
+
detector_file (str, optional): filename of the detector used to generate these results, only
|
|
1535
|
+
used to pull out a version number for the "info" field
|
|
1536
|
+
info (dict, optional): dictionary to put in the results file instead of the default "info" field
|
|
1537
|
+
include_max_conf (bool, optional): old files (version 1.2 and earlier) included a "max_conf" field
|
|
1538
|
+
in each image; this was removed in version 1.3. Set this flag to force the inclusion
|
|
1539
|
+
of this field.
|
|
1540
|
+
custom_metadata (object, optional): additional data to include as info['custom_metadata']; typically
|
|
1541
|
+
a dictionary, but no type/format checks are performed
|
|
1542
|
+
force_forward_slashes (bool, optional): convert all slashes in filenames within [results] to
|
|
1543
|
+
forward slashes
|
|
1544
|
+
|
|
1545
|
+
Returns:
|
|
1546
|
+
dict: the MD-formatted dictionary that was written to [output_file]
|
|
1547
|
+
"""
|
|
1548
|
+
|
|
1549
|
+
if relative_path_base is not None:
|
|
1550
|
+
results_relative = []
|
|
1551
|
+
for r in results:
|
|
1552
|
+
r_relative = copy.copy(r)
|
|
1553
|
+
r_relative['file'] = os.path.relpath(r_relative['file'], start=relative_path_base)
|
|
1554
|
+
results_relative.append(r_relative)
|
|
1555
|
+
results = results_relative
|
|
1556
|
+
|
|
1557
|
+
if force_forward_slashes:
|
|
1558
|
+
results_converted = []
|
|
1559
|
+
for r in results:
|
|
1560
|
+
r_converted = copy.copy(r)
|
|
1561
|
+
r_converted['file'] = r_converted['file'].replace('\\','/')
|
|
1562
|
+
results_converted.append(r_converted)
|
|
1563
|
+
results = results_converted
|
|
1564
|
+
|
|
1565
|
+
# The typical case: we need to build the 'info' struct
|
|
1566
|
+
if info is None:
|
|
1567
|
+
|
|
1568
|
+
info = {
|
|
1569
|
+
'detection_completion_time': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
|
1570
|
+
'format_version': '1.5'
|
|
1571
|
+
}
|
|
1572
|
+
|
|
1573
|
+
if detector_file is not None:
|
|
1574
|
+
detector_filename = os.path.basename(detector_file)
|
|
1575
|
+
detector_version = get_detector_version_from_filename(detector_filename,verbose=True)
|
|
1576
|
+
detector_metadata = get_detector_metadata_from_version_string(detector_version)
|
|
1577
|
+
info['detector'] = detector_filename
|
|
1578
|
+
info['detector_metadata'] = detector_metadata
|
|
1579
|
+
else:
|
|
1580
|
+
info['detector'] = 'unknown'
|
|
1581
|
+
info['detector_metadata'] = get_detector_metadata_from_version_string('unknown')
|
|
1582
|
+
|
|
1583
|
+
# If the caller supplied the entire "info" struct
|
|
1584
|
+
else:
|
|
1585
|
+
|
|
1586
|
+
if detector_file is not None:
|
|
1587
|
+
print('Warning (write_results_to_file): info struct and detector file ' + \
|
|
1588
|
+
'supplied, ignoring detector file')
|
|
1589
|
+
|
|
1590
|
+
if custom_metadata is not None:
|
|
1591
|
+
info['custom_metadata'] = custom_metadata
|
|
1592
|
+
|
|
1593
|
+
# The 'max_detection_conf' field used to be included by default, and it caused all kinds
|
|
1594
|
+
# of headaches, so it's no longer included unless the user explicitly requests it.
|
|
1595
|
+
if not include_max_conf:
|
|
1596
|
+
for im in results:
|
|
1597
|
+
if 'max_detection_conf' in im:
|
|
1598
|
+
del im['max_detection_conf']
|
|
1599
|
+
|
|
1600
|
+
# Sort results by filename; not required by the format, but convenient for consistency
|
|
1601
|
+
results = sort_list_of_dicts_by_key(results,'file')
|
|
1602
|
+
|
|
1603
|
+
# Sort detections in descending order by confidence; not required by the format, but
|
|
1604
|
+
# convenient for consistency
|
|
1605
|
+
for im in results:
|
|
1606
|
+
if ('detections' in im) and (im['detections'] is not None):
|
|
1607
|
+
im['detections'] = sort_list_of_dicts_by_key(im['detections'], 'conf', reverse=True)
|
|
1608
|
+
|
|
1609
|
+
for im in results:
|
|
1610
|
+
if 'failure' in im:
|
|
1611
|
+
if 'detections' in im:
|
|
1612
|
+
assert im['detections'] is None, 'Illegal failure/detection combination'
|
|
1613
|
+
else:
|
|
1614
|
+
im['detections'] = None
|
|
1615
|
+
|
|
1616
|
+
final_output = {
|
|
1617
|
+
'images': results,
|
|
1618
|
+
'detection_categories': run_detector.DEFAULT_DETECTOR_LABEL_MAP,
|
|
1619
|
+
'info': info
|
|
1620
|
+
}
|
|
1621
|
+
|
|
1622
|
+
# Create the folder where the output file belongs; this will fail if
|
|
1623
|
+
# this is a relative path with no folder component
|
|
1624
|
+
try:
|
|
1625
|
+
os.makedirs(os.path.dirname(output_file),exist_ok=True)
|
|
1626
|
+
except Exception:
|
|
1627
|
+
pass
|
|
1628
|
+
|
|
1629
|
+
ct_utils.write_json(output_file, final_output, force_str=True)
|
|
1630
|
+
print('Output file saved at {}'.format(output_file))
|
|
1631
|
+
|
|
1632
|
+
return final_output
|
|
1633
|
+
|
|
1634
|
+
# ...def write_results_to_file(...)
|
|
1635
|
+
|
|
1636
|
+
|
|
1637
|
+
#%% Interactive driver
|
|
1638
|
+
|
|
1639
|
+
if False:
|
|
1640
|
+
|
|
1641
|
+
pass
|
|
1642
|
+
|
|
1643
|
+
#%%
|
|
1644
|
+
|
|
1645
|
+
model_file = 'MDV5A'
|
|
1646
|
+
image_dir = r'g:\camera_traps\camera_trap_images'
|
|
1647
|
+
output_file = r'g:\temp\md-test.json'
|
|
1648
|
+
|
|
1649
|
+
recursive = True
|
|
1650
|
+
output_relative_filenames = True
|
|
1651
|
+
include_max_conf = False
|
|
1652
|
+
quiet = True
|
|
1653
|
+
image_size = None
|
|
1654
|
+
use_image_queue = False
|
|
1655
|
+
confidence_threshold = 0.0001
|
|
1656
|
+
checkpoint_frequency = 5
|
|
1657
|
+
checkpoint_path = None
|
|
1658
|
+
resume_from_checkpoint = 'auto'
|
|
1659
|
+
allow_checkpoint_overwrite = False
|
|
1660
|
+
ncores = 1
|
|
1661
|
+
class_mapping_filename = None
|
|
1662
|
+
include_image_size = True
|
|
1663
|
+
include_image_timestamp = True
|
|
1664
|
+
include_exif_tags = None
|
|
1665
|
+
overwrite_handling = None
|
|
1666
|
+
|
|
1667
|
+
# Generate a command line
|
|
1668
|
+
cmd = 'python run_detector_batch.py "{}" "{}" "{}"'.format(
|
|
1669
|
+
model_file,image_dir,output_file)
|
|
1670
|
+
|
|
1671
|
+
if recursive:
|
|
1672
|
+
cmd += ' --recursive'
|
|
1673
|
+
if output_relative_filenames:
|
|
1674
|
+
cmd += ' --output_relative_filenames'
|
|
1675
|
+
if include_max_conf:
|
|
1676
|
+
cmd += ' --include_max_conf'
|
|
1677
|
+
if image_size is not None:
|
|
1678
|
+
cmd += ' --image_size {}'.format(image_size)
|
|
1679
|
+
if use_image_queue:
|
|
1680
|
+
cmd += ' --use_image_queue'
|
|
1681
|
+
if confidence_threshold is not None:
|
|
1682
|
+
cmd += ' --threshold {}'.format(confidence_threshold)
|
|
1683
|
+
if checkpoint_frequency is not None:
|
|
1684
|
+
cmd += ' --checkpoint_frequency {}'.format(checkpoint_frequency)
|
|
1685
|
+
if checkpoint_path is not None:
|
|
1686
|
+
cmd += ' --checkpoint_path "{}"'.format(checkpoint_path)
|
|
1687
|
+
if resume_from_checkpoint is not None:
|
|
1688
|
+
cmd += ' --resume_from_checkpoint "{}"'.format(resume_from_checkpoint)
|
|
1689
|
+
if allow_checkpoint_overwrite:
|
|
1690
|
+
cmd += ' --allow_checkpoint_overwrite'
|
|
1691
|
+
if ncores is not None:
|
|
1692
|
+
cmd += ' --ncores {}'.format(ncores)
|
|
1693
|
+
if class_mapping_filename is not None:
|
|
1694
|
+
cmd += ' --class_mapping_filename "{}"'.format(class_mapping_filename)
|
|
1695
|
+
if include_image_size:
|
|
1696
|
+
cmd += ' --include_image_size'
|
|
1697
|
+
if include_image_timestamp:
|
|
1698
|
+
cmd += ' --include_image_timestamp'
|
|
1699
|
+
if include_exif_tags is not None:
|
|
1700
|
+
cmd += ' --include_exif_tags "{}"'.format(include_exif_tags)
|
|
1701
|
+
if overwrite_handling is not None:
|
|
1702
|
+
cmd += ' --overwrite_handling {}'.format(overwrite_handling)
|
|
1703
|
+
|
|
1704
|
+
print(cmd)
|
|
1705
|
+
import clipboard; clipboard.copy(cmd)
|
|
1706
|
+
|
|
1707
|
+
|
|
1708
|
+
#%% Run inference interactively
|
|
1709
|
+
|
|
1710
|
+
image_file_names = path_utils.find_images(image_dir, recursive=False)
|
|
1711
|
+
results = None
|
|
1712
|
+
|
|
1713
|
+
start_time = time.time()
|
|
1714
|
+
|
|
1715
|
+
results = load_and_run_detector_batch(model_file=model_file,
|
|
1716
|
+
image_file_names=image_file_names,
|
|
1717
|
+
checkpoint_path=checkpoint_path,
|
|
1718
|
+
confidence_threshold=confidence_threshold,
|
|
1719
|
+
checkpoint_frequency=checkpoint_frequency,
|
|
1720
|
+
results=results,
|
|
1721
|
+
n_cores=ncores,
|
|
1722
|
+
use_image_queue=use_image_queue,
|
|
1723
|
+
quiet=quiet,
|
|
1724
|
+
image_size=image_size)
|
|
1725
|
+
|
|
1726
|
+
elapsed = time.time() - start_time
|
|
1727
|
+
|
|
1728
|
+
print('Finished inference in {}'.format(humanfriendly.format_timespan(elapsed)))
|
|
1729
|
+
|
|
1730
|
+
|
|
1731
|
+
#%% Command-line driver
|
|
1732
|
+
|
|
1733
|
+
def main(): # noqa
|
|
1734
|
+
|
|
1735
|
+
parser = argparse.ArgumentParser(
|
|
1736
|
+
description='Module to run a TF/PT animal detection model on lots of images')
|
|
1737
|
+
parser.add_argument(
|
|
1738
|
+
'detector_file',
|
|
1739
|
+
help='Path to detector model file (.pb or .pt). Can also be the strings "MDV4", ' + \
|
|
1740
|
+
'"MDV5A", or "MDV5B" to request automatic download.')
|
|
1741
|
+
parser.add_argument(
|
|
1742
|
+
'image_file',
|
|
1743
|
+
help=\
|
|
1744
|
+
'Path to a single image file, a .json or .txt file containing a list of paths to images, or a directory')
|
|
1745
|
+
parser.add_argument(
|
|
1746
|
+
'output_file',
|
|
1747
|
+
help='Path to output JSON results file, should end with a .json extension')
|
|
1748
|
+
parser.add_argument(
|
|
1749
|
+
'--recursive',
|
|
1750
|
+
action='store_true',
|
|
1751
|
+
help='Recurse into directories, only meaningful if image_file points to a directory')
|
|
1752
|
+
parser.add_argument(
|
|
1753
|
+
'--output_relative_filenames',
|
|
1754
|
+
action='store_true',
|
|
1755
|
+
help='Output relative file names, only meaningful if image_file points to a directory')
|
|
1756
|
+
parser.add_argument(
|
|
1757
|
+
'--include_max_conf',
|
|
1758
|
+
action='store_true',
|
|
1759
|
+
help='Include the "max_detection_conf" field in the output')
|
|
1760
|
+
parser.add_argument(
|
|
1761
|
+
'--verbose',
|
|
1762
|
+
action='store_true',
|
|
1763
|
+
help='Enable additional debug output')
|
|
1764
|
+
parser.add_argument(
|
|
1765
|
+
'--image_size',
|
|
1766
|
+
type=int,
|
|
1767
|
+
default=None,
|
|
1768
|
+
help=('Force image resizing to a specific integer size on the long axis (not recommended to change this)'))
|
|
1769
|
+
parser.add_argument(
|
|
1770
|
+
'--augment',
|
|
1771
|
+
action='store_true',
|
|
1772
|
+
help='Enable image augmentation'
|
|
1773
|
+
)
|
|
1774
|
+
parser.add_argument(
|
|
1775
|
+
'--use_image_queue',
|
|
1776
|
+
action='store_true',
|
|
1777
|
+
help='Pre-load images, may help keep your GPU busy; does not currently support ' + \
|
|
1778
|
+
'checkpointing. Useful if you have a very fast GPU and a very slow disk.')
|
|
1779
|
+
parser.add_argument(
|
|
1780
|
+
'--preprocess_on_image_queue',
|
|
1781
|
+
action='store_true',
|
|
1782
|
+
help='Whether to do image resizing on the image queue (PyTorch detectors only)')
|
|
1783
|
+
parser.add_argument(
|
|
1784
|
+
'--use_threads_for_queue',
|
|
1785
|
+
action='store_true',
|
|
1786
|
+
help='Use threads (rather than processes) for the image queue; only relevant if --use_image_queue is set')
|
|
1787
|
+
parser.add_argument(
|
|
1788
|
+
'--threshold',
|
|
1789
|
+
type=float,
|
|
1790
|
+
default=run_detector.DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD,
|
|
1791
|
+
help="Confidence threshold between 0 and 1.0, don't include boxes below this " + \
|
|
1792
|
+
"confidence in the output file. Default is {}".format(
|
|
1793
|
+
run_detector.DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD))
|
|
1794
|
+
parser.add_argument(
|
|
1795
|
+
'--checkpoint_frequency',
|
|
1796
|
+
type=int,
|
|
1797
|
+
default=-1,
|
|
1798
|
+
help='Write results to a temporary file every N images; default is -1, which ' + \
|
|
1799
|
+
'disables this feature')
|
|
1800
|
+
parser.add_argument(
|
|
1801
|
+
'--checkpoint_path',
|
|
1802
|
+
type=str,
|
|
1803
|
+
default=None,
|
|
1804
|
+
help='File name to which checkpoints will be written if checkpoint_frequency is > 0, ' + \
|
|
1805
|
+
'defaults to md_checkpoint_[date].json in the same folder as the output file')
|
|
1806
|
+
parser.add_argument(
|
|
1807
|
+
'--resume_from_checkpoint',
|
|
1808
|
+
type=str,
|
|
1809
|
+
default=None,
|
|
1810
|
+
help='Path to a JSON checkpoint file to resume from, or "auto" to ' + \
|
|
1811
|
+
'find the most recent checkpoint in the same folder as the output file. "auto" uses' + \
|
|
1812
|
+
'checkpoint_path (rather than searching the output folder) if checkpoint_path is specified.')
|
|
1813
|
+
parser.add_argument(
|
|
1814
|
+
'--allow_checkpoint_overwrite',
|
|
1815
|
+
action='store_true',
|
|
1816
|
+
help='By default, this script will bail if the specified checkpoint file ' + \
|
|
1817
|
+
'already exists; this option allows it to overwrite existing checkpoints')
|
|
1818
|
+
parser.add_argument(
|
|
1819
|
+
'--ncores',
|
|
1820
|
+
type=int,
|
|
1821
|
+
default=1,
|
|
1822
|
+
help='Number of cores to use for inference; only applies to CPU-based inference (default 1)')
|
|
1823
|
+
parser.add_argument(
|
|
1824
|
+
'--loader_workers',
|
|
1825
|
+
type=int,
|
|
1826
|
+
default=default_loaders,
|
|
1827
|
+
help='Number of image loader workers to use; only relevant when --use_image_queue ' + \
|
|
1828
|
+
'is set (default {})'.format(default_loaders))
|
|
1829
|
+
parser.add_argument(
|
|
1830
|
+
'--class_mapping_filename',
|
|
1831
|
+
type=str,
|
|
1832
|
+
default=None,
|
|
1833
|
+
help='Use a non-default class mapping, supplied in a .json file with a dictionary mapping' + \
|
|
1834
|
+
'int-strings to strings. This will also disable the addition of "1" to all category ' + \
|
|
1835
|
+
'IDs, so your class mapping should start at zero. Can also be a YOLOv5 dataset.yaml file.')
|
|
1836
|
+
parser.add_argument(
|
|
1837
|
+
'--include_image_size',
|
|
1838
|
+
action='store_true',
|
|
1839
|
+
help='Include image dimensions in output file'
|
|
1840
|
+
)
|
|
1841
|
+
parser.add_argument(
|
|
1842
|
+
'--include_image_timestamp',
|
|
1843
|
+
action='store_true',
|
|
1844
|
+
help='Include image datetime (if available) in output file'
|
|
1845
|
+
)
|
|
1846
|
+
parser.add_argument(
|
|
1847
|
+
'--include_exif_tags',
|
|
1848
|
+
type=str,
|
|
1849
|
+
default=None,
|
|
1850
|
+
help='Command-separated list of EXIF tags to include in output, or "all" to include all tags'
|
|
1851
|
+
)
|
|
1852
|
+
parser.add_argument(
|
|
1853
|
+
'--overwrite_handling',
|
|
1854
|
+
type=str,
|
|
1855
|
+
default='overwrite',
|
|
1856
|
+
help='What should we do if the output file exists? overwrite/skip/error (default overwrite)'
|
|
1857
|
+
)
|
|
1858
|
+
parser.add_argument(
|
|
1859
|
+
'--force_model_download',
|
|
1860
|
+
action='store_true',
|
|
1861
|
+
help=('If a named model (e.g. "MDV5A") is supplied, force a download of that model even if the ' +\
|
|
1862
|
+
'local file already exists.'))
|
|
1863
|
+
parser.add_argument(
|
|
1864
|
+
'--previous_results_file',
|
|
1865
|
+
type=str,
|
|
1866
|
+
default=None,
|
|
1867
|
+
help=('If supplied, this should point to a previous .json results file; any results in that ' +\
|
|
1868
|
+
'file will be transferred to the output file without reprocessing those images. Useful ' +\
|
|
1869
|
+
'for "updating" a set of results when you may have added new images to a folder you\'ve ' +\
|
|
1870
|
+
'already processed. Only supported when using relative paths.'))
|
|
1871
|
+
parser.add_argument(
|
|
1872
|
+
'--detector_options',
|
|
1873
|
+
nargs='*',
|
|
1874
|
+
metavar='KEY=VALUE',
|
|
1875
|
+
default='',
|
|
1876
|
+
help='Detector-specific options, as a space-separated list of key-value pairs')
|
|
1877
|
+
parser.add_argument(
|
|
1878
|
+
'--batch_size',
|
|
1879
|
+
type=int,
|
|
1880
|
+
default=1,
|
|
1881
|
+
help='Batch size for GPU inference (default 1). CPU inference will ignore this and use batch_size=1.')
|
|
1882
|
+
|
|
1883
|
+
# This argument is deprecated, we always use what was formerly "quiet mode"
|
|
1884
|
+
parser.add_argument(
|
|
1885
|
+
'--quiet',
|
|
1886
|
+
action='store_true',
|
|
1887
|
+
help=argparse.SUPPRESS)
|
|
1888
|
+
|
|
1889
|
+
# This argument is deprecated in favor use --include_exif_tags
|
|
1890
|
+
parser.add_argument(
|
|
1891
|
+
'--include_exif_data',
|
|
1892
|
+
action='store_true',
|
|
1893
|
+
help=argparse.SUPPRESS)
|
|
1894
|
+
|
|
1895
|
+
if len(sys.argv[1:]) == 0:
|
|
1896
|
+
parser.print_help()
|
|
1897
|
+
parser.exit()
|
|
1898
|
+
|
|
1899
|
+
args = parser.parse_args()
|
|
1900
|
+
|
|
1901
|
+
global use_threads_for_queue
|
|
1902
|
+
if args.use_threads_for_queue:
|
|
1903
|
+
use_threads_for_queue = True
|
|
1904
|
+
|
|
1905
|
+
# Support the legacy --include_exif_data flag
|
|
1906
|
+
if args.include_exif_data and (args.include_exif_tags is None):
|
|
1907
|
+
args.include_exif_tags = 'all'
|
|
1908
|
+
|
|
1909
|
+
detector_options = parse_kvp_list(args.detector_options)
|
|
1910
|
+
|
|
1911
|
+
# If the specified detector file is really the name of a known model, find
|
|
1912
|
+
# (and possibly download) that model
|
|
1913
|
+
args.detector_file = try_download_known_detector(args.detector_file,
|
|
1914
|
+
force_download=args.force_model_download,
|
|
1915
|
+
verbose=verbose)
|
|
1916
|
+
|
|
1917
|
+
assert os.path.exists(args.detector_file), \
|
|
1918
|
+
'detector file {} does not exist'.format(args.detector_file)
|
|
1919
|
+
assert 0.0 <= args.threshold <= 1.0, 'Confidence threshold needs to be between 0 and 1'
|
|
1920
|
+
assert args.output_file.endswith('.json'), 'output_file specified needs to end with .json'
|
|
1921
|
+
if args.checkpoint_frequency != -1:
|
|
1922
|
+
assert args.checkpoint_frequency > 0, 'Checkpoint_frequency needs to be > 0 or == -1'
|
|
1923
|
+
if args.output_relative_filenames:
|
|
1924
|
+
assert os.path.isdir(args.image_file), \
|
|
1925
|
+
f'Could not find folder {args.image_file}, must supply a folder when ' + \
|
|
1926
|
+
'--output_relative_filenames is set'
|
|
1927
|
+
if args.previous_results_file is not None:
|
|
1928
|
+
assert os.path.isdir(args.image_file) and args.output_relative_filenames, \
|
|
1929
|
+
"Can only process previous results when using relative paths"
|
|
1930
|
+
if os.path.exists(args.output_file):
|
|
1931
|
+
if args.overwrite_handling == 'overwrite':
|
|
1932
|
+
print('Warning: output file {} already exists and will be overwritten'.format(
|
|
1933
|
+
args.output_file))
|
|
1934
|
+
elif args.overwrite_handling == 'skip':
|
|
1935
|
+
print('Output file {} exists, returning'.format(
|
|
1936
|
+
args.output_file))
|
|
1937
|
+
return
|
|
1938
|
+
elif args.overwrite_handling == 'error':
|
|
1939
|
+
raise Exception('Output file {} exists'.format(args.output_file))
|
|
1940
|
+
else:
|
|
1941
|
+
raise ValueError('Illegal overwrite handling string {}'.format(args.overwrite_handling))
|
|
1942
|
+
|
|
1943
|
+
output_dir = os.path.dirname(args.output_file)
|
|
1944
|
+
|
|
1945
|
+
if len(output_dir) > 0:
|
|
1946
|
+
os.makedirs(output_dir,exist_ok=True)
|
|
1947
|
+
|
|
1948
|
+
assert not os.path.isdir(args.output_file), 'Specified output file is a directory'
|
|
1949
|
+
|
|
1950
|
+
if args.class_mapping_filename is not None:
|
|
1951
|
+
_load_custom_class_mapping(args.class_mapping_filename)
|
|
1952
|
+
|
|
1953
|
+
# Load the checkpoint if available
|
|
1954
|
+
#
|
|
1955
|
+
# File paths in the checkpoint are always absolute paths; conversion to relative paths
|
|
1956
|
+
# (if requested) happens at the time results are exported at the end of a job.
|
|
1957
|
+
if args.resume_from_checkpoint is not None:
|
|
1958
|
+
if args.resume_from_checkpoint == 'auto':
|
|
1959
|
+
checkpoint_files = os.listdir(output_dir)
|
|
1960
|
+
checkpoint_files = [fn for fn in checkpoint_files if \
|
|
1961
|
+
(fn.startswith('md_checkpoint') and fn.endswith('.json'))]
|
|
1962
|
+
if len(checkpoint_files) == 0:
|
|
1963
|
+
raise ValueError('resume_from_checkpoint set to "auto", but no checkpoints found in {}'.format(
|
|
1964
|
+
output_dir))
|
|
1965
|
+
else:
|
|
1966
|
+
if len(checkpoint_files) > 1:
|
|
1967
|
+
print('Warning: found {} checkpoints in {}, using the latest'.format(
|
|
1968
|
+
len(checkpoint_files),output_dir))
|
|
1969
|
+
checkpoint_files = sorted(checkpoint_files)
|
|
1970
|
+
checkpoint_file_relative = checkpoint_files[-1]
|
|
1971
|
+
checkpoint_file = os.path.join(output_dir,checkpoint_file_relative)
|
|
1972
|
+
else:
|
|
1973
|
+
checkpoint_file = args.resume_from_checkpoint
|
|
1974
|
+
results = load_checkpoint(checkpoint_file)
|
|
1975
|
+
else:
|
|
1976
|
+
results = []
|
|
1977
|
+
|
|
1978
|
+
# Find the images to process; images can be a directory, may need to recurse
|
|
1979
|
+
if os.path.isdir(args.image_file):
|
|
1980
|
+
image_file_names = path_utils.find_images(args.image_file, args.recursive)
|
|
1981
|
+
if len(image_file_names) > 0:
|
|
1982
|
+
print('{} image files found in the input directory'.format(len(image_file_names)))
|
|
1983
|
+
else:
|
|
1984
|
+
if (args.recursive):
|
|
1985
|
+
print('No image files found in directory {}, exiting'.format(args.image_file))
|
|
1986
|
+
else:
|
|
1987
|
+
print('No image files found in directory {}, did you mean to specify '
|
|
1988
|
+
'--recursive?'.format(
|
|
1989
|
+
args.image_file))
|
|
1990
|
+
return
|
|
1991
|
+
|
|
1992
|
+
# A json list of image paths
|
|
1993
|
+
elif os.path.isfile(args.image_file) and args.image_file.endswith('.json'):
|
|
1994
|
+
with open(args.image_file) as f:
|
|
1995
|
+
image_file_names = json.load(f)
|
|
1996
|
+
print('Loaded {} image filenames from .json list file {}'.format(
|
|
1997
|
+
len(image_file_names),args.image_file))
|
|
1998
|
+
|
|
1999
|
+
# A text list of image paths
|
|
2000
|
+
elif os.path.isfile(args.image_file) and args.image_file.endswith('.txt'):
|
|
2001
|
+
with open(args.image_file) as f:
|
|
2002
|
+
image_file_names = f.readlines()
|
|
2003
|
+
image_file_names = [fn.strip() for fn in image_file_names if len(fn.strip()) > 0]
|
|
2004
|
+
print('Loaded {} image filenames from .txt list file {}'.format(
|
|
2005
|
+
len(image_file_names),args.image_file))
|
|
2006
|
+
|
|
2007
|
+
# A single image file
|
|
2008
|
+
elif os.path.isfile(args.image_file) and path_utils.is_image_file(args.image_file):
|
|
2009
|
+
image_file_names = [args.image_file]
|
|
2010
|
+
print('Processing image {}'.format(args.image_file))
|
|
2011
|
+
|
|
2012
|
+
else:
|
|
2013
|
+
raise ValueError('image_file specified is not a directory, a json list, or an image file, '
|
|
2014
|
+
'(or does not have recognizable extensions).')
|
|
2015
|
+
|
|
2016
|
+
# At this point, regardless of how they were specified, [image_file_names] is a list of
|
|
2017
|
+
# absolute image paths.
|
|
2018
|
+
assert len(image_file_names) > 0, 'Specified image_file does not point to valid image files'
|
|
2019
|
+
|
|
2020
|
+
# Convert to forward slashes to facilitate comparison with previous results
|
|
2021
|
+
image_file_names = [fn.replace('\\','/') for fn in image_file_names]
|
|
2022
|
+
|
|
2023
|
+
# We can head off many problems related to incorrect command line formulation if we confirm
|
|
2024
|
+
# that one image exists before proceeding. The use of the first image for this test is
|
|
2025
|
+
# arbitrary.
|
|
2026
|
+
assert os.path.exists(image_file_names[0]), \
|
|
2027
|
+
'The first image to be processed does not exist at {}'.format(image_file_names[0])
|
|
2028
|
+
|
|
2029
|
+
# Possibly load results from a previous pass
|
|
2030
|
+
previous_results = None
|
|
2031
|
+
|
|
2032
|
+
if args.previous_results_file is not None:
|
|
2033
|
+
|
|
2034
|
+
assert os.path.isfile(args.previous_results_file), \
|
|
2035
|
+
'Could not find previous results file {}'.format(args.previous_results_file)
|
|
2036
|
+
with open(args.previous_results_file,'r') as f:
|
|
2037
|
+
previous_results = json.load(f)
|
|
2038
|
+
|
|
2039
|
+
assert previous_results['detection_categories'] == run_detector.DEFAULT_DETECTOR_LABEL_MAP, \
|
|
2040
|
+
"Can't merge previous results when those results use a different set of detection categories"
|
|
2041
|
+
|
|
2042
|
+
print('Loaded previous results for {} images from {}'.format(
|
|
2043
|
+
len(previous_results['images']), args.previous_results_file))
|
|
2044
|
+
|
|
2045
|
+
# Convert previous result filenames to absolute paths if necessary
|
|
2046
|
+
#
|
|
2047
|
+
# We asserted above to make sure that we are using relative paths and processing a
|
|
2048
|
+
# folder, but just to be super-clear...
|
|
2049
|
+
assert os.path.isdir(args.image_file)
|
|
2050
|
+
|
|
2051
|
+
previous_image_files_set = set()
|
|
2052
|
+
for im in previous_results['images']:
|
|
2053
|
+
assert not os.path.isabs(im['file']), \
|
|
2054
|
+
"When processing previous results, relative paths are required"
|
|
2055
|
+
fn_abs = os.path.join(args.image_file,im['file']).replace('\\','/')
|
|
2056
|
+
# Absolute paths are expected at the final output stage below
|
|
2057
|
+
im['file'] = fn_abs
|
|
2058
|
+
previous_image_files_set.add(fn_abs)
|
|
2059
|
+
|
|
2060
|
+
image_file_names_to_keep = []
|
|
2061
|
+
for fn_abs in image_file_names:
|
|
2062
|
+
if fn_abs not in previous_image_files_set:
|
|
2063
|
+
image_file_names_to_keep.append(fn_abs)
|
|
2064
|
+
|
|
2065
|
+
print('Based on previous results file, processing {} of {} images'.format(
|
|
2066
|
+
len(image_file_names_to_keep), len(image_file_names)))
|
|
2067
|
+
|
|
2068
|
+
image_file_names = image_file_names_to_keep
|
|
2069
|
+
|
|
2070
|
+
# ...if we're handling previous results
|
|
2071
|
+
|
|
2072
|
+
# Test that we can write to the output_file's dir if checkpointing requested
|
|
2073
|
+
if args.checkpoint_frequency != -1:
|
|
2074
|
+
|
|
2075
|
+
if args.checkpoint_path is not None:
|
|
2076
|
+
checkpoint_path = args.checkpoint_path
|
|
2077
|
+
else:
|
|
2078
|
+
checkpoint_path = os.path.join(output_dir,
|
|
2079
|
+
'md_checkpoint_{}.json'.format(
|
|
2080
|
+
datetime.now().strftime("%Y%m%d%H%M%S")))
|
|
2081
|
+
|
|
2082
|
+
# Don't overwrite existing checkpoint files, this is a sure-fire way to eventually
|
|
2083
|
+
# erase someone's checkpoint.
|
|
2084
|
+
if (checkpoint_path is not None) and (not args.allow_checkpoint_overwrite) \
|
|
2085
|
+
and (args.resume_from_checkpoint is None):
|
|
2086
|
+
|
|
2087
|
+
assert not os.path.isfile(checkpoint_path), \
|
|
2088
|
+
f'Checkpoint path {checkpoint_path} already exists, delete or move it before ' + \
|
|
2089
|
+
're-using the same checkpoint path, or specify --allow_checkpoint_overwrite'
|
|
2090
|
+
|
|
2091
|
+
print('The checkpoint file will be written to {}'.format(checkpoint_path))
|
|
2092
|
+
|
|
2093
|
+
else:
|
|
2094
|
+
|
|
2095
|
+
if args.checkpoint_path is not None:
|
|
2096
|
+
print('Warning: checkpointing disabled because checkpoint_frequency is -1, ' + \
|
|
2097
|
+
'but a checkpoint path was specified')
|
|
2098
|
+
checkpoint_path = None
|
|
2099
|
+
|
|
2100
|
+
start_time = time.time()
|
|
2101
|
+
|
|
2102
|
+
results = load_and_run_detector_batch(model_file=args.detector_file,
|
|
2103
|
+
image_file_names=image_file_names,
|
|
2104
|
+
checkpoint_path=checkpoint_path,
|
|
2105
|
+
confidence_threshold=args.threshold,
|
|
2106
|
+
checkpoint_frequency=args.checkpoint_frequency,
|
|
2107
|
+
results=results,
|
|
2108
|
+
n_cores=args.ncores,
|
|
2109
|
+
use_image_queue=args.use_image_queue,
|
|
2110
|
+
quiet=True,
|
|
2111
|
+
image_size=args.image_size,
|
|
2112
|
+
class_mapping_filename=args.class_mapping_filename,
|
|
2113
|
+
include_image_size=args.include_image_size,
|
|
2114
|
+
include_image_timestamp=args.include_image_timestamp,
|
|
2115
|
+
include_exif_tags=args.include_exif_tags,
|
|
2116
|
+
augment=args.augment,
|
|
2117
|
+
# Don't download the model *again*
|
|
2118
|
+
force_model_download=False,
|
|
2119
|
+
detector_options=detector_options,
|
|
2120
|
+
loader_workers=args.loader_workers,
|
|
2121
|
+
preprocess_on_image_queue=args.preprocess_on_image_queue,
|
|
2122
|
+
batch_size=args.batch_size,
|
|
2123
|
+
verbose_output=args.verbose)
|
|
2124
|
+
|
|
2125
|
+
elapsed = time.time() - start_time
|
|
2126
|
+
images_per_second = len(results) / elapsed
|
|
2127
|
+
print('Finished inference for {} images in {} ({:.2f} images per second)'.format(
|
|
2128
|
+
len(results),humanfriendly.format_timespan(elapsed),images_per_second))
|
|
2129
|
+
|
|
2130
|
+
relative_path_base = None
|
|
2131
|
+
|
|
2132
|
+
# We asserted above to make sure that if output_relative_filenames is set,
|
|
2133
|
+
# args.image_file is a folder, but we'll double-check for clarity.
|
|
2134
|
+
if args.output_relative_filenames:
|
|
2135
|
+
assert os.path.isdir(args.image_file)
|
|
2136
|
+
relative_path_base = args.image_file
|
|
2137
|
+
|
|
2138
|
+
# Merge results from a previous file if necessary
|
|
2139
|
+
if previous_results is not None:
|
|
2140
|
+
previous_filenames_set = set([im['file'] for im in previous_results['images']])
|
|
2141
|
+
new_filenames_set = set([im['file'] for im in results])
|
|
2142
|
+
assert len(previous_filenames_set.intersection(new_filenames_set)) == 0, \
|
|
2143
|
+
'Previous results handling error: redundant image filenames'
|
|
2144
|
+
results.extend(previous_results['images'])
|
|
2145
|
+
|
|
2146
|
+
write_results_to_file(results,
|
|
2147
|
+
args.output_file,
|
|
2148
|
+
relative_path_base=relative_path_base,
|
|
2149
|
+
detector_file=args.detector_file,
|
|
2150
|
+
include_max_conf=args.include_max_conf)
|
|
2151
|
+
|
|
2152
|
+
if checkpoint_path and os.path.isfile(checkpoint_path):
|
|
2153
|
+
os.remove(checkpoint_path)
|
|
2154
|
+
print('Deleted checkpoint file {}'.format(checkpoint_path))
|
|
2155
|
+
|
|
2156
|
+
print('Done, thanks for MegaDetect\'ing!')
|
|
2157
|
+
|
|
2158
|
+
if __name__ == '__main__':
|
|
2159
|
+
main()
|