megadetector 5.0.8__py3-none-any.whl → 5.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/__init__.py +0 -0
- api/batch_processing/__init__.py +0 -0
- api/batch_processing/api_core/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/score.py +0 -1
- api/batch_processing/api_core/server_job_status_table.py +0 -1
- api/batch_processing/api_core_support/__init__.py +0 -0
- api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
- api/batch_processing/api_support/__init__.py +0 -0
- api/batch_processing/api_support/summarize_daily_activity.py +0 -1
- api/batch_processing/data_preparation/__init__.py +0 -0
- api/batch_processing/data_preparation/manage_local_batch.py +65 -65
- api/batch_processing/data_preparation/manage_video_batch.py +8 -8
- api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
- api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
- api/batch_processing/postprocessing/__init__.py +0 -0
- api/batch_processing/postprocessing/add_max_conf.py +12 -12
- api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
- api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
- api/batch_processing/postprocessing/compare_batch_results.py +113 -43
- api/batch_processing/postprocessing/convert_output_format.py +41 -16
- api/batch_processing/postprocessing/load_api_results.py +16 -17
- api/batch_processing/postprocessing/md_to_coco.py +31 -21
- api/batch_processing/postprocessing/md_to_labelme.py +52 -22
- api/batch_processing/postprocessing/merge_detections.py +14 -14
- api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
- api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
- api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
- api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
- api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
- api/synchronous/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
- api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
- api/synchronous/api_core/animal_detection_api/config.py +35 -35
- api/synchronous/api_core/tests/__init__.py +0 -0
- api/synchronous/api_core/tests/load_test.py +109 -109
- classification/__init__.py +0 -0
- classification/aggregate_classifier_probs.py +21 -24
- classification/analyze_failed_images.py +11 -13
- classification/cache_batchapi_outputs.py +51 -51
- classification/create_classification_dataset.py +69 -68
- classification/crop_detections.py +54 -53
- classification/csv_to_json.py +97 -100
- classification/detect_and_crop.py +105 -105
- classification/evaluate_model.py +43 -42
- classification/identify_mislabeled_candidates.py +47 -46
- classification/json_to_azcopy_list.py +10 -10
- classification/json_validator.py +72 -71
- classification/map_classification_categories.py +44 -43
- classification/merge_classification_detection_output.py +68 -68
- classification/prepare_classification_script.py +157 -154
- classification/prepare_classification_script_mc.py +228 -228
- classification/run_classifier.py +27 -26
- classification/save_mislabeled.py +30 -30
- classification/train_classifier.py +20 -20
- classification/train_classifier_tf.py +21 -22
- classification/train_utils.py +10 -10
- data_management/__init__.py +0 -0
- data_management/annotations/__init__.py +0 -0
- data_management/annotations/annotation_constants.py +18 -31
- data_management/camtrap_dp_to_coco.py +238 -0
- data_management/cct_json_utils.py +102 -59
- data_management/cct_to_md.py +176 -158
- data_management/cct_to_wi.py +247 -219
- data_management/coco_to_labelme.py +272 -263
- data_management/coco_to_yolo.py +79 -58
- data_management/databases/__init__.py +0 -0
- data_management/databases/add_width_and_height_to_db.py +20 -16
- data_management/databases/combine_coco_camera_traps_files.py +35 -31
- data_management/databases/integrity_check_json_db.py +62 -24
- data_management/databases/subset_json_db.py +24 -15
- data_management/generate_crops_from_cct.py +27 -45
- data_management/get_image_sizes.py +188 -162
- data_management/importers/add_nacti_sizes.py +8 -8
- data_management/importers/add_timestamps_to_icct.py +78 -78
- data_management/importers/animl_results_to_md_results.py +158 -158
- data_management/importers/auckland_doc_test_to_json.py +9 -9
- data_management/importers/auckland_doc_to_json.py +8 -8
- data_management/importers/awc_to_json.py +7 -7
- data_management/importers/bellevue_to_json.py +15 -15
- data_management/importers/cacophony-thermal-importer.py +13 -13
- data_management/importers/carrizo_shrubfree_2018.py +8 -8
- data_management/importers/carrizo_trail_cam_2017.py +8 -8
- data_management/importers/cct_field_adjustments.py +9 -9
- data_management/importers/channel_islands_to_cct.py +10 -10
- data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
- data_management/importers/ena24_to_json.py +7 -7
- data_management/importers/filenames_to_json.py +8 -8
- data_management/importers/helena_to_cct.py +7 -7
- data_management/importers/idaho-camera-traps.py +7 -7
- data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
- data_management/importers/jb_csv_to_json.py +9 -9
- data_management/importers/mcgill_to_json.py +8 -8
- data_management/importers/missouri_to_json.py +18 -18
- data_management/importers/nacti_fieldname_adjustments.py +10 -10
- data_management/importers/noaa_seals_2019.py +7 -7
- data_management/importers/pc_to_json.py +7 -7
- data_management/importers/plot_wni_giraffes.py +7 -7
- data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
- data_management/importers/prepare_zsl_imerit.py +7 -7
- data_management/importers/rspb_to_json.py +8 -8
- data_management/importers/save_the_elephants_survey_A.py +8 -8
- data_management/importers/save_the_elephants_survey_B.py +9 -9
- data_management/importers/snapshot_safari_importer.py +26 -26
- data_management/importers/snapshot_safari_importer_reprise.py +665 -665
- data_management/importers/snapshot_serengeti_lila.py +14 -14
- data_management/importers/sulross_get_exif.py +8 -9
- data_management/importers/timelapse_csv_set_to_json.py +11 -11
- data_management/importers/ubc_to_json.py +13 -13
- data_management/importers/umn_to_json.py +7 -7
- data_management/importers/wellington_to_json.py +8 -8
- data_management/importers/wi_to_json.py +9 -9
- data_management/importers/zamba_results_to_md_results.py +181 -181
- data_management/labelme_to_coco.py +65 -24
- data_management/labelme_to_yolo.py +8 -8
- data_management/lila/__init__.py +0 -0
- data_management/lila/add_locations_to_island_camera_traps.py +9 -9
- data_management/lila/add_locations_to_nacti.py +147 -147
- data_management/lila/create_lila_blank_set.py +13 -13
- data_management/lila/create_lila_test_set.py +8 -8
- data_management/lila/create_links_to_md_results_files.py +106 -106
- data_management/lila/download_lila_subset.py +44 -110
- data_management/lila/generate_lila_per_image_labels.py +55 -42
- data_management/lila/get_lila_annotation_counts.py +18 -15
- data_management/lila/get_lila_image_counts.py +11 -11
- data_management/lila/lila_common.py +96 -33
- data_management/lila/test_lila_metadata_urls.py +132 -116
- data_management/ocr_tools.py +173 -128
- data_management/read_exif.py +110 -97
- data_management/remap_coco_categories.py +83 -83
- data_management/remove_exif.py +58 -62
- data_management/resize_coco_dataset.py +30 -23
- data_management/wi_download_csv_to_coco.py +246 -239
- data_management/yolo_output_to_md_output.py +86 -73
- data_management/yolo_to_coco.py +300 -60
- detection/__init__.py +0 -0
- detection/detector_training/__init__.py +0 -0
- detection/process_video.py +85 -33
- detection/pytorch_detector.py +43 -25
- detection/run_detector.py +157 -72
- detection/run_detector_batch.py +179 -113
- detection/run_inference_with_yolov5_val.py +108 -48
- detection/run_tiled_inference.py +111 -40
- detection/tf_detector.py +51 -29
- detection/video_utils.py +606 -521
- docs/source/conf.py +43 -0
- md_utils/__init__.py +0 -0
- md_utils/azure_utils.py +9 -9
- md_utils/ct_utils.py +228 -68
- md_utils/directory_listing.py +59 -64
- md_utils/md_tests.py +968 -871
- md_utils/path_utils.py +460 -134
- md_utils/process_utils.py +157 -133
- md_utils/sas_blob_utils.py +20 -20
- md_utils/split_locations_into_train_val.py +45 -32
- md_utils/string_utils.py +33 -10
- md_utils/url_utils.py +176 -60
- md_utils/write_html_image_list.py +40 -33
- md_visualization/__init__.py +0 -0
- md_visualization/plot_utils.py +102 -109
- md_visualization/render_images_with_thumbnails.py +34 -34
- md_visualization/visualization_utils.py +597 -291
- md_visualization/visualize_db.py +76 -48
- md_visualization/visualize_detector_output.py +61 -42
- {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/METADATA +13 -7
- megadetector-5.0.10.dist-info/RECORD +224 -0
- {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/top_level.txt +1 -0
- taxonomy_mapping/__init__.py +0 -0
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
- taxonomy_mapping/map_new_lila_datasets.py +154 -154
- taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
- taxonomy_mapping/preview_lila_taxonomy.py +591 -591
- taxonomy_mapping/retrieve_sample_image.py +12 -12
- taxonomy_mapping/simple_image_download.py +11 -11
- taxonomy_mapping/species_lookup.py +10 -10
- taxonomy_mapping/taxonomy_csv_checker.py +18 -18
- taxonomy_mapping/taxonomy_graph.py +47 -47
- taxonomy_mapping/validate_lila_category_mappings.py +83 -76
- data_management/cct_json_to_filename_json.py +0 -89
- data_management/cct_to_csv.py +0 -140
- data_management/databases/remove_corrupted_images_from_db.py +0 -191
- detection/detector_training/copy_checkpoints.py +0 -43
- megadetector-5.0.8.dist-info/RECORD +0 -205
- {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/LICENSE +0 -0
- {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/WHEEL +0 -0
detection/run_detector_batch.py
CHANGED
|
@@ -1,38 +1,43 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
run_detector_batch.py
|
|
4
|
+
|
|
5
|
+
Module to run MegaDetector on lots of images, writing the results
|
|
6
|
+
to a file in the MegaDetector results format.
|
|
7
|
+
|
|
8
|
+
https://github.com/agentmorris/MegaDetector/tree/main/api/batch_processing#megadetector-batch-output-format
|
|
9
|
+
|
|
10
|
+
This enables the results to be used in our post-processing pipeline; see
|
|
11
|
+
api/batch_processing/postprocessing/postprocess_batch_results.py .
|
|
12
|
+
|
|
13
|
+
This script can save results to checkpoints intermittently, in case disaster
|
|
14
|
+
strikes. To enable this, set --checkpoint_frequency to n > 0, and results
|
|
15
|
+
will be saved as a checkpoint every n images. Checkpoints will be written
|
|
16
|
+
to a file in the same directory as the output_file, and after all images
|
|
17
|
+
are processed and final results file written to output_file, the temporary
|
|
18
|
+
checkpoint file will be deleted. If you want to resume from a checkpoint, set
|
|
19
|
+
the checkpoint file's path using --resume_from_checkpoint.
|
|
20
|
+
|
|
21
|
+
The `threshold` you can provide as an argument is the confidence threshold above
|
|
22
|
+
which detections will be included in the output file.
|
|
23
|
+
|
|
24
|
+
Has multiprocessing support for CPUs only; if a GPU is available, it will
|
|
25
|
+
use the GPU instead of CPUs, and the --ncores option will be ignored. Checkpointing
|
|
26
|
+
is not supported when using a GPU.
|
|
27
|
+
|
|
28
|
+
The lack of GPU multiprocessing support might sound annoying, but in practice we
|
|
29
|
+
run a gazillion MegaDetector images on multiple GPUs using this script, we just only use
|
|
30
|
+
one GPU *per invocation of this script*. Dividing a big batch of images into one chunk
|
|
31
|
+
per GPU happens outside of this script.
|
|
32
|
+
|
|
33
|
+
Does not have a command-line option to bind the process to a particular GPU, but you can
|
|
34
|
+
prepend with "CUDA_VISIBLE_DEVICES=0 ", for example, to bind to GPU 0, e.g.:
|
|
35
|
+
|
|
36
|
+
CUDA_VISIBLE_DEVICES=0 python detection/run_detector_batch.py md_v4.1.0.pb ~/data ~/mdv4test.json
|
|
37
|
+
|
|
38
|
+
You can disable GPU processing entirely by setting CUDA_VISIBLE_DEVICES=''.
|
|
39
|
+
|
|
40
|
+
"""
|
|
36
41
|
|
|
37
42
|
#%% Constants, imports, environment
|
|
38
43
|
|
|
@@ -91,7 +96,7 @@ exif_options.byte_handling = 'convert_to_string'
|
|
|
91
96
|
|
|
92
97
|
#%% Support functions for multiprocessing
|
|
93
98
|
|
|
94
|
-
def
|
|
99
|
+
def _producer_func(q,image_files):
|
|
95
100
|
"""
|
|
96
101
|
Producer function; only used when using the (optional) image queue.
|
|
97
102
|
|
|
@@ -120,7 +125,7 @@ def producer_func(q,image_files):
|
|
|
120
125
|
print('Finished image loading'); sys.stdout.flush()
|
|
121
126
|
|
|
122
127
|
|
|
123
|
-
def
|
|
128
|
+
def _consumer_func(q,return_queue,model_file,confidence_threshold,image_size=None):
|
|
124
129
|
"""
|
|
125
130
|
Consumer function; only used when using the (optional) image queue.
|
|
126
131
|
|
|
@@ -177,15 +182,28 @@ def run_detector_with_image_queue(image_files,model_file,confidence_threshold,
|
|
|
177
182
|
when --use_image_queue is specified. Starts a reader process to read images from disk, but
|
|
178
183
|
processes images in the process from which this function is called (i.e., does not currently
|
|
179
184
|
spawn a separate consumer process).
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
image_files (str): list of absolute paths to images
|
|
188
|
+
model_file (str): filename or model identifier (e.g. "MDV5A")
|
|
189
|
+
confidence_threshold (float): minimum confidence detection to include in
|
|
190
|
+
output
|
|
191
|
+
quiet (bool, optional): suppress per-image console printouts
|
|
192
|
+
image_size (tuple, optional): image size to use for inference, only mess with this
|
|
193
|
+
if (a) you're using a model other than MegaDetector or (b) you know what you're
|
|
194
|
+
doing
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
list: list of dicts in the format returned by process_image()
|
|
180
198
|
"""
|
|
181
199
|
|
|
182
200
|
q = multiprocessing.JoinableQueue(max_queue_size)
|
|
183
201
|
return_queue = multiprocessing.Queue(1)
|
|
184
202
|
|
|
185
203
|
if use_threads_for_queue:
|
|
186
|
-
producer = Thread(target=
|
|
204
|
+
producer = Thread(target=_producer_func,args=(q,image_files,))
|
|
187
205
|
else:
|
|
188
|
-
producer = Process(target=
|
|
206
|
+
producer = Process(target=_producer_func,args=(q,image_files,))
|
|
189
207
|
producer.daemon = False
|
|
190
208
|
producer.start()
|
|
191
209
|
|
|
@@ -199,15 +217,15 @@ def run_detector_with_image_queue(image_files,model_file,confidence_threshold,
|
|
|
199
217
|
|
|
200
218
|
if run_separate_consumer_process:
|
|
201
219
|
if use_threads_for_queue:
|
|
202
|
-
consumer = Thread(target=
|
|
220
|
+
consumer = Thread(target=_consumer_func,args=(q,return_queue,model_file,
|
|
203
221
|
confidence_threshold,image_size,))
|
|
204
222
|
else:
|
|
205
|
-
consumer = Process(target=
|
|
223
|
+
consumer = Process(target=_consumer_func,args=(q,return_queue,model_file,
|
|
206
224
|
confidence_threshold,image_size,))
|
|
207
225
|
consumer.daemon = True
|
|
208
226
|
consumer.start()
|
|
209
227
|
else:
|
|
210
|
-
|
|
228
|
+
_consumer_func(q,return_queue,model_file,confidence_threshold,image_size)
|
|
211
229
|
|
|
212
230
|
producer.join()
|
|
213
231
|
print('Producer finished')
|
|
@@ -226,13 +244,15 @@ def run_detector_with_image_queue(image_files,model_file,confidence_threshold,
|
|
|
226
244
|
|
|
227
245
|
#%% Other support functions
|
|
228
246
|
|
|
229
|
-
def
|
|
247
|
+
def _chunks_by_number_of_chunks(ls, n):
|
|
230
248
|
"""
|
|
231
249
|
Splits a list into n even chunks.
|
|
250
|
+
|
|
251
|
+
External callers should use ct_utils.split_list_into_n_chunks().
|
|
232
252
|
|
|
233
|
-
Args
|
|
234
|
-
|
|
235
|
-
|
|
253
|
+
Args:
|
|
254
|
+
ls (list): list to break up into chunks
|
|
255
|
+
n (int): number of chunks
|
|
236
256
|
"""
|
|
237
257
|
|
|
238
258
|
for i in range(0, n):
|
|
@@ -242,19 +262,31 @@ def chunks_by_number_of_chunks(ls, n):
|
|
|
242
262
|
#%% Image processing functions
|
|
243
263
|
|
|
244
264
|
def process_images(im_files, detector, confidence_threshold, use_image_queue=False,
|
|
245
|
-
quiet=False, image_size=None, checkpoint_queue=None,
|
|
246
|
-
|
|
265
|
+
quiet=False, image_size=None, checkpoint_queue=None,
|
|
266
|
+
include_image_size=False, include_image_timestamp=False,
|
|
267
|
+
include_exif_data=False):
|
|
247
268
|
"""
|
|
248
|
-
Runs MegaDetector over a list of image files.
|
|
249
|
-
image queue is enabled, but not in the
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
269
|
+
Runs a detector (typically MegaDetector) over a list of image files.
|
|
270
|
+
As of 3/2024, this entry point is used when the image queue is enabled, but not in the
|
|
271
|
+
standard inference path (which instead loops over process_image()).
|
|
272
|
+
|
|
273
|
+
Args:
|
|
274
|
+
im_files (list: paths to image files
|
|
275
|
+
detector (str or detector object): loaded model or str; if this is a string, it can be a
|
|
276
|
+
path to a .pb/.pt model file or a known model identifier (e.g. "MDV5A")
|
|
277
|
+
confidence_threshold (float): only detections above this threshold are returned
|
|
278
|
+
use_image_queue (bool, optional): separate image loading onto a dedicated worker process
|
|
279
|
+
quiet (bool, optional): suppress per-image printouts
|
|
280
|
+
image_size (tuple, optional): image size to use for inference, only mess with this
|
|
281
|
+
if (a) you're using a model other than MegaDetector or (b) you know what you're
|
|
282
|
+
doing
|
|
283
|
+
checkpoint_queue (Queue, optional): internal parameter used to pass image queues around
|
|
284
|
+
include_image_size (bool, optional): should we include image size in the output for each image?
|
|
285
|
+
include_image_timestamp (bool, optional): should we include image timestamps in the output for each image?
|
|
286
|
+
include_exif_data (bool, optional): should we include EXIF data in the output for each image?
|
|
255
287
|
|
|
256
|
-
Returns
|
|
257
|
-
|
|
288
|
+
Returns:
|
|
289
|
+
list: list of dicts, in which each dict represents detections on one image,
|
|
258
290
|
see the 'images' key in https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#batch-processing-api-output-format
|
|
259
291
|
"""
|
|
260
292
|
|
|
@@ -293,17 +325,26 @@ def process_image(im_file, detector, confidence_threshold, image=None,
|
|
|
293
325
|
include_image_timestamp=False, include_exif_data=False,
|
|
294
326
|
skip_image_resizing=False):
|
|
295
327
|
"""
|
|
296
|
-
Runs MegaDetector on a single image file.
|
|
297
|
-
|
|
298
|
-
Args
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
328
|
+
Runs a detector (typically MegaDetector) on a single image file.
|
|
329
|
+
|
|
330
|
+
Args:
|
|
331
|
+
im_file (str): path to image file
|
|
332
|
+
detector (detector object): loaded model, this can no longer be a string by the time
|
|
333
|
+
you get this far down the pipeline
|
|
334
|
+
confidence_threshold (float): only detections above this threshold are returned
|
|
335
|
+
image (Image, optional): previously-loaded image, if available, used when a worker
|
|
336
|
+
thread is handling image loads
|
|
337
|
+
quiet (bool, optional): suppress per-image printouts
|
|
338
|
+
image_size (tuple, optional): image size to use for inference, only mess with this
|
|
339
|
+
if (a) you're using a model other than MegaDetector or (b) you know what you're
|
|
340
|
+
doing
|
|
341
|
+
include_image_size (bool, optional): should we include image size in the output for each image?
|
|
342
|
+
include_image_timestamp (bool, optional): should we include image timestamps in the output for each image?
|
|
343
|
+
include_exif_data (bool, optional): should we include EXIF data in the output for each image?
|
|
344
|
+
skip_image_resizing (bool, optional): whether to skip internal image resizing and rely on external resizing
|
|
304
345
|
|
|
305
346
|
Returns:
|
|
306
|
-
|
|
347
|
+
dict: dict representing detections on one image,
|
|
307
348
|
see the 'images' key in
|
|
308
349
|
https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#batch-processing-api-output-format
|
|
309
350
|
"""
|
|
@@ -351,7 +392,7 @@ def process_image(im_file, detector, confidence_threshold, image=None,
|
|
|
351
392
|
# ...def process_image(...)
|
|
352
393
|
|
|
353
394
|
|
|
354
|
-
def
|
|
395
|
+
def _load_custom_class_mapping(class_mapping_filename):
|
|
355
396
|
"""
|
|
356
397
|
This is an experimental hack to allow the use of non-MD YOLOv5 models through
|
|
357
398
|
the same infrastructure; it disables the code that enforces MDv5-like class lists.
|
|
@@ -389,34 +430,50 @@ def load_and_run_detector_batch(model_file, image_file_names, checkpoint_path=No
|
|
|
389
430
|
class_mapping_filename=None, include_image_size=False,
|
|
390
431
|
include_image_timestamp=False, include_exif_data=False):
|
|
391
432
|
"""
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
433
|
+
Load a model file and run it on a list of images.
|
|
434
|
+
|
|
435
|
+
Args:
|
|
436
|
+
|
|
437
|
+
model_file (str): path to model file, or supported model string (e.g. "MDV5A")
|
|
438
|
+
image_file_names (list or str): list of strings (image filenames), a single image filename,
|
|
439
|
+
a folder to recursively search for images in, or a .json or .txt file containing a list
|
|
440
|
+
of images.
|
|
441
|
+
checkpoint_path (str, optional), path to use for checkpoints (if None, checkpointing
|
|
442
|
+
is disabled)
|
|
443
|
+
confidence_threshold (float, optional): only detections above this threshold are returned
|
|
444
|
+
checkpoint_frequency (int, optional): int, write results to JSON checkpoint file every N
|
|
445
|
+
images, -1 disabled checkpointing
|
|
446
|
+
results (list, optional): list of dicts, existing results loaded from checkpoint; generally
|
|
447
|
+
not useful if you're using this function outside of the CLI
|
|
448
|
+
n_cores (int, optional): number of parallel worker to use, ignored if we're running on a GPU
|
|
449
|
+
use_image_queue (bool, optional): use a dedicated worker for image loading
|
|
450
|
+
quiet (bool, optional): disable per-image console output
|
|
451
|
+
image_size (tuple, optional): image size to use for inference, only mess with this
|
|
452
|
+
if (a) you're using a model other than MegaDetector or (b) you know what you're
|
|
453
|
+
doing
|
|
454
|
+
class_mapping_filename (str, optional), use a non-default class mapping supplied in a .json
|
|
455
|
+
file or YOLOv5 dataset.yaml file
|
|
456
|
+
include_image_size (bool, optional): should we include image size in the output for each image?
|
|
457
|
+
include_image_timestamp (bool, optional): should we include image timestamps in the output for each image?
|
|
458
|
+
include_exif_data (bool, optional): should we include EXIF data in the output for each image?
|
|
459
|
+
|
|
460
|
+
Returns:
|
|
461
|
+
results: list of dicts; each dict represents detections on one image
|
|
407
462
|
"""
|
|
408
463
|
|
|
464
|
+
# Validate input arguments
|
|
409
465
|
if n_cores is None:
|
|
410
466
|
n_cores = 1
|
|
411
467
|
|
|
412
468
|
if confidence_threshold is None:
|
|
413
469
|
confidence_threshold=run_detector.DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD
|
|
414
|
-
|
|
415
|
-
if
|
|
470
|
+
|
|
471
|
+
# Disable checkpointing if checkpoint_path is None
|
|
472
|
+
if checkpoint_frequency is None or checkpoint_path is None:
|
|
416
473
|
checkpoint_frequency = -1
|
|
417
474
|
|
|
418
475
|
if class_mapping_filename is not None:
|
|
419
|
-
|
|
476
|
+
_load_custom_class_mapping(class_mapping_filename)
|
|
420
477
|
|
|
421
478
|
# Handle the case where image_file_names is not yet actually a list
|
|
422
479
|
if isinstance(image_file_names,str):
|
|
@@ -451,7 +508,8 @@ def load_and_run_detector_batch(model_file, image_file_names, checkpoint_path=No
|
|
|
451
508
|
list_file))
|
|
452
509
|
else:
|
|
453
510
|
raise ValueError(
|
|
454
|
-
'{} supplied as [image_file_names] argument, but it does not appear to be a file or folder'
|
|
511
|
+
'{} supplied as [image_file_names] argument, but it does not appear to be a file or folder'.format(
|
|
512
|
+
image_file_names))
|
|
455
513
|
|
|
456
514
|
if results is None:
|
|
457
515
|
results = []
|
|
@@ -515,12 +573,12 @@ def load_and_run_detector_batch(model_file, image_file_names, checkpoint_path=No
|
|
|
515
573
|
results.append(result)
|
|
516
574
|
|
|
517
575
|
# Write a checkpoint if necessary
|
|
518
|
-
if checkpoint_frequency != -1 and count % checkpoint_frequency == 0:
|
|
576
|
+
if (checkpoint_frequency != -1) and ((count % checkpoint_frequency) == 0):
|
|
519
577
|
|
|
520
578
|
print('Writing a new checkpoint after having processed {} images since '
|
|
521
579
|
'last restart'.format(count))
|
|
522
580
|
|
|
523
|
-
|
|
581
|
+
_write_checkpoint(checkpoint_path, results)
|
|
524
582
|
|
|
525
583
|
else:
|
|
526
584
|
|
|
@@ -540,7 +598,7 @@ def load_and_run_detector_batch(model_file, image_file_names, checkpoint_path=No
|
|
|
540
598
|
len(already_processed),n_images_all))
|
|
541
599
|
|
|
542
600
|
# Divide images into chunks; we'll send one chunk to each worker process
|
|
543
|
-
image_batches = list(
|
|
601
|
+
image_batches = list(_chunks_by_number_of_chunks(image_file_names, n_cores))
|
|
544
602
|
|
|
545
603
|
pool = workerpool(n_cores)
|
|
546
604
|
|
|
@@ -553,7 +611,7 @@ def load_and_run_detector_batch(model_file, image_file_names, checkpoint_path=No
|
|
|
553
611
|
# Pass the "results" array (which may already contain images loaded from an existing
|
|
554
612
|
# checkpoint) to the checkpoint queue handler function, which will append results to
|
|
555
613
|
# the list as they become available.
|
|
556
|
-
checkpoint_thread = Thread(target=
|
|
614
|
+
checkpoint_thread = Thread(target=_checkpoint_queue_handler,
|
|
557
615
|
args=(checkpoint_path, checkpoint_frequency,
|
|
558
616
|
checkpoint_queue, results), daemon=True)
|
|
559
617
|
checkpoint_thread.start()
|
|
@@ -597,7 +655,7 @@ def load_and_run_detector_batch(model_file, image_file_names, checkpoint_path=No
|
|
|
597
655
|
# ...def load_and_run_detector_batch(...)
|
|
598
656
|
|
|
599
657
|
|
|
600
|
-
def
|
|
658
|
+
def _checkpoint_queue_handler(checkpoint_path, checkpoint_frequency, checkpoint_queue, results):
|
|
601
659
|
"""
|
|
602
660
|
Thread function to accumulate results and write checkpoints when checkpointing and
|
|
603
661
|
multiprocessing are both enabled.
|
|
@@ -617,15 +675,15 @@ def checkpoint_queue_handler(checkpoint_path, checkpoint_frequency, checkpoint_q
|
|
|
617
675
|
print('Writing a new checkpoint after having processed {} images since '
|
|
618
676
|
'last restart'.format(result_count))
|
|
619
677
|
|
|
620
|
-
|
|
678
|
+
_write_checkpoint(checkpoint_path, results)
|
|
621
679
|
|
|
622
680
|
|
|
623
|
-
def
|
|
681
|
+
def _write_checkpoint(checkpoint_path, results):
|
|
624
682
|
"""
|
|
625
683
|
Writes the 'images' field in the dict 'results' to a json checkpoint file.
|
|
626
684
|
"""
|
|
627
685
|
|
|
628
|
-
assert checkpoint_path is not None
|
|
686
|
+
assert checkpoint_path is not None
|
|
629
687
|
|
|
630
688
|
# Back up any previous checkpoints, to protect against crashes while we're writing
|
|
631
689
|
# the checkpoint file.
|
|
@@ -645,9 +703,14 @@ def write_checkpoint(checkpoint_path, results):
|
|
|
645
703
|
|
|
646
704
|
def get_image_datetime(image):
|
|
647
705
|
"""
|
|
648
|
-
|
|
706
|
+
Reads EXIF datetime from a PIL Image object.
|
|
649
707
|
|
|
650
|
-
|
|
708
|
+
Args:
|
|
709
|
+
image (Image): the PIL Image object from which we should read datetime information
|
|
710
|
+
|
|
711
|
+
Returns:
|
|
712
|
+
str: the EXIF datetime from [image] (a PIL Image object), if available, as a string;
|
|
713
|
+
returns None if EXIF datetime is not available.
|
|
651
714
|
"""
|
|
652
715
|
|
|
653
716
|
exif_tags = read_exif.read_pil_exif(image,exif_options)
|
|
@@ -669,20 +732,24 @@ def write_results_to_file(results, output_file, relative_path_base=None,
|
|
|
669
732
|
|
|
670
733
|
https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#batch-processing-api-output-format
|
|
671
734
|
|
|
672
|
-
Args
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
735
|
+
Args:
|
|
736
|
+
results (list): list of dict, each dict represents detections on one image
|
|
737
|
+
output_file (str): path to JSON output file, should end in '.json'
|
|
738
|
+
relative_path_base (str, optional): path to a directory as the base for relative paths, can
|
|
739
|
+
be None if the paths in [results] are absolute
|
|
740
|
+
detector_file (str, optional): filename of the detector used to generate these results, only
|
|
741
|
+
used to pull out a version number for the "info" field
|
|
742
|
+
info (dict, optional): dictionary to put in the results file instead of the default "info" field
|
|
743
|
+
include_max_conf (bool, optional): old files (version 1.2 and earlier) included a "max_conf" field
|
|
744
|
+
in each image; this was removed in version 1.3. Set this flag to force the inclusion
|
|
745
|
+
of this field.
|
|
746
|
+
custom_metadata (object, optional): additional data to include as info['custom_metadata']; typically
|
|
747
|
+
a dictionary, but no type/format checks are performed
|
|
748
|
+
force_forward_slashes (bool, optional): convert all slashes in filenames within [results] to
|
|
749
|
+
forward slashes
|
|
750
|
+
|
|
751
|
+
Returns:
|
|
752
|
+
dict: the MD-formatted dictionary that was written to [output_file]
|
|
686
753
|
"""
|
|
687
754
|
|
|
688
755
|
if relative_path_base is not None:
|
|
@@ -997,7 +1064,7 @@ def main():
|
|
|
997
1064
|
assert not os.path.isdir(args.output_file), 'Specified output file is a directory'
|
|
998
1065
|
|
|
999
1066
|
if args.class_mapping_filename is not None:
|
|
1000
|
-
|
|
1067
|
+
_load_custom_class_mapping(args.class_mapping_filename)
|
|
1001
1068
|
|
|
1002
1069
|
# Load the checkpoint if available
|
|
1003
1070
|
#
|
|
@@ -1146,8 +1213,7 @@ def main():
|
|
|
1146
1213
|
os.remove(checkpoint_path)
|
|
1147
1214
|
print('Deleted checkpoint file {}'.format(checkpoint_path))
|
|
1148
1215
|
|
|
1149
|
-
print('Done!')
|
|
1150
|
-
|
|
1216
|
+
print('Done, thanks for MegaDetect\'ing!')
|
|
1151
1217
|
|
|
1152
1218
|
if __name__ == '__main__':
|
|
1153
1219
|
main()
|