megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/__init__.py +0 -0
- api/batch_processing/__init__.py +0 -0
- api/batch_processing/api_core/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/score.py +0 -1
- api/batch_processing/api_core/server_job_status_table.py +0 -1
- api/batch_processing/api_core_support/__init__.py +0 -0
- api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
- api/batch_processing/api_support/__init__.py +0 -0
- api/batch_processing/api_support/summarize_daily_activity.py +0 -1
- api/batch_processing/data_preparation/__init__.py +0 -0
- api/batch_processing/data_preparation/manage_local_batch.py +93 -79
- api/batch_processing/data_preparation/manage_video_batch.py +8 -8
- api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
- api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
- api/batch_processing/postprocessing/__init__.py +0 -0
- api/batch_processing/postprocessing/add_max_conf.py +12 -12
- api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
- api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
- api/batch_processing/postprocessing/compare_batch_results.py +114 -44
- api/batch_processing/postprocessing/convert_output_format.py +62 -19
- api/batch_processing/postprocessing/load_api_results.py +17 -20
- api/batch_processing/postprocessing/md_to_coco.py +31 -21
- api/batch_processing/postprocessing/md_to_labelme.py +165 -68
- api/batch_processing/postprocessing/merge_detections.py +40 -15
- api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
- api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
- api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
- api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
- api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
- api/synchronous/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
- api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
- api/synchronous/api_core/animal_detection_api/config.py +35 -35
- api/synchronous/api_core/tests/__init__.py +0 -0
- api/synchronous/api_core/tests/load_test.py +109 -109
- classification/__init__.py +0 -0
- classification/aggregate_classifier_probs.py +21 -24
- classification/analyze_failed_images.py +11 -13
- classification/cache_batchapi_outputs.py +51 -51
- classification/create_classification_dataset.py +69 -68
- classification/crop_detections.py +54 -53
- classification/csv_to_json.py +97 -100
- classification/detect_and_crop.py +105 -105
- classification/evaluate_model.py +43 -42
- classification/identify_mislabeled_candidates.py +47 -46
- classification/json_to_azcopy_list.py +10 -10
- classification/json_validator.py +72 -71
- classification/map_classification_categories.py +44 -43
- classification/merge_classification_detection_output.py +68 -68
- classification/prepare_classification_script.py +157 -154
- classification/prepare_classification_script_mc.py +228 -228
- classification/run_classifier.py +27 -26
- classification/save_mislabeled.py +30 -30
- classification/train_classifier.py +20 -20
- classification/train_classifier_tf.py +21 -22
- classification/train_utils.py +10 -10
- data_management/__init__.py +0 -0
- data_management/annotations/__init__.py +0 -0
- data_management/annotations/annotation_constants.py +18 -31
- data_management/camtrap_dp_to_coco.py +238 -0
- data_management/cct_json_utils.py +107 -59
- data_management/cct_to_md.py +176 -158
- data_management/cct_to_wi.py +247 -219
- data_management/coco_to_labelme.py +272 -0
- data_management/coco_to_yolo.py +86 -62
- data_management/databases/__init__.py +0 -0
- data_management/databases/add_width_and_height_to_db.py +20 -16
- data_management/databases/combine_coco_camera_traps_files.py +35 -31
- data_management/databases/integrity_check_json_db.py +130 -83
- data_management/databases/subset_json_db.py +25 -16
- data_management/generate_crops_from_cct.py +27 -45
- data_management/get_image_sizes.py +188 -144
- data_management/importers/add_nacti_sizes.py +8 -8
- data_management/importers/add_timestamps_to_icct.py +78 -78
- data_management/importers/animl_results_to_md_results.py +158 -160
- data_management/importers/auckland_doc_test_to_json.py +9 -9
- data_management/importers/auckland_doc_to_json.py +8 -8
- data_management/importers/awc_to_json.py +7 -7
- data_management/importers/bellevue_to_json.py +15 -15
- data_management/importers/cacophony-thermal-importer.py +13 -13
- data_management/importers/carrizo_shrubfree_2018.py +8 -8
- data_management/importers/carrizo_trail_cam_2017.py +8 -8
- data_management/importers/cct_field_adjustments.py +9 -9
- data_management/importers/channel_islands_to_cct.py +10 -10
- data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
- data_management/importers/ena24_to_json.py +7 -7
- data_management/importers/filenames_to_json.py +8 -8
- data_management/importers/helena_to_cct.py +7 -7
- data_management/importers/idaho-camera-traps.py +7 -7
- data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
- data_management/importers/jb_csv_to_json.py +9 -9
- data_management/importers/mcgill_to_json.py +8 -8
- data_management/importers/missouri_to_json.py +18 -18
- data_management/importers/nacti_fieldname_adjustments.py +10 -10
- data_management/importers/noaa_seals_2019.py +8 -8
- data_management/importers/pc_to_json.py +7 -7
- data_management/importers/plot_wni_giraffes.py +7 -7
- data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
- data_management/importers/prepare_zsl_imerit.py +7 -7
- data_management/importers/rspb_to_json.py +8 -8
- data_management/importers/save_the_elephants_survey_A.py +8 -8
- data_management/importers/save_the_elephants_survey_B.py +9 -9
- data_management/importers/snapshot_safari_importer.py +26 -26
- data_management/importers/snapshot_safari_importer_reprise.py +665 -665
- data_management/importers/snapshot_serengeti_lila.py +14 -14
- data_management/importers/sulross_get_exif.py +8 -9
- data_management/importers/timelapse_csv_set_to_json.py +11 -11
- data_management/importers/ubc_to_json.py +13 -13
- data_management/importers/umn_to_json.py +7 -7
- data_management/importers/wellington_to_json.py +8 -8
- data_management/importers/wi_to_json.py +9 -9
- data_management/importers/zamba_results_to_md_results.py +181 -181
- data_management/labelme_to_coco.py +309 -159
- data_management/labelme_to_yolo.py +103 -60
- data_management/lila/__init__.py +0 -0
- data_management/lila/add_locations_to_island_camera_traps.py +9 -9
- data_management/lila/add_locations_to_nacti.py +147 -147
- data_management/lila/create_lila_blank_set.py +114 -31
- data_management/lila/create_lila_test_set.py +8 -8
- data_management/lila/create_links_to_md_results_files.py +106 -106
- data_management/lila/download_lila_subset.py +92 -90
- data_management/lila/generate_lila_per_image_labels.py +56 -43
- data_management/lila/get_lila_annotation_counts.py +18 -15
- data_management/lila/get_lila_image_counts.py +11 -11
- data_management/lila/lila_common.py +103 -70
- data_management/lila/test_lila_metadata_urls.py +132 -116
- data_management/ocr_tools.py +173 -128
- data_management/read_exif.py +161 -99
- data_management/remap_coco_categories.py +84 -0
- data_management/remove_exif.py +58 -62
- data_management/resize_coco_dataset.py +32 -44
- data_management/wi_download_csv_to_coco.py +246 -0
- data_management/yolo_output_to_md_output.py +86 -73
- data_management/yolo_to_coco.py +535 -95
- detection/__init__.py +0 -0
- detection/detector_training/__init__.py +0 -0
- detection/process_video.py +85 -33
- detection/pytorch_detector.py +43 -25
- detection/run_detector.py +157 -72
- detection/run_detector_batch.py +189 -114
- detection/run_inference_with_yolov5_val.py +118 -51
- detection/run_tiled_inference.py +113 -42
- detection/tf_detector.py +51 -28
- detection/video_utils.py +606 -521
- docs/source/conf.py +43 -0
- md_utils/__init__.py +0 -0
- md_utils/azure_utils.py +9 -9
- md_utils/ct_utils.py +249 -70
- md_utils/directory_listing.py +59 -64
- md_utils/md_tests.py +968 -862
- md_utils/path_utils.py +655 -155
- md_utils/process_utils.py +157 -133
- md_utils/sas_blob_utils.py +20 -20
- md_utils/split_locations_into_train_val.py +45 -32
- md_utils/string_utils.py +33 -10
- md_utils/url_utils.py +208 -27
- md_utils/write_html_image_list.py +51 -35
- md_visualization/__init__.py +0 -0
- md_visualization/plot_utils.py +102 -109
- md_visualization/render_images_with_thumbnails.py +34 -34
- md_visualization/visualization_utils.py +908 -311
- md_visualization/visualize_db.py +109 -58
- md_visualization/visualize_detector_output.py +61 -42
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
- megadetector-5.0.9.dist-info/RECORD +224 -0
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
- taxonomy_mapping/__init__.py +0 -0
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
- taxonomy_mapping/map_new_lila_datasets.py +154 -154
- taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
- taxonomy_mapping/preview_lila_taxonomy.py +591 -591
- taxonomy_mapping/retrieve_sample_image.py +12 -12
- taxonomy_mapping/simple_image_download.py +11 -11
- taxonomy_mapping/species_lookup.py +10 -10
- taxonomy_mapping/taxonomy_csv_checker.py +18 -18
- taxonomy_mapping/taxonomy_graph.py +47 -47
- taxonomy_mapping/validate_lila_category_mappings.py +83 -76
- data_management/cct_json_to_filename_json.py +0 -89
- data_management/cct_to_csv.py +0 -140
- data_management/databases/remove_corrupted_images_from_db.py +0 -191
- detection/detector_training/copy_checkpoints.py +0 -43
- md_visualization/visualize_megadb.py +0 -183
- megadetector-5.0.7.dist-info/RECORD +0 -202
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
detection/run_detector_batch.py
CHANGED
|
@@ -1,38 +1,43 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
run_detector_batch.py
|
|
4
|
+
|
|
5
|
+
Module to run MegaDetector on lots of images, writing the results
|
|
6
|
+
to a file in the MegaDetector results format.
|
|
7
|
+
|
|
8
|
+
https://github.com/agentmorris/MegaDetector/tree/main/api/batch_processing#megadetector-batch-output-format
|
|
9
|
+
|
|
10
|
+
This enables the results to be used in our post-processing pipeline; see
|
|
11
|
+
api/batch_processing/postprocessing/postprocess_batch_results.py .
|
|
12
|
+
|
|
13
|
+
This script can save results to checkpoints intermittently, in case disaster
|
|
14
|
+
strikes. To enable this, set --checkpoint_frequency to n > 0, and results
|
|
15
|
+
will be saved as a checkpoint every n images. Checkpoints will be written
|
|
16
|
+
to a file in the same directory as the output_file, and after all images
|
|
17
|
+
are processed and final results file written to output_file, the temporary
|
|
18
|
+
checkpoint file will be deleted. If you want to resume from a checkpoint, set
|
|
19
|
+
the checkpoint file's path using --resume_from_checkpoint.
|
|
20
|
+
|
|
21
|
+
The `threshold` you can provide as an argument is the confidence threshold above
|
|
22
|
+
which detections will be included in the output file.
|
|
23
|
+
|
|
24
|
+
Has multiprocessing support for CPUs only; if a GPU is available, it will
|
|
25
|
+
use the GPU instead of CPUs, and the --ncores option will be ignored. Checkpointing
|
|
26
|
+
is not supported when using a GPU.
|
|
27
|
+
|
|
28
|
+
The lack of GPU multiprocessing support might sound annoying, but in practice we
|
|
29
|
+
run a gazillion MegaDetector images on multiple GPUs using this script, we just only use
|
|
30
|
+
one GPU *per invocation of this script*. Dividing a big batch of images into one chunk
|
|
31
|
+
per GPU happens outside of this script.
|
|
32
|
+
|
|
33
|
+
Does not have a command-line option to bind the process to a particular GPU, but you can
|
|
34
|
+
prepend with "CUDA_VISIBLE_DEVICES=0 ", for example, to bind to GPU 0, e.g.:
|
|
35
|
+
|
|
36
|
+
CUDA_VISIBLE_DEVICES=0 python detection/run_detector_batch.py md_v4.1.0.pb ~/data ~/mdv4test.json
|
|
37
|
+
|
|
38
|
+
You can disable GPU processing entirely by setting CUDA_VISIBLE_DEVICES=''.
|
|
39
|
+
|
|
40
|
+
"""
|
|
36
41
|
|
|
37
42
|
#%% Constants, imports, environment
|
|
38
43
|
|
|
@@ -91,7 +96,7 @@ exif_options.byte_handling = 'convert_to_string'
|
|
|
91
96
|
|
|
92
97
|
#%% Support functions for multiprocessing
|
|
93
98
|
|
|
94
|
-
def
|
|
99
|
+
def _producer_func(q,image_files):
|
|
95
100
|
"""
|
|
96
101
|
Producer function; only used when using the (optional) image queue.
|
|
97
102
|
|
|
@@ -120,7 +125,7 @@ def producer_func(q,image_files):
|
|
|
120
125
|
print('Finished image loading'); sys.stdout.flush()
|
|
121
126
|
|
|
122
127
|
|
|
123
|
-
def
|
|
128
|
+
def _consumer_func(q,return_queue,model_file,confidence_threshold,image_size=None):
|
|
124
129
|
"""
|
|
125
130
|
Consumer function; only used when using the (optional) image queue.
|
|
126
131
|
|
|
@@ -177,15 +182,28 @@ def run_detector_with_image_queue(image_files,model_file,confidence_threshold,
|
|
|
177
182
|
when --use_image_queue is specified. Starts a reader process to read images from disk, but
|
|
178
183
|
processes images in the process from which this function is called (i.e., does not currently
|
|
179
184
|
spawn a separate consumer process).
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
image_files (str): list of absolute paths to images
|
|
188
|
+
model_file (str): filename or model identifier (e.g. "MDV5A")
|
|
189
|
+
confidence_threshold (float): minimum confidence detection to include in
|
|
190
|
+
output
|
|
191
|
+
quiet (bool, optional): suppress per-image console printouts
|
|
192
|
+
image_size (tuple, optional): image size to use for inference, only mess with this
|
|
193
|
+
if (a) you're using a model other than MegaDetector or (b) you know what you're
|
|
194
|
+
doing
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
list: list of dicts in the format returned by process_image()
|
|
180
198
|
"""
|
|
181
199
|
|
|
182
200
|
q = multiprocessing.JoinableQueue(max_queue_size)
|
|
183
201
|
return_queue = multiprocessing.Queue(1)
|
|
184
202
|
|
|
185
203
|
if use_threads_for_queue:
|
|
186
|
-
producer = Thread(target=
|
|
204
|
+
producer = Thread(target=_producer_func,args=(q,image_files,))
|
|
187
205
|
else:
|
|
188
|
-
producer = Process(target=
|
|
206
|
+
producer = Process(target=_producer_func,args=(q,image_files,))
|
|
189
207
|
producer.daemon = False
|
|
190
208
|
producer.start()
|
|
191
209
|
|
|
@@ -199,15 +217,15 @@ def run_detector_with_image_queue(image_files,model_file,confidence_threshold,
|
|
|
199
217
|
|
|
200
218
|
if run_separate_consumer_process:
|
|
201
219
|
if use_threads_for_queue:
|
|
202
|
-
consumer = Thread(target=
|
|
220
|
+
consumer = Thread(target=_consumer_func,args=(q,return_queue,model_file,
|
|
203
221
|
confidence_threshold,image_size,))
|
|
204
222
|
else:
|
|
205
|
-
consumer = Process(target=
|
|
223
|
+
consumer = Process(target=_consumer_func,args=(q,return_queue,model_file,
|
|
206
224
|
confidence_threshold,image_size,))
|
|
207
225
|
consumer.daemon = True
|
|
208
226
|
consumer.start()
|
|
209
227
|
else:
|
|
210
|
-
|
|
228
|
+
_consumer_func(q,return_queue,model_file,confidence_threshold,image_size)
|
|
211
229
|
|
|
212
230
|
producer.join()
|
|
213
231
|
print('Producer finished')
|
|
@@ -226,13 +244,15 @@ def run_detector_with_image_queue(image_files,model_file,confidence_threshold,
|
|
|
226
244
|
|
|
227
245
|
#%% Other support functions
|
|
228
246
|
|
|
229
|
-
def
|
|
247
|
+
def _chunks_by_number_of_chunks(ls, n):
|
|
230
248
|
"""
|
|
231
249
|
Splits a list into n even chunks.
|
|
250
|
+
|
|
251
|
+
External callers should use ct_utils.split_list_into_n_chunks().
|
|
232
252
|
|
|
233
|
-
Args
|
|
234
|
-
|
|
235
|
-
|
|
253
|
+
Args:
|
|
254
|
+
ls (list): list to break up into chunks
|
|
255
|
+
n (int): number of chunks
|
|
236
256
|
"""
|
|
237
257
|
|
|
238
258
|
for i in range(0, n):
|
|
@@ -242,18 +262,31 @@ def chunks_by_number_of_chunks(ls, n):
|
|
|
242
262
|
#%% Image processing functions
|
|
243
263
|
|
|
244
264
|
def process_images(im_files, detector, confidence_threshold, use_image_queue=False,
|
|
245
|
-
quiet=False, image_size=None, checkpoint_queue=None,
|
|
246
|
-
|
|
265
|
+
quiet=False, image_size=None, checkpoint_queue=None,
|
|
266
|
+
include_image_size=False, include_image_timestamp=False,
|
|
267
|
+
include_exif_data=False):
|
|
247
268
|
"""
|
|
248
|
-
Runs MegaDetector over a list of image files.
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
269
|
+
Runs a detector (typically MegaDetector) over a list of image files.
|
|
270
|
+
As of 3/2024, this entry point is used when the image queue is enabled, but not in the
|
|
271
|
+
standard inference path (which instead loops over process_image()).
|
|
272
|
+
|
|
273
|
+
Args:
|
|
274
|
+
im_files (list: paths to image files
|
|
275
|
+
detector (str or detector object): loaded model or str; if this is a string, it can be a
|
|
276
|
+
path to a .pb/.pt model file or a known model identifier (e.g. "MDV5A")
|
|
277
|
+
confidence_threshold (float): only detections above this threshold are returned
|
|
278
|
+
use_image_queue (bool, optional): separate image loading onto a dedicated worker process
|
|
279
|
+
quiet (bool, optional): suppress per-image printouts
|
|
280
|
+
image_size (tuple, optional): image size to use for inference, only mess with this
|
|
281
|
+
if (a) you're using a model other than MegaDetector or (b) you know what you're
|
|
282
|
+
doing
|
|
283
|
+
checkpoint_queue (Queue, optional): internal parameter used to pass image queues around
|
|
284
|
+
include_image_size (bool, optional): should we include image size in the output for each image?
|
|
285
|
+
include_image_timestamp (bool, optional): should we include image timestamps in the output for each image?
|
|
286
|
+
include_exif_data (bool, optional): should we include EXIF data in the output for each image?
|
|
254
287
|
|
|
255
|
-
Returns
|
|
256
|
-
|
|
288
|
+
Returns:
|
|
289
|
+
list: list of dicts, in which each dict represents detections on one image,
|
|
257
290
|
see the 'images' key in https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#batch-processing-api-output-format
|
|
258
291
|
"""
|
|
259
292
|
|
|
@@ -269,7 +302,7 @@ def process_images(im_files, detector, confidence_threshold, use_image_queue=Fal
|
|
|
269
302
|
include_image_size=include_image_size,
|
|
270
303
|
include_image_timestamp=include_image_timestamp,
|
|
271
304
|
include_exif_data=include_exif_data)
|
|
272
|
-
else:
|
|
305
|
+
else:
|
|
273
306
|
results = []
|
|
274
307
|
for im_file in im_files:
|
|
275
308
|
result = process_image(im_file, detector, confidence_threshold,
|
|
@@ -292,17 +325,26 @@ def process_image(im_file, detector, confidence_threshold, image=None,
|
|
|
292
325
|
include_image_timestamp=False, include_exif_data=False,
|
|
293
326
|
skip_image_resizing=False):
|
|
294
327
|
"""
|
|
295
|
-
Runs MegaDetector on a single image file.
|
|
296
|
-
|
|
297
|
-
Args
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
328
|
+
Runs a detector (typically MegaDetector) on a single image file.
|
|
329
|
+
|
|
330
|
+
Args:
|
|
331
|
+
im_file (str): path to image file
|
|
332
|
+
detector (detector object): loaded model, this can no longer be a string by the time
|
|
333
|
+
you get this far down the pipeline
|
|
334
|
+
confidence_threshold (float): only detections above this threshold are returned
|
|
335
|
+
image (Image, optional): previously-loaded image, if available, used when a worker
|
|
336
|
+
thread is handling image loads
|
|
337
|
+
quiet (bool, optional): suppress per-image printouts
|
|
338
|
+
image_size (tuple, optional): image size to use for inference, only mess with this
|
|
339
|
+
if (a) you're using a model other than MegaDetector or (b) you know what you're
|
|
340
|
+
doing
|
|
341
|
+
include_image_size (bool, optional): should we include image size in the output for each image?
|
|
342
|
+
include_image_timestamp (bool, optional): should we include image timestamps in the output for each image?
|
|
343
|
+
include_exif_data (bool, optional): should we include EXIF data in the output for each image?
|
|
344
|
+
skip_image_resizing (bool, optional): whether to skip internal image resizing and rely on external resizing
|
|
303
345
|
|
|
304
346
|
Returns:
|
|
305
|
-
|
|
347
|
+
dict: dict representing detections on one image,
|
|
306
348
|
see the 'images' key in
|
|
307
349
|
https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#batch-processing-api-output-format
|
|
308
350
|
"""
|
|
@@ -350,7 +392,7 @@ def process_image(im_file, detector, confidence_threshold, image=None,
|
|
|
350
392
|
# ...def process_image(...)
|
|
351
393
|
|
|
352
394
|
|
|
353
|
-
def
|
|
395
|
+
def _load_custom_class_mapping(class_mapping_filename):
|
|
354
396
|
"""
|
|
355
397
|
This is an experimental hack to allow the use of non-MD YOLOv5 models through
|
|
356
398
|
the same infrastructure; it disables the code that enforces MDv5-like class lists.
|
|
@@ -388,34 +430,50 @@ def load_and_run_detector_batch(model_file, image_file_names, checkpoint_path=No
|
|
|
388
430
|
class_mapping_filename=None, include_image_size=False,
|
|
389
431
|
include_image_timestamp=False, include_exif_data=False):
|
|
390
432
|
"""
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
433
|
+
Load a model file and run it on a list of images.
|
|
434
|
+
|
|
435
|
+
Args:
|
|
436
|
+
|
|
437
|
+
model_file (str): path to model file, or supported model string (e.g. "MDV5A")
|
|
438
|
+
image_file_names (list or str): list of strings (image filenames), a single image filename,
|
|
439
|
+
a folder to recursively search for images in, or a .json or .txt file containing a list
|
|
440
|
+
of images.
|
|
441
|
+
checkpoint_path (str, optional), path to use for checkpoints (if None, checkpointing
|
|
442
|
+
is disabled)
|
|
443
|
+
confidence_threshold (float, optional): only detections above this threshold are returned
|
|
444
|
+
checkpoint_frequency (int, optional): int, write results to JSON checkpoint file every N
|
|
445
|
+
images, -1 disabled checkpointing
|
|
446
|
+
results (list, optional): list of dicts, existing results loaded from checkpoint; generally
|
|
447
|
+
not useful if you're using this function outside of the CLI
|
|
448
|
+
n_cores (int, optional): number of parallel worker to use, ignored if we're running on a GPU
|
|
449
|
+
use_image_queue (bool, optional): use a dedicated worker for image loading
|
|
450
|
+
quiet (bool, optional): disable per-image console output
|
|
451
|
+
image_size (tuple, optional): image size to use for inference, only mess with this
|
|
452
|
+
if (a) you're using a model other than MegaDetector or (b) you know what you're
|
|
453
|
+
doing
|
|
454
|
+
class_mapping_filename (str, optional), use a non-default class mapping supplied in a .json
|
|
455
|
+
file or YOLOv5 dataset.yaml file
|
|
456
|
+
include_image_size (bool, optional): should we include image size in the output for each image?
|
|
457
|
+
include_image_timestamp (bool, optional): should we include image timestamps in the output for each image?
|
|
458
|
+
include_exif_data (bool, optional): should we include EXIF data in the output for each image?
|
|
459
|
+
|
|
460
|
+
Returns:
|
|
461
|
+
results: list of dicts; each dict represents detections on one image
|
|
406
462
|
"""
|
|
407
463
|
|
|
464
|
+
# Validate input arguments
|
|
408
465
|
if n_cores is None:
|
|
409
466
|
n_cores = 1
|
|
410
467
|
|
|
411
468
|
if confidence_threshold is None:
|
|
412
469
|
confidence_threshold=run_detector.DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD
|
|
413
|
-
|
|
414
|
-
if
|
|
470
|
+
|
|
471
|
+
# Disable checkpointing if checkpoint_path is None
|
|
472
|
+
if checkpoint_frequency is None or checkpoint_path is None:
|
|
415
473
|
checkpoint_frequency = -1
|
|
416
474
|
|
|
417
475
|
if class_mapping_filename is not None:
|
|
418
|
-
|
|
476
|
+
_load_custom_class_mapping(class_mapping_filename)
|
|
419
477
|
|
|
420
478
|
# Handle the case where image_file_names is not yet actually a list
|
|
421
479
|
if isinstance(image_file_names,str):
|
|
@@ -450,7 +508,8 @@ def load_and_run_detector_batch(model_file, image_file_names, checkpoint_path=No
|
|
|
450
508
|
list_file))
|
|
451
509
|
else:
|
|
452
510
|
raise ValueError(
|
|
453
|
-
'{} supplied as [image_file_names] argument, but it does not appear to be a file or folder'
|
|
511
|
+
'{} supplied as [image_file_names] argument, but it does not appear to be a file or folder'.format(
|
|
512
|
+
image_file_names))
|
|
454
513
|
|
|
455
514
|
if results is None:
|
|
456
515
|
results = []
|
|
@@ -514,12 +573,12 @@ def load_and_run_detector_batch(model_file, image_file_names, checkpoint_path=No
|
|
|
514
573
|
results.append(result)
|
|
515
574
|
|
|
516
575
|
# Write a checkpoint if necessary
|
|
517
|
-
if checkpoint_frequency != -1 and count % checkpoint_frequency == 0:
|
|
576
|
+
if (checkpoint_frequency != -1) and ((count % checkpoint_frequency) == 0):
|
|
518
577
|
|
|
519
578
|
print('Writing a new checkpoint after having processed {} images since '
|
|
520
579
|
'last restart'.format(count))
|
|
521
580
|
|
|
522
|
-
|
|
581
|
+
_write_checkpoint(checkpoint_path, results)
|
|
523
582
|
|
|
524
583
|
else:
|
|
525
584
|
|
|
@@ -539,7 +598,7 @@ def load_and_run_detector_batch(model_file, image_file_names, checkpoint_path=No
|
|
|
539
598
|
len(already_processed),n_images_all))
|
|
540
599
|
|
|
541
600
|
# Divide images into chunks; we'll send one chunk to each worker process
|
|
542
|
-
image_batches = list(
|
|
601
|
+
image_batches = list(_chunks_by_number_of_chunks(image_file_names, n_cores))
|
|
543
602
|
|
|
544
603
|
pool = workerpool(n_cores)
|
|
545
604
|
|
|
@@ -552,7 +611,7 @@ def load_and_run_detector_batch(model_file, image_file_names, checkpoint_path=No
|
|
|
552
611
|
# Pass the "results" array (which may already contain images loaded from an existing
|
|
553
612
|
# checkpoint) to the checkpoint queue handler function, which will append results to
|
|
554
613
|
# the list as they become available.
|
|
555
|
-
checkpoint_thread = Thread(target=
|
|
614
|
+
checkpoint_thread = Thread(target=_checkpoint_queue_handler,
|
|
556
615
|
args=(checkpoint_path, checkpoint_frequency,
|
|
557
616
|
checkpoint_queue, results), daemon=True)
|
|
558
617
|
checkpoint_thread.start()
|
|
@@ -596,7 +655,7 @@ def load_and_run_detector_batch(model_file, image_file_names, checkpoint_path=No
|
|
|
596
655
|
# ...def load_and_run_detector_batch(...)
|
|
597
656
|
|
|
598
657
|
|
|
599
|
-
def
|
|
658
|
+
def _checkpoint_queue_handler(checkpoint_path, checkpoint_frequency, checkpoint_queue, results):
|
|
600
659
|
"""
|
|
601
660
|
Thread function to accumulate results and write checkpoints when checkpointing and
|
|
602
661
|
multiprocessing are both enabled.
|
|
@@ -616,15 +675,15 @@ def checkpoint_queue_handler(checkpoint_path, checkpoint_frequency, checkpoint_q
|
|
|
616
675
|
print('Writing a new checkpoint after having processed {} images since '
|
|
617
676
|
'last restart'.format(result_count))
|
|
618
677
|
|
|
619
|
-
|
|
678
|
+
_write_checkpoint(checkpoint_path, results)
|
|
620
679
|
|
|
621
680
|
|
|
622
|
-
def
|
|
681
|
+
def _write_checkpoint(checkpoint_path, results):
|
|
623
682
|
"""
|
|
624
683
|
Writes the 'images' field in the dict 'results' to a json checkpoint file.
|
|
625
684
|
"""
|
|
626
685
|
|
|
627
|
-
assert checkpoint_path is not None
|
|
686
|
+
assert checkpoint_path is not None
|
|
628
687
|
|
|
629
688
|
# Back up any previous checkpoints, to protect against crashes while we're writing
|
|
630
689
|
# the checkpoint file.
|
|
@@ -644,9 +703,14 @@ def write_checkpoint(checkpoint_path, results):
|
|
|
644
703
|
|
|
645
704
|
def get_image_datetime(image):
|
|
646
705
|
"""
|
|
647
|
-
|
|
706
|
+
Reads EXIF datetime from a PIL Image object.
|
|
648
707
|
|
|
649
|
-
|
|
708
|
+
Args:
|
|
709
|
+
image (Image): the PIL Image object from which we should read datetime information
|
|
710
|
+
|
|
711
|
+
Returns:
|
|
712
|
+
str: the EXIF datetime from [image] (a PIL Image object), if available, as a string;
|
|
713
|
+
returns None if EXIF datetime is not available.
|
|
650
714
|
"""
|
|
651
715
|
|
|
652
716
|
exif_tags = read_exif.read_pil_exif(image,exif_options)
|
|
@@ -662,26 +726,30 @@ def get_image_datetime(image):
|
|
|
662
726
|
|
|
663
727
|
def write_results_to_file(results, output_file, relative_path_base=None,
|
|
664
728
|
detector_file=None, info=None, include_max_conf=False,
|
|
665
|
-
custom_metadata=None):
|
|
729
|
+
custom_metadata=None, force_forward_slashes=True):
|
|
666
730
|
"""
|
|
667
731
|
Writes list of detection results to JSON output file. Format matches:
|
|
668
732
|
|
|
669
733
|
https://github.com/agentmorris/MegaDetector/tree/master/api/batch_processing#batch-processing-api-output-format
|
|
670
734
|
|
|
671
|
-
Args
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
735
|
+
Args:
|
|
736
|
+
results (list): list of dict, each dict represents detections on one image
|
|
737
|
+
output_file (str): path to JSON output file, should end in '.json'
|
|
738
|
+
relative_path_base (str, optional): path to a directory as the base for relative paths, can
|
|
739
|
+
be None if the paths in [results] are absolute
|
|
740
|
+
detector_file (str, optional): filename of the detector used to generate these results, only
|
|
741
|
+
used to pull out a version number for the "info" field
|
|
742
|
+
info (dict, optional): dictionary to put in the results file instead of the default "info" field
|
|
743
|
+
include_max_conf (bool, optional): old files (version 1.2 and earlier) included a "max_conf" field
|
|
744
|
+
in each image; this was removed in version 1.3. Set this flag to force the inclusion
|
|
745
|
+
of this field.
|
|
746
|
+
custom_metadata (object, optional): additional data to include as info['custom_metadata']; typically
|
|
747
|
+
a dictionary, but no type/format checks are performed
|
|
748
|
+
force_forward_slashes (bool, optional): convert all slashes in filenames within [results] to
|
|
749
|
+
forward slashes
|
|
750
|
+
|
|
751
|
+
Returns:
|
|
752
|
+
dict: the MD-formatted dictionary that was written to [output_file]
|
|
685
753
|
"""
|
|
686
754
|
|
|
687
755
|
if relative_path_base is not None:
|
|
@@ -692,6 +760,14 @@ def write_results_to_file(results, output_file, relative_path_base=None,
|
|
|
692
760
|
results_relative.append(r_relative)
|
|
693
761
|
results = results_relative
|
|
694
762
|
|
|
763
|
+
if force_forward_slashes:
|
|
764
|
+
results_converted = []
|
|
765
|
+
for r in results:
|
|
766
|
+
r_converted = copy.copy(r)
|
|
767
|
+
r_converted['file'] = r_converted['file'].replace('\\','/')
|
|
768
|
+
results_converted.append(r_converted)
|
|
769
|
+
results = results_converted
|
|
770
|
+
|
|
695
771
|
# The typical case: we need to build the 'info' struct
|
|
696
772
|
if info is None:
|
|
697
773
|
|
|
@@ -988,7 +1064,7 @@ def main():
|
|
|
988
1064
|
assert not os.path.isdir(args.output_file), 'Specified output file is a directory'
|
|
989
1065
|
|
|
990
1066
|
if args.class_mapping_filename is not None:
|
|
991
|
-
|
|
1067
|
+
_load_custom_class_mapping(args.class_mapping_filename)
|
|
992
1068
|
|
|
993
1069
|
# Load the checkpoint if available
|
|
994
1070
|
#
|
|
@@ -1137,8 +1213,7 @@ def main():
|
|
|
1137
1213
|
os.remove(checkpoint_path)
|
|
1138
1214
|
print('Deleted checkpoint file {}'.format(checkpoint_path))
|
|
1139
1215
|
|
|
1140
|
-
print('Done!')
|
|
1141
|
-
|
|
1216
|
+
print('Done, thanks for MegaDetect\'ing!')
|
|
1142
1217
|
|
|
1143
1218
|
if __name__ == '__main__':
|
|
1144
1219
|
main()
|