megadetector 5.0.5__py3-none-any.whl → 5.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/batch_processing/data_preparation/manage_local_batch.py +302 -263
- api/batch_processing/data_preparation/manage_video_batch.py +81 -2
- api/batch_processing/postprocessing/add_max_conf.py +1 -0
- api/batch_processing/postprocessing/categorize_detections_by_size.py +50 -19
- api/batch_processing/postprocessing/compare_batch_results.py +110 -60
- api/batch_processing/postprocessing/load_api_results.py +56 -70
- api/batch_processing/postprocessing/md_to_coco.py +1 -1
- api/batch_processing/postprocessing/md_to_labelme.py +2 -1
- api/batch_processing/postprocessing/postprocess_batch_results.py +240 -81
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +625 -0
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +71 -23
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +227 -75
- api/batch_processing/postprocessing/subset_json_detector_output.py +132 -5
- api/batch_processing/postprocessing/top_folders_to_bottom.py +1 -1
- api/synchronous/api_core/animal_detection_api/detection/run_detector_batch.py +2 -2
- classification/prepare_classification_script.py +191 -191
- data_management/coco_to_yolo.py +68 -45
- data_management/databases/integrity_check_json_db.py +7 -5
- data_management/generate_crops_from_cct.py +3 -3
- data_management/get_image_sizes.py +8 -6
- data_management/importers/add_timestamps_to_icct.py +79 -0
- data_management/importers/animl_results_to_md_results.py +160 -0
- data_management/importers/auckland_doc_test_to_json.py +4 -4
- data_management/importers/auckland_doc_to_json.py +1 -1
- data_management/importers/awc_to_json.py +5 -5
- data_management/importers/bellevue_to_json.py +5 -5
- data_management/importers/carrizo_shrubfree_2018.py +5 -5
- data_management/importers/carrizo_trail_cam_2017.py +5 -5
- data_management/importers/cct_field_adjustments.py +2 -3
- data_management/importers/channel_islands_to_cct.py +4 -4
- data_management/importers/ena24_to_json.py +5 -5
- data_management/importers/helena_to_cct.py +10 -10
- data_management/importers/idaho-camera-traps.py +12 -12
- data_management/importers/idfg_iwildcam_lila_prep.py +8 -8
- data_management/importers/jb_csv_to_json.py +4 -4
- data_management/importers/missouri_to_json.py +1 -1
- data_management/importers/noaa_seals_2019.py +1 -1
- data_management/importers/pc_to_json.py +5 -5
- data_management/importers/prepare-noaa-fish-data-for-lila.py +4 -4
- data_management/importers/prepare_zsl_imerit.py +5 -5
- data_management/importers/rspb_to_json.py +4 -4
- data_management/importers/save_the_elephants_survey_A.py +5 -5
- data_management/importers/save_the_elephants_survey_B.py +6 -6
- data_management/importers/snapshot_safari_importer.py +9 -9
- data_management/importers/snapshot_serengeti_lila.py +9 -9
- data_management/importers/timelapse_csv_set_to_json.py +5 -7
- data_management/importers/ubc_to_json.py +4 -4
- data_management/importers/umn_to_json.py +4 -4
- data_management/importers/wellington_to_json.py +1 -1
- data_management/importers/wi_to_json.py +2 -2
- data_management/importers/zamba_results_to_md_results.py +181 -0
- data_management/labelme_to_coco.py +35 -7
- data_management/labelme_to_yolo.py +229 -0
- data_management/lila/add_locations_to_island_camera_traps.py +1 -1
- data_management/lila/add_locations_to_nacti.py +147 -0
- data_management/lila/create_lila_blank_set.py +474 -0
- data_management/lila/create_lila_test_set.py +2 -1
- data_management/lila/create_links_to_md_results_files.py +106 -0
- data_management/lila/download_lila_subset.py +46 -21
- data_management/lila/generate_lila_per_image_labels.py +23 -14
- data_management/lila/get_lila_annotation_counts.py +17 -11
- data_management/lila/lila_common.py +14 -11
- data_management/lila/test_lila_metadata_urls.py +116 -0
- data_management/ocr_tools.py +829 -0
- data_management/resize_coco_dataset.py +13 -11
- data_management/yolo_output_to_md_output.py +84 -12
- data_management/yolo_to_coco.py +38 -20
- detection/process_video.py +36 -14
- detection/pytorch_detector.py +23 -8
- detection/run_detector.py +76 -19
- detection/run_detector_batch.py +178 -63
- detection/run_inference_with_yolov5_val.py +326 -57
- detection/run_tiled_inference.py +153 -43
- detection/video_utils.py +34 -8
- md_utils/ct_utils.py +172 -1
- md_utils/md_tests.py +372 -51
- md_utils/path_utils.py +167 -39
- md_utils/process_utils.py +26 -7
- md_utils/split_locations_into_train_val.py +215 -0
- md_utils/string_utils.py +10 -0
- md_utils/url_utils.py +0 -2
- md_utils/write_html_image_list.py +9 -26
- md_visualization/plot_utils.py +12 -8
- md_visualization/visualization_utils.py +106 -7
- md_visualization/visualize_db.py +16 -8
- md_visualization/visualize_detector_output.py +208 -97
- {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/METADATA +3 -6
- {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/RECORD +98 -121
- {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/WHEEL +1 -1
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +1 -1
- taxonomy_mapping/map_new_lila_datasets.py +43 -39
- taxonomy_mapping/prepare_lila_taxonomy_release.py +5 -2
- taxonomy_mapping/preview_lila_taxonomy.py +27 -27
- taxonomy_mapping/species_lookup.py +33 -13
- taxonomy_mapping/taxonomy_csv_checker.py +7 -5
- api/synchronous/api_core/yolov5/detect.py +0 -252
- api/synchronous/api_core/yolov5/export.py +0 -607
- api/synchronous/api_core/yolov5/hubconf.py +0 -146
- api/synchronous/api_core/yolov5/models/__init__.py +0 -0
- api/synchronous/api_core/yolov5/models/common.py +0 -738
- api/synchronous/api_core/yolov5/models/experimental.py +0 -104
- api/synchronous/api_core/yolov5/models/tf.py +0 -574
- api/synchronous/api_core/yolov5/models/yolo.py +0 -338
- api/synchronous/api_core/yolov5/train.py +0 -670
- api/synchronous/api_core/yolov5/utils/__init__.py +0 -36
- api/synchronous/api_core/yolov5/utils/activations.py +0 -103
- api/synchronous/api_core/yolov5/utils/augmentations.py +0 -284
- api/synchronous/api_core/yolov5/utils/autoanchor.py +0 -170
- api/synchronous/api_core/yolov5/utils/autobatch.py +0 -66
- api/synchronous/api_core/yolov5/utils/aws/__init__.py +0 -0
- api/synchronous/api_core/yolov5/utils/aws/resume.py +0 -40
- api/synchronous/api_core/yolov5/utils/benchmarks.py +0 -148
- api/synchronous/api_core/yolov5/utils/callbacks.py +0 -71
- api/synchronous/api_core/yolov5/utils/dataloaders.py +0 -1087
- api/synchronous/api_core/yolov5/utils/downloads.py +0 -178
- api/synchronous/api_core/yolov5/utils/flask_rest_api/example_request.py +0 -19
- api/synchronous/api_core/yolov5/utils/flask_rest_api/restapi.py +0 -46
- api/synchronous/api_core/yolov5/utils/general.py +0 -1018
- api/synchronous/api_core/yolov5/utils/loggers/__init__.py +0 -187
- api/synchronous/api_core/yolov5/utils/loggers/wandb/__init__.py +0 -0
- api/synchronous/api_core/yolov5/utils/loggers/wandb/log_dataset.py +0 -27
- api/synchronous/api_core/yolov5/utils/loggers/wandb/sweep.py +0 -41
- api/synchronous/api_core/yolov5/utils/loggers/wandb/wandb_utils.py +0 -577
- api/synchronous/api_core/yolov5/utils/loss.py +0 -234
- api/synchronous/api_core/yolov5/utils/metrics.py +0 -355
- api/synchronous/api_core/yolov5/utils/plots.py +0 -489
- api/synchronous/api_core/yolov5/utils/torch_utils.py +0 -314
- api/synchronous/api_core/yolov5/val.py +0 -394
- md_utils/matlab_porting_tools.py +0 -97
- {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/LICENSE +0 -0
- {megadetector-5.0.5.dist-info → megadetector-5.0.7.dist-info}/top_level.txt +0 -0
|
@@ -14,18 +14,6 @@
|
|
|
14
14
|
# the same if you are reading this in Jupyter Notebook (using the .ipynb version of the
|
|
15
15
|
# script):
|
|
16
16
|
#
|
|
17
|
-
# * You can specify the MegaDetector location, but you may find it useful to use the same paths
|
|
18
|
-
# I use; on all the machines where I run MD, I keep all versions of MegaDetector handy at these
|
|
19
|
-
# paths:
|
|
20
|
-
#
|
|
21
|
-
# ~/models/camera_traps/megadetector/md_v5.0.0/md_v5a.0.0.pt
|
|
22
|
-
# ~/models/camera_traps/megadetector/md_v5.0.0/md_v5b.0.0.pt
|
|
23
|
-
# ~/models/camera_traps/megadetector/md_v4.1.0/md_v4.1.0.pb
|
|
24
|
-
#
|
|
25
|
-
# On Windows, this translates to, for example:
|
|
26
|
-
#
|
|
27
|
-
# c:\users\dmorr\models\camera_traps\megadetector\md_v5.0.0\md_v5a.0.0.pt
|
|
28
|
-
#
|
|
29
17
|
# * Typically when I have a MegaDetector job to run, I make a copy of this script. Let's
|
|
30
18
|
# say I'm running a job for an organization called "bibblebop"; I have a big folder of
|
|
31
19
|
# job-specific copies of this script, and I might save a new one called "bibblebop-2023-07-26.py"
|
|
@@ -78,6 +66,7 @@ import json
|
|
|
78
66
|
import os
|
|
79
67
|
import stat
|
|
80
68
|
import time
|
|
69
|
+
import re
|
|
81
70
|
|
|
82
71
|
import humanfriendly
|
|
83
72
|
|
|
@@ -86,15 +75,16 @@ from collections import defaultdict
|
|
|
86
75
|
|
|
87
76
|
from md_utils import path_utils
|
|
88
77
|
from md_utils.ct_utils import is_list_sorted
|
|
78
|
+
from md_utils.ct_utils import split_list_into_n_chunks
|
|
89
79
|
|
|
90
80
|
from detection.run_detector_batch import load_and_run_detector_batch, write_results_to_file
|
|
91
81
|
from detection.run_detector import DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD
|
|
82
|
+
from detection.run_detector import estimate_md_images_per_second
|
|
92
83
|
|
|
93
84
|
from api.batch_processing.postprocessing.postprocess_batch_results import (
|
|
94
85
|
PostProcessingOptions, process_batch_results)
|
|
95
86
|
from detection.run_detector import get_detector_version_from_filename
|
|
96
|
-
|
|
97
|
-
max_task_name_length = 92
|
|
87
|
+
from md_utils.ct_utils import image_file_to_camera_folder
|
|
98
88
|
|
|
99
89
|
# To specify a non-default confidence threshold for including detections in the .json file
|
|
100
90
|
json_threshold = None
|
|
@@ -102,61 +92,108 @@ json_threshold = None
|
|
|
102
92
|
# Turn warnings into errors if more than this many images are missing
|
|
103
93
|
max_tolerable_failed_images = 100
|
|
104
94
|
|
|
95
|
+
# Should we supply the --image_queue_option to run_detector_batch.py? I only set this
|
|
96
|
+
# when I have a very slow drive and a comparably fast GPU. When this is enabled, checkpointing
|
|
97
|
+
# is not supported within a job, so I set n_jobs to a large number (typically 100).
|
|
105
98
|
use_image_queue = False
|
|
106
99
|
|
|
107
100
|
# Only relevant when we're using a single GPU
|
|
108
101
|
default_gpu_number = 0
|
|
109
102
|
|
|
103
|
+
# Should we supply --quiet to run_detector_batch.py?
|
|
110
104
|
quiet_mode = True
|
|
111
105
|
|
|
112
106
|
# Specify a target image size when running MD... strongly recommended to leave this at "None"
|
|
107
|
+
#
|
|
108
|
+
# When using augmented inference, if you leave this at "None", run_inference_with_yolov5_val.py
|
|
109
|
+
# will use its default size, which is 1280 * 1.3, which is almost always what you want.
|
|
113
110
|
image_size = None
|
|
114
111
|
|
|
115
112
|
# Only relevant when running on CPU
|
|
116
113
|
ncores = 1
|
|
117
114
|
|
|
118
|
-
# OS-specific script line continuation character
|
|
115
|
+
# OS-specific script line continuation character (modified later if we're running on Windows)
|
|
119
116
|
slcc = '\\'
|
|
120
117
|
|
|
121
|
-
# OS-specific script comment character
|
|
118
|
+
# OS-specific script comment character (modified later if we're running on Windows)
|
|
122
119
|
scc = '#'
|
|
123
120
|
|
|
121
|
+
# # OS-specific script extension (modified later if we're running on Windows)
|
|
124
122
|
script_extension = '.sh'
|
|
125
123
|
|
|
124
|
+
# If False, we'll load chunk files with file lists if they exist
|
|
125
|
+
force_enumeration = False
|
|
126
|
+
|
|
126
127
|
# Prefer threads on Windows, processes on Linux
|
|
127
128
|
parallelization_defaults_to_threads = False
|
|
128
129
|
|
|
129
130
|
# This is for things like image rendering, not for MegaDetector
|
|
130
131
|
default_workers_for_parallel_tasks = 30
|
|
131
132
|
|
|
133
|
+
overwrite_handling = 'skip' # 'skip', 'error', or 'overwrite'
|
|
134
|
+
|
|
135
|
+
# Only relevant to repeat detection elimination; try to identify EK113/RCNX101-style
|
|
136
|
+
# overflow folders and treat them as the same camera
|
|
137
|
+
overflow_folder_handling_enabled = True
|
|
138
|
+
|
|
139
|
+
# The function used to get camera names from image paths; can also replace this
|
|
140
|
+
# with a custom function.
|
|
141
|
+
relative_path_to_location = image_file_to_camera_folder
|
|
142
|
+
|
|
143
|
+
# This will be the .json results file after RDE; if this is still None when
|
|
144
|
+
# we get to classification stuff, that will indicate that we didn't do RDE.
|
|
145
|
+
filtered_output_filename = None
|
|
146
|
+
|
|
147
|
+
if os.name == 'nt':
|
|
148
|
+
|
|
149
|
+
slcc = '^'
|
|
150
|
+
scc = 'REM'
|
|
151
|
+
script_extension = '.bat'
|
|
152
|
+
|
|
153
|
+
# My experience has been that Python multiprocessing is flaky on Windows, so
|
|
154
|
+
# default to threads on Windows
|
|
155
|
+
parallelization_defaults_to_threads = True
|
|
156
|
+
default_workers_for_parallel_tasks = 10
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
## Constants related to using YOLOv5's val.py
|
|
160
|
+
|
|
132
161
|
# Should we use YOLOv5's val.py instead of run_detector_batch.py?
|
|
133
162
|
use_yolo_inference_scripts = False
|
|
134
163
|
|
|
135
|
-
# Directory in which to run val.py
|
|
164
|
+
# Directory in which to run val.py (relevant for YOLOv5, not for YOLOv8)
|
|
136
165
|
yolo_working_dir = os.path.expanduser('~/git/yolov5')
|
|
137
166
|
|
|
167
|
+
# Only used for loading the mapping from class indices to names
|
|
168
|
+
yolo_dataset_file = None
|
|
169
|
+
|
|
170
|
+
# 'yolov5' or 'yolov8'; assumes YOLOv5 if this is None
|
|
171
|
+
yolo_model_type = None
|
|
172
|
+
|
|
173
|
+
# inference batch size
|
|
174
|
+
yolo_batch_size = 1
|
|
175
|
+
|
|
138
176
|
# Should we remove intermediate files used for running YOLOv5's val.py?
|
|
139
177
|
#
|
|
140
178
|
# Only relevant if use_yolo_inference_scripts is True.
|
|
141
|
-
remove_yolo_intermediate_results =
|
|
142
|
-
remove_yolo_symlink_folder =
|
|
179
|
+
remove_yolo_intermediate_results = True
|
|
180
|
+
remove_yolo_symlink_folder = True
|
|
143
181
|
use_symlinks_for_yolo_inference = True
|
|
182
|
+
write_yolo_debug_output = False
|
|
144
183
|
|
|
145
|
-
|
|
184
|
+
# Should we apply YOLOv5's test-time augmentation?
|
|
185
|
+
augment = False
|
|
146
186
|
|
|
147
|
-
# Set later if EK113/RCNX101-style overflow folders are being handled in this dataset
|
|
148
|
-
overflow_folder_handling_enabled = False
|
|
149
187
|
|
|
150
|
-
|
|
151
|
-
# is True.
|
|
152
|
-
augment = False
|
|
188
|
+
## Constants related to tiled inference
|
|
153
189
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
190
|
+
use_tiled_inference = True
|
|
191
|
+
|
|
192
|
+
# Should we delete tiles after each job? Only set this to False for debugging;
|
|
193
|
+
# large jobs will take up a lot of space if you keep tiles around after each task.
|
|
194
|
+
remove_tiles = True
|
|
195
|
+
tile_size = (1280,1280)
|
|
196
|
+
tile_overlap = 0.2
|
|
160
197
|
|
|
161
198
|
|
|
162
199
|
#%% Constants I set per script
|
|
@@ -164,9 +201,11 @@ if os.name == 'nt':
|
|
|
164
201
|
input_path = '/drive/organization'
|
|
165
202
|
|
|
166
203
|
assert not (input_path.endswith('/') or input_path.endswith('\\'))
|
|
204
|
+
assert os.path.isdir(input_path), 'Could not find input folder {}'.format(input_path)
|
|
205
|
+
input_path = input_path.replace('\\','/')
|
|
167
206
|
|
|
168
207
|
organization_name_short = 'organization'
|
|
169
|
-
job_date = None # '
|
|
208
|
+
job_date = None # '2024-01-01'
|
|
170
209
|
assert job_date is not None and organization_name_short != 'organization'
|
|
171
210
|
|
|
172
211
|
# Optional descriptor
|
|
@@ -177,9 +216,7 @@ if job_tag is None:
|
|
|
177
216
|
else:
|
|
178
217
|
job_description_string = '-' + job_tag
|
|
179
218
|
|
|
180
|
-
model_file =
|
|
181
|
-
# model_file = os.path.expanduser('~/models/camera_traps/megadetector/md_v5.0.0/md_v5b.0.0.pt')
|
|
182
|
-
# model_file = os.path.expanduser('~/models/camera_traps/megadetector/md_v4.1.0/md_v4.1.0.pb')
|
|
219
|
+
model_file = 'MDV5A' # 'MDV5A', 'MDV5B', 'MDV4'
|
|
183
220
|
|
|
184
221
|
postprocessing_base = os.path.expanduser('~/postprocessing')
|
|
185
222
|
|
|
@@ -193,16 +230,12 @@ n_gpus = 2
|
|
|
193
230
|
# checkpointing. Don't worry, this will be assert()'d in the next cell.
|
|
194
231
|
checkpoint_frequency = 10000
|
|
195
232
|
|
|
196
|
-
#
|
|
197
|
-
|
|
198
|
-
if ('v5') in model_file:
|
|
199
|
-
gpu_images_per_second = 10
|
|
200
|
-
else:
|
|
201
|
-
gpu_images_per_second = 2.9
|
|
233
|
+
# Estimate inference speed for the current GPU
|
|
234
|
+
approx_images_per_second = estimate_md_images_per_second(model_file)
|
|
202
235
|
|
|
203
|
-
# Rough estimate for
|
|
236
|
+
# Rough estimate for the inference time cost of augmentation
|
|
204
237
|
if augment:
|
|
205
|
-
|
|
238
|
+
approx_images_per_second = approx_images_per_second * 0.7
|
|
206
239
|
|
|
207
240
|
base_task_name = organization_name_short + '-' + job_date + job_description_string + '-' + \
|
|
208
241
|
get_detector_version_from_filename(model_file)
|
|
@@ -223,6 +256,14 @@ if augment:
|
|
|
223
256
|
assert use_yolo_inference_scripts,\
|
|
224
257
|
'Augmentation is only supported when running with the YOLO inference scripts'
|
|
225
258
|
|
|
259
|
+
if use_tiled_inference:
|
|
260
|
+
assert not augment, \
|
|
261
|
+
'Augmentation is not supported when using tiled inference'
|
|
262
|
+
assert not use_yolo_inference_scripts, \
|
|
263
|
+
'Using the YOLO inference script is not supported when using tiled inference'
|
|
264
|
+
assert checkpoint_frequency is None, \
|
|
265
|
+
'Checkpointing is not supported when using tiled inference'
|
|
266
|
+
|
|
226
267
|
filename_base = os.path.join(base_output_folder_name, base_task_name)
|
|
227
268
|
combined_api_output_folder = os.path.join(filename_base, 'combined_api_outputs')
|
|
228
269
|
postprocessing_output_folder = os.path.join(filename_base, 'preview')
|
|
@@ -239,20 +280,17 @@ print('Output folder:\n{}'.format(filename_base))
|
|
|
239
280
|
|
|
240
281
|
#%% Enumerate files
|
|
241
282
|
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
if False:
|
|
283
|
+
# Have we already listed files for this job?
|
|
284
|
+
chunk_files = os.listdir(filename_base)
|
|
285
|
+
pattern = re.compile('chunk\d+.json')
|
|
286
|
+
chunk_files = [fn for fn in chunk_files if pattern.match(fn)]
|
|
247
287
|
|
|
248
|
-
|
|
288
|
+
if (not force_enumeration) and (len(chunk_files) > 0):
|
|
249
289
|
|
|
250
|
-
|
|
290
|
+
print('Found {} chunk files in folder {}, bypassing enumeration'.format(
|
|
291
|
+
len(chunk_files),
|
|
292
|
+
filename_base))
|
|
251
293
|
|
|
252
|
-
import re
|
|
253
|
-
chunk_files = os.listdir(filename_base)
|
|
254
|
-
pattern = re.compile('chunk\d+.json')
|
|
255
|
-
chunk_files = [fn for fn in chunk_files if pattern.match(fn)]
|
|
256
294
|
all_images = []
|
|
257
295
|
for fn in chunk_files:
|
|
258
296
|
with open(os.path.join(filename_base,fn),'r') as f:
|
|
@@ -260,27 +298,45 @@ if False:
|
|
|
260
298
|
assert isinstance(chunk,list)
|
|
261
299
|
all_images.extend(chunk)
|
|
262
300
|
all_images = sorted(all_images)
|
|
263
|
-
print('Loaded {} image files from chunks in {}'.format(len(all_images),filename_base))
|
|
264
301
|
|
|
302
|
+
print('Loaded {} image files from {} chunks in {}'.format(
|
|
303
|
+
len(all_images),len(chunk_files),filename_base))
|
|
265
304
|
|
|
266
|
-
|
|
305
|
+
else:
|
|
306
|
+
|
|
307
|
+
print('Enumerating image files in {}'.format(input_path))
|
|
308
|
+
|
|
309
|
+
all_images = sorted(path_utils.find_images(input_path,recursive=True,convert_slashes=True))
|
|
310
|
+
|
|
311
|
+
# It's common to run this notebook on an external drive with the main folders in the drive root
|
|
312
|
+
all_images = [fn for fn in all_images if not \
|
|
313
|
+
(fn.startswith('$RECYCLE') or fn.startswith('System Volume Information'))]
|
|
314
|
+
|
|
315
|
+
print('')
|
|
316
|
+
|
|
317
|
+
print('Enumerated {} image files in {}'.format(len(all_images),input_path))
|
|
318
|
+
|
|
267
319
|
|
|
268
|
-
|
|
269
|
-
k, m = divmod(len(L), n)
|
|
270
|
-
return list(L[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))
|
|
320
|
+
#%% Divide images into chunks
|
|
271
321
|
|
|
272
|
-
folder_chunks =
|
|
322
|
+
folder_chunks = split_list_into_n_chunks(all_images,n_jobs)
|
|
273
323
|
|
|
274
324
|
|
|
275
325
|
#%% Estimate total time
|
|
276
326
|
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
327
|
+
if approx_images_per_second is None:
|
|
328
|
+
|
|
329
|
+
print("Can't estimate inference time for the current environment")
|
|
330
|
+
|
|
331
|
+
else:
|
|
332
|
+
|
|
333
|
+
n_images = len(all_images)
|
|
334
|
+
execution_seconds = n_images / approx_images_per_second
|
|
335
|
+
wallclock_seconds = execution_seconds / n_gpus
|
|
336
|
+
print('Expected time: {}'.format(humanfriendly.format_timespan(wallclock_seconds)))
|
|
337
|
+
|
|
338
|
+
seconds_per_chunk = len(folder_chunks[0]) / approx_images_per_second
|
|
339
|
+
print('Expected time per chunk: {}'.format(humanfriendly.format_timespan(seconds_per_chunk)))
|
|
284
340
|
|
|
285
341
|
|
|
286
342
|
#%% Write file lists
|
|
@@ -297,19 +353,20 @@ for i_chunk,chunk_list in enumerate(folder_chunks):
|
|
|
297
353
|
#%% Generate commands
|
|
298
354
|
|
|
299
355
|
# A list of the scripts tied to each GPU, as absolute paths. We'll write this out at
|
|
300
|
-
# the end so each GPU's list of commands can be run at once
|
|
301
|
-
# running lots of small batches via YOLOv5's val.py, which doesn't support checkpointing.
|
|
356
|
+
# the end so each GPU's list of commands can be run at once
|
|
302
357
|
gpu_to_scripts = defaultdict(list)
|
|
303
358
|
|
|
304
359
|
# i_task = 0; task = task_info[i_task]
|
|
305
360
|
for i_task,task in enumerate(task_info):
|
|
306
361
|
|
|
307
362
|
chunk_file = task['input_file']
|
|
363
|
+
checkpoint_filename = chunk_file.replace('.json','_checkpoint.json')
|
|
364
|
+
|
|
308
365
|
output_fn = chunk_file.replace('.json','_results.json')
|
|
309
366
|
|
|
310
367
|
task['output_file'] = output_fn
|
|
311
368
|
|
|
312
|
-
if
|
|
369
|
+
if n_gpus > 1:
|
|
313
370
|
gpu_number = i_task % n_gpus
|
|
314
371
|
else:
|
|
315
372
|
gpu_number = default_gpu_number
|
|
@@ -325,6 +382,10 @@ for i_task,task in enumerate(task_info):
|
|
|
325
382
|
augment_string = ''
|
|
326
383
|
if augment:
|
|
327
384
|
augment_string = '--augment_enabled 1'
|
|
385
|
+
else:
|
|
386
|
+
augment_string = '--augment_enabled 0'
|
|
387
|
+
|
|
388
|
+
batch_string = '--batch_size {}'.format(yolo_batch_size)
|
|
328
389
|
|
|
329
390
|
symlink_folder = os.path.join(filename_base,'symlinks','symlinks_{}'.format(
|
|
330
391
|
str(i_task).zfill(3)))
|
|
@@ -338,6 +399,10 @@ for i_task,task in enumerate(task_info):
|
|
|
338
399
|
if not remove_yolo_symlink_folder:
|
|
339
400
|
remove_symlink_folder_string = '--no_remove_symlink_folder'
|
|
340
401
|
|
|
402
|
+
write_yolo_debug_output_string = ''
|
|
403
|
+
if write_yolo_debug_output:
|
|
404
|
+
write_yolo_debug_output = '--write_yolo_debug_output'
|
|
405
|
+
|
|
341
406
|
remove_yolo_results_string = ''
|
|
342
407
|
if not remove_yolo_intermediate_results:
|
|
343
408
|
remove_yolo_results_string = '--no_remove_yolo_results_folder'
|
|
@@ -354,12 +419,48 @@ for i_task,task in enumerate(task_info):
|
|
|
354
419
|
|
|
355
420
|
overwrite_handling_string = '--overwrite_handling {}'.format(overwrite_handling)
|
|
356
421
|
|
|
357
|
-
cmd += f'python run_inference_with_yolov5_val.py "{model_file}" "{chunk_file}" "{output_fn}"
|
|
358
|
-
|
|
422
|
+
cmd += f'python run_inference_with_yolov5_val.py "{model_file}" "{chunk_file}" "{output_fn}" '
|
|
423
|
+
cmd += f'{image_size_string} {augment_string} '
|
|
424
|
+
cmd += f'{symlink_folder_string} {yolo_results_folder_string} {remove_yolo_results_string} '
|
|
425
|
+
cmd += f'{remove_symlink_folder_string} {confidence_threshold_string} {device_string} '
|
|
426
|
+
cmd += f'{overwrite_handling_string} {batch_string} {write_yolo_debug_output_string}'
|
|
427
|
+
|
|
428
|
+
if yolo_working_dir is not None:
|
|
429
|
+
cmd += f' --yolo_working_folder "{yolo_working_dir}"'
|
|
430
|
+
if yolo_dataset_file is not None:
|
|
431
|
+
cmd += ' --yolo_dataset_file "{}"'.format(yolo_dataset_file)
|
|
432
|
+
if yolo_model_type is not None:
|
|
433
|
+
cmd += ' --model_type {}'.format(yolo_model_type)
|
|
434
|
+
|
|
359
435
|
if not use_symlinks_for_yolo_inference:
|
|
360
436
|
cmd += ' --no_use_symlinks'
|
|
361
437
|
|
|
362
438
|
cmd += '\n'
|
|
439
|
+
|
|
440
|
+
elif use_tiled_inference:
|
|
441
|
+
|
|
442
|
+
tiling_folder = os.path.join(filename_base,'tile_cache','tile_cache_{}'.format(
|
|
443
|
+
str(i_task).zfill(3)))
|
|
444
|
+
|
|
445
|
+
if os.name == 'nt':
|
|
446
|
+
cuda_string = f'set CUDA_VISIBLE_DEVICES={gpu_number} & '
|
|
447
|
+
else:
|
|
448
|
+
cuda_string = f'CUDA_VISIBLE_DEVICES={gpu_number} '
|
|
449
|
+
|
|
450
|
+
cmd = f'{cuda_string} python run_tiled_inference.py "{model_file}" "{input_path}" "{tiling_folder}" "{output_fn}"'
|
|
451
|
+
|
|
452
|
+
cmd += f' --image_list "{chunk_file}"'
|
|
453
|
+
cmd += f' --overwrite_handling {overwrite_handling}'
|
|
454
|
+
|
|
455
|
+
if not remove_tiles:
|
|
456
|
+
cmd += ' --no_remove_tiles'
|
|
457
|
+
|
|
458
|
+
# If we're using non-default tile sizes
|
|
459
|
+
if tile_size is not None and (tile_size[0] > 0 or tile_size[1] > 0):
|
|
460
|
+
cmd += ' --tile_size_x {} --tile_size_y {}'.format(tile_size[0],tile_size[1])
|
|
461
|
+
|
|
462
|
+
if tile_overlap is not None:
|
|
463
|
+
cmd += f' --tile_overlap {tile_overlap}'
|
|
363
464
|
|
|
364
465
|
else:
|
|
365
466
|
|
|
@@ -370,7 +471,6 @@ for i_task,task in enumerate(task_info):
|
|
|
370
471
|
|
|
371
472
|
checkpoint_frequency_string = ''
|
|
372
473
|
checkpoint_path_string = ''
|
|
373
|
-
checkpoint_filename = chunk_file.replace('.json','_checkpoint.json')
|
|
374
474
|
|
|
375
475
|
if checkpoint_frequency is not None and checkpoint_frequency > 0:
|
|
376
476
|
checkpoint_frequency_string = f'--checkpoint_frequency {checkpoint_frequency}'
|
|
@@ -479,12 +579,10 @@ multiple processes, so the tasks will run serially. This only matters if you ha
|
|
|
479
579
|
GPUs.
|
|
480
580
|
"""
|
|
481
581
|
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
pass
|
|
485
|
-
|
|
486
|
-
#%%% Run the tasks (commented out)
|
|
582
|
+
run_tasks_in_notebook = False
|
|
487
583
|
|
|
584
|
+
if run_tasks_in_notebook:
|
|
585
|
+
|
|
488
586
|
assert not use_yolo_inference_scripts, \
|
|
489
587
|
'If you want to use the YOLOv5 inference scripts, you can\'t run the model interactively (yet)'
|
|
490
588
|
|
|
@@ -532,15 +630,32 @@ if False:
|
|
|
532
630
|
|
|
533
631
|
# ...for each chunk
|
|
534
632
|
|
|
535
|
-
# ...if
|
|
633
|
+
# ...if we're running tasks in this notebook
|
|
536
634
|
|
|
537
635
|
|
|
538
636
|
#%% Load results, look for failed or missing images in each task
|
|
539
637
|
|
|
638
|
+
# Check that all task output files exist
|
|
639
|
+
|
|
640
|
+
missing_output_files = []
|
|
641
|
+
|
|
642
|
+
# i_task = 0; task = task_info[i_task]
|
|
643
|
+
for i_task,task in tqdm(enumerate(task_info),total=len(task_info)):
|
|
644
|
+
output_file = task['output_file']
|
|
645
|
+
if not os.path.isfile(output_file):
|
|
646
|
+
missing_output_files.append(output_file)
|
|
647
|
+
|
|
648
|
+
if len(missing_output_files) > 0:
|
|
649
|
+
print('Missing {} output files:'.format(len(missing_output_files)))
|
|
650
|
+
for s in missing_output_files:
|
|
651
|
+
print(s)
|
|
652
|
+
raise Exception('Missing output files')
|
|
653
|
+
|
|
654
|
+
|
|
540
655
|
n_total_failures = 0
|
|
541
656
|
|
|
542
657
|
# i_task = 0; task = task_info[i_task]
|
|
543
|
-
for i_task,task in enumerate(task_info):
|
|
658
|
+
for i_task,task in tqdm(enumerate(task_info),total=len(task_info)):
|
|
544
659
|
|
|
545
660
|
chunk_file = task['input_file']
|
|
546
661
|
output_file = task['output_file']
|
|
@@ -557,6 +672,13 @@ for i_task,task in enumerate(task_info):
|
|
|
557
672
|
|
|
558
673
|
# im = task_results['images'][0]
|
|
559
674
|
for im in task_results['images']:
|
|
675
|
+
|
|
676
|
+
# Most of the time, inference result files use absolute paths, but it's
|
|
677
|
+
# getting annoying to make sure that's *always* true, so handle both here.
|
|
678
|
+
# E.g., when using tiled inference, paths will be relative.
|
|
679
|
+
if not os.path.isabs(im['file']):
|
|
680
|
+
fn = os.path.join(input_path,im['file']).replace('\\','/')
|
|
681
|
+
im['file'] = fn
|
|
560
682
|
assert im['file'].startswith(input_path)
|
|
561
683
|
assert im['file'] in task_images_set
|
|
562
684
|
filename_to_results[im['file']] = im
|
|
@@ -568,7 +690,8 @@ for i_task,task in enumerate(task_info):
|
|
|
568
690
|
task['results'] = task_results
|
|
569
691
|
|
|
570
692
|
for fn in task_images:
|
|
571
|
-
assert fn in filename_to_results
|
|
693
|
+
assert fn in filename_to_results, \
|
|
694
|
+
'File {} not found in results for task {}'.format(fn,i_task)
|
|
572
695
|
|
|
573
696
|
n_total_failures += n_task_failures
|
|
574
697
|
|
|
@@ -582,13 +705,13 @@ print('Processed all {} images with {} failures'.format(
|
|
|
582
705
|
len(all_images),n_total_failures))
|
|
583
706
|
|
|
584
707
|
|
|
585
|
-
|
|
708
|
+
##%% Merge results files and make filenames relative
|
|
586
709
|
|
|
587
710
|
combined_results = {}
|
|
588
711
|
combined_results['images'] = []
|
|
589
712
|
images_processed = set()
|
|
590
713
|
|
|
591
|
-
for i_task,task in enumerate(task_info):
|
|
714
|
+
for i_task,task in tqdm(enumerate(task_info),total=len(task_info)):
|
|
592
715
|
|
|
593
716
|
task_results = task['results']
|
|
594
717
|
|
|
@@ -615,10 +738,14 @@ assert len(combined_results['images']) == len(all_images), \
|
|
|
615
738
|
result_filenames = [im['file'] for im in combined_results['images']]
|
|
616
739
|
assert len(combined_results['images']) == len(set(result_filenames))
|
|
617
740
|
|
|
618
|
-
#
|
|
741
|
+
# Convert to relative paths, preserving '/' as the path separator, regardless of OS
|
|
619
742
|
for im in combined_results['images']:
|
|
620
|
-
assert im['file']
|
|
621
|
-
|
|
743
|
+
assert '\\' not in im['file']
|
|
744
|
+
assert im['file'].startswith(input_path)
|
|
745
|
+
if input_path.endswith(':'):
|
|
746
|
+
im['file'] = im['file'].replace(input_path,'',1)
|
|
747
|
+
else:
|
|
748
|
+
im['file'] = im['file'].replace(input_path + '/','',1)
|
|
622
749
|
|
|
623
750
|
combined_api_output_file = os.path.join(
|
|
624
751
|
combined_api_output_folder,
|
|
@@ -642,10 +769,8 @@ options.confidence_threshold = 0.2
|
|
|
642
769
|
options.almost_detection_confidence_threshold = options.confidence_threshold - 0.05
|
|
643
770
|
options.ground_truth_json_file = None
|
|
644
771
|
options.separate_detections_by_category = True
|
|
645
|
-
options.
|
|
646
|
-
|
|
647
|
-
# options.sample_seed = 0
|
|
648
|
-
# options.max_figures_per_html_file = 5000
|
|
772
|
+
options.sample_seed = 0
|
|
773
|
+
options.max_figures_per_html_file = 2500
|
|
649
774
|
|
|
650
775
|
options.parallelize_rendering = True
|
|
651
776
|
options.parallelize_rendering_n_cores = default_workers_for_parallel_tasks
|
|
@@ -668,89 +793,8 @@ options.api_output_file = combined_api_output_file
|
|
|
668
793
|
options.output_dir = output_base
|
|
669
794
|
ppresults = process_batch_results(options)
|
|
670
795
|
html_output_file = ppresults.output_html_file
|
|
671
|
-
path_utils.open_file(html_output_file)
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
#%% RDE (sample directory collapsing)
|
|
676
|
-
|
|
677
|
-
#
|
|
678
|
-
# The next few cells are about repeat detection elimination; if you want to skip this,
|
|
679
|
-
# and still do other stuff in this notebook (e.g. running classifiers), that's fine, but
|
|
680
|
-
# the rest of the notebook weakly assumes you've done this. Specifically, it looks for
|
|
681
|
-
# the variable "filtered_api_output_file" (a file produced by the RDE process). If you
|
|
682
|
-
# don't run the RDE cells, just change "filtered_api_output_file" to "combined_api_output_file"
|
|
683
|
-
# (the raw output from MegaDetector). Then it will be like all this RDE stuff doesn't exist.
|
|
684
|
-
#
|
|
685
|
-
# Though FWIW, once you're sufficiently power-user-ish to use this notebook, RDE is almost
|
|
686
|
-
# always worth it.
|
|
687
|
-
#
|
|
688
|
-
|
|
689
|
-
def remove_overflow_folders(relative_path):
|
|
690
|
-
"""
|
|
691
|
-
This is a sample function that returns a camera name given an image path. By
|
|
692
|
-
default in the RDE process, leaf-node folders are equivalent to cameras. To map
|
|
693
|
-
something other than leaf-node folders to cameras, fill in this function, and un-comment the
|
|
694
|
-
line below containing "remove_overflow_folders".
|
|
695
|
-
|
|
696
|
-
Sample regular expressions are included here for common patterns, particularly the
|
|
697
|
-
overflow folders created by Reconyx and Bushnell camera traps. So if one of those
|
|
698
|
-
fits your scenario, you don't have to modify this function, just un-comment the line
|
|
699
|
-
below that enables this feature.
|
|
700
|
-
|
|
701
|
-
Nothing bad happens if you have overflow folders like this and you don't
|
|
702
|
-
enable this mapping, you are just taking a more conservative approach to RDE in that
|
|
703
|
-
scenario.
|
|
704
|
-
"""
|
|
705
|
-
|
|
706
|
-
import re
|
|
707
|
-
|
|
708
|
-
# 100RECNX is the overflow folder style for Reconyx cameras
|
|
709
|
-
# 100EK113 is (for some reason) the overflow folder style for Bushnell cameras
|
|
710
|
-
# 100_BTCF is the overflow folder style for Browning cameras
|
|
711
|
-
patterns = ['\/\d+RECNX\/','\/\d+EK\d+\/','\/\d+_BTCF\/']
|
|
712
|
-
|
|
713
|
-
relative_path = relative_path.replace('\\','/')
|
|
714
|
-
for pat in patterns:
|
|
715
|
-
relative_path = re.sub(pat,'/',relative_path)
|
|
716
|
-
dir_name = os.path.dirname(relative_path)
|
|
717
|
-
|
|
718
|
-
return dir_name
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
#%% Test cells for remove_overflow_folders
|
|
722
|
-
|
|
723
|
-
if False:
|
|
724
|
-
|
|
725
|
-
pass
|
|
726
|
-
|
|
727
|
-
#%% Test the generic cases
|
|
728
|
-
|
|
729
|
-
relative_path = 'a/b/c/d/100EK113/blah.jpg'
|
|
730
|
-
print(remove_overflow_folders(relative_path))
|
|
731
|
-
|
|
732
|
-
relative_path = 'a/b/c/d/100RECNX/blah.jpg'
|
|
733
|
-
print(remove_overflow_folders(relative_path))
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
#%% Test remove_overflow_folders on the current dataset
|
|
737
|
-
|
|
738
|
-
with open(combined_api_output_file,'r') as f:
|
|
739
|
-
d = json.load(f)
|
|
740
|
-
image_filenames = [im['file'] for im in d['images']]
|
|
741
|
-
|
|
742
|
-
dir_names = set()
|
|
743
|
-
|
|
744
|
-
# relative_path = image_filenames[0]
|
|
745
|
-
for relative_path in tqdm(image_filenames):
|
|
746
|
-
dir_name = remove_overflow_folders(relative_path)
|
|
747
|
-
dir_names.add(dir_name)
|
|
748
|
-
|
|
749
|
-
dir_names = list(dir_names)
|
|
750
|
-
dir_names.sort()
|
|
751
|
-
|
|
752
|
-
for s in dir_names:
|
|
753
|
-
print(s)
|
|
796
|
+
path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True)
|
|
797
|
+
# import clipboard; clipboard.copy(html_output_file)
|
|
754
798
|
|
|
755
799
|
|
|
756
800
|
#%% Repeat detection elimination, phase 1
|
|
@@ -762,7 +806,7 @@ task_index = 0
|
|
|
762
806
|
|
|
763
807
|
options = repeat_detections_core.RepeatDetectionOptions()
|
|
764
808
|
|
|
765
|
-
options.confidenceMin = 0.
|
|
809
|
+
options.confidenceMin = 0.1
|
|
766
810
|
options.confidenceMax = 1.01
|
|
767
811
|
options.iouThreshold = 0.85
|
|
768
812
|
options.occurrenceThreshold = 15
|
|
@@ -779,13 +823,13 @@ options.otherDetectionsThreshold = options.confidenceMin
|
|
|
779
823
|
|
|
780
824
|
options.bRenderDetectionTiles = True
|
|
781
825
|
options.maxOutputImageWidth = 2000
|
|
782
|
-
options.detectionTilesMaxCrops =
|
|
826
|
+
options.detectionTilesMaxCrops = 300
|
|
783
827
|
|
|
784
828
|
# options.lineThickness = 5
|
|
785
829
|
# options.boxExpansion = 8
|
|
786
830
|
|
|
787
831
|
# To invoke custom collapsing of folders for a particular manufacturer's naming scheme
|
|
788
|
-
|
|
832
|
+
options.customDirNameFunction = relative_path_to_location
|
|
789
833
|
|
|
790
834
|
options.bRenderHtml = False
|
|
791
835
|
options.imageBase = input_path
|
|
@@ -810,9 +854,9 @@ options.debugMaxRenderInstance = -1
|
|
|
810
854
|
# Can be None, 'xsort', or 'clustersort'
|
|
811
855
|
options.smartSort = 'xsort'
|
|
812
856
|
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
857
|
+
suspicious_detection_results = repeat_detections_core.find_repeat_detections(combined_api_output_file,
|
|
858
|
+
outputFilename=None,
|
|
859
|
+
options=options)
|
|
816
860
|
|
|
817
861
|
|
|
818
862
|
#%% Manual RDE step
|
|
@@ -820,7 +864,8 @@ suspiciousDetectionResults = repeat_detections_core.find_repeat_detections(combi
|
|
|
820
864
|
## DELETE THE VALID DETECTIONS ##
|
|
821
865
|
|
|
822
866
|
# If you run this line, it will open the folder up in your file browser
|
|
823
|
-
path_utils.open_file(os.path.dirname(
|
|
867
|
+
path_utils.open_file(os.path.dirname(suspicious_detection_results.filterFile),
|
|
868
|
+
attempt_to_open_in_wsl_host=True)
|
|
824
869
|
|
|
825
870
|
#
|
|
826
871
|
# If you ran the previous cell, but then you change your mind and you don't want to do
|
|
@@ -828,7 +873,7 @@ path_utils.open_file(os.path.dirname(suspiciousDetectionResults.filterFile))
|
|
|
828
873
|
# previous cell. If you do that, you're implicitly telling the notebook that you looked
|
|
829
874
|
# at everything in that folder, and confirmed there were no red boxes on animals.
|
|
830
875
|
#
|
|
831
|
-
# Instead, either change "
|
|
876
|
+
# Instead, either change "filtered_output_filename" below to "combined_api_output_file",
|
|
832
877
|
# or delete *all* the images in the filtering folder.
|
|
833
878
|
#
|
|
834
879
|
|
|
@@ -837,12 +882,13 @@ path_utils.open_file(os.path.dirname(suspiciousDetectionResults.filterFile))
|
|
|
837
882
|
|
|
838
883
|
from api.batch_processing.postprocessing.repeat_detection_elimination import remove_repeat_detections
|
|
839
884
|
|
|
840
|
-
filtered_output_filename = path_utils.insert_before_extension(combined_api_output_file,
|
|
885
|
+
filtered_output_filename = path_utils.insert_before_extension(combined_api_output_file,
|
|
886
|
+
'filtered_{}'.format(rde_string))
|
|
841
887
|
|
|
842
888
|
remove_repeat_detections.remove_repeat_detections(
|
|
843
889
|
inputFile=combined_api_output_file,
|
|
844
890
|
outputFile=filtered_output_filename,
|
|
845
|
-
filteringDir=os.path.dirname(
|
|
891
|
+
filteringDir=os.path.dirname(suspicious_detection_results.filterFile)
|
|
846
892
|
)
|
|
847
893
|
|
|
848
894
|
|
|
@@ -858,8 +904,8 @@ options.confidence_threshold = 0.2
|
|
|
858
904
|
options.almost_detection_confidence_threshold = options.confidence_threshold - 0.05
|
|
859
905
|
options.ground_truth_json_file = None
|
|
860
906
|
options.separate_detections_by_category = True
|
|
861
|
-
|
|
862
|
-
|
|
907
|
+
options.sample_seed = 0
|
|
908
|
+
options.max_figures_per_html_file = 5000
|
|
863
909
|
|
|
864
910
|
options.parallelize_rendering = True
|
|
865
911
|
options.parallelize_rendering_n_cores = default_workers_for_parallel_tasks
|
|
@@ -884,7 +930,8 @@ options.output_dir = output_base
|
|
|
884
930
|
ppresults = process_batch_results(options)
|
|
885
931
|
html_output_file = ppresults.output_html_file
|
|
886
932
|
|
|
887
|
-
path_utils.open_file(html_output_file)
|
|
933
|
+
path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True)
|
|
934
|
+
# import clipboard; clipboard.copy(html_output_file)
|
|
888
935
|
|
|
889
936
|
|
|
890
937
|
#%% Run MegaClassifier (actually, write out a script that runs MegaClassifier)
|
|
@@ -893,6 +940,11 @@ path_utils.open_file(html_output_file)
|
|
|
893
940
|
final_output_path_mc = None
|
|
894
941
|
final_output_path_ic = None
|
|
895
942
|
|
|
943
|
+
# If we didn't do RDE
|
|
944
|
+
if filtered_output_filename is None:
|
|
945
|
+
print("Warning: it looks like you didn't do RDE, using the raw output file")
|
|
946
|
+
filtered_output_filename = combined_api_output_file
|
|
947
|
+
|
|
896
948
|
classifier_name_short = 'megaclassifier'
|
|
897
949
|
threshold_str = '0.15' # 0.6
|
|
898
950
|
classifier_name = 'megaclassifier_v0.1_efficientnet-b3'
|
|
@@ -1080,7 +1132,6 @@ with open(output_file,'w') as f:
|
|
|
1080
1132
|
for s in commands:
|
|
1081
1133
|
f.write('{}'.format(s))
|
|
1082
1134
|
|
|
1083
|
-
import stat
|
|
1084
1135
|
st = os.stat(output_file)
|
|
1085
1136
|
os.chmod(output_file, st.st_mode | stat.S_IEXEC)
|
|
1086
1137
|
|
|
@@ -1250,8 +1301,6 @@ os.chmod(output_file, st.st_mode | stat.S_IEXEC)
|
|
|
1250
1301
|
|
|
1251
1302
|
#%% Within-image classification smoothing
|
|
1252
1303
|
|
|
1253
|
-
from collections import defaultdict
|
|
1254
|
-
|
|
1255
1304
|
#
|
|
1256
1305
|
# Only count detections with a classification confidence threshold above
|
|
1257
1306
|
# *classification_confidence_threshold*, which in practice means we're only
|
|
@@ -1510,7 +1559,7 @@ else:
|
|
|
1510
1559
|
import datetime
|
|
1511
1560
|
from data_management.read_exif import parse_exif_datetime_string
|
|
1512
1561
|
|
|
1513
|
-
min_valid_timestamp_year =
|
|
1562
|
+
min_valid_timestamp_year = 2001
|
|
1514
1563
|
|
|
1515
1564
|
now = datetime.datetime.now()
|
|
1516
1565
|
|
|
@@ -1528,12 +1577,13 @@ for exif_result in tqdm(exif_results):
|
|
|
1528
1577
|
|
|
1529
1578
|
# By default we assume that each leaf-node folder is a location
|
|
1530
1579
|
if overflow_folder_handling_enabled:
|
|
1531
|
-
im['location'] =
|
|
1580
|
+
im['location'] = relative_path_to_location(os.path.dirname(exif_result['file_name']))
|
|
1532
1581
|
else:
|
|
1533
1582
|
im['location'] = os.path.dirname(exif_result['file_name'])
|
|
1534
1583
|
|
|
1535
1584
|
im['file_name'] = exif_result['file_name']
|
|
1536
1585
|
im['id'] = im['file_name']
|
|
1586
|
+
|
|
1537
1587
|
if ('exif_tags' not in exif_result) or (exif_result['exif_tags'] is None) or \
|
|
1538
1588
|
(exif_datetime_tag not in exif_result['exif_tags']):
|
|
1539
1589
|
exif_dt = None
|
|
@@ -1567,7 +1617,7 @@ for exif_result in tqdm(exif_results):
|
|
|
1567
1617
|
|
|
1568
1618
|
# ...for each exif image result
|
|
1569
1619
|
|
|
1570
|
-
print('Parsed EXIF datetime information, unable to parse EXIF
|
|
1620
|
+
print('Parsed EXIF datetime information, unable to parse EXIF date from {} of {} images'.format(
|
|
1571
1621
|
len(images_without_datetime),len(exif_results)))
|
|
1572
1622
|
|
|
1573
1623
|
|
|
@@ -1633,7 +1683,7 @@ min_dominant_class_classifications_above_threshold_for_class_smoothing = 5 # 2
|
|
|
1633
1683
|
max_secondary_class_classifications_above_threshold_for_class_smoothing = 5
|
|
1634
1684
|
|
|
1635
1685
|
# If the ratio between a dominant class and a secondary class count is greater than this,
|
|
1636
|
-
# regardless of the secondary class count, switch those
|
|
1686
|
+
# regardless of the secondary class count, switch those classifications (i.e., ignore
|
|
1637
1687
|
# max_secondary_class_classifications_above_threshold_for_class_smoothing).
|
|
1638
1688
|
#
|
|
1639
1689
|
# This may be different for different dominant classes, e.g. if we see lots of cows, they really
|
|
@@ -1935,7 +1985,7 @@ options.classification_confidence_threshold = 0.7
|
|
|
1935
1985
|
options.almost_detection_confidence_threshold = options.confidence_threshold - 0.05
|
|
1936
1986
|
options.ground_truth_json_file = None
|
|
1937
1987
|
options.separate_detections_by_category = True
|
|
1938
|
-
|
|
1988
|
+
options.max_figures_per_html_file = 2500
|
|
1939
1989
|
|
|
1940
1990
|
options.parallelize_rendering = True
|
|
1941
1991
|
options.parallelize_rendering_n_cores = default_workers_for_parallel_tasks
|
|
@@ -1953,8 +2003,8 @@ print('Processing {} to {}'.format(base_task_name, output_base))
|
|
|
1953
2003
|
options.api_output_file = sequence_smoothed_classification_file
|
|
1954
2004
|
options.output_dir = output_base
|
|
1955
2005
|
ppresults = process_batch_results(options)
|
|
1956
|
-
path_utils.open_file(ppresults.output_html_file)
|
|
1957
|
-
|
|
2006
|
+
path_utils.open_file(ppresults.output_html_file,attempt_to_open_in_wsl_host=True)
|
|
2007
|
+
# import clipboard; clipboard.copy(ppresults.output_html_file)
|
|
1958
2008
|
|
|
1959
2009
|
#% Zip .json files
|
|
1960
2010
|
|
|
@@ -2021,7 +2071,7 @@ for i, j in itertools.combinations(list(range(0,len(filenames))),2):
|
|
|
2021
2071
|
results = compare_batch_results(options)
|
|
2022
2072
|
|
|
2023
2073
|
from md_utils.path_utils import open_file
|
|
2024
|
-
open_file(results.html_output_file)
|
|
2074
|
+
open_file(results.html_output_file,attempt_to_open_in_wsl_host=True)
|
|
2025
2075
|
|
|
2026
2076
|
|
|
2027
2077
|
#%% Merge in high-confidence detections from another results file
|
|
@@ -2048,15 +2098,17 @@ from api.batch_processing.postprocessing import categorize_detections_by_size
|
|
|
2048
2098
|
|
|
2049
2099
|
size_options = categorize_detections_by_size.SizeCategorizationOptions()
|
|
2050
2100
|
|
|
2051
|
-
|
|
2052
|
-
size_options.
|
|
2053
|
-
|
|
2054
|
-
|
|
2101
|
+
size_options.size_thresholds = [0.9]
|
|
2102
|
+
size_options.size_category_names = ['large_detections']
|
|
2103
|
+
|
|
2104
|
+
size_options.categories_to_separate = [1]
|
|
2055
2105
|
size_options.measurement = 'size' # 'width'
|
|
2056
2106
|
|
|
2107
|
+
threshold_string = '-'.join([str(x) for x in size_options.size_thresholds])
|
|
2108
|
+
|
|
2057
2109
|
input_file = filtered_output_filename
|
|
2058
2110
|
size_separated_file = input_file.replace('.json','-size-separated-{}.json'.format(
|
|
2059
|
-
|
|
2111
|
+
threshold_string))
|
|
2060
2112
|
d = categorize_detections_by_size.categorize_detections_by_size(input_file,size_separated_file,
|
|
2061
2113
|
size_options)
|
|
2062
2114
|
|
|
@@ -2064,7 +2116,7 @@ d = categorize_detections_by_size.categorize_detections_by_size(input_file,size_
|
|
|
2064
2116
|
#%% Preview large boxes
|
|
2065
2117
|
|
|
2066
2118
|
output_base_large_boxes = os.path.join(postprocessing_output_folder,
|
|
2067
|
-
base_task_name + '_{}_{:.3f}
|
|
2119
|
+
base_task_name + '_{}_{:.3f}_size_separated_boxes'.format(rde_string, options.confidence_threshold))
|
|
2068
2120
|
os.makedirs(output_base_large_boxes, exist_ok=True)
|
|
2069
2121
|
print('Processing post-RDE, post-size-separation to {}'.format(output_base_large_boxes))
|
|
2070
2122
|
|
|
@@ -2073,7 +2125,7 @@ options.output_dir = output_base_large_boxes
|
|
|
2073
2125
|
|
|
2074
2126
|
ppresults = process_batch_results(options)
|
|
2075
2127
|
html_output_file = ppresults.output_html_file
|
|
2076
|
-
path_utils.open_file(html_output_file)
|
|
2128
|
+
path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True)
|
|
2077
2129
|
|
|
2078
2130
|
|
|
2079
2131
|
#%% .json splitting
|
|
@@ -2086,12 +2138,6 @@ from api.batch_processing.postprocessing.subset_json_detector_output import (
|
|
|
2086
2138
|
input_filename = filtered_output_filename
|
|
2087
2139
|
output_base = os.path.join(combined_api_output_folder,base_task_name + '_json_subsets')
|
|
2088
2140
|
|
|
2089
|
-
if False:
|
|
2090
|
-
if data is None:
|
|
2091
|
-
with open(input_filename) as f:
|
|
2092
|
-
data = json.load(f)
|
|
2093
|
-
print('Data set contains {} images'.format(len(data['images'])))
|
|
2094
|
-
|
|
2095
2141
|
print('Processing file {} to {}'.format(input_filename,output_base))
|
|
2096
2142
|
|
|
2097
2143
|
options = SubsetJsonDetectorOutputOptions()
|
|
@@ -2185,65 +2231,45 @@ options.allow_existing_directory = False
|
|
|
2185
2231
|
separate_detections_into_folders(options)
|
|
2186
2232
|
|
|
2187
2233
|
|
|
2188
|
-
#%%
|
|
2234
|
+
#%% Convert frame-level results to video-level results
|
|
2189
2235
|
|
|
2190
|
-
|
|
2191
|
-
|
|
2236
|
+
# This cell is only useful if the files submitted to this job were generated via
|
|
2237
|
+
# video_folder_to_frames().
|
|
2192
2238
|
|
|
2193
|
-
|
|
2194
|
-
for i_task in task_set:
|
|
2195
|
-
|
|
2196
|
-
if i_task == task_set[0]:
|
|
2197
|
-
commands.append('sleep {}'.format(str(sleep_time_before_tasks)))
|
|
2198
|
-
|
|
2199
|
-
task = task_info[i_task]
|
|
2200
|
-
chunk_file = task['input_file']
|
|
2201
|
-
output_fn = chunk_file.replace('.json','_results.json')
|
|
2202
|
-
|
|
2203
|
-
task['output_file'] = output_fn
|
|
2239
|
+
from detection.video_utils import frame_results_to_video_results
|
|
2204
2240
|
|
|
2205
|
-
|
|
2206
|
-
|
|
2207
|
-
checkpoint_frequency_string = ''
|
|
2208
|
-
checkpoint_path_string = ''
|
|
2209
|
-
if checkpoint_frequency is not None and checkpoint_frequency > 0:
|
|
2210
|
-
checkpoint_frequency_string = f'--checkpoint_frequency {checkpoint_frequency}'
|
|
2211
|
-
checkpoint_path_string = '--checkpoint_path {}'.format(chunk_file.replace(
|
|
2212
|
-
'.json','_checkpoint.json'))
|
|
2213
|
-
|
|
2214
|
-
use_image_queue_string = ''
|
|
2215
|
-
if (use_image_queue):
|
|
2216
|
-
use_image_queue_string = '--use_image_queue'
|
|
2241
|
+
video_output_filename = filtered_output_filename.replace('.json','_aggregated.json')
|
|
2242
|
+
frame_results_to_video_results(filtered_output_filename,video_output_filename)
|
|
2217
2243
|
|
|
2218
|
-
|
|
2219
|
-
|
|
2220
|
-
|
|
2221
|
-
|
|
2222
|
-
quiet_string = ''
|
|
2223
|
-
if quiet_mode:
|
|
2224
|
-
quiet_string = '--quiet'
|
|
2225
|
-
|
|
2226
|
-
cmd = f'{cuda_string} python run_detector_batch.py {model_file} {chunk_file} {output_fn} {checkpoint_frequency_string} {checkpoint_path_string} {use_image_queue_string} {ncores_string} {quiet_string}'
|
|
2227
|
-
|
|
2228
|
-
task['command'] = cmd
|
|
2229
|
-
commands.append(cmd)
|
|
2230
|
-
if i_task != task_set[-1]:
|
|
2231
|
-
commands.append('sleep {}'.format(str(sleep_time_between_tasks)))
|
|
2244
|
+
|
|
2245
|
+
#%% Sample custom path replacement function
|
|
2246
|
+
|
|
2247
|
+
def custom_relative_path_to_location(relative_path):
|
|
2232
2248
|
|
|
2233
|
-
|
|
2249
|
+
relative_path = relative_path.replace('\\','/')
|
|
2250
|
+
tokens = relative_path.split('/')
|
|
2251
|
+
location_name = '/'.join(tokens[0:2])
|
|
2252
|
+
return location_name
|
|
2234
2253
|
|
|
2235
|
-
task_strings = [str(k).zfill(3) for k in task_set]
|
|
2236
|
-
task_set_string = '_'.join(task_strings)
|
|
2237
|
-
cmd_file = os.path.join(filename_base,'run_chunk_{}_gpu_{}.sh'.format(task_set_string,
|
|
2238
|
-
str(gpu_number).zfill(2)))
|
|
2239
2254
|
|
|
2240
|
-
|
|
2241
|
-
|
|
2242
|
-
|
|
2255
|
+
#%% Test relative_path_to_location on the current dataset
|
|
2256
|
+
|
|
2257
|
+
with open(combined_api_output_file,'r') as f:
|
|
2258
|
+
d = json.load(f)
|
|
2259
|
+
image_filenames = [im['file'] for im in d['images']]
|
|
2260
|
+
|
|
2261
|
+
location_names = set()
|
|
2262
|
+
|
|
2263
|
+
# relative_path = image_filenames[0]
|
|
2264
|
+
for relative_path in tqdm(image_filenames):
|
|
2265
|
+
location_name = relative_path_to_location(relative_path)
|
|
2266
|
+
location_names.add(location_name)
|
|
2243
2267
|
|
|
2244
|
-
|
|
2245
|
-
|
|
2246
|
-
|
|
2268
|
+
location_names = list(location_names)
|
|
2269
|
+
location_names.sort()
|
|
2270
|
+
|
|
2271
|
+
for s in location_names:
|
|
2272
|
+
print(s)
|
|
2247
2273
|
|
|
2248
2274
|
|
|
2249
2275
|
#%% End notebook: turn this script into a notebook (how meta!)
|
|
@@ -2251,8 +2277,12 @@ os.chmod(cmd_file, st.st_mode | stat.S_IEXEC)
|
|
|
2251
2277
|
import os
|
|
2252
2278
|
import nbformat as nbf
|
|
2253
2279
|
|
|
2254
|
-
|
|
2255
|
-
'
|
|
2280
|
+
if os.name == 'nt':
|
|
2281
|
+
git_base = r'c:\git'
|
|
2282
|
+
else:
|
|
2283
|
+
git_base = os.path.expanduer('~/git')
|
|
2284
|
+
|
|
2285
|
+
input_py_file = git_base + '/MegaDetector/api/batch_processing/data_preparation/manage_local_batch.py'
|
|
2256
2286
|
assert os.path.isfile(input_py_file)
|
|
2257
2287
|
output_ipynb_file = input_py_file.replace('.py','.ipynb')
|
|
2258
2288
|
|
|
@@ -2275,14 +2305,23 @@ i_line = 0
|
|
|
2275
2305
|
|
|
2276
2306
|
header_comment = ''
|
|
2277
2307
|
|
|
2308
|
+
# Delete a few lines from the top that don't belong in the NB version, e.g. the name
|
|
2309
|
+
# of the .py file
|
|
2278
2310
|
lines_to_ignore = 7
|
|
2311
|
+
expected_first_token = '# This script'
|
|
2312
|
+
found_first_token = False
|
|
2279
2313
|
|
|
2280
2314
|
# Everything before the first cell is the header comment
|
|
2281
2315
|
while(not lines[i_line].startswith('#%%')):
|
|
2316
|
+
|
|
2282
2317
|
if i_line < lines_to_ignore:
|
|
2283
2318
|
i_line += 1
|
|
2284
2319
|
continue
|
|
2285
2320
|
|
|
2321
|
+
if not found_first_token:
|
|
2322
|
+
assert lines[i_line].startswith(expected_first_token)
|
|
2323
|
+
found_first_token = True
|
|
2324
|
+
|
|
2286
2325
|
s = lines[i_line].replace('#','').strip()
|
|
2287
2326
|
if len(s) == 0:
|
|
2288
2327
|
header_comment += '\n\n'
|