megadetector 5.0.6__py3-none-any.whl → 5.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/batch_processing/data_preparation/manage_local_batch.py +297 -202
- api/batch_processing/data_preparation/manage_video_batch.py +7 -2
- api/batch_processing/postprocessing/add_max_conf.py +1 -0
- api/batch_processing/postprocessing/combine_api_outputs.py +2 -2
- api/batch_processing/postprocessing/compare_batch_results.py +111 -61
- api/batch_processing/postprocessing/convert_output_format.py +24 -6
- api/batch_processing/postprocessing/load_api_results.py +56 -72
- api/batch_processing/postprocessing/md_to_labelme.py +119 -51
- api/batch_processing/postprocessing/merge_detections.py +30 -5
- api/batch_processing/postprocessing/postprocess_batch_results.py +175 -55
- api/batch_processing/postprocessing/remap_detection_categories.py +163 -0
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +628 -0
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +71 -23
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +224 -76
- api/batch_processing/postprocessing/subset_json_detector_output.py +132 -5
- api/batch_processing/postprocessing/top_folders_to_bottom.py +1 -1
- classification/prepare_classification_script.py +191 -191
- data_management/cct_json_utils.py +7 -2
- data_management/coco_to_labelme.py +263 -0
- data_management/coco_to_yolo.py +72 -48
- data_management/databases/integrity_check_json_db.py +75 -64
- data_management/databases/subset_json_db.py +1 -1
- data_management/generate_crops_from_cct.py +1 -1
- data_management/get_image_sizes.py +44 -26
- data_management/importers/animl_results_to_md_results.py +3 -5
- data_management/importers/noaa_seals_2019.py +2 -2
- data_management/importers/zamba_results_to_md_results.py +2 -2
- data_management/labelme_to_coco.py +264 -127
- data_management/labelme_to_yolo.py +96 -53
- data_management/lila/create_lila_blank_set.py +557 -0
- data_management/lila/create_lila_test_set.py +2 -1
- data_management/lila/create_links_to_md_results_files.py +1 -1
- data_management/lila/download_lila_subset.py +138 -45
- data_management/lila/generate_lila_per_image_labels.py +23 -14
- data_management/lila/get_lila_annotation_counts.py +16 -10
- data_management/lila/lila_common.py +15 -42
- data_management/lila/test_lila_metadata_urls.py +116 -0
- data_management/read_exif.py +65 -16
- data_management/remap_coco_categories.py +84 -0
- data_management/resize_coco_dataset.py +14 -31
- data_management/wi_download_csv_to_coco.py +239 -0
- data_management/yolo_output_to_md_output.py +40 -13
- data_management/yolo_to_coco.py +313 -100
- detection/process_video.py +36 -14
- detection/pytorch_detector.py +1 -1
- detection/run_detector.py +73 -18
- detection/run_detector_batch.py +116 -27
- detection/run_inference_with_yolov5_val.py +135 -27
- detection/run_tiled_inference.py +153 -43
- detection/tf_detector.py +2 -1
- detection/video_utils.py +4 -2
- md_utils/ct_utils.py +101 -6
- md_utils/md_tests.py +264 -17
- md_utils/path_utils.py +326 -47
- md_utils/process_utils.py +26 -7
- md_utils/split_locations_into_train_val.py +215 -0
- md_utils/string_utils.py +10 -0
- md_utils/url_utils.py +66 -3
- md_utils/write_html_image_list.py +12 -2
- md_visualization/visualization_utils.py +380 -74
- md_visualization/visualize_db.py +41 -10
- md_visualization/visualize_detector_output.py +185 -104
- {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/METADATA +11 -13
- {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/RECORD +74 -67
- {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/WHEEL +1 -1
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +1 -1
- taxonomy_mapping/map_new_lila_datasets.py +43 -39
- taxonomy_mapping/prepare_lila_taxonomy_release.py +5 -2
- taxonomy_mapping/preview_lila_taxonomy.py +27 -27
- taxonomy_mapping/species_lookup.py +33 -13
- taxonomy_mapping/taxonomy_csv_checker.py +7 -5
- md_visualization/visualize_megadb.py +0 -183
- {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/LICENSE +0 -0
- {megadetector-5.0.6.dist-info → megadetector-5.0.8.dist-info}/top_level.txt +0 -0
|
@@ -14,18 +14,6 @@
|
|
|
14
14
|
# the same if you are reading this in Jupyter Notebook (using the .ipynb version of the
|
|
15
15
|
# script):
|
|
16
16
|
#
|
|
17
|
-
# * You can specify the MegaDetector location, but you may find it useful to use the same paths
|
|
18
|
-
# I use; on all the machines where I run MD, I keep all versions of MegaDetector handy at these
|
|
19
|
-
# paths:
|
|
20
|
-
#
|
|
21
|
-
# ~/models/camera_traps/megadetector/md_v5.0.0/md_v5a.0.0.pt
|
|
22
|
-
# ~/models/camera_traps/megadetector/md_v5.0.0/md_v5b.0.0.pt
|
|
23
|
-
# ~/models/camera_traps/megadetector/md_v4.1.0/md_v4.1.0.pb
|
|
24
|
-
#
|
|
25
|
-
# On Windows, this translates to, for example:
|
|
26
|
-
#
|
|
27
|
-
# c:\users\dmorr\models\camera_traps\megadetector\md_v5.0.0\md_v5a.0.0.pt
|
|
28
|
-
#
|
|
29
17
|
# * Typically when I have a MegaDetector job to run, I make a copy of this script. Let's
|
|
30
18
|
# say I'm running a job for an organization called "bibblebop"; I have a big folder of
|
|
31
19
|
# job-specific copies of this script, and I might save a new one called "bibblebop-2023-07-26.py"
|
|
@@ -78,6 +66,7 @@ import json
|
|
|
78
66
|
import os
|
|
79
67
|
import stat
|
|
80
68
|
import time
|
|
69
|
+
import re
|
|
81
70
|
|
|
82
71
|
import humanfriendly
|
|
83
72
|
|
|
@@ -90,12 +79,14 @@ from md_utils.ct_utils import split_list_into_n_chunks
|
|
|
90
79
|
|
|
91
80
|
from detection.run_detector_batch import load_and_run_detector_batch, write_results_to_file
|
|
92
81
|
from detection.run_detector import DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD
|
|
82
|
+
from detection.run_detector import estimate_md_images_per_second
|
|
93
83
|
|
|
94
84
|
from api.batch_processing.postprocessing.postprocess_batch_results import (
|
|
95
85
|
PostProcessingOptions, process_batch_results)
|
|
96
86
|
from detection.run_detector import get_detector_version_from_filename
|
|
87
|
+
from md_utils.ct_utils import image_file_to_camera_folder
|
|
97
88
|
|
|
98
|
-
|
|
89
|
+
## Inference options
|
|
99
90
|
|
|
100
91
|
# To specify a non-default confidence threshold for including detections in the .json file
|
|
101
92
|
json_threshold = None
|
|
@@ -103,61 +94,113 @@ json_threshold = None
|
|
|
103
94
|
# Turn warnings into errors if more than this many images are missing
|
|
104
95
|
max_tolerable_failed_images = 100
|
|
105
96
|
|
|
97
|
+
# Should we supply the --image_queue_option to run_detector_batch.py? I only set this
|
|
98
|
+
# when I have a very slow drive and a comparably fast GPU. When this is enabled, checkpointing
|
|
99
|
+
# is not supported within a job, so I set n_jobs to a large number (typically 100).
|
|
106
100
|
use_image_queue = False
|
|
107
101
|
|
|
108
102
|
# Only relevant when we're using a single GPU
|
|
109
103
|
default_gpu_number = 0
|
|
110
104
|
|
|
105
|
+
# Should we supply --quiet to run_detector_batch.py?
|
|
111
106
|
quiet_mode = True
|
|
112
107
|
|
|
113
108
|
# Specify a target image size when running MD... strongly recommended to leave this at "None"
|
|
109
|
+
#
|
|
110
|
+
# When using augmented inference, if you leave this at "None", run_inference_with_yolov5_val.py
|
|
111
|
+
# will use its default size, which is 1280 * 1.3, which is almost always what you want.
|
|
114
112
|
image_size = None
|
|
115
113
|
|
|
114
|
+
# Should we include image size, timestamp, and/or EXIF data in MD output?
|
|
115
|
+
include_image_size = False
|
|
116
|
+
include_image_timestamp = False
|
|
117
|
+
include_exif_data = False
|
|
118
|
+
|
|
116
119
|
# Only relevant when running on CPU
|
|
117
120
|
ncores = 1
|
|
118
121
|
|
|
119
|
-
# OS-specific script line continuation character
|
|
122
|
+
# OS-specific script line continuation character (modified later if we're running on Windows)
|
|
120
123
|
slcc = '\\'
|
|
121
124
|
|
|
122
|
-
# OS-specific script comment character
|
|
125
|
+
# OS-specific script comment character (modified later if we're running on Windows)
|
|
123
126
|
scc = '#'
|
|
124
127
|
|
|
128
|
+
# # OS-specific script extension (modified later if we're running on Windows)
|
|
125
129
|
script_extension = '.sh'
|
|
126
130
|
|
|
131
|
+
# If False, we'll load chunk files with file lists if they exist
|
|
132
|
+
force_enumeration = False
|
|
133
|
+
|
|
127
134
|
# Prefer threads on Windows, processes on Linux
|
|
128
135
|
parallelization_defaults_to_threads = False
|
|
129
136
|
|
|
130
137
|
# This is for things like image rendering, not for MegaDetector
|
|
131
138
|
default_workers_for_parallel_tasks = 30
|
|
132
139
|
|
|
140
|
+
overwrite_handling = 'skip' # 'skip', 'error', or 'overwrite'
|
|
141
|
+
|
|
142
|
+
# Only relevant to repeat detection elimination; try to identify EK113/RCNX101-style
|
|
143
|
+
# overflow folders and treat them as the same camera
|
|
144
|
+
overflow_folder_handling_enabled = True
|
|
145
|
+
|
|
146
|
+
# The function used to get camera names from image paths; can also replace this
|
|
147
|
+
# with a custom function.
|
|
148
|
+
relative_path_to_location = image_file_to_camera_folder
|
|
149
|
+
|
|
150
|
+
# This will be the .json results file after RDE; if this is still None when
|
|
151
|
+
# we get to classification stuff, that will indicate that we didn't do RDE.
|
|
152
|
+
filtered_output_filename = None
|
|
153
|
+
|
|
154
|
+
if os.name == 'nt':
|
|
155
|
+
|
|
156
|
+
slcc = '^'
|
|
157
|
+
scc = 'REM'
|
|
158
|
+
script_extension = '.bat'
|
|
159
|
+
|
|
160
|
+
# My experience has been that Python multiprocessing is flaky on Windows, so
|
|
161
|
+
# default to threads on Windows
|
|
162
|
+
parallelization_defaults_to_threads = True
|
|
163
|
+
default_workers_for_parallel_tasks = 10
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
## Constants related to using YOLOv5's val.py
|
|
167
|
+
|
|
133
168
|
# Should we use YOLOv5's val.py instead of run_detector_batch.py?
|
|
134
169
|
use_yolo_inference_scripts = False
|
|
135
170
|
|
|
136
|
-
# Directory in which to run val.py
|
|
171
|
+
# Directory in which to run val.py (relevant for YOLOv5, not for YOLOv8)
|
|
137
172
|
yolo_working_dir = os.path.expanduser('~/git/yolov5')
|
|
138
173
|
|
|
174
|
+
# Only used for loading the mapping from class indices to names
|
|
175
|
+
yolo_dataset_file = None
|
|
176
|
+
|
|
177
|
+
# 'yolov5' or 'yolov8'; assumes YOLOv5 if this is None
|
|
178
|
+
yolo_model_type = None
|
|
179
|
+
|
|
180
|
+
# inference batch size
|
|
181
|
+
yolo_batch_size = 1
|
|
182
|
+
|
|
139
183
|
# Should we remove intermediate files used for running YOLOv5's val.py?
|
|
140
184
|
#
|
|
141
185
|
# Only relevant if use_yolo_inference_scripts is True.
|
|
142
|
-
remove_yolo_intermediate_results =
|
|
143
|
-
remove_yolo_symlink_folder =
|
|
186
|
+
remove_yolo_intermediate_results = True
|
|
187
|
+
remove_yolo_symlink_folder = True
|
|
144
188
|
use_symlinks_for_yolo_inference = True
|
|
189
|
+
write_yolo_debug_output = False
|
|
145
190
|
|
|
146
|
-
|
|
191
|
+
# Should we apply YOLOv5's test-time augmentation?
|
|
192
|
+
augment = False
|
|
147
193
|
|
|
148
|
-
# Set later if EK113/RCNX101-style overflow folders are being handled in this dataset
|
|
149
|
-
overflow_folder_handling_enabled = False
|
|
150
194
|
|
|
151
|
-
|
|
152
|
-
# is True.
|
|
153
|
-
augment = False
|
|
195
|
+
## Constants related to tiled inference
|
|
154
196
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
197
|
+
use_tiled_inference = False
|
|
198
|
+
|
|
199
|
+
# Should we delete tiles after each job? Only set this to False for debugging;
|
|
200
|
+
# large jobs will take up a lot of space if you keep tiles around after each task.
|
|
201
|
+
remove_tiles = True
|
|
202
|
+
tile_size = (1280,1280)
|
|
203
|
+
tile_overlap = 0.2
|
|
161
204
|
|
|
162
205
|
|
|
163
206
|
#%% Constants I set per script
|
|
@@ -165,9 +208,11 @@ if os.name == 'nt':
|
|
|
165
208
|
input_path = '/drive/organization'
|
|
166
209
|
|
|
167
210
|
assert not (input_path.endswith('/') or input_path.endswith('\\'))
|
|
211
|
+
assert os.path.isdir(input_path), 'Could not find input folder {}'.format(input_path)
|
|
212
|
+
input_path = input_path.replace('\\','/')
|
|
168
213
|
|
|
169
214
|
organization_name_short = 'organization'
|
|
170
|
-
job_date = None # '
|
|
215
|
+
job_date = None # '2024-01-01'
|
|
171
216
|
assert job_date is not None and organization_name_short != 'organization'
|
|
172
217
|
|
|
173
218
|
# Optional descriptor
|
|
@@ -178,9 +223,7 @@ if job_tag is None:
|
|
|
178
223
|
else:
|
|
179
224
|
job_description_string = '-' + job_tag
|
|
180
225
|
|
|
181
|
-
model_file =
|
|
182
|
-
# model_file = os.path.expanduser('~/models/camera_traps/megadetector/md_v5.0.0/md_v5b.0.0.pt')
|
|
183
|
-
# model_file = os.path.expanduser('~/models/camera_traps/megadetector/md_v4.1.0/md_v4.1.0.pb')
|
|
226
|
+
model_file = 'MDV5A' # 'MDV5A', 'MDV5B', 'MDV4'
|
|
184
227
|
|
|
185
228
|
postprocessing_base = os.path.expanduser('~/postprocessing')
|
|
186
229
|
|
|
@@ -194,16 +237,12 @@ n_gpus = 2
|
|
|
194
237
|
# checkpointing. Don't worry, this will be assert()'d in the next cell.
|
|
195
238
|
checkpoint_frequency = 10000
|
|
196
239
|
|
|
197
|
-
#
|
|
198
|
-
|
|
199
|
-
if ('v5') in model_file:
|
|
200
|
-
gpu_images_per_second = 10
|
|
201
|
-
else:
|
|
202
|
-
gpu_images_per_second = 2.9
|
|
240
|
+
# Estimate inference speed for the current GPU
|
|
241
|
+
approx_images_per_second = estimate_md_images_per_second(model_file)
|
|
203
242
|
|
|
204
|
-
# Rough estimate for
|
|
205
|
-
if augment:
|
|
206
|
-
|
|
243
|
+
# Rough estimate for the inference time cost of augmentation
|
|
244
|
+
if augment and (approx_images_per_second is not None):
|
|
245
|
+
approx_images_per_second = approx_images_per_second * 0.7
|
|
207
246
|
|
|
208
247
|
base_task_name = organization_name_short + '-' + job_date + job_description_string + '-' + \
|
|
209
248
|
get_detector_version_from_filename(model_file)
|
|
@@ -224,10 +263,22 @@ if augment:
|
|
|
224
263
|
assert use_yolo_inference_scripts,\
|
|
225
264
|
'Augmentation is only supported when running with the YOLO inference scripts'
|
|
226
265
|
|
|
266
|
+
if use_tiled_inference:
|
|
267
|
+
assert not augment, \
|
|
268
|
+
'Augmentation is not supported when using tiled inference'
|
|
269
|
+
assert not use_yolo_inference_scripts, \
|
|
270
|
+
'Using the YOLO inference script is not supported when using tiled inference'
|
|
271
|
+
assert checkpoint_frequency is None, \
|
|
272
|
+
'Checkpointing is not supported when using tiled inference'
|
|
273
|
+
|
|
227
274
|
filename_base = os.path.join(base_output_folder_name, base_task_name)
|
|
228
275
|
combined_api_output_folder = os.path.join(filename_base, 'combined_api_outputs')
|
|
229
276
|
postprocessing_output_folder = os.path.join(filename_base, 'preview')
|
|
230
277
|
|
|
278
|
+
combined_api_output_file = os.path.join(
|
|
279
|
+
combined_api_output_folder,
|
|
280
|
+
'{}_detections.json'.format(base_task_name))
|
|
281
|
+
|
|
231
282
|
os.makedirs(filename_base, exist_ok=True)
|
|
232
283
|
os.makedirs(combined_api_output_folder, exist_ok=True)
|
|
233
284
|
os.makedirs(postprocessing_output_folder, exist_ok=True)
|
|
@@ -240,24 +291,17 @@ print('Output folder:\n{}'.format(filename_base))
|
|
|
240
291
|
|
|
241
292
|
#%% Enumerate files
|
|
242
293
|
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
(fn.startswith('$RECYCLE') or fn.startswith('System Volume Information'))]
|
|
248
|
-
|
|
249
|
-
print('Enumerated {} image files in {}'.format(len(all_images),input_path))
|
|
250
|
-
|
|
251
|
-
if False:
|
|
294
|
+
# Have we already listed files for this job?
|
|
295
|
+
chunk_files = os.listdir(filename_base)
|
|
296
|
+
pattern = re.compile('chunk\d+.json')
|
|
297
|
+
chunk_files = [fn for fn in chunk_files if pattern.match(fn)]
|
|
252
298
|
|
|
253
|
-
|
|
299
|
+
if (not force_enumeration) and (len(chunk_files) > 0):
|
|
254
300
|
|
|
255
|
-
|
|
301
|
+
print('Found {} chunk files in folder {}, bypassing enumeration'.format(
|
|
302
|
+
len(chunk_files),
|
|
303
|
+
filename_base))
|
|
256
304
|
|
|
257
|
-
import re
|
|
258
|
-
chunk_files = os.listdir(filename_base)
|
|
259
|
-
pattern = re.compile('chunk\d+.json')
|
|
260
|
-
chunk_files = [fn for fn in chunk_files if pattern.match(fn)]
|
|
261
305
|
all_images = []
|
|
262
306
|
for fn in chunk_files:
|
|
263
307
|
with open(os.path.join(filename_base,fn),'r') as f:
|
|
@@ -265,8 +309,24 @@ if False:
|
|
|
265
309
|
assert isinstance(chunk,list)
|
|
266
310
|
all_images.extend(chunk)
|
|
267
311
|
all_images = sorted(all_images)
|
|
268
|
-
print('Loaded {} image files from chunks in {}'.format(len(all_images),filename_base))
|
|
269
312
|
|
|
313
|
+
print('Loaded {} image files from {} chunks in {}'.format(
|
|
314
|
+
len(all_images),len(chunk_files),filename_base))
|
|
315
|
+
|
|
316
|
+
else:
|
|
317
|
+
|
|
318
|
+
print('Enumerating image files in {}'.format(input_path))
|
|
319
|
+
|
|
320
|
+
all_images = sorted(path_utils.find_images(input_path,recursive=True,convert_slashes=True))
|
|
321
|
+
|
|
322
|
+
# It's common to run this notebook on an external drive with the main folders in the drive root
|
|
323
|
+
all_images = [fn for fn in all_images if not \
|
|
324
|
+
(fn.startswith('$RECYCLE') or fn.startswith('System Volume Information'))]
|
|
325
|
+
|
|
326
|
+
print('')
|
|
327
|
+
|
|
328
|
+
print('Enumerated {} image files in {}'.format(len(all_images),input_path))
|
|
329
|
+
|
|
270
330
|
|
|
271
331
|
#%% Divide images into chunks
|
|
272
332
|
|
|
@@ -275,13 +335,19 @@ folder_chunks = split_list_into_n_chunks(all_images,n_jobs)
|
|
|
275
335
|
|
|
276
336
|
#%% Estimate total time
|
|
277
337
|
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
338
|
+
if approx_images_per_second is None:
|
|
339
|
+
|
|
340
|
+
print("Can't estimate inference time for the current environment")
|
|
341
|
+
|
|
342
|
+
else:
|
|
343
|
+
|
|
344
|
+
n_images = len(all_images)
|
|
345
|
+
execution_seconds = n_images / approx_images_per_second
|
|
346
|
+
wallclock_seconds = execution_seconds / n_gpus
|
|
347
|
+
print('Expected time: {}'.format(humanfriendly.format_timespan(wallclock_seconds)))
|
|
348
|
+
|
|
349
|
+
seconds_per_chunk = len(folder_chunks[0]) / approx_images_per_second
|
|
350
|
+
print('Expected time per chunk: {}'.format(humanfriendly.format_timespan(seconds_per_chunk)))
|
|
285
351
|
|
|
286
352
|
|
|
287
353
|
#%% Write file lists
|
|
@@ -298,19 +364,20 @@ for i_chunk,chunk_list in enumerate(folder_chunks):
|
|
|
298
364
|
#%% Generate commands
|
|
299
365
|
|
|
300
366
|
# A list of the scripts tied to each GPU, as absolute paths. We'll write this out at
|
|
301
|
-
# the end so each GPU's list of commands can be run at once
|
|
302
|
-
# running lots of small batches via YOLOv5's val.py, which doesn't support checkpointing.
|
|
367
|
+
# the end so each GPU's list of commands can be run at once
|
|
303
368
|
gpu_to_scripts = defaultdict(list)
|
|
304
369
|
|
|
305
370
|
# i_task = 0; task = task_info[i_task]
|
|
306
371
|
for i_task,task in enumerate(task_info):
|
|
307
372
|
|
|
308
373
|
chunk_file = task['input_file']
|
|
374
|
+
checkpoint_filename = chunk_file.replace('.json','_checkpoint.json')
|
|
375
|
+
|
|
309
376
|
output_fn = chunk_file.replace('.json','_results.json')
|
|
310
377
|
|
|
311
378
|
task['output_file'] = output_fn
|
|
312
379
|
|
|
313
|
-
if
|
|
380
|
+
if n_gpus > 1:
|
|
314
381
|
gpu_number = i_task % n_gpus
|
|
315
382
|
else:
|
|
316
383
|
gpu_number = default_gpu_number
|
|
@@ -326,6 +393,10 @@ for i_task,task in enumerate(task_info):
|
|
|
326
393
|
augment_string = ''
|
|
327
394
|
if augment:
|
|
328
395
|
augment_string = '--augment_enabled 1'
|
|
396
|
+
else:
|
|
397
|
+
augment_string = '--augment_enabled 0'
|
|
398
|
+
|
|
399
|
+
batch_string = '--batch_size {}'.format(yolo_batch_size)
|
|
329
400
|
|
|
330
401
|
symlink_folder = os.path.join(filename_base,'symlinks','symlinks_{}'.format(
|
|
331
402
|
str(i_task).zfill(3)))
|
|
@@ -339,6 +410,10 @@ for i_task,task in enumerate(task_info):
|
|
|
339
410
|
if not remove_yolo_symlink_folder:
|
|
340
411
|
remove_symlink_folder_string = '--no_remove_symlink_folder'
|
|
341
412
|
|
|
413
|
+
write_yolo_debug_output_string = ''
|
|
414
|
+
if write_yolo_debug_output:
|
|
415
|
+
write_yolo_debug_output = '--write_yolo_debug_output'
|
|
416
|
+
|
|
342
417
|
remove_yolo_results_string = ''
|
|
343
418
|
if not remove_yolo_intermediate_results:
|
|
344
419
|
remove_yolo_results_string = '--no_remove_yolo_results_folder'
|
|
@@ -356,15 +431,47 @@ for i_task,task in enumerate(task_info):
|
|
|
356
431
|
overwrite_handling_string = '--overwrite_handling {}'.format(overwrite_handling)
|
|
357
432
|
|
|
358
433
|
cmd += f'python run_inference_with_yolov5_val.py "{model_file}" "{chunk_file}" "{output_fn}" '
|
|
359
|
-
cmd += f'
|
|
434
|
+
cmd += f'{image_size_string} {augment_string} '
|
|
360
435
|
cmd += f'{symlink_folder_string} {yolo_results_folder_string} {remove_yolo_results_string} '
|
|
361
436
|
cmd += f'{remove_symlink_folder_string} {confidence_threshold_string} {device_string} '
|
|
362
|
-
cmd += f'{overwrite_handling_string}'
|
|
363
|
-
|
|
437
|
+
cmd += f'{overwrite_handling_string} {batch_string} {write_yolo_debug_output_string}'
|
|
438
|
+
|
|
439
|
+
if yolo_working_dir is not None:
|
|
440
|
+
cmd += f' --yolo_working_folder "{yolo_working_dir}"'
|
|
441
|
+
if yolo_dataset_file is not None:
|
|
442
|
+
cmd += ' --yolo_dataset_file "{}"'.format(yolo_dataset_file)
|
|
443
|
+
if yolo_model_type is not None:
|
|
444
|
+
cmd += ' --model_type {}'.format(yolo_model_type)
|
|
445
|
+
|
|
364
446
|
if not use_symlinks_for_yolo_inference:
|
|
365
447
|
cmd += ' --no_use_symlinks'
|
|
366
448
|
|
|
367
449
|
cmd += '\n'
|
|
450
|
+
|
|
451
|
+
elif use_tiled_inference:
|
|
452
|
+
|
|
453
|
+
tiling_folder = os.path.join(filename_base,'tile_cache','tile_cache_{}'.format(
|
|
454
|
+
str(i_task).zfill(3)))
|
|
455
|
+
|
|
456
|
+
if os.name == 'nt':
|
|
457
|
+
cuda_string = f'set CUDA_VISIBLE_DEVICES={gpu_number} & '
|
|
458
|
+
else:
|
|
459
|
+
cuda_string = f'CUDA_VISIBLE_DEVICES={gpu_number} '
|
|
460
|
+
|
|
461
|
+
cmd = f'{cuda_string} python run_tiled_inference.py "{model_file}" "{input_path}" "{tiling_folder}" "{output_fn}"'
|
|
462
|
+
|
|
463
|
+
cmd += f' --image_list "{chunk_file}"'
|
|
464
|
+
cmd += f' --overwrite_handling {overwrite_handling}'
|
|
465
|
+
|
|
466
|
+
if not remove_tiles:
|
|
467
|
+
cmd += ' --no_remove_tiles'
|
|
468
|
+
|
|
469
|
+
# If we're using non-default tile sizes
|
|
470
|
+
if tile_size is not None and (tile_size[0] > 0 or tile_size[1] > 0):
|
|
471
|
+
cmd += ' --tile_size_x {} --tile_size_y {}'.format(tile_size[0],tile_size[1])
|
|
472
|
+
|
|
473
|
+
if tile_overlap is not None:
|
|
474
|
+
cmd += f' --tile_overlap {tile_overlap}'
|
|
368
475
|
|
|
369
476
|
else:
|
|
370
477
|
|
|
@@ -375,7 +482,6 @@ for i_task,task in enumerate(task_info):
|
|
|
375
482
|
|
|
376
483
|
checkpoint_frequency_string = ''
|
|
377
484
|
checkpoint_path_string = ''
|
|
378
|
-
checkpoint_filename = chunk_file.replace('.json','_checkpoint.json')
|
|
379
485
|
|
|
380
486
|
if checkpoint_frequency is not None and checkpoint_frequency > 0:
|
|
381
487
|
checkpoint_frequency_string = f'--checkpoint_frequency {checkpoint_frequency}'
|
|
@@ -399,7 +505,14 @@ for i_task,task in enumerate(task_info):
|
|
|
399
505
|
|
|
400
506
|
overwrite_handling_string = '--overwrite_handling {}'.format(overwrite_handling)
|
|
401
507
|
cmd = f'{cuda_string} python run_detector_batch.py "{model_file}" "{chunk_file}" "{output_fn}" {checkpoint_frequency_string} {checkpoint_path_string} {use_image_queue_string} {ncores_string} {quiet_string} {image_size_string} {confidence_threshold_string} {overwrite_handling_string}'
|
|
402
|
-
|
|
508
|
+
|
|
509
|
+
if include_image_size:
|
|
510
|
+
cmd += ' --include_image_size'
|
|
511
|
+
if include_image_timestamp:
|
|
512
|
+
cmd += ' --include_image_timestamp'
|
|
513
|
+
if include_exif_data:
|
|
514
|
+
cmd += ' --include_exif_data'
|
|
515
|
+
|
|
403
516
|
cmd_file = os.path.join(filename_base,'run_chunk_{}_gpu_{}{}'.format(str(i_task).zfill(3),
|
|
404
517
|
str(gpu_number).zfill(2),script_extension))
|
|
405
518
|
|
|
@@ -484,12 +597,10 @@ multiple processes, so the tasks will run serially. This only matters if you ha
|
|
|
484
597
|
GPUs.
|
|
485
598
|
"""
|
|
486
599
|
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
pass
|
|
490
|
-
|
|
491
|
-
#%%% Run the tasks (commented out)
|
|
600
|
+
run_tasks_in_notebook = False
|
|
492
601
|
|
|
602
|
+
if run_tasks_in_notebook:
|
|
603
|
+
|
|
493
604
|
assert not use_yolo_inference_scripts, \
|
|
494
605
|
'If you want to use the YOLOv5 inference scripts, you can\'t run the model interactively (yet)'
|
|
495
606
|
|
|
@@ -537,15 +648,32 @@ if False:
|
|
|
537
648
|
|
|
538
649
|
# ...for each chunk
|
|
539
650
|
|
|
540
|
-
# ...if
|
|
651
|
+
# ...if we're running tasks in this notebook
|
|
541
652
|
|
|
542
653
|
|
|
543
654
|
#%% Load results, look for failed or missing images in each task
|
|
544
655
|
|
|
656
|
+
# Check that all task output files exist
|
|
657
|
+
|
|
658
|
+
missing_output_files = []
|
|
659
|
+
|
|
660
|
+
# i_task = 0; task = task_info[i_task]
|
|
661
|
+
for i_task,task in tqdm(enumerate(task_info),total=len(task_info)):
|
|
662
|
+
output_file = task['output_file']
|
|
663
|
+
if not os.path.isfile(output_file):
|
|
664
|
+
missing_output_files.append(output_file)
|
|
665
|
+
|
|
666
|
+
if len(missing_output_files) > 0:
|
|
667
|
+
print('Missing {} output files:'.format(len(missing_output_files)))
|
|
668
|
+
for s in missing_output_files:
|
|
669
|
+
print(s)
|
|
670
|
+
raise Exception('Missing output files')
|
|
671
|
+
|
|
672
|
+
|
|
545
673
|
n_total_failures = 0
|
|
546
674
|
|
|
547
675
|
# i_task = 0; task = task_info[i_task]
|
|
548
|
-
for i_task,task in enumerate(task_info):
|
|
676
|
+
for i_task,task in tqdm(enumerate(task_info),total=len(task_info)):
|
|
549
677
|
|
|
550
678
|
chunk_file = task['input_file']
|
|
551
679
|
output_file = task['output_file']
|
|
@@ -562,6 +690,13 @@ for i_task,task in enumerate(task_info):
|
|
|
562
690
|
|
|
563
691
|
# im = task_results['images'][0]
|
|
564
692
|
for im in task_results['images']:
|
|
693
|
+
|
|
694
|
+
# Most of the time, inference result files use absolute paths, but it's
|
|
695
|
+
# getting annoying to make sure that's *always* true, so handle both here.
|
|
696
|
+
# E.g., when using tiled inference, paths will be relative.
|
|
697
|
+
if not os.path.isabs(im['file']):
|
|
698
|
+
fn = os.path.join(input_path,im['file']).replace('\\','/')
|
|
699
|
+
im['file'] = fn
|
|
565
700
|
assert im['file'].startswith(input_path)
|
|
566
701
|
assert im['file'] in task_images_set
|
|
567
702
|
filename_to_results[im['file']] = im
|
|
@@ -573,7 +708,8 @@ for i_task,task in enumerate(task_info):
|
|
|
573
708
|
task['results'] = task_results
|
|
574
709
|
|
|
575
710
|
for fn in task_images:
|
|
576
|
-
assert fn in filename_to_results
|
|
711
|
+
assert fn in filename_to_results, \
|
|
712
|
+
'File {} not found in results for task {}'.format(fn,i_task)
|
|
577
713
|
|
|
578
714
|
n_total_failures += n_task_failures
|
|
579
715
|
|
|
@@ -593,7 +729,7 @@ combined_results = {}
|
|
|
593
729
|
combined_results['images'] = []
|
|
594
730
|
images_processed = set()
|
|
595
731
|
|
|
596
|
-
for i_task,task in enumerate(task_info):
|
|
732
|
+
for i_task,task in tqdm(enumerate(task_info),total=len(task_info)):
|
|
597
733
|
|
|
598
734
|
task_results = task['results']
|
|
599
735
|
|
|
@@ -620,19 +756,15 @@ assert len(combined_results['images']) == len(all_images), \
|
|
|
620
756
|
result_filenames = [im['file'] for im in combined_results['images']]
|
|
621
757
|
assert len(combined_results['images']) == len(set(result_filenames))
|
|
622
758
|
|
|
623
|
-
#
|
|
759
|
+
# Convert to relative paths, preserving '/' as the path separator, regardless of OS
|
|
624
760
|
for im in combined_results['images']:
|
|
761
|
+
assert '\\' not in im['file']
|
|
762
|
+
assert im['file'].startswith(input_path)
|
|
625
763
|
if input_path.endswith(':'):
|
|
626
|
-
assert im['file'].startswith(input_path)
|
|
627
764
|
im['file'] = im['file'].replace(input_path,'',1)
|
|
628
765
|
else:
|
|
629
|
-
|
|
630
|
-
im['file'] = im['file'].replace(input_path + os.path.sep,'',1)
|
|
766
|
+
im['file'] = im['file'].replace(input_path + '/','',1)
|
|
631
767
|
|
|
632
|
-
combined_api_output_file = os.path.join(
|
|
633
|
-
combined_api_output_folder,
|
|
634
|
-
'{}_detections.json'.format(base_task_name))
|
|
635
|
-
|
|
636
768
|
with open(combined_api_output_file,'w') as f:
|
|
637
769
|
json.dump(combined_results,f,indent=1)
|
|
638
770
|
|
|
@@ -675,88 +807,8 @@ options.api_output_file = combined_api_output_file
|
|
|
675
807
|
options.output_dir = output_base
|
|
676
808
|
ppresults = process_batch_results(options)
|
|
677
809
|
html_output_file = ppresults.output_html_file
|
|
678
|
-
path_utils.open_file(html_output_file)
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
#%% RDE (sample directory collapsing)
|
|
682
|
-
|
|
683
|
-
#
|
|
684
|
-
# The next few cells are about repeat detection elimination; if you want to skip this,
|
|
685
|
-
# and still do other stuff in this notebook (e.g. running classifiers), that's fine, but
|
|
686
|
-
# the rest of the notebook weakly assumes you've done this. Specifically, it looks for
|
|
687
|
-
# the variable "filtered_api_output_file" (a file produced by the RDE process). If you
|
|
688
|
-
# don't run the RDE cells, just change "filtered_api_output_file" to "combined_api_output_file"
|
|
689
|
-
# (the raw output from MegaDetector). Then it will be like all this RDE stuff doesn't exist.
|
|
690
|
-
#
|
|
691
|
-
# Though FWIW, once you're sufficiently power-user-ish to use this notebook, RDE is almost
|
|
692
|
-
# always worth it.
|
|
693
|
-
#
|
|
694
|
-
|
|
695
|
-
def relative_path_to_location(relative_path):
|
|
696
|
-
"""
|
|
697
|
-
This is a sample function that returns a camera name given an image path. By
|
|
698
|
-
default in the RDE process, leaf-node folders are equivalent to cameras. To map
|
|
699
|
-
something other than leaf-node folders to cameras, fill in this function, and un-comment the
|
|
700
|
-
line below containing "relative_path_to_location".
|
|
701
|
-
|
|
702
|
-
Sample regular expressions are included here for common patterns, particularly the
|
|
703
|
-
overflow folders created by Reconyx and Bushnell camera traps. So if one of those
|
|
704
|
-
fits your scenario, you don't have to modify this function, just un-comment the line
|
|
705
|
-
below that enables this feature.
|
|
706
|
-
|
|
707
|
-
Nothing bad happens if you have overflow folders like this and you don't
|
|
708
|
-
enable this mapping, you are just taking a more conservative approach to RDE in that
|
|
709
|
-
scenario.
|
|
710
|
-
"""
|
|
711
|
-
|
|
712
|
-
import re
|
|
713
|
-
|
|
714
|
-
# 100RECNX is the overflow folder style for Reconyx cameras
|
|
715
|
-
# 100EK113 is (for some reason) the overflow folder style for Bushnell cameras
|
|
716
|
-
# 100_BTCF is the overflow folder style for Browning cameras
|
|
717
|
-
patterns = ['\/\d+RECNX\/','\/\d+EK\d+\/','\/\d+_BTCF\/']
|
|
718
|
-
|
|
719
|
-
relative_path = relative_path.replace('\\','/')
|
|
720
|
-
for pat in patterns:
|
|
721
|
-
relative_path = re.sub(pat,'/',relative_path)
|
|
722
|
-
location_name = os.path.dirname(relative_path)
|
|
723
|
-
|
|
724
|
-
return location_name
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
#%% Test cells for relative_path_to_location
|
|
728
|
-
|
|
729
|
-
if False:
|
|
730
|
-
|
|
731
|
-
pass
|
|
732
|
-
|
|
733
|
-
#%% Test the generic cases
|
|
734
|
-
|
|
735
|
-
relative_path = 'a/b/c/d/100EK113/blah.jpg'
|
|
736
|
-
print(relative_path_to_location(relative_path))
|
|
737
|
-
|
|
738
|
-
relative_path = 'a/b/c/d/100RECNX/blah.jpg'
|
|
739
|
-
print(relative_path_to_location(relative_path))
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
#%% Test relative_path_to_location on the current dataset
|
|
743
|
-
|
|
744
|
-
with open(combined_api_output_file,'r') as f:
|
|
745
|
-
d = json.load(f)
|
|
746
|
-
image_filenames = [im['file'] for im in d['images']]
|
|
747
|
-
|
|
748
|
-
location_names = set()
|
|
749
|
-
|
|
750
|
-
# relative_path = image_filenames[0]
|
|
751
|
-
for relative_path in tqdm(image_filenames):
|
|
752
|
-
location_name = relative_path_to_location(relative_path)
|
|
753
|
-
location_names.add(location_name)
|
|
754
|
-
|
|
755
|
-
location_names = list(location_names)
|
|
756
|
-
location_names.sort()
|
|
757
|
-
|
|
758
|
-
for s in location_names:
|
|
759
|
-
print(s)
|
|
810
|
+
path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True,browser_name='chrome')
|
|
811
|
+
# import clipboard; clipboard.copy(html_output_file)
|
|
760
812
|
|
|
761
813
|
|
|
762
814
|
#%% Repeat detection elimination, phase 1
|
|
@@ -768,7 +820,7 @@ task_index = 0
|
|
|
768
820
|
|
|
769
821
|
options = repeat_detections_core.RepeatDetectionOptions()
|
|
770
822
|
|
|
771
|
-
options.confidenceMin = 0.
|
|
823
|
+
options.confidenceMin = 0.1
|
|
772
824
|
options.confidenceMax = 1.01
|
|
773
825
|
options.iouThreshold = 0.85
|
|
774
826
|
options.occurrenceThreshold = 15
|
|
@@ -785,13 +837,13 @@ options.otherDetectionsThreshold = options.confidenceMin
|
|
|
785
837
|
|
|
786
838
|
options.bRenderDetectionTiles = True
|
|
787
839
|
options.maxOutputImageWidth = 2000
|
|
788
|
-
options.detectionTilesMaxCrops =
|
|
840
|
+
options.detectionTilesMaxCrops = 250
|
|
789
841
|
|
|
790
842
|
# options.lineThickness = 5
|
|
791
843
|
# options.boxExpansion = 8
|
|
792
844
|
|
|
793
845
|
# To invoke custom collapsing of folders for a particular manufacturer's naming scheme
|
|
794
|
-
|
|
846
|
+
options.customDirNameFunction = relative_path_to_location
|
|
795
847
|
|
|
796
848
|
options.bRenderHtml = False
|
|
797
849
|
options.imageBase = input_path
|
|
@@ -816,9 +868,9 @@ options.debugMaxRenderInstance = -1
|
|
|
816
868
|
# Can be None, 'xsort', or 'clustersort'
|
|
817
869
|
options.smartSort = 'xsort'
|
|
818
870
|
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
871
|
+
suspicious_detection_results = repeat_detections_core.find_repeat_detections(combined_api_output_file,
|
|
872
|
+
outputFilename=None,
|
|
873
|
+
options=options)
|
|
822
874
|
|
|
823
875
|
|
|
824
876
|
#%% Manual RDE step
|
|
@@ -826,7 +878,8 @@ suspiciousDetectionResults = repeat_detections_core.find_repeat_detections(combi
|
|
|
826
878
|
## DELETE THE VALID DETECTIONS ##
|
|
827
879
|
|
|
828
880
|
# If you run this line, it will open the folder up in your file browser
|
|
829
|
-
path_utils.open_file(os.path.dirname(
|
|
881
|
+
path_utils.open_file(os.path.dirname(suspicious_detection_results.filterFile),
|
|
882
|
+
attempt_to_open_in_wsl_host=True)
|
|
830
883
|
|
|
831
884
|
#
|
|
832
885
|
# If you ran the previous cell, but then you change your mind and you don't want to do
|
|
@@ -834,7 +887,7 @@ path_utils.open_file(os.path.dirname(suspiciousDetectionResults.filterFile))
|
|
|
834
887
|
# previous cell. If you do that, you're implicitly telling the notebook that you looked
|
|
835
888
|
# at everything in that folder, and confirmed there were no red boxes on animals.
|
|
836
889
|
#
|
|
837
|
-
# Instead, either change "
|
|
890
|
+
# Instead, either change "filtered_output_filename" below to "combined_api_output_file",
|
|
838
891
|
# or delete *all* the images in the filtering folder.
|
|
839
892
|
#
|
|
840
893
|
|
|
@@ -843,12 +896,13 @@ path_utils.open_file(os.path.dirname(suspiciousDetectionResults.filterFile))
|
|
|
843
896
|
|
|
844
897
|
from api.batch_processing.postprocessing.repeat_detection_elimination import remove_repeat_detections
|
|
845
898
|
|
|
846
|
-
filtered_output_filename = path_utils.insert_before_extension(combined_api_output_file,
|
|
899
|
+
filtered_output_filename = path_utils.insert_before_extension(combined_api_output_file,
|
|
900
|
+
'filtered_{}'.format(rde_string))
|
|
847
901
|
|
|
848
902
|
remove_repeat_detections.remove_repeat_detections(
|
|
849
903
|
inputFile=combined_api_output_file,
|
|
850
904
|
outputFile=filtered_output_filename,
|
|
851
|
-
filteringDir=os.path.dirname(
|
|
905
|
+
filteringDir=os.path.dirname(suspicious_detection_results.filterFile)
|
|
852
906
|
)
|
|
853
907
|
|
|
854
908
|
|
|
@@ -890,7 +944,8 @@ options.output_dir = output_base
|
|
|
890
944
|
ppresults = process_batch_results(options)
|
|
891
945
|
html_output_file = ppresults.output_html_file
|
|
892
946
|
|
|
893
|
-
path_utils.open_file(html_output_file)
|
|
947
|
+
path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True,browser_name='chrome')
|
|
948
|
+
# import clipboard; clipboard.copy(html_output_file)
|
|
894
949
|
|
|
895
950
|
|
|
896
951
|
#%% Run MegaClassifier (actually, write out a script that runs MegaClassifier)
|
|
@@ -899,6 +954,11 @@ path_utils.open_file(html_output_file)
|
|
|
899
954
|
final_output_path_mc = None
|
|
900
955
|
final_output_path_ic = None
|
|
901
956
|
|
|
957
|
+
# If we didn't do RDE
|
|
958
|
+
if filtered_output_filename is None:
|
|
959
|
+
print("Warning: it looks like you didn't do RDE, using the raw output file")
|
|
960
|
+
filtered_output_filename = combined_api_output_file
|
|
961
|
+
|
|
902
962
|
classifier_name_short = 'megaclassifier'
|
|
903
963
|
threshold_str = '0.15' # 0.6
|
|
904
964
|
classifier_name = 'megaclassifier_v0.1_efficientnet-b3'
|
|
@@ -1086,7 +1146,6 @@ with open(output_file,'w') as f:
|
|
|
1086
1146
|
for s in commands:
|
|
1087
1147
|
f.write('{}'.format(s))
|
|
1088
1148
|
|
|
1089
|
-
import stat
|
|
1090
1149
|
st = os.stat(output_file)
|
|
1091
1150
|
os.chmod(output_file, st.st_mode | stat.S_IEXEC)
|
|
1092
1151
|
|
|
@@ -1256,8 +1315,6 @@ os.chmod(output_file, st.st_mode | stat.S_IEXEC)
|
|
|
1256
1315
|
|
|
1257
1316
|
#%% Within-image classification smoothing
|
|
1258
1317
|
|
|
1259
|
-
from collections import defaultdict
|
|
1260
|
-
|
|
1261
1318
|
#
|
|
1262
1319
|
# Only count detections with a classification confidence threshold above
|
|
1263
1320
|
# *classification_confidence_threshold*, which in practice means we're only
|
|
@@ -1516,7 +1573,7 @@ else:
|
|
|
1516
1573
|
import datetime
|
|
1517
1574
|
from data_management.read_exif import parse_exif_datetime_string
|
|
1518
1575
|
|
|
1519
|
-
min_valid_timestamp_year =
|
|
1576
|
+
min_valid_timestamp_year = 2001
|
|
1520
1577
|
|
|
1521
1578
|
now = datetime.datetime.now()
|
|
1522
1579
|
|
|
@@ -1540,6 +1597,7 @@ for exif_result in tqdm(exif_results):
|
|
|
1540
1597
|
|
|
1541
1598
|
im['file_name'] = exif_result['file_name']
|
|
1542
1599
|
im['id'] = im['file_name']
|
|
1600
|
+
|
|
1543
1601
|
if ('exif_tags' not in exif_result) or (exif_result['exif_tags'] is None) or \
|
|
1544
1602
|
(exif_datetime_tag not in exif_result['exif_tags']):
|
|
1545
1603
|
exif_dt = None
|
|
@@ -1573,7 +1631,7 @@ for exif_result in tqdm(exif_results):
|
|
|
1573
1631
|
|
|
1574
1632
|
# ...for each exif image result
|
|
1575
1633
|
|
|
1576
|
-
print('Parsed EXIF datetime information, unable to parse EXIF
|
|
1634
|
+
print('Parsed EXIF datetime information, unable to parse EXIF date from {} of {} images'.format(
|
|
1577
1635
|
len(images_without_datetime),len(exif_results)))
|
|
1578
1636
|
|
|
1579
1637
|
|
|
@@ -1639,7 +1697,7 @@ min_dominant_class_classifications_above_threshold_for_class_smoothing = 5 # 2
|
|
|
1639
1697
|
max_secondary_class_classifications_above_threshold_for_class_smoothing = 5
|
|
1640
1698
|
|
|
1641
1699
|
# If the ratio between a dominant class and a secondary class count is greater than this,
|
|
1642
|
-
# regardless of the secondary class count, switch those
|
|
1700
|
+
# regardless of the secondary class count, switch those classifications (i.e., ignore
|
|
1643
1701
|
# max_secondary_class_classifications_above_threshold_for_class_smoothing).
|
|
1644
1702
|
#
|
|
1645
1703
|
# This may be different for different dominant classes, e.g. if we see lots of cows, they really
|
|
@@ -1959,8 +2017,8 @@ print('Processing {} to {}'.format(base_task_name, output_base))
|
|
|
1959
2017
|
options.api_output_file = sequence_smoothed_classification_file
|
|
1960
2018
|
options.output_dir = output_base
|
|
1961
2019
|
ppresults = process_batch_results(options)
|
|
1962
|
-
path_utils.open_file(ppresults.output_html_file)
|
|
1963
|
-
|
|
2020
|
+
path_utils.open_file(ppresults.output_html_file,attempt_to_open_in_wsl_host=True,browser_name='chrome')
|
|
2021
|
+
# import clipboard; clipboard.copy(ppresults.output_html_file)
|
|
1964
2022
|
|
|
1965
2023
|
#% Zip .json files
|
|
1966
2024
|
|
|
@@ -2027,7 +2085,7 @@ for i, j in itertools.combinations(list(range(0,len(filenames))),2):
|
|
|
2027
2085
|
results = compare_batch_results(options)
|
|
2028
2086
|
|
|
2029
2087
|
from md_utils.path_utils import open_file
|
|
2030
|
-
open_file(results.html_output_file)
|
|
2088
|
+
open_file(results.html_output_file,attempt_to_open_in_wsl_host=True,browser_name='chrome')
|
|
2031
2089
|
|
|
2032
2090
|
|
|
2033
2091
|
#%% Merge in high-confidence detections from another results file
|
|
@@ -2081,7 +2139,7 @@ options.output_dir = output_base_large_boxes
|
|
|
2081
2139
|
|
|
2082
2140
|
ppresults = process_batch_results(options)
|
|
2083
2141
|
html_output_file = ppresults.output_html_file
|
|
2084
|
-
path_utils.open_file(html_output_file)
|
|
2142
|
+
path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True,browser_name='chrome')
|
|
2085
2143
|
|
|
2086
2144
|
|
|
2087
2145
|
#%% .json splitting
|
|
@@ -2094,12 +2152,6 @@ from api.batch_processing.postprocessing.subset_json_detector_output import (
|
|
|
2094
2152
|
input_filename = filtered_output_filename
|
|
2095
2153
|
output_base = os.path.join(combined_api_output_folder,base_task_name + '_json_subsets')
|
|
2096
2154
|
|
|
2097
|
-
if False:
|
|
2098
|
-
if data is None:
|
|
2099
|
-
with open(input_filename) as f:
|
|
2100
|
-
data = json.load(f)
|
|
2101
|
-
print('Data set contains {} images'.format(len(data['images'])))
|
|
2102
|
-
|
|
2103
2155
|
print('Processing file {} to {}'.format(input_filename,output_base))
|
|
2104
2156
|
|
|
2105
2157
|
options = SubsetJsonDetectorOutputOptions()
|
|
@@ -2204,13 +2256,47 @@ video_output_filename = filtered_output_filename.replace('.json','_aggregated.js
|
|
|
2204
2256
|
frame_results_to_video_results(filtered_output_filename,video_output_filename)
|
|
2205
2257
|
|
|
2206
2258
|
|
|
2259
|
+
#%% Sample custom path replacement function
|
|
2260
|
+
|
|
2261
|
+
def custom_relative_path_to_location(relative_path):
|
|
2262
|
+
|
|
2263
|
+
relative_path = relative_path.replace('\\','/')
|
|
2264
|
+
tokens = relative_path.split('/')
|
|
2265
|
+
location_name = '/'.join(tokens[0:2])
|
|
2266
|
+
return location_name
|
|
2267
|
+
|
|
2268
|
+
|
|
2269
|
+
#%% Test relative_path_to_location on the current dataset
|
|
2270
|
+
|
|
2271
|
+
with open(combined_api_output_file,'r') as f:
|
|
2272
|
+
d = json.load(f)
|
|
2273
|
+
image_filenames = [im['file'] for im in d['images']]
|
|
2274
|
+
|
|
2275
|
+
location_names = set()
|
|
2276
|
+
|
|
2277
|
+
# relative_path = image_filenames[0]
|
|
2278
|
+
for relative_path in tqdm(image_filenames):
|
|
2279
|
+
location_name = relative_path_to_location(relative_path)
|
|
2280
|
+
location_names.add(location_name)
|
|
2281
|
+
|
|
2282
|
+
location_names = list(location_names)
|
|
2283
|
+
location_names.sort()
|
|
2284
|
+
|
|
2285
|
+
for s in location_names:
|
|
2286
|
+
print(s)
|
|
2287
|
+
|
|
2288
|
+
|
|
2207
2289
|
#%% End notebook: turn this script into a notebook (how meta!)
|
|
2208
2290
|
|
|
2209
2291
|
import os
|
|
2210
2292
|
import nbformat as nbf
|
|
2211
2293
|
|
|
2212
|
-
|
|
2213
|
-
'
|
|
2294
|
+
if os.name == 'nt':
|
|
2295
|
+
git_base = r'c:\git'
|
|
2296
|
+
else:
|
|
2297
|
+
git_base = os.path.expanduser('~/git')
|
|
2298
|
+
|
|
2299
|
+
input_py_file = git_base + '/MegaDetector/api/batch_processing/data_preparation/manage_local_batch.py'
|
|
2214
2300
|
assert os.path.isfile(input_py_file)
|
|
2215
2301
|
output_ipynb_file = input_py_file.replace('.py','.ipynb')
|
|
2216
2302
|
|
|
@@ -2233,14 +2319,23 @@ i_line = 0
|
|
|
2233
2319
|
|
|
2234
2320
|
header_comment = ''
|
|
2235
2321
|
|
|
2322
|
+
# Delete a few lines from the top that don't belong in the NB version, e.g. the name
|
|
2323
|
+
# of the .py file
|
|
2236
2324
|
lines_to_ignore = 7
|
|
2325
|
+
expected_first_token = '# This script'
|
|
2326
|
+
found_first_token = False
|
|
2237
2327
|
|
|
2238
2328
|
# Everything before the first cell is the header comment
|
|
2239
2329
|
while(not lines[i_line].startswith('#%%')):
|
|
2330
|
+
|
|
2240
2331
|
if i_line < lines_to_ignore:
|
|
2241
2332
|
i_line += 1
|
|
2242
2333
|
continue
|
|
2243
2334
|
|
|
2335
|
+
if not found_first_token:
|
|
2336
|
+
assert lines[i_line].startswith(expected_first_token)
|
|
2337
|
+
found_first_token = True
|
|
2338
|
+
|
|
2244
2339
|
s = lines[i_line].replace('#','').strip()
|
|
2245
2340
|
if len(s) == 0:
|
|
2246
2341
|
header_comment += '\n\n'
|